1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
71 #include "double-int.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
82 #include "hard-reg-set.h"
84 #include "dominance.h"
86 #include "basic-block.h"
87 #include "gimple-pretty-print.h"
89 #include "hash-table.h"
90 #include "tree-ssa-alias.h"
91 #include "internal-fn.h"
93 #include "gimple-expr.h"
97 #include "gimple-iterator.h"
98 #include "gimplify-me.h"
99 #include "gimple-ssa.h"
100 #include "plugin-api.h"
103 #include "tree-cfg.h"
104 #include "tree-phinodes.h"
105 #include "ssa-iterators.h"
106 #include "stringpool.h"
107 #include "tree-ssanames.h"
108 #include "tree-ssa-loop-ivopts.h"
109 #include "tree-ssa-loop-manip.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop.h"
115 #include "statistics.h"
117 #include "fixed-value.h"
118 #include "insn-config.h"
123 #include "emit-rtl.h"
127 #include "tree-dfa.h"
128 #include "tree-ssa.h"
130 #include "tree-pass.h"
131 #include "tree-chrec.h"
132 #include "tree-scalar-evolution.h"
134 #include "langhooks.h"
135 #include "tree-affine.h"
137 #include "tree-inline.h"
138 #include "tree-ssa-propagate.h"
139 #include "tree-ssa-address.h"
140 #include "builtins.h"
141 #include "tree-vectorizer.h"
143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
144 cost of different addressing modes. This should be moved to a TBD
145 interface between the GIMPLE and RTL worlds. */
148 /* The infinite cost. */
149 #define INFTY 10000000
151 #define AVG_LOOP_NITER(LOOP) 5
153 /* Returns the expected number of loop iterations for LOOP.
154 The average trip count is computed from profile data if it
157 static inline HOST_WIDE_INT
158 avg_loop_niter (struct loop
*loop
)
160 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
162 return AVG_LOOP_NITER (loop
);
167 /* Representation of the induction variable. */
170 tree base
; /* Initial value of the iv. */
171 tree base_object
; /* A memory object to that the induction variable points. */
172 tree step
; /* Step of the iv (constant only). */
173 tree ssa_name
; /* The ssa name with the value. */
174 bool biv_p
; /* Is it a biv? */
175 bool have_use_for
; /* Do we already have a use for it? */
176 unsigned use_id
; /* The identifier in the use if it is the case. */
179 /* Per-ssa version information (induction variable descriptions, etc.). */
182 tree name
; /* The ssa name. */
183 struct iv
*iv
; /* Induction variable description. */
184 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
185 an expression that is not an induction variable. */
186 bool preserve_biv
; /* For the original biv, whether to preserve it. */
187 unsigned inv_id
; /* Id of an invariant. */
193 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
194 USE_ADDRESS
, /* Use in an address. */
195 USE_COMPARE
/* Use is a compare. */
198 /* Cost of a computation. */
201 int cost
; /* The runtime cost. */
202 unsigned complexity
; /* The estimate of the complexity of the code for
203 the computation (in no concrete units --
204 complexity field should be larger for more
205 complex expressions and addressing modes). */
208 static const comp_cost no_cost
= {0, 0};
209 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
211 /* The candidate - cost pair. */
214 struct iv_cand
*cand
; /* The candidate. */
215 comp_cost cost
; /* The cost. */
216 bitmap depends_on
; /* The list of invariants that have to be
218 tree value
; /* For final value elimination, the expression for
219 the final value of the iv. For iv elimination,
220 the new bound to compare with. */
221 enum tree_code comp
; /* For iv elimination, the comparison. */
222 int inv_expr_id
; /* Loop invariant expression id. */
228 unsigned id
; /* The id of the use. */
229 enum use_type type
; /* Type of the use. */
230 struct iv
*iv
; /* The induction variable it is based on. */
231 gimple stmt
; /* Statement in that it occurs. */
232 tree
*op_p
; /* The place where it occurs. */
233 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
236 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
237 struct cost_pair
*cost_map
;
238 /* The costs wrto the iv candidates. */
240 struct iv_cand
*selected
;
241 /* The selected candidate. */
244 /* The position where the iv is computed. */
247 IP_NORMAL
, /* At the end, just before the exit condition. */
248 IP_END
, /* At the end of the latch block. */
249 IP_BEFORE_USE
, /* Immediately before a specific use. */
250 IP_AFTER_USE
, /* Immediately after a specific use. */
251 IP_ORIGINAL
/* The original biv. */
254 /* The induction variable candidate. */
257 unsigned id
; /* The number of the candidate. */
258 bool important
; /* Whether this is an "important" candidate, i.e. such
259 that it should be considered by all uses. */
260 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
261 gimple incremented_at
;/* For original biv, the statement where it is
263 tree var_before
; /* The variable used for it before increment. */
264 tree var_after
; /* The variable used for it after increment. */
265 struct iv
*iv
; /* The value of the candidate. NULL for
266 "pseudocandidate" used to indicate the possibility
267 to replace the final value of an iv by direct
268 computation of the value. */
269 unsigned cost
; /* Cost of the candidate. */
270 unsigned cost_step
; /* Cost of the candidate's increment operation. */
271 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
272 where it is incremented. */
273 bitmap depends_on
; /* The list of invariants that are used in step of the
277 /* Loop invariant expression hashtable entry. */
278 struct iv_inv_expr_ent
285 /* The data used by the induction variable optimizations. */
287 typedef struct iv_use
*iv_use_p
;
289 typedef struct iv_cand
*iv_cand_p
;
291 /* Hashtable helpers. */
293 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
295 typedef iv_inv_expr_ent
*value_type
;
296 typedef iv_inv_expr_ent
*compare_type
;
297 static inline hashval_t
hash (const iv_inv_expr_ent
*);
298 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
301 /* Hash function for loop invariant expressions. */
304 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
309 /* Hash table equality function for expressions. */
312 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
313 const iv_inv_expr_ent
*expr2
)
315 return expr1
->hash
== expr2
->hash
316 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
321 /* The currently optimized loop. */
322 struct loop
*current_loop
;
323 source_location loop_loc
;
325 /* Numbers of iterations for all exits of the current loop. */
326 hash_map
<edge
, tree_niter_desc
*> *niters
;
328 /* Number of registers used in it. */
331 /* The size of version_info array allocated. */
332 unsigned version_info_size
;
334 /* The array of information for the ssa names. */
335 struct version_info
*version_info
;
337 /* The hashtable of loop invariant expressions created
339 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
341 /* Loop invariant expression id. */
344 /* The bitmap of indices in version_info whose value was changed. */
347 /* The uses of induction variables. */
348 vec
<iv_use_p
> iv_uses
;
350 /* The candidates. */
351 vec
<iv_cand_p
> iv_candidates
;
353 /* A bitmap of important candidates. */
354 bitmap important_candidates
;
356 /* Cache used by tree_to_aff_combination_expand. */
357 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
359 /* The maximum invariant id. */
362 /* Whether to consider just related and important candidates when replacing a
364 bool consider_all_candidates
;
366 /* Are we optimizing for speed? */
369 /* Whether the loop body includes any function calls. */
370 bool body_includes_call
;
372 /* Whether the loop body can only be exited via single exit. */
373 bool loop_single_exit_p
;
376 /* An assignment of iv candidates to uses. */
380 /* The number of uses covered by the assignment. */
383 /* Number of uses that cannot be expressed by the candidates in the set. */
386 /* Candidate assigned to a use, together with the related costs. */
387 struct cost_pair
**cand_for_use
;
389 /* Number of times each candidate is used. */
390 unsigned *n_cand_uses
;
392 /* The candidates used. */
395 /* The number of candidates in the set. */
398 /* Total number of registers needed. */
401 /* Total cost of expressing uses. */
402 comp_cost cand_use_cost
;
404 /* Total cost of candidates. */
407 /* Number of times each invariant is used. */
408 unsigned *n_invariant_uses
;
410 /* The array holding the number of uses of each loop
411 invariant expressions created by ivopt. */
412 unsigned *used_inv_expr
;
414 /* The number of created loop invariants. */
415 unsigned num_used_inv_expr
;
417 /* Total cost of the assignment. */
421 /* Difference of two iv candidate assignments. */
428 /* An old assignment (for rollback purposes). */
429 struct cost_pair
*old_cp
;
431 /* A new assignment. */
432 struct cost_pair
*new_cp
;
434 /* Next change in the list. */
435 struct iv_ca_delta
*next_change
;
438 /* Bound on number of candidates below that all candidates are considered. */
440 #define CONSIDER_ALL_CANDIDATES_BOUND \
441 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
443 /* If there are more iv occurrences, we just give up (it is quite unlikely that
444 optimizing such a loop would help, and it would take ages). */
446 #define MAX_CONSIDERED_USES \
447 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
449 /* If there are at most this number of ivs in the set, try removing unnecessary
450 ivs from the set always. */
452 #define ALWAYS_PRUNE_CAND_SET_BOUND \
453 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
455 /* The list of trees for that the decl_rtl field must be reset is stored
458 static vec
<tree
> decl_rtl_to_reset
;
460 static comp_cost
force_expr_to_var_cost (tree
, bool);
462 /* Number of uses recorded in DATA. */
464 static inline unsigned
465 n_iv_uses (struct ivopts_data
*data
)
467 return data
->iv_uses
.length ();
470 /* Ith use recorded in DATA. */
472 static inline struct iv_use
*
473 iv_use (struct ivopts_data
*data
, unsigned i
)
475 return data
->iv_uses
[i
];
478 /* Number of candidates recorded in DATA. */
480 static inline unsigned
481 n_iv_cands (struct ivopts_data
*data
)
483 return data
->iv_candidates
.length ();
486 /* Ith candidate recorded in DATA. */
488 static inline struct iv_cand
*
489 iv_cand (struct ivopts_data
*data
, unsigned i
)
491 return data
->iv_candidates
[i
];
494 /* The single loop exit if it dominates the latch, NULL otherwise. */
497 single_dom_exit (struct loop
*loop
)
499 edge exit
= single_exit (loop
);
504 if (!just_once_each_iteration_p (loop
, exit
->src
))
510 /* Dumps information about the induction variable IV to FILE. */
513 dump_iv (FILE *file
, struct iv
*iv
)
517 fprintf (file
, "ssa name ");
518 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
519 fprintf (file
, "\n");
522 fprintf (file
, " type ");
523 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
524 fprintf (file
, "\n");
528 fprintf (file
, " base ");
529 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
530 fprintf (file
, "\n");
532 fprintf (file
, " step ");
533 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
534 fprintf (file
, "\n");
538 fprintf (file
, " invariant ");
539 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
540 fprintf (file
, "\n");
545 fprintf (file
, " base object ");
546 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
547 fprintf (file
, "\n");
551 fprintf (file
, " is a biv\n");
554 /* Dumps information about the USE to FILE. */
557 dump_use (FILE *file
, struct iv_use
*use
)
559 fprintf (file
, "use %d\n", use
->id
);
563 case USE_NONLINEAR_EXPR
:
564 fprintf (file
, " generic\n");
568 fprintf (file
, " address\n");
572 fprintf (file
, " compare\n");
579 fprintf (file
, " in statement ");
580 print_gimple_stmt (file
, use
->stmt
, 0, 0);
581 fprintf (file
, "\n");
583 fprintf (file
, " at position ");
585 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
586 fprintf (file
, "\n");
588 dump_iv (file
, use
->iv
);
590 if (use
->related_cands
)
592 fprintf (file
, " related candidates ");
593 dump_bitmap (file
, use
->related_cands
);
597 /* Dumps information about the uses to FILE. */
600 dump_uses (FILE *file
, struct ivopts_data
*data
)
605 for (i
= 0; i
< n_iv_uses (data
); i
++)
607 use
= iv_use (data
, i
);
609 dump_use (file
, use
);
610 fprintf (file
, "\n");
614 /* Dumps information about induction variable candidate CAND to FILE. */
617 dump_cand (FILE *file
, struct iv_cand
*cand
)
619 struct iv
*iv
= cand
->iv
;
621 fprintf (file
, "candidate %d%s\n",
622 cand
->id
, cand
->important
? " (important)" : "");
624 if (cand
->depends_on
)
626 fprintf (file
, " depends on ");
627 dump_bitmap (file
, cand
->depends_on
);
632 fprintf (file
, " final value replacement\n");
636 if (cand
->var_before
)
638 fprintf (file
, " var_before ");
639 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
640 fprintf (file
, "\n");
644 fprintf (file
, " var_after ");
645 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
646 fprintf (file
, "\n");
652 fprintf (file
, " incremented before exit test\n");
656 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
660 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
664 fprintf (file
, " incremented at end\n");
668 fprintf (file
, " original biv\n");
675 /* Returns the info for ssa version VER. */
677 static inline struct version_info
*
678 ver_info (struct ivopts_data
*data
, unsigned ver
)
680 return data
->version_info
+ ver
;
683 /* Returns the info for ssa name NAME. */
685 static inline struct version_info
*
686 name_info (struct ivopts_data
*data
, tree name
)
688 return ver_info (data
, SSA_NAME_VERSION (name
));
691 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
695 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
697 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
701 if (sbb
== loop
->latch
)
707 return stmt
== last_stmt (bb
);
710 /* Returns true if STMT if after the place where the original induction
711 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
712 if the positions are identical. */
715 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
717 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
718 basic_block stmt_bb
= gimple_bb (stmt
);
720 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
723 if (stmt_bb
!= cand_bb
)
727 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
729 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
732 /* Returns true if STMT if after the place where the induction variable
733 CAND is incremented in LOOP. */
736 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
744 return stmt_after_ip_normal_pos (loop
, stmt
);
748 return stmt_after_inc_pos (cand
, stmt
, false);
751 return stmt_after_inc_pos (cand
, stmt
, true);
758 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
761 abnormal_ssa_name_p (tree exp
)
766 if (TREE_CODE (exp
) != SSA_NAME
)
769 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
772 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
773 abnormal phi node. Callback for for_each_index. */
776 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
777 void *data ATTRIBUTE_UNUSED
)
779 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
781 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
783 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
787 return !abnormal_ssa_name_p (*index
);
790 /* Returns true if EXPR contains a ssa name that occurs in an
791 abnormal phi node. */
794 contains_abnormal_ssa_name_p (tree expr
)
797 enum tree_code_class codeclass
;
802 code
= TREE_CODE (expr
);
803 codeclass
= TREE_CODE_CLASS (code
);
805 if (code
== SSA_NAME
)
806 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
808 if (code
== INTEGER_CST
809 || is_gimple_min_invariant (expr
))
812 if (code
== ADDR_EXPR
)
813 return !for_each_index (&TREE_OPERAND (expr
, 0),
814 idx_contains_abnormal_ssa_name_p
,
817 if (code
== COND_EXPR
)
818 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
819 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
820 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
826 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
831 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
843 /* Returns the structure describing number of iterations determined from
844 EXIT of DATA->current_loop, or NULL if something goes wrong. */
846 static struct tree_niter_desc
*
847 niter_for_exit (struct ivopts_data
*data
, edge exit
)
849 struct tree_niter_desc
*desc
;
850 tree_niter_desc
**slot
;
854 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
858 slot
= data
->niters
->get (exit
);
862 /* Try to determine number of iterations. We cannot safely work with ssa
863 names that appear in phi nodes on abnormal edges, so that we do not
864 create overlapping life ranges for them (PR 27283). */
865 desc
= XNEW (struct tree_niter_desc
);
866 if (!number_of_iterations_exit (data
->current_loop
,
868 || contains_abnormal_ssa_name_p (desc
->niter
))
873 data
->niters
->put (exit
, desc
);
881 /* Returns the structure describing number of iterations determined from
882 single dominating exit of DATA->current_loop, or NULL if something
885 static struct tree_niter_desc
*
886 niter_for_single_dom_exit (struct ivopts_data
*data
)
888 edge exit
= single_dom_exit (data
->current_loop
);
893 return niter_for_exit (data
, exit
);
896 /* Initializes data structures used by the iv optimization pass, stored
900 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
902 data
->version_info_size
= 2 * num_ssa_names
;
903 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
904 data
->relevant
= BITMAP_ALLOC (NULL
);
905 data
->important_candidates
= BITMAP_ALLOC (NULL
);
906 data
->max_inv_id
= 0;
908 data
->iv_uses
.create (20);
909 data
->iv_candidates
.create (20);
910 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
911 data
->inv_expr_id
= 0;
912 data
->name_expansion_cache
= NULL
;
913 decl_rtl_to_reset
.create (20);
916 /* Returns a memory object to that EXPR points. In case we are able to
917 determine that it does not point to any such object, NULL is returned. */
920 determine_base_object (tree expr
)
922 enum tree_code code
= TREE_CODE (expr
);
925 /* If this is a pointer casted to any type, we need to determine
926 the base object for the pointer; so handle conversions before
927 throwing away non-pointer expressions. */
928 if (CONVERT_EXPR_P (expr
))
929 return determine_base_object (TREE_OPERAND (expr
, 0));
931 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
940 obj
= TREE_OPERAND (expr
, 0);
941 base
= get_base_address (obj
);
946 if (TREE_CODE (base
) == MEM_REF
)
947 return determine_base_object (TREE_OPERAND (base
, 0));
949 return fold_convert (ptr_type_node
,
950 build_fold_addr_expr (base
));
952 case POINTER_PLUS_EXPR
:
953 return determine_base_object (TREE_OPERAND (expr
, 0));
957 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
961 return fold_convert (ptr_type_node
, expr
);
965 /* Return true if address expression with non-DECL_P operand appears
969 contain_complex_addr_expr (tree expr
)
974 switch (TREE_CODE (expr
))
976 case POINTER_PLUS_EXPR
:
979 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
980 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
984 return (!DECL_P (TREE_OPERAND (expr
, 0)));
993 /* Allocates an induction variable with given initial value BASE and step STEP
997 alloc_iv (tree base
, tree step
)
1000 struct iv
*iv
= XCNEW (struct iv
);
1001 gcc_assert (step
!= NULL_TREE
);
1003 /* Lower address expression in base except ones with DECL_P as operand.
1005 1) More accurate cost can be computed for address expressions;
1006 2) Duplicate candidates won't be created for bases in different
1007 forms, like &a[0] and &a. */
1009 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1010 || contain_complex_addr_expr (expr
))
1013 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1014 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1018 iv
->base_object
= determine_base_object (base
);
1021 iv
->have_use_for
= false;
1023 iv
->ssa_name
= NULL_TREE
;
1028 /* Sets STEP and BASE for induction variable IV. */
1031 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
1033 struct version_info
*info
= name_info (data
, iv
);
1035 gcc_assert (!info
->iv
);
1037 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1038 info
->iv
= alloc_iv (base
, step
);
1039 info
->iv
->ssa_name
= iv
;
1042 /* Finds induction variable declaration for VAR. */
1045 get_iv (struct ivopts_data
*data
, tree var
)
1048 tree type
= TREE_TYPE (var
);
1050 if (!POINTER_TYPE_P (type
)
1051 && !INTEGRAL_TYPE_P (type
))
1054 if (!name_info (data
, var
)->iv
)
1056 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1059 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1060 set_iv (data
, var
, var
, build_int_cst (type
, 0));
1063 return name_info (data
, var
)->iv
;
1066 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1067 not define a simple affine biv with nonzero step. */
1070 determine_biv_step (gphi
*phi
)
1072 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1073 tree name
= PHI_RESULT (phi
);
1076 if (virtual_operand_p (name
))
1079 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1082 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1085 /* Return the first non-invariant ssa var found in EXPR. */
1088 extract_single_var_from_expr (tree expr
)
1092 enum tree_code code
;
1094 if (!expr
|| is_gimple_min_invariant (expr
))
1097 code
= TREE_CODE (expr
);
1098 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1100 n
= TREE_OPERAND_LENGTH (expr
);
1101 for (i
= 0; i
< n
; i
++)
1103 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1109 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1112 /* Finds basic ivs. */
1115 find_bivs (struct ivopts_data
*data
)
1118 tree step
, type
, base
, stop
;
1120 struct loop
*loop
= data
->current_loop
;
1123 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1127 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1130 step
= determine_biv_step (phi
);
1134 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1135 /* Stop expanding iv base at the first ssa var referred by iv step.
1136 Ideally we should stop at any ssa var, because that's expensive
1137 and unusual to happen, we just do it on the first one.
1139 See PR64705 for the rationale. */
1140 stop
= extract_single_var_from_expr (step
);
1141 base
= expand_simple_operations (base
, stop
);
1142 if (contains_abnormal_ssa_name_p (base
)
1143 || contains_abnormal_ssa_name_p (step
))
1146 type
= TREE_TYPE (PHI_RESULT (phi
));
1147 base
= fold_convert (type
, base
);
1150 if (POINTER_TYPE_P (type
))
1151 step
= convert_to_ptrofftype (step
);
1153 step
= fold_convert (type
, step
);
1156 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1163 /* Marks basic ivs. */
1166 mark_bivs (struct ivopts_data
*data
)
1171 struct iv
*iv
, *incr_iv
;
1172 struct loop
*loop
= data
->current_loop
;
1173 basic_block incr_bb
;
1176 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1180 iv
= get_iv (data
, PHI_RESULT (phi
));
1184 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1185 def
= SSA_NAME_DEF_STMT (var
);
1186 /* Don't mark iv peeled from other one as biv. */
1188 && gimple_code (def
) == GIMPLE_PHI
1189 && gimple_bb (def
) == loop
->header
)
1192 incr_iv
= get_iv (data
, var
);
1196 /* If the increment is in the subloop, ignore it. */
1197 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1198 if (incr_bb
->loop_father
!= data
->current_loop
1199 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1203 incr_iv
->biv_p
= true;
1207 /* Checks whether STMT defines a linear induction variable and stores its
1208 parameters to IV. */
1211 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1214 struct loop
*loop
= data
->current_loop
;
1216 iv
->base
= NULL_TREE
;
1217 iv
->step
= NULL_TREE
;
1219 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1222 lhs
= gimple_assign_lhs (stmt
);
1223 if (TREE_CODE (lhs
) != SSA_NAME
)
1226 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1229 /* Stop expanding iv base at the first ssa var referred by iv step.
1230 Ideally we should stop at any ssa var, because that's expensive
1231 and unusual to happen, we just do it on the first one.
1233 See PR64705 for the rationale. */
1234 stop
= extract_single_var_from_expr (iv
->step
);
1235 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1236 if (contains_abnormal_ssa_name_p (iv
->base
)
1237 || contains_abnormal_ssa_name_p (iv
->step
))
1240 /* If STMT could throw, then do not consider STMT as defining a GIV.
1241 While this will suppress optimizations, we can not safely delete this
1242 GIV and associated statements, even if it appears it is not used. */
1243 if (stmt_could_throw_p (stmt
))
1249 /* Finds general ivs in statement STMT. */
1252 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1256 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1259 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1262 /* Finds general ivs in basic block BB. */
1265 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1267 gimple_stmt_iterator bsi
;
1269 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1270 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1273 /* Finds general ivs. */
1276 find_givs (struct ivopts_data
*data
)
1278 struct loop
*loop
= data
->current_loop
;
1279 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1282 for (i
= 0; i
< loop
->num_nodes
; i
++)
1283 find_givs_in_bb (data
, body
[i
]);
1287 /* For each ssa name defined in LOOP determines whether it is an induction
1288 variable and if so, its initial value and step. */
1291 find_induction_variables (struct ivopts_data
*data
)
1296 if (!find_bivs (data
))
1302 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1304 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1308 fprintf (dump_file
, " number of iterations ");
1309 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1310 if (!integer_zerop (niter
->may_be_zero
))
1312 fprintf (dump_file
, "; zero if ");
1313 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1315 fprintf (dump_file
, "\n\n");
1318 fprintf (dump_file
, "Induction variables:\n\n");
1320 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1322 if (ver_info (data
, i
)->iv
)
1323 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1330 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1332 static struct iv_use
*
1333 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1334 gimple stmt
, enum use_type use_type
)
1336 struct iv_use
*use
= XCNEW (struct iv_use
);
1338 use
->id
= n_iv_uses (data
);
1339 use
->type
= use_type
;
1343 use
->related_cands
= BITMAP_ALLOC (NULL
);
1345 /* To avoid showing ssa name in the dumps, if it was not reset by the
1347 iv
->ssa_name
= NULL_TREE
;
1349 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1350 dump_use (dump_file
, use
);
1352 data
->iv_uses
.safe_push (use
);
1357 /* Checks whether OP is a loop-level invariant and if so, records it.
1358 NONLINEAR_USE is true if the invariant is used in a way we do not
1359 handle specially. */
1362 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1365 struct version_info
*info
;
1367 if (TREE_CODE (op
) != SSA_NAME
1368 || virtual_operand_p (op
))
1371 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1373 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1376 info
= name_info (data
, op
);
1378 info
->has_nonlin_use
|= nonlinear_use
;
1380 info
->inv_id
= ++data
->max_inv_id
;
1381 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1384 /* Checks whether the use OP is interesting and if so, records it. */
1386 static struct iv_use
*
1387 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1394 if (TREE_CODE (op
) != SSA_NAME
)
1397 iv
= get_iv (data
, op
);
1401 if (iv
->have_use_for
)
1403 use
= iv_use (data
, iv
->use_id
);
1405 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1409 if (integer_zerop (iv
->step
))
1411 record_invariant (data
, op
, true);
1414 iv
->have_use_for
= true;
1416 civ
= XNEW (struct iv
);
1419 stmt
= SSA_NAME_DEF_STMT (op
);
1420 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1421 || is_gimple_assign (stmt
));
1423 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1424 iv
->use_id
= use
->id
;
1429 /* Given a condition in statement STMT, checks whether it is a compare
1430 of an induction variable and an invariant. If this is the case,
1431 CONTROL_VAR is set to location of the iv, BOUND to the location of
1432 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1433 induction variable descriptions, and true is returned. If this is not
1434 the case, CONTROL_VAR and BOUND are set to the arguments of the
1435 condition and false is returned. */
1438 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1439 tree
**control_var
, tree
**bound
,
1440 struct iv
**iv_var
, struct iv
**iv_bound
)
1442 /* The objects returned when COND has constant operands. */
1443 static struct iv const_iv
;
1445 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1446 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1449 if (gimple_code (stmt
) == GIMPLE_COND
)
1451 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1452 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1453 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1457 op0
= gimple_assign_rhs1_ptr (stmt
);
1458 op1
= gimple_assign_rhs2_ptr (stmt
);
1461 zero
= integer_zero_node
;
1462 const_iv
.step
= integer_zero_node
;
1464 if (TREE_CODE (*op0
) == SSA_NAME
)
1465 iv0
= get_iv (data
, *op0
);
1466 if (TREE_CODE (*op1
) == SSA_NAME
)
1467 iv1
= get_iv (data
, *op1
);
1469 /* Exactly one of the compared values must be an iv, and the other one must
1474 if (integer_zerop (iv0
->step
))
1476 /* Control variable may be on the other side. */
1477 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1478 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1480 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1495 /* Checks whether the condition in STMT is interesting and if so,
1499 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1501 tree
*var_p
, *bound_p
;
1502 struct iv
*var_iv
, *civ
;
1504 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1506 find_interesting_uses_op (data
, *var_p
);
1507 find_interesting_uses_op (data
, *bound_p
);
1511 civ
= XNEW (struct iv
);
1513 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1516 /* Returns the outermost loop EXPR is obviously invariant in
1517 relative to the loop LOOP, i.e. if all its operands are defined
1518 outside of the returned loop. Returns NULL if EXPR is not
1519 even obviously invariant in LOOP. */
1522 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1527 if (is_gimple_min_invariant (expr
))
1528 return current_loops
->tree_root
;
1530 if (TREE_CODE (expr
) == SSA_NAME
)
1532 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1535 if (flow_bb_inside_loop_p (loop
, def_bb
))
1537 return superloop_at_depth (loop
,
1538 loop_depth (def_bb
->loop_father
) + 1);
1541 return current_loops
->tree_root
;
1547 unsigned maxdepth
= 0;
1548 len
= TREE_OPERAND_LENGTH (expr
);
1549 for (i
= 0; i
< len
; i
++)
1551 struct loop
*ivloop
;
1552 if (!TREE_OPERAND (expr
, i
))
1555 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1558 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1561 return superloop_at_depth (loop
, maxdepth
);
1564 /* Returns true if expression EXPR is obviously invariant in LOOP,
1565 i.e. if all its operands are defined outside of the LOOP. LOOP
1566 should not be the function body. */
1569 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1574 gcc_assert (loop_depth (loop
) > 0);
1576 if (is_gimple_min_invariant (expr
))
1579 if (TREE_CODE (expr
) == SSA_NAME
)
1581 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1583 && flow_bb_inside_loop_p (loop
, def_bb
))
1592 len
= TREE_OPERAND_LENGTH (expr
);
1593 for (i
= 0; i
< len
; i
++)
1594 if (TREE_OPERAND (expr
, i
)
1595 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1601 /* Cumulates the steps of indices into DATA and replaces their values with the
1602 initial ones. Returns false when the value of the index cannot be determined.
1603 Callback for for_each_index. */
1605 struct ifs_ivopts_data
1607 struct ivopts_data
*ivopts_data
;
1613 idx_find_step (tree base
, tree
*idx
, void *data
)
1615 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1617 tree step
, iv_base
, iv_step
, lbound
, off
;
1618 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1620 /* If base is a component ref, require that the offset of the reference
1622 if (TREE_CODE (base
) == COMPONENT_REF
)
1624 off
= component_ref_field_offset (base
);
1625 return expr_invariant_in_loop_p (loop
, off
);
1628 /* If base is array, first check whether we will be able to move the
1629 reference out of the loop (in order to take its address in strength
1630 reduction). In order for this to work we need both lower bound
1631 and step to be loop invariants. */
1632 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1634 /* Moreover, for a range, the size needs to be invariant as well. */
1635 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1636 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1639 step
= array_ref_element_size (base
);
1640 lbound
= array_ref_low_bound (base
);
1642 if (!expr_invariant_in_loop_p (loop
, step
)
1643 || !expr_invariant_in_loop_p (loop
, lbound
))
1647 if (TREE_CODE (*idx
) != SSA_NAME
)
1650 iv
= get_iv (dta
->ivopts_data
, *idx
);
1654 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1655 *&x[0], which is not folded and does not trigger the
1656 ARRAY_REF path below. */
1659 if (integer_zerop (iv
->step
))
1662 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1664 step
= array_ref_element_size (base
);
1666 /* We only handle addresses whose step is an integer constant. */
1667 if (TREE_CODE (step
) != INTEGER_CST
)
1671 /* The step for pointer arithmetics already is 1 byte. */
1672 step
= size_one_node
;
1676 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1677 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1680 /* The index might wrap. */
1684 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1685 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1690 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1691 object is passed to it in DATA. */
1694 idx_record_use (tree base
, tree
*idx
,
1697 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1698 find_interesting_uses_op (data
, *idx
);
1699 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1701 find_interesting_uses_op (data
, array_ref_element_size (base
));
1702 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1707 /* If we can prove that TOP = cst * BOT for some constant cst,
1708 store cst to MUL and return true. Otherwise return false.
1709 The returned value is always sign-extended, regardless of the
1710 signedness of TOP and BOT. */
1713 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1716 enum tree_code code
;
1717 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1718 widest_int res
, p0
, p1
;
1723 if (operand_equal_p (top
, bot
, 0))
1729 code
= TREE_CODE (top
);
1733 mby
= TREE_OPERAND (top
, 1);
1734 if (TREE_CODE (mby
) != INTEGER_CST
)
1737 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1740 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1745 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1746 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1749 if (code
== MINUS_EXPR
)
1751 *mul
= wi::sext (p0
+ p1
, precision
);
1755 if (TREE_CODE (bot
) != INTEGER_CST
)
1758 p0
= widest_int::from (top
, SIGNED
);
1759 p1
= widest_int::from (bot
, SIGNED
);
1762 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1770 /* Return true if memory reference REF with step STEP may be unaligned. */
1773 may_be_unaligned_p (tree ref
, tree step
)
1775 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1776 thus they are not misaligned. */
1777 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1780 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1781 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1782 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1784 unsigned HOST_WIDE_INT bitpos
;
1785 unsigned int ref_align
;
1786 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1787 if (ref_align
< align
1788 || (bitpos
% align
) != 0
1789 || (bitpos
% BITS_PER_UNIT
) != 0)
1792 unsigned int trailing_zeros
= tree_ctz (step
);
1793 if (trailing_zeros
< HOST_BITS_PER_INT
1794 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1800 /* Return true if EXPR may be non-addressable. */
1803 may_be_nonaddressable_p (tree expr
)
1805 switch (TREE_CODE (expr
))
1807 case TARGET_MEM_REF
:
1808 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1809 target, thus they are always addressable. */
1813 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1814 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1816 case VIEW_CONVERT_EXPR
:
1817 /* This kind of view-conversions may wrap non-addressable objects
1818 and make them look addressable. After some processing the
1819 non-addressability may be uncovered again, causing ADDR_EXPRs
1820 of inappropriate objects to be built. */
1821 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1822 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1825 /* ... fall through ... */
1828 case ARRAY_RANGE_REF
:
1829 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1841 /* Finds addresses in *OP_P inside STMT. */
1844 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1846 tree base
= *op_p
, step
= size_zero_node
;
1848 struct ifs_ivopts_data ifs_ivopts_data
;
1850 /* Do not play with volatile memory references. A bit too conservative,
1851 perhaps, but safe. */
1852 if (gimple_has_volatile_ops (stmt
))
1855 /* Ignore bitfields for now. Not really something terribly complicated
1857 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1860 base
= unshare_expr (base
);
1862 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1864 tree type
= build_pointer_type (TREE_TYPE (base
));
1868 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1870 civ
= get_iv (data
, TMR_BASE (base
));
1874 TMR_BASE (base
) = civ
->base
;
1877 if (TMR_INDEX2 (base
)
1878 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1880 civ
= get_iv (data
, TMR_INDEX2 (base
));
1884 TMR_INDEX2 (base
) = civ
->base
;
1887 if (TMR_INDEX (base
)
1888 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1890 civ
= get_iv (data
, TMR_INDEX (base
));
1894 TMR_INDEX (base
) = civ
->base
;
1899 if (TMR_STEP (base
))
1900 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1902 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1906 if (integer_zerop (step
))
1908 base
= tree_mem_ref_addr (type
, base
);
1912 ifs_ivopts_data
.ivopts_data
= data
;
1913 ifs_ivopts_data
.stmt
= stmt
;
1914 ifs_ivopts_data
.step
= size_zero_node
;
1915 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1916 || integer_zerop (ifs_ivopts_data
.step
))
1918 step
= ifs_ivopts_data
.step
;
1920 /* Check that the base expression is addressable. This needs
1921 to be done after substituting bases of IVs into it. */
1922 if (may_be_nonaddressable_p (base
))
1925 /* Moreover, on strict alignment platforms, check that it is
1926 sufficiently aligned. */
1927 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1930 base
= build_fold_addr_expr (base
);
1932 /* Substituting bases of IVs into the base expression might
1933 have caused folding opportunities. */
1934 if (TREE_CODE (base
) == ADDR_EXPR
)
1936 tree
*ref
= &TREE_OPERAND (base
, 0);
1937 while (handled_component_p (*ref
))
1938 ref
= &TREE_OPERAND (*ref
, 0);
1939 if (TREE_CODE (*ref
) == MEM_REF
)
1941 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1942 TREE_OPERAND (*ref
, 0),
1943 TREE_OPERAND (*ref
, 1));
1950 civ
= alloc_iv (base
, step
);
1951 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1955 for_each_index (op_p
, idx_record_use
, data
);
1958 /* Finds and records invariants used in STMT. */
1961 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1964 use_operand_p use_p
;
1967 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1969 op
= USE_FROM_PTR (use_p
);
1970 record_invariant (data
, op
, false);
1974 /* Finds interesting uses of induction variables in the statement STMT. */
1977 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1980 tree op
, *lhs
, *rhs
;
1982 use_operand_p use_p
;
1983 enum tree_code code
;
1985 find_invariants_stmt (data
, stmt
);
1987 if (gimple_code (stmt
) == GIMPLE_COND
)
1989 find_interesting_uses_cond (data
, stmt
);
1993 if (is_gimple_assign (stmt
))
1995 lhs
= gimple_assign_lhs_ptr (stmt
);
1996 rhs
= gimple_assign_rhs1_ptr (stmt
);
1998 if (TREE_CODE (*lhs
) == SSA_NAME
)
2000 /* If the statement defines an induction variable, the uses are not
2001 interesting by themselves. */
2003 iv
= get_iv (data
, *lhs
);
2005 if (iv
&& !integer_zerop (iv
->step
))
2009 code
= gimple_assign_rhs_code (stmt
);
2010 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2011 && (REFERENCE_CLASS_P (*rhs
)
2012 || is_gimple_val (*rhs
)))
2014 if (REFERENCE_CLASS_P (*rhs
))
2015 find_interesting_uses_address (data
, stmt
, rhs
);
2017 find_interesting_uses_op (data
, *rhs
);
2019 if (REFERENCE_CLASS_P (*lhs
))
2020 find_interesting_uses_address (data
, stmt
, lhs
);
2023 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2025 find_interesting_uses_cond (data
, stmt
);
2029 /* TODO -- we should also handle address uses of type
2031 memory = call (whatever);
2038 if (gimple_code (stmt
) == GIMPLE_PHI
2039 && gimple_bb (stmt
) == data
->current_loop
->header
)
2041 iv
= get_iv (data
, PHI_RESULT (stmt
));
2043 if (iv
&& !integer_zerop (iv
->step
))
2047 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2049 op
= USE_FROM_PTR (use_p
);
2051 if (TREE_CODE (op
) != SSA_NAME
)
2054 iv
= get_iv (data
, op
);
2058 find_interesting_uses_op (data
, op
);
2062 /* Finds interesting uses of induction variables outside of loops
2063 on loop exit edge EXIT. */
2066 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2072 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2075 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2076 if (!virtual_operand_p (def
))
2077 find_interesting_uses_op (data
, def
);
2081 /* Finds uses of the induction variables that are interesting. */
2084 find_interesting_uses (struct ivopts_data
*data
)
2087 gimple_stmt_iterator bsi
;
2088 basic_block
*body
= get_loop_body (data
->current_loop
);
2090 struct version_info
*info
;
2093 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2094 fprintf (dump_file
, "Uses:\n\n");
2096 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2101 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2102 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2103 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2104 find_interesting_uses_outside (data
, e
);
2106 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2107 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2108 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2109 if (!is_gimple_debug (gsi_stmt (bsi
)))
2110 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2113 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2117 fprintf (dump_file
, "\n");
2119 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2121 info
= ver_info (data
, i
);
2124 fprintf (dump_file
, " ");
2125 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2126 fprintf (dump_file
, " is invariant (%d)%s\n",
2127 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2131 fprintf (dump_file
, "\n");
2137 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2138 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2139 we are at the top-level of the processed address. */
2142 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2143 HOST_WIDE_INT
*offset
)
2145 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2146 enum tree_code code
;
2147 tree type
, orig_type
= TREE_TYPE (expr
);
2148 HOST_WIDE_INT off0
, off1
, st
;
2149 tree orig_expr
= expr
;
2153 type
= TREE_TYPE (expr
);
2154 code
= TREE_CODE (expr
);
2160 if (!cst_and_fits_in_hwi (expr
)
2161 || integer_zerop (expr
))
2164 *offset
= int_cst_value (expr
);
2165 return build_int_cst (orig_type
, 0);
2167 case POINTER_PLUS_EXPR
:
2170 op0
= TREE_OPERAND (expr
, 0);
2171 op1
= TREE_OPERAND (expr
, 1);
2173 op0
= strip_offset_1 (op0
, false, false, &off0
);
2174 op1
= strip_offset_1 (op1
, false, false, &off1
);
2176 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2177 if (op0
== TREE_OPERAND (expr
, 0)
2178 && op1
== TREE_OPERAND (expr
, 1))
2181 if (integer_zerop (op1
))
2183 else if (integer_zerop (op0
))
2185 if (code
== MINUS_EXPR
)
2186 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2191 expr
= fold_build2 (code
, type
, op0
, op1
);
2193 return fold_convert (orig_type
, expr
);
2196 op1
= TREE_OPERAND (expr
, 1);
2197 if (!cst_and_fits_in_hwi (op1
))
2200 op0
= TREE_OPERAND (expr
, 0);
2201 op0
= strip_offset_1 (op0
, false, false, &off0
);
2202 if (op0
== TREE_OPERAND (expr
, 0))
2205 *offset
= off0
* int_cst_value (op1
);
2206 if (integer_zerop (op0
))
2209 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2211 return fold_convert (orig_type
, expr
);
2214 case ARRAY_RANGE_REF
:
2218 step
= array_ref_element_size (expr
);
2219 if (!cst_and_fits_in_hwi (step
))
2222 st
= int_cst_value (step
);
2223 op1
= TREE_OPERAND (expr
, 1);
2224 op1
= strip_offset_1 (op1
, false, false, &off1
);
2225 *offset
= off1
* st
;
2228 && integer_zerop (op1
))
2230 /* Strip the component reference completely. */
2231 op0
= TREE_OPERAND (expr
, 0);
2232 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2245 tmp
= component_ref_field_offset (expr
);
2246 field
= TREE_OPERAND (expr
, 1);
2248 && cst_and_fits_in_hwi (tmp
)
2249 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2251 HOST_WIDE_INT boffset
, abs_off
;
2253 /* Strip the component reference completely. */
2254 op0
= TREE_OPERAND (expr
, 0);
2255 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2256 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2257 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2261 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2268 op0
= TREE_OPERAND (expr
, 0);
2269 op0
= strip_offset_1 (op0
, true, true, &off0
);
2272 if (op0
== TREE_OPERAND (expr
, 0))
2275 expr
= build_fold_addr_expr (op0
);
2276 return fold_convert (orig_type
, expr
);
2279 /* ??? Offset operand? */
2280 inside_addr
= false;
2287 /* Default handling of expressions for that we want to recurse into
2288 the first operand. */
2289 op0
= TREE_OPERAND (expr
, 0);
2290 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2293 if (op0
== TREE_OPERAND (expr
, 0)
2294 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2297 expr
= copy_node (expr
);
2298 TREE_OPERAND (expr
, 0) = op0
;
2300 TREE_OPERAND (expr
, 1) = op1
;
2302 /* Inside address, we might strip the top level component references,
2303 thus changing type of the expression. Handling of ADDR_EXPR
2305 expr
= fold_convert (orig_type
, expr
);
2310 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2313 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2316 tree core
= strip_offset_1 (expr
, false, false, &off
);
2321 /* Returns variant of TYPE that can be used as base for different uses.
2322 We return unsigned type with the same precision, which avoids problems
2326 generic_type_for (tree type
)
2328 if (POINTER_TYPE_P (type
))
2329 return unsigned_type_for (type
);
2331 if (TYPE_UNSIGNED (type
))
2334 return unsigned_type_for (type
);
2337 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2338 the bitmap to that we should store it. */
2340 static struct ivopts_data
*fd_ivopts_data
;
2342 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2344 bitmap
*depends_on
= (bitmap
*) data
;
2345 struct version_info
*info
;
2347 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2349 info
= name_info (fd_ivopts_data
, *expr_p
);
2351 if (!info
->inv_id
|| info
->has_nonlin_use
)
2355 *depends_on
= BITMAP_ALLOC (NULL
);
2356 bitmap_set_bit (*depends_on
, info
->inv_id
);
2361 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2362 position to POS. If USE is not NULL, the candidate is set as related to
2363 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2364 replacement of the final value of the iv by a direct computation. */
2366 static struct iv_cand
*
2367 add_candidate_1 (struct ivopts_data
*data
,
2368 tree base
, tree step
, bool important
, enum iv_position pos
,
2369 struct iv_use
*use
, gimple incremented_at
)
2372 struct iv_cand
*cand
= NULL
;
2373 tree type
, orig_type
;
2375 /* For non-original variables, make sure their values are computed in a type
2376 that does not invoke undefined behavior on overflows (since in general,
2377 we cannot prove that these induction variables are non-wrapping). */
2378 if (pos
!= IP_ORIGINAL
)
2380 orig_type
= TREE_TYPE (base
);
2381 type
= generic_type_for (orig_type
);
2382 if (type
!= orig_type
)
2384 base
= fold_convert (type
, base
);
2385 step
= fold_convert (type
, step
);
2389 for (i
= 0; i
< n_iv_cands (data
); i
++)
2391 cand
= iv_cand (data
, i
);
2393 if (cand
->pos
!= pos
)
2396 if (cand
->incremented_at
!= incremented_at
2397 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2398 && cand
->ainc_use
!= use
))
2412 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2413 && operand_equal_p (step
, cand
->iv
->step
, 0)
2414 && (TYPE_PRECISION (TREE_TYPE (base
))
2415 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2419 if (i
== n_iv_cands (data
))
2421 cand
= XCNEW (struct iv_cand
);
2427 cand
->iv
= alloc_iv (base
, step
);
2430 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2432 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2433 cand
->var_after
= cand
->var_before
;
2435 cand
->important
= important
;
2436 cand
->incremented_at
= incremented_at
;
2437 data
->iv_candidates
.safe_push (cand
);
2440 && TREE_CODE (step
) != INTEGER_CST
)
2442 fd_ivopts_data
= data
;
2443 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2446 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2447 cand
->ainc_use
= use
;
2449 cand
->ainc_use
= NULL
;
2451 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2452 dump_cand (dump_file
, cand
);
2455 if (important
&& !cand
->important
)
2457 cand
->important
= true;
2458 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2459 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2464 bitmap_set_bit (use
->related_cands
, i
);
2465 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2466 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2473 /* Returns true if incrementing the induction variable at the end of the LOOP
2476 The purpose is to avoid splitting latch edge with a biv increment, thus
2477 creating a jump, possibly confusing other optimization passes and leaving
2478 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2479 is not available (so we do not have a better alternative), or if the latch
2480 edge is already nonempty. */
2483 allow_ip_end_pos_p (struct loop
*loop
)
2485 if (!ip_normal_pos (loop
))
2488 if (!empty_block_p (ip_end_pos (loop
)))
2494 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2495 Important field is set to IMPORTANT. */
2498 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2499 bool important
, struct iv_use
*use
)
2501 basic_block use_bb
= gimple_bb (use
->stmt
);
2502 machine_mode mem_mode
;
2503 unsigned HOST_WIDE_INT cstepi
;
2505 /* If we insert the increment in any position other than the standard
2506 ones, we must ensure that it is incremented once per iteration.
2507 It must not be in an inner nested loop, or one side of an if
2509 if (use_bb
->loop_father
!= data
->current_loop
2510 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2511 || stmt_could_throw_p (use
->stmt
)
2512 || !cst_and_fits_in_hwi (step
))
2515 cstepi
= int_cst_value (step
);
2517 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2518 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2519 || USE_STORE_PRE_INCREMENT (mem_mode
))
2520 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2521 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2522 || USE_STORE_PRE_DECREMENT (mem_mode
))
2523 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2525 enum tree_code code
= MINUS_EXPR
;
2527 tree new_step
= step
;
2529 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2531 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2532 code
= POINTER_PLUS_EXPR
;
2535 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2536 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2537 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2540 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2541 || USE_STORE_POST_INCREMENT (mem_mode
))
2542 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2543 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2544 || USE_STORE_POST_DECREMENT (mem_mode
))
2545 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2547 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2552 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2553 position to POS. If USE is not NULL, the candidate is set as related to
2554 it. The candidate computation is scheduled on all available positions. */
2557 add_candidate (struct ivopts_data
*data
,
2558 tree base
, tree step
, bool important
, struct iv_use
*use
)
2560 if (ip_normal_pos (data
->current_loop
))
2561 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2562 if (ip_end_pos (data
->current_loop
)
2563 && allow_ip_end_pos_p (data
->current_loop
))
2564 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2566 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2567 add_autoinc_candidates (data
, base
, step
, important
, use
);
2570 /* Adds standard iv candidates. */
2573 add_standard_iv_candidates (struct ivopts_data
*data
)
2575 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2577 /* The same for a double-integer type if it is still fast enough. */
2579 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2580 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2581 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2582 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2584 /* The same for a double-integer type if it is still fast enough. */
2586 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2587 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2588 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2589 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2593 /* Adds candidates bases on the old induction variable IV. */
2596 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2600 struct iv_cand
*cand
;
2602 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2604 /* The same, but with initial value zero. */
2605 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2606 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2608 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2609 iv
->step
, true, NULL
);
2611 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2612 if (gimple_code (phi
) == GIMPLE_PHI
)
2614 /* Additionally record the possibility of leaving the original iv
2616 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2617 /* Don't add candidate if it's from another PHI node because
2618 it's an affine iv appearing in the form of PEELED_CHREC. */
2619 phi
= SSA_NAME_DEF_STMT (def
);
2620 if (gimple_code (phi
) != GIMPLE_PHI
)
2622 cand
= add_candidate_1 (data
,
2623 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2624 SSA_NAME_DEF_STMT (def
));
2625 cand
->var_before
= iv
->ssa_name
;
2626 cand
->var_after
= def
;
2629 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2633 /* Adds candidates based on the old induction variables. */
2636 add_old_ivs_candidates (struct ivopts_data
*data
)
2642 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2644 iv
= ver_info (data
, i
)->iv
;
2645 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2646 add_old_iv_candidates (data
, iv
);
2650 /* Adds candidates based on the value of the induction variable IV and USE. */
2653 add_iv_value_candidates (struct ivopts_data
*data
,
2654 struct iv
*iv
, struct iv_use
*use
)
2656 unsigned HOST_WIDE_INT offset
;
2660 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2662 /* The same, but with initial value zero. Make such variable important,
2663 since it is generic enough so that possibly many uses may be based
2665 basetype
= TREE_TYPE (iv
->base
);
2666 if (POINTER_TYPE_P (basetype
))
2667 basetype
= sizetype
;
2668 add_candidate (data
, build_int_cst (basetype
, 0),
2669 iv
->step
, true, use
);
2671 /* Third, try removing the constant offset. Make sure to even
2672 add a candidate for &a[0] vs. (T *)&a. */
2673 base
= strip_offset (iv
->base
, &offset
);
2675 || base
!= iv
->base
)
2676 add_candidate (data
, base
, iv
->step
, false, use
);
2679 /* Adds candidates based on the uses. */
2682 add_derived_ivs_candidates (struct ivopts_data
*data
)
2686 for (i
= 0; i
< n_iv_uses (data
); i
++)
2688 struct iv_use
*use
= iv_use (data
, i
);
2695 case USE_NONLINEAR_EXPR
:
2698 /* Just add the ivs based on the value of the iv used here. */
2699 add_iv_value_candidates (data
, use
->iv
, use
);
2708 /* Record important candidates and add them to related_cands bitmaps
2712 record_important_candidates (struct ivopts_data
*data
)
2717 for (i
= 0; i
< n_iv_cands (data
); i
++)
2719 struct iv_cand
*cand
= iv_cand (data
, i
);
2721 if (cand
->important
)
2722 bitmap_set_bit (data
->important_candidates
, i
);
2725 data
->consider_all_candidates
= (n_iv_cands (data
)
2726 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2728 if (data
->consider_all_candidates
)
2730 /* We will not need "related_cands" bitmaps in this case,
2731 so release them to decrease peak memory consumption. */
2732 for (i
= 0; i
< n_iv_uses (data
); i
++)
2734 use
= iv_use (data
, i
);
2735 BITMAP_FREE (use
->related_cands
);
2740 /* Add important candidates to the related_cands bitmaps. */
2741 for (i
= 0; i
< n_iv_uses (data
); i
++)
2742 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2743 data
->important_candidates
);
2747 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2748 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2749 we allocate a simple list to every use. */
2752 alloc_use_cost_map (struct ivopts_data
*data
)
2754 unsigned i
, size
, s
;
2756 for (i
= 0; i
< n_iv_uses (data
); i
++)
2758 struct iv_use
*use
= iv_use (data
, i
);
2760 if (data
->consider_all_candidates
)
2761 size
= n_iv_cands (data
);
2764 s
= bitmap_count_bits (use
->related_cands
);
2766 /* Round up to the power of two, so that moduling by it is fast. */
2767 size
= s
? (1 << ceil_log2 (s
)) : 1;
2770 use
->n_map_members
= size
;
2771 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2775 /* Returns description of computation cost of expression whose runtime
2776 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2779 new_cost (unsigned runtime
, unsigned complexity
)
2783 cost
.cost
= runtime
;
2784 cost
.complexity
= complexity
;
2789 /* Adds costs COST1 and COST2. */
2792 add_costs (comp_cost cost1
, comp_cost cost2
)
2794 cost1
.cost
+= cost2
.cost
;
2795 cost1
.complexity
+= cost2
.complexity
;
2799 /* Subtracts costs COST1 and COST2. */
2802 sub_costs (comp_cost cost1
, comp_cost cost2
)
2804 cost1
.cost
-= cost2
.cost
;
2805 cost1
.complexity
-= cost2
.complexity
;
2810 /* Returns a negative number if COST1 < COST2, a positive number if
2811 COST1 > COST2, and 0 if COST1 = COST2. */
2814 compare_costs (comp_cost cost1
, comp_cost cost2
)
2816 if (cost1
.cost
== cost2
.cost
)
2817 return cost1
.complexity
- cost2
.complexity
;
2819 return cost1
.cost
- cost2
.cost
;
2822 /* Returns true if COST is infinite. */
2825 infinite_cost_p (comp_cost cost
)
2827 return cost
.cost
== INFTY
;
2830 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2831 on invariants DEPENDS_ON and that the value used in expressing it
2832 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2835 set_use_iv_cost (struct ivopts_data
*data
,
2836 struct iv_use
*use
, struct iv_cand
*cand
,
2837 comp_cost cost
, bitmap depends_on
, tree value
,
2838 enum tree_code comp
, int inv_expr_id
)
2842 if (infinite_cost_p (cost
))
2844 BITMAP_FREE (depends_on
);
2848 if (data
->consider_all_candidates
)
2850 use
->cost_map
[cand
->id
].cand
= cand
;
2851 use
->cost_map
[cand
->id
].cost
= cost
;
2852 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2853 use
->cost_map
[cand
->id
].value
= value
;
2854 use
->cost_map
[cand
->id
].comp
= comp
;
2855 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2859 /* n_map_members is a power of two, so this computes modulo. */
2860 s
= cand
->id
& (use
->n_map_members
- 1);
2861 for (i
= s
; i
< use
->n_map_members
; i
++)
2862 if (!use
->cost_map
[i
].cand
)
2864 for (i
= 0; i
< s
; i
++)
2865 if (!use
->cost_map
[i
].cand
)
2871 use
->cost_map
[i
].cand
= cand
;
2872 use
->cost_map
[i
].cost
= cost
;
2873 use
->cost_map
[i
].depends_on
= depends_on
;
2874 use
->cost_map
[i
].value
= value
;
2875 use
->cost_map
[i
].comp
= comp
;
2876 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2879 /* Gets cost of (USE, CANDIDATE) pair. */
2881 static struct cost_pair
*
2882 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2883 struct iv_cand
*cand
)
2886 struct cost_pair
*ret
;
2891 if (data
->consider_all_candidates
)
2893 ret
= use
->cost_map
+ cand
->id
;
2900 /* n_map_members is a power of two, so this computes modulo. */
2901 s
= cand
->id
& (use
->n_map_members
- 1);
2902 for (i
= s
; i
< use
->n_map_members
; i
++)
2903 if (use
->cost_map
[i
].cand
== cand
)
2904 return use
->cost_map
+ i
;
2905 else if (use
->cost_map
[i
].cand
== NULL
)
2907 for (i
= 0; i
< s
; i
++)
2908 if (use
->cost_map
[i
].cand
== cand
)
2909 return use
->cost_map
+ i
;
2910 else if (use
->cost_map
[i
].cand
== NULL
)
2916 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2918 produce_memory_decl_rtl (tree obj
, int *regno
)
2920 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2921 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2925 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2927 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2928 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2929 SET_SYMBOL_REF_DECL (x
, obj
);
2930 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2931 set_mem_addr_space (x
, as
);
2932 targetm
.encode_section_info (obj
, x
, true);
2936 x
= gen_raw_REG (address_mode
, (*regno
)++);
2937 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2938 set_mem_addr_space (x
, as
);
2944 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2945 walk_tree. DATA contains the actual fake register number. */
2948 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2950 tree obj
= NULL_TREE
;
2952 int *regno
= (int *) data
;
2954 switch (TREE_CODE (*expr_p
))
2957 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2958 handled_component_p (*expr_p
);
2959 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2962 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2963 x
= produce_memory_decl_rtl (obj
, regno
);
2968 obj
= SSA_NAME_VAR (*expr_p
);
2969 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2972 if (!DECL_RTL_SET_P (obj
))
2973 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2982 if (DECL_RTL_SET_P (obj
))
2985 if (DECL_MODE (obj
) == BLKmode
)
2986 x
= produce_memory_decl_rtl (obj
, regno
);
2988 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2998 decl_rtl_to_reset
.safe_push (obj
);
2999 SET_DECL_RTL (obj
, x
);
3005 /* Determines cost of the computation of EXPR. */
3008 computation_cost (tree expr
, bool speed
)
3012 tree type
= TREE_TYPE (expr
);
3014 /* Avoid using hard regs in ways which may be unsupported. */
3015 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3016 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3017 enum node_frequency real_frequency
= node
->frequency
;
3019 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3020 crtl
->maybe_hot_insn_p
= speed
;
3021 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3023 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3026 default_rtl_profile ();
3027 node
->frequency
= real_frequency
;
3029 cost
= seq_cost (seq
, speed
);
3031 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3032 TYPE_ADDR_SPACE (type
), speed
);
3033 else if (!REG_P (rslt
))
3034 cost
+= set_src_cost (rslt
, speed
);
3039 /* Returns variable containing the value of candidate CAND at statement AT. */
3042 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
3044 if (stmt_after_increment (loop
, cand
, stmt
))
3045 return cand
->var_after
;
3047 return cand
->var_before
;
3050 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3051 same precision that is at least as wide as the precision of TYPE, stores
3052 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3056 determine_common_wider_type (tree
*a
, tree
*b
)
3058 tree wider_type
= NULL
;
3060 tree atype
= TREE_TYPE (*a
);
3062 if (CONVERT_EXPR_P (*a
))
3064 suba
= TREE_OPERAND (*a
, 0);
3065 wider_type
= TREE_TYPE (suba
);
3066 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3072 if (CONVERT_EXPR_P (*b
))
3074 subb
= TREE_OPERAND (*b
, 0);
3075 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3086 /* Determines the expression by that USE is expressed from induction variable
3087 CAND at statement AT in LOOP. The expression is stored in a decomposed
3088 form into AFF. Returns false if USE cannot be expressed using CAND. */
3091 get_computation_aff (struct loop
*loop
,
3092 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3093 struct aff_tree
*aff
)
3095 tree ubase
= use
->iv
->base
;
3096 tree ustep
= use
->iv
->step
;
3097 tree cbase
= cand
->iv
->base
;
3098 tree cstep
= cand
->iv
->step
, cstep_common
;
3099 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3100 tree common_type
, var
;
3102 aff_tree cbase_aff
, var_aff
;
3105 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3107 /* We do not have a precision to express the values of use. */
3111 var
= var_at_stmt (loop
, cand
, at
);
3112 uutype
= unsigned_type_for (utype
);
3114 /* If the conversion is not noop, perform it. */
3115 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3117 cstep
= fold_convert (uutype
, cstep
);
3118 cbase
= fold_convert (uutype
, cbase
);
3119 var
= fold_convert (uutype
, var
);
3122 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3125 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3126 type, we achieve better folding by computing their difference in this
3127 wider type, and cast the result to UUTYPE. We do not need to worry about
3128 overflows, as all the arithmetics will in the end be performed in UUTYPE
3130 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3132 /* use = ubase - ratio * cbase + ratio * var. */
3133 tree_to_aff_combination (ubase
, common_type
, aff
);
3134 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3135 tree_to_aff_combination (var
, uutype
, &var_aff
);
3137 /* We need to shift the value if we are after the increment. */
3138 if (stmt_after_increment (loop
, cand
, at
))
3142 if (common_type
!= uutype
)
3143 cstep_common
= fold_convert (common_type
, cstep
);
3145 cstep_common
= cstep
;
3147 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3148 aff_combination_add (&cbase_aff
, &cstep_aff
);
3151 aff_combination_scale (&cbase_aff
, -rat
);
3152 aff_combination_add (aff
, &cbase_aff
);
3153 if (common_type
!= uutype
)
3154 aff_combination_convert (aff
, uutype
);
3156 aff_combination_scale (&var_aff
, rat
);
3157 aff_combination_add (aff
, &var_aff
);
3162 /* Return the type of USE. */
3165 get_use_type (struct iv_use
*use
)
3167 tree base_type
= TREE_TYPE (use
->iv
->base
);
3170 if (use
->type
== USE_ADDRESS
)
3172 /* The base_type may be a void pointer. Create a pointer type based on
3173 the mem_ref instead. */
3174 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3175 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3176 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3184 /* Determines the expression by that USE is expressed from induction variable
3185 CAND at statement AT in LOOP. The computation is unshared. */
3188 get_computation_at (struct loop
*loop
,
3189 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3192 tree type
= get_use_type (use
);
3194 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3196 unshare_aff_combination (&aff
);
3197 return fold_convert (type
, aff_combination_to_tree (&aff
));
3200 /* Determines the expression by that USE is expressed from induction variable
3201 CAND in LOOP. The computation is unshared. */
3204 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3206 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3209 /* Adjust the cost COST for being in loop setup rather than loop body.
3210 If we're optimizing for space, the loop setup overhead is constant;
3211 if we're optimizing for speed, amortize it over the per-iteration cost. */
3213 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3217 else if (optimize_loop_for_speed_p (data
->current_loop
))
3218 return cost
/ avg_loop_niter (data
->current_loop
);
3223 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3224 validity for a memory reference accessing memory of mode MODE in
3225 address space AS. */
3229 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3232 #define MAX_RATIO 128
3233 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3234 static vec
<sbitmap
> valid_mult_list
;
3237 if (data_index
>= valid_mult_list
.length ())
3238 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3240 valid_mult
= valid_mult_list
[data_index
];
3243 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3244 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3245 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3249 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3250 bitmap_clear (valid_mult
);
3251 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3252 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3253 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3255 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3256 if (memory_address_addr_space_p (mode
, addr
, as
)
3257 || memory_address_addr_space_p (mode
, scaled
, as
))
3258 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3261 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3263 fprintf (dump_file
, " allowed multipliers:");
3264 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3265 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3266 fprintf (dump_file
, " %d", (int) i
);
3267 fprintf (dump_file
, "\n");
3268 fprintf (dump_file
, "\n");
3271 valid_mult_list
[data_index
] = valid_mult
;
3274 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3277 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3280 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3281 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3282 variable is omitted. Compute the cost for a memory reference that accesses
3283 a memory location of mode MEM_MODE in address space AS.
3285 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3286 size of MEM_MODE / RATIO) is available. To make this determination, we
3287 look at the size of the increment to be made, which is given in CSTEP.
3288 CSTEP may be zero if the step is unknown.
3289 STMT_AFTER_INC is true iff the statement we're looking at is after the
3290 increment of the original biv.
3292 TODO -- there must be some better way. This all is quite crude. */
3296 AINC_PRE_INC
, /* Pre increment. */
3297 AINC_PRE_DEC
, /* Pre decrement. */
3298 AINC_POST_INC
, /* Post increment. */
3299 AINC_POST_DEC
, /* Post decrement. */
3300 AINC_NONE
/* Also the number of auto increment types. */
3303 typedef struct address_cost_data_s
3305 HOST_WIDE_INT min_offset
, max_offset
;
3306 unsigned costs
[2][2][2][2];
3307 unsigned ainc_costs
[AINC_NONE
];
3308 } *address_cost_data
;
3312 get_address_cost (bool symbol_present
, bool var_present
,
3313 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3314 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3315 addr_space_t as
, bool speed
,
3316 bool stmt_after_inc
, bool *may_autoinc
)
3318 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3319 static vec
<address_cost_data
> address_cost_data_list
;
3320 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3321 address_cost_data data
;
3322 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3323 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3324 unsigned cost
, acost
, complexity
;
3325 enum ainc_type autoinc_type
;
3326 bool offset_p
, ratio_p
, autoinc
;
3327 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3328 unsigned HOST_WIDE_INT mask
;
3331 if (data_index
>= address_cost_data_list
.length ())
3332 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3334 data
= address_cost_data_list
[data_index
];
3338 HOST_WIDE_INT rat
, off
= 0;
3339 int old_cse_not_expected
, width
;
3340 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3345 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3347 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3349 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3350 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3351 width
= HOST_BITS_PER_WIDE_INT
- 1;
3352 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3354 for (i
= width
; i
>= 0; i
--)
3356 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3357 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3358 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3361 data
->min_offset
= (i
== -1? 0 : off
);
3363 for (i
= width
; i
>= 0; i
--)
3365 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3366 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3367 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3369 /* For some strict-alignment targets, the offset must be naturally
3370 aligned. Try an aligned offset if mem_mode is not QImode. */
3371 off
= mem_mode
!= QImode
3372 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
3373 - GET_MODE_SIZE (mem_mode
)
3377 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3378 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3384 data
->max_offset
= off
;
3386 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3388 fprintf (dump_file
, "get_address_cost:\n");
3389 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3390 GET_MODE_NAME (mem_mode
),
3392 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3393 GET_MODE_NAME (mem_mode
),
3398 for (i
= 2; i
<= MAX_RATIO
; i
++)
3399 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3405 /* Compute the cost of various addressing modes. */
3407 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3408 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3410 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3411 || USE_STORE_PRE_DECREMENT (mem_mode
))
3413 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3414 has_predec
[mem_mode
]
3415 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3417 if (has_predec
[mem_mode
])
3418 data
->ainc_costs
[AINC_PRE_DEC
]
3419 = address_cost (addr
, mem_mode
, as
, speed
);
3421 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3422 || USE_STORE_POST_DECREMENT (mem_mode
))
3424 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3425 has_postdec
[mem_mode
]
3426 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3428 if (has_postdec
[mem_mode
])
3429 data
->ainc_costs
[AINC_POST_DEC
]
3430 = address_cost (addr
, mem_mode
, as
, speed
);
3432 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3433 || USE_STORE_PRE_DECREMENT (mem_mode
))
3435 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3436 has_preinc
[mem_mode
]
3437 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3439 if (has_preinc
[mem_mode
])
3440 data
->ainc_costs
[AINC_PRE_INC
]
3441 = address_cost (addr
, mem_mode
, as
, speed
);
3443 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3444 || USE_STORE_POST_INCREMENT (mem_mode
))
3446 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3447 has_postinc
[mem_mode
]
3448 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3450 if (has_postinc
[mem_mode
])
3451 data
->ainc_costs
[AINC_POST_INC
]
3452 = address_cost (addr
, mem_mode
, as
, speed
);
3454 for (i
= 0; i
< 16; i
++)
3457 var_p
= (i
>> 1) & 1;
3458 off_p
= (i
>> 2) & 1;
3459 rat_p
= (i
>> 3) & 1;
3463 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3464 gen_int_mode (rat
, address_mode
));
3467 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3471 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3472 /* ??? We can run into trouble with some backends by presenting
3473 it with symbols which haven't been properly passed through
3474 targetm.encode_section_info. By setting the local bit, we
3475 enhance the probability of things working. */
3476 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3479 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3481 (PLUS
, address_mode
, base
,
3482 gen_int_mode (off
, address_mode
)));
3485 base
= gen_int_mode (off
, address_mode
);
3490 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3493 /* To avoid splitting addressing modes, pretend that no cse will
3495 old_cse_not_expected
= cse_not_expected
;
3496 cse_not_expected
= true;
3497 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3498 cse_not_expected
= old_cse_not_expected
;
3502 acost
= seq_cost (seq
, speed
);
3503 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3507 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3510 /* On some targets, it is quite expensive to load symbol to a register,
3511 which makes addresses that contain symbols look much more expensive.
3512 However, the symbol will have to be loaded in any case before the
3513 loop (and quite likely we have it in register already), so it does not
3514 make much sense to penalize them too heavily. So make some final
3515 tweaks for the SYMBOL_PRESENT modes:
3517 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3518 var is cheaper, use this mode with small penalty.
3519 If VAR_PRESENT is true, try whether the mode with
3520 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3521 if this is the case, use it. */
3522 add_c
= add_cost (speed
, address_mode
);
3523 for (i
= 0; i
< 8; i
++)
3526 off_p
= (i
>> 1) & 1;
3527 rat_p
= (i
>> 2) & 1;
3529 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3533 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3534 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3537 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3539 fprintf (dump_file
, "Address costs:\n");
3541 for (i
= 0; i
< 16; i
++)
3544 var_p
= (i
>> 1) & 1;
3545 off_p
= (i
>> 2) & 1;
3546 rat_p
= (i
>> 3) & 1;
3548 fprintf (dump_file
, " ");
3550 fprintf (dump_file
, "sym + ");
3552 fprintf (dump_file
, "var + ");
3554 fprintf (dump_file
, "cst + ");
3556 fprintf (dump_file
, "rat * ");
3558 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3559 fprintf (dump_file
, "index costs %d\n", acost
);
3561 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3562 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3563 fprintf (dump_file
, " May include autoinc/dec\n");
3564 fprintf (dump_file
, "\n");
3567 address_cost_data_list
[data_index
] = data
;
3570 bits
= GET_MODE_BITSIZE (address_mode
);
3571 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3573 if ((offset
>> (bits
- 1) & 1))
3578 autoinc_type
= AINC_NONE
;
3579 msize
= GET_MODE_SIZE (mem_mode
);
3580 autoinc_offset
= offset
;
3582 autoinc_offset
+= ratio
* cstep
;
3583 if (symbol_present
|| var_present
|| ratio
!= 1)
3587 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3589 autoinc_type
= AINC_POST_INC
;
3590 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3592 autoinc_type
= AINC_POST_DEC
;
3593 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3595 autoinc_type
= AINC_PRE_INC
;
3596 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3598 autoinc_type
= AINC_PRE_DEC
;
3600 if (autoinc_type
!= AINC_NONE
)
3605 offset_p
= (s_offset
!= 0
3606 && data
->min_offset
<= s_offset
3607 && s_offset
<= data
->max_offset
);
3608 ratio_p
= (ratio
!= 1
3609 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3611 if (ratio
!= 1 && !ratio_p
)
3612 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3614 if (s_offset
&& !offset_p
&& !symbol_present
)
3615 cost
+= add_cost (speed
, address_mode
);
3618 *may_autoinc
= autoinc
;
3620 acost
= data
->ainc_costs
[autoinc_type
];
3622 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3623 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3624 return new_cost (cost
+ acost
, complexity
);
3627 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3628 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3629 calculating the operands of EXPR. Returns true if successful, and returns
3630 the cost in COST. */
3633 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
3634 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3637 tree op1
= TREE_OPERAND (expr
, 1);
3638 tree cst
= TREE_OPERAND (mult
, 1);
3639 tree multop
= TREE_OPERAND (mult
, 0);
3640 int m
= exact_log2 (int_cst_value (cst
));
3641 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3642 int as_cost
, sa_cost
;
3645 if (!(m
>= 0 && m
< maxm
))
3648 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
3650 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3652 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3653 use that in preference to a shift insn followed by an add insn. */
3654 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3655 ? shiftadd_cost (speed
, mode
, m
)
3657 ? shiftsub1_cost (speed
, mode
, m
)
3658 : shiftsub0_cost (speed
, mode
, m
)));
3660 res
= new_cost (MIN (as_cost
, sa_cost
), 0);
3661 res
= add_costs (res
, mult_in_op1
? cost0
: cost1
);
3663 STRIP_NOPS (multop
);
3664 if (!is_gimple_val (multop
))
3665 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3671 /* Estimates cost of forcing expression EXPR into a variable. */
3674 force_expr_to_var_cost (tree expr
, bool speed
)
3676 static bool costs_initialized
= false;
3677 static unsigned integer_cost
[2];
3678 static unsigned symbol_cost
[2];
3679 static unsigned address_cost
[2];
3681 comp_cost cost0
, cost1
, cost
;
3684 if (!costs_initialized
)
3686 tree type
= build_pointer_type (integer_type_node
);
3691 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3692 TREE_STATIC (var
) = 1;
3693 x
= produce_memory_decl_rtl (var
, NULL
);
3694 SET_DECL_RTL (var
, x
);
3696 addr
= build1 (ADDR_EXPR
, type
, var
);
3699 for (i
= 0; i
< 2; i
++)
3701 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3704 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3707 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3708 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3710 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3711 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3712 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3713 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3714 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3715 fprintf (dump_file
, "\n");
3719 costs_initialized
= true;
3724 if (SSA_VAR_P (expr
))
3727 if (is_gimple_min_invariant (expr
))
3729 if (TREE_CODE (expr
) == INTEGER_CST
)
3730 return new_cost (integer_cost
[speed
], 0);
3732 if (TREE_CODE (expr
) == ADDR_EXPR
)
3734 tree obj
= TREE_OPERAND (expr
, 0);
3736 if (TREE_CODE (obj
) == VAR_DECL
3737 || TREE_CODE (obj
) == PARM_DECL
3738 || TREE_CODE (obj
) == RESULT_DECL
)
3739 return new_cost (symbol_cost
[speed
], 0);
3742 return new_cost (address_cost
[speed
], 0);
3745 switch (TREE_CODE (expr
))
3747 case POINTER_PLUS_EXPR
:
3751 op0
= TREE_OPERAND (expr
, 0);
3752 op1
= TREE_OPERAND (expr
, 1);
3759 op0
= TREE_OPERAND (expr
, 0);
3765 /* Just an arbitrary value, FIXME. */
3766 return new_cost (target_spill_cost
[speed
], 0);
3769 if (op0
== NULL_TREE
3770 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
3773 cost0
= force_expr_to_var_cost (op0
, speed
);
3775 if (op1
== NULL_TREE
3776 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
3779 cost1
= force_expr_to_var_cost (op1
, speed
);
3781 mode
= TYPE_MODE (TREE_TYPE (expr
));
3782 switch (TREE_CODE (expr
))
3784 case POINTER_PLUS_EXPR
:
3788 cost
= new_cost (add_cost (speed
, mode
), 0);
3789 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3791 tree mult
= NULL_TREE
;
3793 if (TREE_CODE (op1
) == MULT_EXPR
)
3795 else if (TREE_CODE (op0
) == MULT_EXPR
)
3798 if (mult
!= NULL_TREE
3799 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3800 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3808 tree inner_mode
, outer_mode
;
3809 outer_mode
= TREE_TYPE (expr
);
3810 inner_mode
= TREE_TYPE (op0
);
3811 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
3812 TYPE_MODE (inner_mode
), speed
), 0);
3817 if (cst_and_fits_in_hwi (op0
))
3818 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3820 else if (cst_and_fits_in_hwi (op1
))
3821 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3824 return new_cost (target_spill_cost
[speed
], 0);
3831 cost
= add_costs (cost
, cost0
);
3832 cost
= add_costs (cost
, cost1
);
3834 /* Bound the cost by target_spill_cost. The parts of complicated
3835 computations often are either loop invariant or at least can
3836 be shared between several iv uses, so letting this grow without
3837 limits would not give reasonable results. */
3838 if (cost
.cost
> (int) target_spill_cost
[speed
])
3839 cost
.cost
= target_spill_cost
[speed
];
3844 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3845 invariants the computation depends on. */
3848 force_var_cost (struct ivopts_data
*data
,
3849 tree expr
, bitmap
*depends_on
)
3853 fd_ivopts_data
= data
;
3854 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3857 return force_expr_to_var_cost (expr
, data
->speed
);
3860 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3861 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3862 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3863 invariants the computation depends on. */
3866 split_address_cost (struct ivopts_data
*data
,
3867 tree addr
, bool *symbol_present
, bool *var_present
,
3868 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3871 HOST_WIDE_INT bitsize
;
3872 HOST_WIDE_INT bitpos
;
3875 int unsignedp
, volatilep
;
3877 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3878 &unsignedp
, &volatilep
, false);
3881 || bitpos
% BITS_PER_UNIT
!= 0
3882 || TREE_CODE (core
) != VAR_DECL
)
3884 *symbol_present
= false;
3885 *var_present
= true;
3886 fd_ivopts_data
= data
;
3887 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3888 return new_cost (target_spill_cost
[data
->speed
], 0);
3891 *offset
+= bitpos
/ BITS_PER_UNIT
;
3892 if (TREE_STATIC (core
)
3893 || DECL_EXTERNAL (core
))
3895 *symbol_present
= true;
3896 *var_present
= false;
3900 *symbol_present
= false;
3901 *var_present
= true;
3905 /* Estimates cost of expressing difference of addresses E1 - E2 as
3906 var + symbol + offset. The value of offset is added to OFFSET,
3907 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3908 part is missing. DEPENDS_ON is a set of the invariants the computation
3912 ptr_difference_cost (struct ivopts_data
*data
,
3913 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3914 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3916 HOST_WIDE_INT diff
= 0;
3917 aff_tree aff_e1
, aff_e2
;
3920 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3922 if (ptr_difference_const (e1
, e2
, &diff
))
3925 *symbol_present
= false;
3926 *var_present
= false;
3930 if (integer_zerop (e2
))
3931 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3932 symbol_present
, var_present
, offset
, depends_on
);
3934 *symbol_present
= false;
3935 *var_present
= true;
3937 type
= signed_type_for (TREE_TYPE (e1
));
3938 tree_to_aff_combination (e1
, type
, &aff_e1
);
3939 tree_to_aff_combination (e2
, type
, &aff_e2
);
3940 aff_combination_scale (&aff_e2
, -1);
3941 aff_combination_add (&aff_e1
, &aff_e2
);
3943 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3946 /* Estimates cost of expressing difference E1 - E2 as
3947 var + symbol + offset. The value of offset is added to OFFSET,
3948 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3949 part is missing. DEPENDS_ON is a set of the invariants the computation
3953 difference_cost (struct ivopts_data
*data
,
3954 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3955 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3957 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3958 unsigned HOST_WIDE_INT off1
, off2
;
3959 aff_tree aff_e1
, aff_e2
;
3962 e1
= strip_offset (e1
, &off1
);
3963 e2
= strip_offset (e2
, &off2
);
3964 *offset
+= off1
- off2
;
3969 if (TREE_CODE (e1
) == ADDR_EXPR
)
3970 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3971 offset
, depends_on
);
3972 *symbol_present
= false;
3974 if (operand_equal_p (e1
, e2
, 0))
3976 *var_present
= false;
3980 *var_present
= true;
3982 if (integer_zerop (e2
))
3983 return force_var_cost (data
, e1
, depends_on
);
3985 if (integer_zerop (e1
))
3987 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3988 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3992 type
= signed_type_for (TREE_TYPE (e1
));
3993 tree_to_aff_combination (e1
, type
, &aff_e1
);
3994 tree_to_aff_combination (e2
, type
, &aff_e2
);
3995 aff_combination_scale (&aff_e2
, -1);
3996 aff_combination_add (&aff_e1
, &aff_e2
);
3998 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4001 /* Returns true if AFF1 and AFF2 are identical. */
4004 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4008 if (aff1
->n
!= aff2
->n
)
4011 for (i
= 0; i
< aff1
->n
; i
++)
4013 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4016 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4022 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4025 get_expr_id (struct ivopts_data
*data
, tree expr
)
4027 struct iv_inv_expr_ent ent
;
4028 struct iv_inv_expr_ent
**slot
;
4031 ent
.hash
= iterative_hash_expr (expr
, 0);
4032 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4036 *slot
= XNEW (struct iv_inv_expr_ent
);
4037 (*slot
)->expr
= expr
;
4038 (*slot
)->hash
= ent
.hash
;
4039 (*slot
)->id
= data
->inv_expr_id
++;
4043 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4044 requires a new compiler generated temporary. Returns -1 otherwise.
4045 ADDRESS_P is a flag indicating if the expression is for address
4049 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4050 tree cbase
, HOST_WIDE_INT ratio
,
4053 aff_tree ubase_aff
, cbase_aff
;
4061 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4062 && (TREE_CODE (cbase
) == INTEGER_CST
))
4065 /* Strips the constant part. */
4066 if (TREE_CODE (ubase
) == PLUS_EXPR
4067 || TREE_CODE (ubase
) == MINUS_EXPR
4068 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4070 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4071 ubase
= TREE_OPERAND (ubase
, 0);
4074 /* Strips the constant part. */
4075 if (TREE_CODE (cbase
) == PLUS_EXPR
4076 || TREE_CODE (cbase
) == MINUS_EXPR
4077 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4079 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4080 cbase
= TREE_OPERAND (cbase
, 0);
4085 if (((TREE_CODE (ubase
) == SSA_NAME
)
4086 || (TREE_CODE (ubase
) == ADDR_EXPR
4087 && is_gimple_min_invariant (ubase
)))
4088 && (TREE_CODE (cbase
) == INTEGER_CST
))
4091 if (((TREE_CODE (cbase
) == SSA_NAME
)
4092 || (TREE_CODE (cbase
) == ADDR_EXPR
4093 && is_gimple_min_invariant (cbase
)))
4094 && (TREE_CODE (ubase
) == INTEGER_CST
))
4100 if (operand_equal_p (ubase
, cbase
, 0))
4103 if (TREE_CODE (ubase
) == ADDR_EXPR
4104 && TREE_CODE (cbase
) == ADDR_EXPR
)
4108 usym
= TREE_OPERAND (ubase
, 0);
4109 csym
= TREE_OPERAND (cbase
, 0);
4110 if (TREE_CODE (usym
) == ARRAY_REF
)
4112 tree ind
= TREE_OPERAND (usym
, 1);
4113 if (TREE_CODE (ind
) == INTEGER_CST
4114 && tree_fits_shwi_p (ind
)
4115 && tree_to_shwi (ind
) == 0)
4116 usym
= TREE_OPERAND (usym
, 0);
4118 if (TREE_CODE (csym
) == ARRAY_REF
)
4120 tree ind
= TREE_OPERAND (csym
, 1);
4121 if (TREE_CODE (ind
) == INTEGER_CST
4122 && tree_fits_shwi_p (ind
)
4123 && tree_to_shwi (ind
) == 0)
4124 csym
= TREE_OPERAND (csym
, 0);
4126 if (operand_equal_p (usym
, csym
, 0))
4129 /* Now do more complex comparison */
4130 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4131 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4132 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4136 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4137 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4139 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4140 aff_combination_add (&ubase_aff
, &cbase_aff
);
4141 expr
= aff_combination_to_tree (&ubase_aff
);
4142 return get_expr_id (data
, expr
);
4147 /* Determines the cost of the computation by that USE is expressed
4148 from induction variable CAND. If ADDRESS_P is true, we just need
4149 to create an address from it, otherwise we want to get it into
4150 register. A set of invariants we depend on is stored in
4151 DEPENDS_ON. AT is the statement at that the value is computed.
4152 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4153 addressing is likely. */
4156 get_computation_cost_at (struct ivopts_data
*data
,
4157 struct iv_use
*use
, struct iv_cand
*cand
,
4158 bool address_p
, bitmap
*depends_on
, gimple at
,
4162 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4164 tree utype
= TREE_TYPE (ubase
), ctype
;
4165 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4166 HOST_WIDE_INT ratio
, aratio
;
4167 bool var_present
, symbol_present
, stmt_is_after_inc
;
4170 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4171 machine_mode mem_mode
= (address_p
4172 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4177 /* Only consider real candidates. */
4179 return infinite_cost
;
4181 cbase
= cand
->iv
->base
;
4182 cstep
= cand
->iv
->step
;
4183 ctype
= TREE_TYPE (cbase
);
4185 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4187 /* We do not have a precision to express the values of use. */
4188 return infinite_cost
;
4192 || (use
->iv
->base_object
4193 && cand
->iv
->base_object
4194 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4195 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4197 /* Do not try to express address of an object with computation based
4198 on address of a different object. This may cause problems in rtl
4199 level alias analysis (that does not expect this to be happening,
4200 as this is illegal in C), and would be unlikely to be useful
4202 if (use
->iv
->base_object
4203 && cand
->iv
->base_object
4204 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4205 return infinite_cost
;
4208 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4210 /* TODO -- add direct handling of this case. */
4214 /* CSTEPI is removed from the offset in case statement is after the
4215 increment. If the step is not constant, we use zero instead.
4216 This is a bit imprecise (there is the extra addition), but
4217 redundancy elimination is likely to transform the code so that
4218 it uses value of the variable before increment anyway,
4219 so it is not that much unrealistic. */
4220 if (cst_and_fits_in_hwi (cstep
))
4221 cstepi
= int_cst_value (cstep
);
4225 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4226 return infinite_cost
;
4228 if (wi::fits_shwi_p (rat
))
4229 ratio
= rat
.to_shwi ();
4231 return infinite_cost
;
4234 ctype
= TREE_TYPE (cbase
);
4236 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4238 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4239 or ratio == 1, it is better to handle this like
4241 ubase - ratio * cbase + ratio * var
4243 (also holds in the case ratio == -1, TODO. */
4245 if (cst_and_fits_in_hwi (cbase
))
4247 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4248 cost
= difference_cost (data
,
4249 ubase
, build_int_cst (utype
, 0),
4250 &symbol_present
, &var_present
, &offset
,
4252 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4254 else if (ratio
== 1)
4256 tree real_cbase
= cbase
;
4258 /* Check to see if any adjustment is needed. */
4259 if (cstepi
== 0 && stmt_is_after_inc
)
4261 aff_tree real_cbase_aff
;
4264 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4266 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4268 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4269 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4272 cost
= difference_cost (data
,
4274 &symbol_present
, &var_present
, &offset
,
4276 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4279 && !POINTER_TYPE_P (ctype
)
4280 && multiplier_allowed_in_address_p
4282 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4285 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4286 cost
= difference_cost (data
,
4288 &symbol_present
, &var_present
, &offset
,
4290 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4294 cost
= force_var_cost (data
, cbase
, depends_on
);
4295 cost
= add_costs (cost
,
4296 difference_cost (data
,
4297 ubase
, build_int_cst (utype
, 0),
4298 &symbol_present
, &var_present
,
4299 &offset
, depends_on
));
4300 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4301 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4307 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4308 /* Clear depends on. */
4309 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4310 bitmap_clear (*depends_on
);
4313 /* If we are after the increment, the value of the candidate is higher by
4315 if (stmt_is_after_inc
)
4316 offset
-= ratio
* cstepi
;
4318 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4319 (symbol/var1/const parts may be omitted). If we are looking for an
4320 address, find the cost of addressing this. */
4322 return add_costs (cost
,
4323 get_address_cost (symbol_present
, var_present
,
4324 offset
, ratio
, cstepi
,
4326 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4327 speed
, stmt_is_after_inc
,
4330 /* Otherwise estimate the costs for computing the expression. */
4331 if (!symbol_present
&& !var_present
&& !offset
)
4334 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4338 /* Symbol + offset should be compile-time computable so consider that they
4339 are added once to the variable, if present. */
4340 if (var_present
&& (symbol_present
|| offset
))
4341 cost
.cost
+= adjust_setup_cost (data
,
4342 add_cost (speed
, TYPE_MODE (ctype
)));
4344 /* Having offset does not affect runtime cost in case it is added to
4345 symbol, but it increases complexity. */
4349 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4351 aratio
= ratio
> 0 ? ratio
: -ratio
;
4353 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4358 *can_autoinc
= false;
4361 /* Just get the expression, expand it and measure the cost. */
4362 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4365 return infinite_cost
;
4368 comp
= build_simple_mem_ref (comp
);
4370 return new_cost (computation_cost (comp
, speed
), 0);
4374 /* Determines the cost of the computation by that USE is expressed
4375 from induction variable CAND. If ADDRESS_P is true, we just need
4376 to create an address from it, otherwise we want to get it into
4377 register. A set of invariants we depend on is stored in
4378 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4379 autoinc addressing is likely. */
4382 get_computation_cost (struct ivopts_data
*data
,
4383 struct iv_use
*use
, struct iv_cand
*cand
,
4384 bool address_p
, bitmap
*depends_on
,
4385 bool *can_autoinc
, int *inv_expr_id
)
4387 return get_computation_cost_at (data
,
4388 use
, cand
, address_p
, depends_on
, use
->stmt
,
4389 can_autoinc
, inv_expr_id
);
4392 /* Determines cost of basing replacement of USE on CAND in a generic
4396 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4397 struct iv_use
*use
, struct iv_cand
*cand
)
4401 int inv_expr_id
= -1;
4403 /* The simple case first -- if we need to express value of the preserved
4404 original biv, the cost is 0. This also prevents us from counting the
4405 cost of increment twice -- once at this use and once in the cost of
4407 if (cand
->pos
== IP_ORIGINAL
4408 && cand
->incremented_at
== use
->stmt
)
4410 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4415 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4416 NULL
, &inv_expr_id
);
4418 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4421 return !infinite_cost_p (cost
);
4424 /* Determines cost of basing replacement of USE on CAND in an address. */
4427 determine_use_iv_cost_address (struct ivopts_data
*data
,
4428 struct iv_use
*use
, struct iv_cand
*cand
)
4432 int inv_expr_id
= -1;
4433 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4434 &can_autoinc
, &inv_expr_id
);
4436 if (cand
->ainc_use
== use
)
4439 cost
.cost
-= cand
->cost_step
;
4440 /* If we generated the candidate solely for exploiting autoincrement
4441 opportunities, and it turns out it can't be used, set the cost to
4442 infinity to make sure we ignore it. */
4443 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4444 cost
= infinite_cost
;
4446 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4449 return !infinite_cost_p (cost
);
4452 /* Computes value of candidate CAND at position AT in iteration NITER, and
4453 stores it to VAL. */
4456 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4459 aff_tree step
, delta
, nit
;
4460 struct iv
*iv
= cand
->iv
;
4461 tree type
= TREE_TYPE (iv
->base
);
4462 tree steptype
= type
;
4463 if (POINTER_TYPE_P (type
))
4464 steptype
= sizetype
;
4465 steptype
= unsigned_type_for (type
);
4467 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4468 aff_combination_convert (&step
, steptype
);
4469 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4470 aff_combination_convert (&nit
, steptype
);
4471 aff_combination_mult (&nit
, &step
, &delta
);
4472 if (stmt_after_increment (loop
, cand
, at
))
4473 aff_combination_add (&delta
, &step
);
4475 tree_to_aff_combination (iv
->base
, type
, val
);
4476 if (!POINTER_TYPE_P (type
))
4477 aff_combination_convert (val
, steptype
);
4478 aff_combination_add (val
, &delta
);
4481 /* Returns period of induction variable iv. */
4484 iv_period (struct iv
*iv
)
4486 tree step
= iv
->step
, period
, type
;
4489 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4491 type
= unsigned_type_for (TREE_TYPE (step
));
4492 /* Period of the iv is lcm (step, type_range)/step -1,
4493 i.e., N*type_range/step - 1. Since type range is power
4494 of two, N == (step >> num_of_ending_zeros_binary (step),
4495 so the final result is
4497 (type_range >> num_of_ending_zeros_binary (step)) - 1
4500 pow2div
= num_ending_zeros (step
);
4502 period
= build_low_bits_mask (type
,
4503 (TYPE_PRECISION (type
)
4504 - tree_to_uhwi (pow2div
)));
4509 /* Returns the comparison operator used when eliminating the iv USE. */
4511 static enum tree_code
4512 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4514 struct loop
*loop
= data
->current_loop
;
4518 ex_bb
= gimple_bb (use
->stmt
);
4519 exit
= EDGE_SUCC (ex_bb
, 0);
4520 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4521 exit
= EDGE_SUCC (ex_bb
, 1);
4523 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4526 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4527 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4528 calculation is performed in non-wrapping type.
4530 TODO: More generally, we could test for the situation that
4531 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4532 This would require knowing the sign of OFFSET. */
4535 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4537 enum tree_code code
;
4539 aff_tree aff_e1
, aff_e2
, aff_offset
;
4541 if (!nowrap_type_p (TREE_TYPE (base
)))
4544 base
= expand_simple_operations (base
);
4546 if (TREE_CODE (base
) == SSA_NAME
)
4548 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4550 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4553 code
= gimple_assign_rhs_code (stmt
);
4554 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4557 e1
= gimple_assign_rhs1 (stmt
);
4558 e2
= gimple_assign_rhs2 (stmt
);
4562 code
= TREE_CODE (base
);
4563 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4565 e1
= TREE_OPERAND (base
, 0);
4566 e2
= TREE_OPERAND (base
, 1);
4569 /* Use affine expansion as deeper inspection to prove the equality. */
4570 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4571 &aff_e2
, &data
->name_expansion_cache
);
4572 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4573 &aff_offset
, &data
->name_expansion_cache
);
4574 aff_combination_scale (&aff_offset
, -1);
4578 aff_combination_add (&aff_e2
, &aff_offset
);
4579 if (aff_combination_zero_p (&aff_e2
))
4582 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4583 &aff_e1
, &data
->name_expansion_cache
);
4584 aff_combination_add (&aff_e1
, &aff_offset
);
4585 return aff_combination_zero_p (&aff_e1
);
4587 case POINTER_PLUS_EXPR
:
4588 aff_combination_add (&aff_e2
, &aff_offset
);
4589 return aff_combination_zero_p (&aff_e2
);
4596 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4597 comparison with CAND. NITER describes the number of iterations of
4598 the loops. If successful, the comparison in COMP_P is altered accordingly.
4600 We aim to handle the following situation:
4616 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4617 We aim to optimize this to
4625 while (p < p_0 - a + b);
4627 This preserves the correctness, since the pointer arithmetics does not
4628 overflow. More precisely:
4630 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4631 overflow in computing it or the values of p.
4632 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4633 overflow. To prove this, we use the fact that p_0 = base + a. */
4636 iv_elimination_compare_lt (struct ivopts_data
*data
,
4637 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4638 struct tree_niter_desc
*niter
)
4640 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4641 struct aff_tree nit
, tmpa
, tmpb
;
4642 enum tree_code comp
;
4645 /* We need to know that the candidate induction variable does not overflow.
4646 While more complex analysis may be used to prove this, for now just
4647 check that the variable appears in the original program and that it
4648 is computed in a type that guarantees no overflows. */
4649 cand_type
= TREE_TYPE (cand
->iv
->base
);
4650 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4653 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4654 the calculation of the BOUND could overflow, making the comparison
4656 if (!data
->loop_single_exit_p
)
4659 /* We need to be able to decide whether candidate is increasing or decreasing
4660 in order to choose the right comparison operator. */
4661 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4663 step
= int_cst_value (cand
->iv
->step
);
4665 /* Check that the number of iterations matches the expected pattern:
4666 a + 1 > b ? 0 : b - a - 1. */
4667 mbz
= niter
->may_be_zero
;
4668 if (TREE_CODE (mbz
) == GT_EXPR
)
4670 /* Handle a + 1 > b. */
4671 tree op0
= TREE_OPERAND (mbz
, 0);
4672 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4674 a
= TREE_OPERAND (op0
, 0);
4675 b
= TREE_OPERAND (mbz
, 1);
4680 else if (TREE_CODE (mbz
) == LT_EXPR
)
4682 tree op1
= TREE_OPERAND (mbz
, 1);
4684 /* Handle b < a + 1. */
4685 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4687 a
= TREE_OPERAND (op1
, 0);
4688 b
= TREE_OPERAND (mbz
, 0);
4696 /* Expected number of iterations is B - A - 1. Check that it matches
4697 the actual number, i.e., that B - A - NITER = 1. */
4698 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4699 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4700 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4701 aff_combination_scale (&nit
, -1);
4702 aff_combination_scale (&tmpa
, -1);
4703 aff_combination_add (&tmpb
, &tmpa
);
4704 aff_combination_add (&tmpb
, &nit
);
4705 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
4708 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4710 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4712 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4713 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
4716 /* Determine the new comparison operator. */
4717 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4718 if (*comp_p
== NE_EXPR
)
4720 else if (*comp_p
== EQ_EXPR
)
4721 *comp_p
= invert_tree_comparison (comp
, false);
4728 /* Check whether it is possible to express the condition in USE by comparison
4729 of candidate CAND. If so, store the value compared with to BOUND, and the
4730 comparison operator to COMP. */
4733 may_eliminate_iv (struct ivopts_data
*data
,
4734 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4735 enum tree_code
*comp
)
4740 struct loop
*loop
= data
->current_loop
;
4742 struct tree_niter_desc
*desc
= NULL
;
4744 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4747 /* For now works only for exits that dominate the loop latch.
4748 TODO: extend to other conditions inside loop body. */
4749 ex_bb
= gimple_bb (use
->stmt
);
4750 if (use
->stmt
!= last_stmt (ex_bb
)
4751 || gimple_code (use
->stmt
) != GIMPLE_COND
4752 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4755 exit
= EDGE_SUCC (ex_bb
, 0);
4756 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4757 exit
= EDGE_SUCC (ex_bb
, 1);
4758 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4761 desc
= niter_for_exit (data
, exit
);
4765 /* Determine whether we can use the variable to test the exit condition.
4766 This is the case iff the period of the induction variable is greater
4767 than the number of iterations for which the exit condition is true. */
4768 period
= iv_period (cand
->iv
);
4770 /* If the number of iterations is constant, compare against it directly. */
4771 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4773 /* See cand_value_at. */
4774 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4776 if (!tree_int_cst_lt (desc
->niter
, period
))
4781 if (tree_int_cst_lt (period
, desc
->niter
))
4786 /* If not, and if this is the only possible exit of the loop, see whether
4787 we can get a conservative estimate on the number of iterations of the
4788 entire loop and compare against that instead. */
4791 widest_int period_value
, max_niter
;
4793 max_niter
= desc
->max
;
4794 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4796 period_value
= wi::to_widest (period
);
4797 if (wi::gtu_p (max_niter
, period_value
))
4799 /* See if we can take advantage of inferred loop bound information. */
4800 if (data
->loop_single_exit_p
)
4802 if (!max_loop_iterations (loop
, &max_niter
))
4804 /* The loop bound is already adjusted by adding 1. */
4805 if (wi::gtu_p (max_niter
, period_value
))
4813 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4815 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
4816 aff_combination_to_tree (&bnd
));
4817 *comp
= iv_elimination_compare (data
, use
);
4819 /* It is unlikely that computing the number of iterations using division
4820 would be more profitable than keeping the original induction variable. */
4821 if (expression_expensive_p (*bound
))
4824 /* Sometimes, it is possible to handle the situation that the number of
4825 iterations may be zero unless additional assumtions by using <
4826 instead of != in the exit condition.
4828 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4829 base the exit condition on it. However, that is often too
4831 if (!integer_zerop (desc
->may_be_zero
))
4832 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4837 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4838 be copied, if is is used in the loop body and DATA->body_includes_call. */
4841 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4843 tree sbound
= bound
;
4844 STRIP_NOPS (sbound
);
4846 if (TREE_CODE (sbound
) == SSA_NAME
4847 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4848 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4849 && data
->body_includes_call
)
4850 return COSTS_N_INSNS (1);
4855 /* Determines cost of basing replacement of USE on CAND in a condition. */
4858 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4859 struct iv_use
*use
, struct iv_cand
*cand
)
4861 tree bound
= NULL_TREE
;
4863 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4864 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4866 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4867 tree
*control_var
, *bound_cst
;
4868 enum tree_code comp
= ERROR_MARK
;
4870 /* Only consider real candidates. */
4873 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4878 /* Try iv elimination. */
4879 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4881 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4882 if (elim_cost
.cost
== 0)
4883 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4884 else if (TREE_CODE (bound
) == INTEGER_CST
)
4886 /* If we replace a loop condition 'i < n' with 'p < base + n',
4887 depends_on_elim will have 'base' and 'n' set, which implies
4888 that both 'base' and 'n' will be live during the loop. More likely,
4889 'base + n' will be loop invariant, resulting in only one live value
4890 during the loop. So in that case we clear depends_on_elim and set
4891 elim_inv_expr_id instead. */
4892 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4894 elim_inv_expr_id
= get_expr_id (data
, bound
);
4895 bitmap_clear (depends_on_elim
);
4897 /* The bound is a loop invariant, so it will be only computed
4899 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4902 elim_cost
= infinite_cost
;
4904 /* Try expressing the original giv. If it is compared with an invariant,
4905 note that we cannot get rid of it. */
4906 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4910 /* When the condition is a comparison of the candidate IV against
4911 zero, prefer this IV.
4913 TODO: The constant that we're subtracting from the cost should
4914 be target-dependent. This information should be added to the
4915 target costs for each backend. */
4916 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4917 && integer_zerop (*bound_cst
)
4918 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4919 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4920 elim_cost
.cost
-= 1;
4922 express_cost
= get_computation_cost (data
, use
, cand
, false,
4923 &depends_on_express
, NULL
,
4924 &express_inv_expr_id
);
4925 fd_ivopts_data
= data
;
4926 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4928 /* Count the cost of the original bound as well. */
4929 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4930 if (bound_cost
.cost
== 0)
4931 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4932 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4933 bound_cost
.cost
= 0;
4934 express_cost
.cost
+= bound_cost
.cost
;
4936 /* Choose the better approach, preferring the eliminated IV. */
4937 if (compare_costs (elim_cost
, express_cost
) <= 0)
4940 depends_on
= depends_on_elim
;
4941 depends_on_elim
= NULL
;
4942 inv_expr_id
= elim_inv_expr_id
;
4946 cost
= express_cost
;
4947 depends_on
= depends_on_express
;
4948 depends_on_express
= NULL
;
4951 inv_expr_id
= express_inv_expr_id
;
4954 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4956 if (depends_on_elim
)
4957 BITMAP_FREE (depends_on_elim
);
4958 if (depends_on_express
)
4959 BITMAP_FREE (depends_on_express
);
4961 return !infinite_cost_p (cost
);
4964 /* Determines cost of basing replacement of USE on CAND. Returns false
4965 if USE cannot be based on CAND. */
4968 determine_use_iv_cost (struct ivopts_data
*data
,
4969 struct iv_use
*use
, struct iv_cand
*cand
)
4973 case USE_NONLINEAR_EXPR
:
4974 return determine_use_iv_cost_generic (data
, use
, cand
);
4977 return determine_use_iv_cost_address (data
, use
, cand
);
4980 return determine_use_iv_cost_condition (data
, use
, cand
);
4987 /* Return true if get_computation_cost indicates that autoincrement is
4988 a possibility for the pair of USE and CAND, false otherwise. */
4991 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4992 struct iv_cand
*cand
)
4998 if (use
->type
!= USE_ADDRESS
)
5001 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5002 &can_autoinc
, NULL
);
5004 BITMAP_FREE (depends_on
);
5006 return !infinite_cost_p (cost
) && can_autoinc
;
5009 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5010 use that allows autoincrement, and set their AINC_USE if possible. */
5013 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5017 for (i
= 0; i
< n_iv_cands (data
); i
++)
5019 struct iv_cand
*cand
= iv_cand (data
, i
);
5020 struct iv_use
*closest_before
= NULL
;
5021 struct iv_use
*closest_after
= NULL
;
5022 if (cand
->pos
!= IP_ORIGINAL
)
5025 for (j
= 0; j
< n_iv_uses (data
); j
++)
5027 struct iv_use
*use
= iv_use (data
, j
);
5028 unsigned uid
= gimple_uid (use
->stmt
);
5030 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5033 if (uid
< gimple_uid (cand
->incremented_at
)
5034 && (closest_before
== NULL
5035 || uid
> gimple_uid (closest_before
->stmt
)))
5036 closest_before
= use
;
5038 if (uid
> gimple_uid (cand
->incremented_at
)
5039 && (closest_after
== NULL
5040 || uid
< gimple_uid (closest_after
->stmt
)))
5041 closest_after
= use
;
5044 if (closest_before
!= NULL
5045 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5046 cand
->ainc_use
= closest_before
;
5047 else if (closest_after
!= NULL
5048 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5049 cand
->ainc_use
= closest_after
;
5053 /* Finds the candidates for the induction variables. */
5056 find_iv_candidates (struct ivopts_data
*data
)
5058 /* Add commonly used ivs. */
5059 add_standard_iv_candidates (data
);
5061 /* Add old induction variables. */
5062 add_old_ivs_candidates (data
);
5064 /* Add induction variables derived from uses. */
5065 add_derived_ivs_candidates (data
);
5067 set_autoinc_for_original_candidates (data
);
5069 /* Record the important candidates. */
5070 record_important_candidates (data
);
5073 /* Determines costs of basing the use of the iv on an iv candidate. */
5076 determine_use_iv_costs (struct ivopts_data
*data
)
5080 struct iv_cand
*cand
;
5081 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5083 alloc_use_cost_map (data
);
5085 for (i
= 0; i
< n_iv_uses (data
); i
++)
5087 use
= iv_use (data
, i
);
5089 if (data
->consider_all_candidates
)
5091 for (j
= 0; j
< n_iv_cands (data
); j
++)
5093 cand
= iv_cand (data
, j
);
5094 determine_use_iv_cost (data
, use
, cand
);
5101 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5103 cand
= iv_cand (data
, j
);
5104 if (!determine_use_iv_cost (data
, use
, cand
))
5105 bitmap_set_bit (to_clear
, j
);
5108 /* Remove the candidates for that the cost is infinite from
5109 the list of related candidates. */
5110 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5111 bitmap_clear (to_clear
);
5115 BITMAP_FREE (to_clear
);
5117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5119 fprintf (dump_file
, "Use-candidate costs:\n");
5121 for (i
= 0; i
< n_iv_uses (data
); i
++)
5123 use
= iv_use (data
, i
);
5125 fprintf (dump_file
, "Use %d:\n", i
);
5126 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5127 for (j
= 0; j
< use
->n_map_members
; j
++)
5129 if (!use
->cost_map
[j
].cand
5130 || infinite_cost_p (use
->cost_map
[j
].cost
))
5133 fprintf (dump_file
, " %d\t%d\t%d\t",
5134 use
->cost_map
[j
].cand
->id
,
5135 use
->cost_map
[j
].cost
.cost
,
5136 use
->cost_map
[j
].cost
.complexity
);
5137 if (use
->cost_map
[j
].depends_on
)
5138 bitmap_print (dump_file
,
5139 use
->cost_map
[j
].depends_on
, "","");
5140 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5141 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5142 fprintf (dump_file
, "\n");
5145 fprintf (dump_file
, "\n");
5147 fprintf (dump_file
, "\n");
5151 /* Determines cost of the candidate CAND. */
5154 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5156 comp_cost cost_base
;
5157 unsigned cost
, cost_step
;
5166 /* There are two costs associated with the candidate -- its increment
5167 and its initialization. The second is almost negligible for any loop
5168 that rolls enough, so we take it just very little into account. */
5170 base
= cand
->iv
->base
;
5171 cost_base
= force_var_cost (data
, base
, NULL
);
5172 /* It will be exceptional that the iv register happens to be initialized with
5173 the proper value at no cost. In general, there will at least be a regcopy
5175 if (cost_base
.cost
== 0)
5176 cost_base
.cost
= COSTS_N_INSNS (1);
5177 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5179 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5181 /* Prefer the original ivs unless we may gain something by replacing it.
5182 The reason is to make debugging simpler; so this is not relevant for
5183 artificial ivs created by other optimization passes. */
5184 if (cand
->pos
!= IP_ORIGINAL
5185 || !SSA_NAME_VAR (cand
->var_before
)
5186 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5189 /* Prefer not to insert statements into latch unless there are some
5190 already (so that we do not create unnecessary jumps). */
5191 if (cand
->pos
== IP_END
5192 && empty_block_p (ip_end_pos (data
->current_loop
)))
5196 cand
->cost_step
= cost_step
;
5199 /* Determines costs of computation of the candidates. */
5202 determine_iv_costs (struct ivopts_data
*data
)
5206 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5208 fprintf (dump_file
, "Candidate costs:\n");
5209 fprintf (dump_file
, " cand\tcost\n");
5212 for (i
= 0; i
< n_iv_cands (data
); i
++)
5214 struct iv_cand
*cand
= iv_cand (data
, i
);
5216 determine_iv_cost (data
, cand
);
5218 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5219 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5222 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5223 fprintf (dump_file
, "\n");
5226 /* Calculates cost for having SIZE induction variables. */
5229 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5231 /* We add size to the cost, so that we prefer eliminating ivs
5233 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5234 data
->body_includes_call
);
5237 /* For each size of the induction variable set determine the penalty. */
5240 determine_set_costs (struct ivopts_data
*data
)
5246 struct loop
*loop
= data
->current_loop
;
5249 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5251 fprintf (dump_file
, "Global costs:\n");
5252 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5253 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5254 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5255 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5259 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5262 op
= PHI_RESULT (phi
);
5264 if (virtual_operand_p (op
))
5267 if (get_iv (data
, op
))
5273 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5275 struct version_info
*info
= ver_info (data
, j
);
5277 if (info
->inv_id
&& info
->has_nonlin_use
)
5281 data
->regs_used
= n
;
5282 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5283 fprintf (dump_file
, " regs_used %d\n", n
);
5285 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5287 fprintf (dump_file
, " cost for size:\n");
5288 fprintf (dump_file
, " ivs\tcost\n");
5289 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5290 fprintf (dump_file
, " %d\t%d\n", j
,
5291 ivopts_global_cost_for_size (data
, j
));
5292 fprintf (dump_file
, "\n");
5296 /* Returns true if A is a cheaper cost pair than B. */
5299 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5309 cmp
= compare_costs (a
->cost
, b
->cost
);
5316 /* In case the costs are the same, prefer the cheaper candidate. */
5317 if (a
->cand
->cost
< b
->cand
->cost
)
5324 /* Returns candidate by that USE is expressed in IVS. */
5326 static struct cost_pair
*
5327 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5329 return ivs
->cand_for_use
[use
->id
];
5332 /* Computes the cost field of IVS structure. */
5335 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5337 comp_cost cost
= ivs
->cand_use_cost
;
5339 cost
.cost
+= ivs
->cand_cost
;
5341 cost
.cost
+= ivopts_global_cost_for_size (data
,
5342 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5347 /* Remove invariants in set INVS to set IVS. */
5350 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5358 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5360 ivs
->n_invariant_uses
[iid
]--;
5361 if (ivs
->n_invariant_uses
[iid
] == 0)
5366 /* Set USE not to be expressed by any candidate in IVS. */
5369 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5372 unsigned uid
= use
->id
, cid
;
5373 struct cost_pair
*cp
;
5375 cp
= ivs
->cand_for_use
[uid
];
5381 ivs
->cand_for_use
[uid
] = NULL
;
5382 ivs
->n_cand_uses
[cid
]--;
5384 if (ivs
->n_cand_uses
[cid
] == 0)
5386 bitmap_clear_bit (ivs
->cands
, cid
);
5387 /* Do not count the pseudocandidates. */
5391 ivs
->cand_cost
-= cp
->cand
->cost
;
5393 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5396 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5398 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5400 if (cp
->inv_expr_id
!= -1)
5402 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5403 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5404 ivs
->num_used_inv_expr
--;
5406 iv_ca_recount_cost (data
, ivs
);
5409 /* Add invariants in set INVS to set IVS. */
5412 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5420 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5422 ivs
->n_invariant_uses
[iid
]++;
5423 if (ivs
->n_invariant_uses
[iid
] == 1)
5428 /* Set cost pair for USE in set IVS to CP. */
5431 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5432 struct iv_use
*use
, struct cost_pair
*cp
)
5434 unsigned uid
= use
->id
, cid
;
5436 if (ivs
->cand_for_use
[uid
] == cp
)
5439 if (ivs
->cand_for_use
[uid
])
5440 iv_ca_set_no_cp (data
, ivs
, use
);
5447 ivs
->cand_for_use
[uid
] = cp
;
5448 ivs
->n_cand_uses
[cid
]++;
5449 if (ivs
->n_cand_uses
[cid
] == 1)
5451 bitmap_set_bit (ivs
->cands
, cid
);
5452 /* Do not count the pseudocandidates. */
5456 ivs
->cand_cost
+= cp
->cand
->cost
;
5458 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5461 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5462 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5464 if (cp
->inv_expr_id
!= -1)
5466 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5467 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5468 ivs
->num_used_inv_expr
++;
5470 iv_ca_recount_cost (data
, ivs
);
5474 /* Extend set IVS by expressing USE by some of the candidates in it
5475 if possible. Consider all important candidates if candidates in
5476 set IVS don't give any result. */
5479 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5482 struct cost_pair
*best_cp
= NULL
, *cp
;
5485 struct iv_cand
*cand
;
5487 gcc_assert (ivs
->upto
>= use
->id
);
5491 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5493 cand
= iv_cand (data
, i
);
5494 cp
= get_use_iv_cost (data
, use
, cand
);
5495 if (cheaper_cost_pair (cp
, best_cp
))
5499 if (best_cp
== NULL
)
5501 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5503 cand
= iv_cand (data
, i
);
5504 cp
= get_use_iv_cost (data
, use
, cand
);
5505 if (cheaper_cost_pair (cp
, best_cp
))
5510 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5513 /* Get cost for assignment IVS. */
5516 iv_ca_cost (struct iv_ca
*ivs
)
5518 /* This was a conditional expression but it triggered a bug in
5521 return infinite_cost
;
5526 /* Returns true if all dependences of CP are among invariants in IVS. */
5529 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5534 if (!cp
->depends_on
)
5537 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5539 if (ivs
->n_invariant_uses
[i
] == 0)
5546 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5547 it before NEXT_CHANGE. */
5549 static struct iv_ca_delta
*
5550 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5551 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5553 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5556 change
->old_cp
= old_cp
;
5557 change
->new_cp
= new_cp
;
5558 change
->next_change
= next_change
;
5563 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5566 static struct iv_ca_delta
*
5567 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5569 struct iv_ca_delta
*last
;
5577 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5579 last
->next_change
= l2
;
5584 /* Reverse the list of changes DELTA, forming the inverse to it. */
5586 static struct iv_ca_delta
*
5587 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5589 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5590 struct cost_pair
*tmp
;
5592 for (act
= delta
; act
; act
= next
)
5594 next
= act
->next_change
;
5595 act
->next_change
= prev
;
5599 act
->old_cp
= act
->new_cp
;
5606 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5607 reverted instead. */
5610 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5611 struct iv_ca_delta
*delta
, bool forward
)
5613 struct cost_pair
*from
, *to
;
5614 struct iv_ca_delta
*act
;
5617 delta
= iv_ca_delta_reverse (delta
);
5619 for (act
= delta
; act
; act
= act
->next_change
)
5623 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5624 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5628 iv_ca_delta_reverse (delta
);
5631 /* Returns true if CAND is used in IVS. */
5634 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5636 return ivs
->n_cand_uses
[cand
->id
] > 0;
5639 /* Returns number of induction variable candidates in the set IVS. */
5642 iv_ca_n_cands (struct iv_ca
*ivs
)
5644 return ivs
->n_cands
;
5647 /* Free the list of changes DELTA. */
5650 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5652 struct iv_ca_delta
*act
, *next
;
5654 for (act
= *delta
; act
; act
= next
)
5656 next
= act
->next_change
;
5663 /* Allocates new iv candidates assignment. */
5665 static struct iv_ca
*
5666 iv_ca_new (struct ivopts_data
*data
)
5668 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5672 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5673 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5674 nw
->cands
= BITMAP_ALLOC (NULL
);
5677 nw
->cand_use_cost
= no_cost
;
5679 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5681 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5682 nw
->num_used_inv_expr
= 0;
5687 /* Free memory occupied by the set IVS. */
5690 iv_ca_free (struct iv_ca
**ivs
)
5692 free ((*ivs
)->cand_for_use
);
5693 free ((*ivs
)->n_cand_uses
);
5694 BITMAP_FREE ((*ivs
)->cands
);
5695 free ((*ivs
)->n_invariant_uses
);
5696 free ((*ivs
)->used_inv_expr
);
5701 /* Dumps IVS to FILE. */
5704 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5706 const char *pref
= " invariants ";
5708 comp_cost cost
= iv_ca_cost (ivs
);
5710 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5711 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5712 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5713 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5715 for (i
= 0; i
< ivs
->upto
; i
++)
5717 struct iv_use
*use
= iv_use (data
, i
);
5718 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5720 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5721 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5723 fprintf (file
, " use:%d --> ??\n", use
->id
);
5726 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5727 if (ivs
->n_invariant_uses
[i
])
5729 fprintf (file
, "%s%d", pref
, i
);
5732 fprintf (file
, "\n\n");
5735 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5736 new set, and store differences in DELTA. Number of induction variables
5737 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5738 the function will try to find a solution with mimimal iv candidates. */
5741 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5742 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5743 unsigned *n_ivs
, bool min_ncand
)
5748 struct cost_pair
*old_cp
, *new_cp
;
5751 for (i
= 0; i
< ivs
->upto
; i
++)
5753 use
= iv_use (data
, i
);
5754 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5757 && old_cp
->cand
== cand
)
5760 new_cp
= get_use_iv_cost (data
, use
, cand
);
5764 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5767 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5770 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5773 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5774 cost
= iv_ca_cost (ivs
);
5776 *n_ivs
= iv_ca_n_cands (ivs
);
5777 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5782 /* Try narrowing set IVS by removing CAND. Return the cost of
5783 the new set and store the differences in DELTA. START is
5784 the candidate with which we start narrowing. */
5787 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5788 struct iv_cand
*cand
, struct iv_cand
*start
,
5789 struct iv_ca_delta
**delta
)
5793 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5795 struct iv_cand
*cnd
;
5796 comp_cost cost
, best_cost
, acost
;
5799 for (i
= 0; i
< n_iv_uses (data
); i
++)
5801 use
= iv_use (data
, i
);
5803 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5804 if (old_cp
->cand
!= cand
)
5807 best_cost
= iv_ca_cost (ivs
);
5808 /* Start narrowing with START. */
5809 new_cp
= get_use_iv_cost (data
, use
, start
);
5811 if (data
->consider_all_candidates
)
5813 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5815 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5818 cnd
= iv_cand (data
, ci
);
5820 cp
= get_use_iv_cost (data
, use
, cnd
);
5824 iv_ca_set_cp (data
, ivs
, use
, cp
);
5825 acost
= iv_ca_cost (ivs
);
5827 if (compare_costs (acost
, best_cost
) < 0)
5836 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5838 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5841 cnd
= iv_cand (data
, ci
);
5843 cp
= get_use_iv_cost (data
, use
, cnd
);
5847 iv_ca_set_cp (data
, ivs
, use
, cp
);
5848 acost
= iv_ca_cost (ivs
);
5850 if (compare_costs (acost
, best_cost
) < 0)
5857 /* Restore to old cp for use. */
5858 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
5862 iv_ca_delta_free (delta
);
5863 return infinite_cost
;
5866 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5869 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5870 cost
= iv_ca_cost (ivs
);
5871 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5876 /* Try optimizing the set of candidates IVS by removing candidates different
5877 from to EXCEPT_CAND from it. Return cost of the new set, and store
5878 differences in DELTA. */
5881 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5882 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5885 struct iv_ca_delta
*act_delta
, *best_delta
;
5887 comp_cost best_cost
, acost
;
5888 struct iv_cand
*cand
;
5891 best_cost
= iv_ca_cost (ivs
);
5893 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5895 cand
= iv_cand (data
, i
);
5897 if (cand
== except_cand
)
5900 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
5902 if (compare_costs (acost
, best_cost
) < 0)
5905 iv_ca_delta_free (&best_delta
);
5906 best_delta
= act_delta
;
5909 iv_ca_delta_free (&act_delta
);
5918 /* Recurse to possibly remove other unnecessary ivs. */
5919 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5920 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5921 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5922 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5926 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5927 cheaper local cost for USE than BEST_CP. Return pointer to
5928 the corresponding cost_pair, otherwise just return BEST_CP. */
5930 static struct cost_pair
*
5931 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
5932 unsigned int cand_idx
, struct iv_cand
*old_cand
,
5933 struct cost_pair
*best_cp
)
5935 struct iv_cand
*cand
;
5936 struct cost_pair
*cp
;
5938 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
5939 if (cand_idx
== old_cand
->id
)
5942 cand
= iv_cand (data
, cand_idx
);
5943 cp
= get_use_iv_cost (data
, use
, cand
);
5944 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
5950 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5951 which are used by more than one iv uses. For each of those candidates,
5952 this function tries to represent iv uses under that candidate using
5953 other ones with lower local cost, then tries to prune the new set.
5954 If the new set has lower cost, It returns the new cost after recording
5955 candidate replacement in list DELTA. */
5958 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5959 struct iv_ca_delta
**delta
)
5961 bitmap_iterator bi
, bj
;
5962 unsigned int i
, j
, k
;
5964 struct iv_cand
*cand
;
5965 comp_cost orig_cost
, acost
;
5966 struct iv_ca_delta
*act_delta
, *tmp_delta
;
5967 struct cost_pair
*old_cp
, *best_cp
= NULL
;
5970 orig_cost
= iv_ca_cost (ivs
);
5972 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5974 if (ivs
->n_cand_uses
[i
] == 1
5975 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
5978 cand
= iv_cand (data
, i
);
5981 /* Represent uses under current candidate using other ones with
5982 lower local cost. */
5983 for (j
= 0; j
< ivs
->upto
; j
++)
5985 use
= iv_use (data
, j
);
5986 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5988 if (old_cp
->cand
!= cand
)
5992 if (data
->consider_all_candidates
)
5993 for (k
= 0; k
< n_iv_cands (data
); k
++)
5994 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
5995 old_cp
->cand
, best_cp
);
5997 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
5998 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
5999 old_cp
->cand
, best_cp
);
6001 if (best_cp
== old_cp
)
6004 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
6006 /* No need for further prune. */
6010 /* Prune the new candidate set. */
6011 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6012 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6013 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6014 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6016 if (compare_costs (acost
, orig_cost
) < 0)
6022 iv_ca_delta_free (&act_delta
);
6028 /* Tries to extend the sets IVS in the best possible way in order
6029 to express the USE. If ORIGINALP is true, prefer candidates from
6030 the original set of IVs, otherwise favor important candidates not
6031 based on any memory object. */
6034 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6035 struct iv_use
*use
, bool originalp
)
6037 comp_cost best_cost
, act_cost
;
6040 struct iv_cand
*cand
;
6041 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6042 struct cost_pair
*cp
;
6044 iv_ca_add_use (data
, ivs
, use
);
6045 best_cost
= iv_ca_cost (ivs
);
6046 cp
= iv_ca_cand_for_use (ivs
, use
);
6049 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6050 iv_ca_set_no_cp (data
, ivs
, use
);
6053 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6054 first try important candidates not based on any memory object. Only if
6055 this fails, try the specific ones. Rationale -- in loops with many
6056 variables the best choice often is to use just one generic biv. If we
6057 added here many ivs specific to the uses, the optimization algorithm later
6058 would be likely to get stuck in a local minimum, thus causing us to create
6059 too many ivs. The approach from few ivs to more seems more likely to be
6060 successful -- starting from few ivs, replacing an expensive use by a
6061 specific iv should always be a win. */
6062 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6064 cand
= iv_cand (data
, i
);
6066 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6069 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6072 if (iv_ca_cand_used_p (ivs
, cand
))
6075 cp
= get_use_iv_cost (data
, use
, cand
);
6079 iv_ca_set_cp (data
, ivs
, use
, cp
);
6080 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6082 iv_ca_set_no_cp (data
, ivs
, use
);
6083 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6085 if (compare_costs (act_cost
, best_cost
) < 0)
6087 best_cost
= act_cost
;
6089 iv_ca_delta_free (&best_delta
);
6090 best_delta
= act_delta
;
6093 iv_ca_delta_free (&act_delta
);
6096 if (infinite_cost_p (best_cost
))
6098 for (i
= 0; i
< use
->n_map_members
; i
++)
6100 cp
= use
->cost_map
+ i
;
6105 /* Already tried this. */
6106 if (cand
->important
)
6108 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6110 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6114 if (iv_ca_cand_used_p (ivs
, cand
))
6118 iv_ca_set_cp (data
, ivs
, use
, cp
);
6119 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6120 iv_ca_set_no_cp (data
, ivs
, use
);
6121 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6124 if (compare_costs (act_cost
, best_cost
) < 0)
6126 best_cost
= act_cost
;
6129 iv_ca_delta_free (&best_delta
);
6130 best_delta
= act_delta
;
6133 iv_ca_delta_free (&act_delta
);
6137 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6138 iv_ca_delta_free (&best_delta
);
6140 return !infinite_cost_p (best_cost
);
6143 /* Finds an initial assignment of candidates to uses. */
6145 static struct iv_ca
*
6146 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6148 struct iv_ca
*ivs
= iv_ca_new (data
);
6151 for (i
= 0; i
< n_iv_uses (data
); i
++)
6152 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6161 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6162 points to a bool variable, this function tries to break local
6163 optimal fixed-point by replacing candidates in IVS if it's true. */
6166 try_improve_iv_set (struct ivopts_data
*data
,
6167 struct iv_ca
*ivs
, bool *try_replace_p
)
6170 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6171 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6172 struct iv_cand
*cand
;
6174 /* Try extending the set of induction variables by one. */
6175 for (i
= 0; i
< n_iv_cands (data
); i
++)
6177 cand
= iv_cand (data
, i
);
6179 if (iv_ca_cand_used_p (ivs
, cand
))
6182 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6186 /* If we successfully added the candidate and the set is small enough,
6187 try optimizing it by removing other candidates. */
6188 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6190 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6191 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6192 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6193 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6196 if (compare_costs (acost
, best_cost
) < 0)
6199 iv_ca_delta_free (&best_delta
);
6200 best_delta
= act_delta
;
6203 iv_ca_delta_free (&act_delta
);
6208 /* Try removing the candidates from the set instead. */
6209 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6211 if (!best_delta
&& *try_replace_p
)
6213 *try_replace_p
= false;
6214 /* So far candidate selecting algorithm tends to choose fewer IVs
6215 so that it can handle cases in which loops have many variables
6216 but the best choice is often to use only one general biv. One
6217 weakness is it can't handle opposite cases, in which different
6218 candidates should be chosen with respect to each use. To solve
6219 the problem, we replace candidates in a manner described by the
6220 comments of iv_ca_replace, thus give general algorithm a chance
6221 to break local optimal fixed-point in these cases. */
6222 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6229 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6230 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6231 iv_ca_delta_free (&best_delta
);
6235 /* Attempts to find the optimal set of induction variables. We do simple
6236 greedy heuristic -- we try to replace at most one candidate in the selected
6237 solution and remove the unused ivs while this improves the cost. */
6239 static struct iv_ca
*
6240 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6243 bool try_replace_p
= true;
6245 /* Get the initial solution. */
6246 set
= get_initial_solution (data
, originalp
);
6249 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6250 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6254 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6256 fprintf (dump_file
, "Initial set of candidates:\n");
6257 iv_ca_dump (data
, dump_file
, set
);
6260 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6262 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6264 fprintf (dump_file
, "Improved to:\n");
6265 iv_ca_dump (data
, dump_file
, set
);
6272 static struct iv_ca
*
6273 find_optimal_iv_set (struct ivopts_data
*data
)
6276 struct iv_ca
*set
, *origset
;
6278 comp_cost cost
, origcost
;
6280 /* Determine the cost based on a strategy that starts with original IVs,
6281 and try again using a strategy that prefers candidates not based
6283 origset
= find_optimal_iv_set_1 (data
, true);
6284 set
= find_optimal_iv_set_1 (data
, false);
6286 if (!origset
&& !set
)
6289 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6290 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6292 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6294 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6295 origcost
.cost
, origcost
.complexity
);
6296 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6297 cost
.cost
, cost
.complexity
);
6300 /* Choose the one with the best cost. */
6301 if (compare_costs (origcost
, cost
) <= 0)
6308 iv_ca_free (&origset
);
6310 for (i
= 0; i
< n_iv_uses (data
); i
++)
6312 use
= iv_use (data
, i
);
6313 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6319 /* Creates a new induction variable corresponding to CAND. */
6322 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6324 gimple_stmt_iterator incr_pos
;
6334 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6338 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6346 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6350 /* Mark that the iv is preserved. */
6351 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6352 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6354 /* Rewrite the increment so that it uses var_before directly. */
6355 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6359 gimple_add_tmp_var (cand
->var_before
);
6361 base
= unshare_expr (cand
->iv
->base
);
6363 create_iv (base
, unshare_expr (cand
->iv
->step
),
6364 cand
->var_before
, data
->current_loop
,
6365 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6368 /* Creates new induction variables described in SET. */
6371 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6374 struct iv_cand
*cand
;
6377 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6379 cand
= iv_cand (data
, i
);
6380 create_new_iv (data
, cand
);
6383 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6385 fprintf (dump_file
, "Selected IV set for loop %d",
6386 data
->current_loop
->num
);
6387 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6388 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6389 LOCATION_LINE (data
->loop_loc
));
6390 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6391 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6393 cand
= iv_cand (data
, i
);
6394 dump_cand (dump_file
, cand
);
6396 fprintf (dump_file
, "\n");
6400 /* Rewrites USE (definition of iv used in a nonlinear expression)
6401 using candidate CAND. */
6404 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6405 struct iv_use
*use
, struct iv_cand
*cand
)
6410 gimple_stmt_iterator bsi
;
6412 /* An important special case -- if we are asked to express value of
6413 the original iv by itself, just exit; there is no need to
6414 introduce a new computation (that might also need casting the
6415 variable to unsigned and back). */
6416 if (cand
->pos
== IP_ORIGINAL
6417 && cand
->incremented_at
== use
->stmt
)
6419 enum tree_code stmt_code
;
6421 gcc_assert (is_gimple_assign (use
->stmt
));
6422 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6424 /* Check whether we may leave the computation unchanged.
6425 This is the case only if it does not rely on other
6426 computations in the loop -- otherwise, the computation
6427 we rely upon may be removed in remove_unused_ivs,
6428 thus leading to ICE. */
6429 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6430 if (stmt_code
== PLUS_EXPR
6431 || stmt_code
== MINUS_EXPR
6432 || stmt_code
== POINTER_PLUS_EXPR
)
6434 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6435 op
= gimple_assign_rhs2 (use
->stmt
);
6436 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6437 op
= gimple_assign_rhs1 (use
->stmt
);
6444 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6448 comp
= get_computation (data
->current_loop
, use
, cand
);
6449 gcc_assert (comp
!= NULL_TREE
);
6451 switch (gimple_code (use
->stmt
))
6454 tgt
= PHI_RESULT (use
->stmt
);
6456 /* If we should keep the biv, do not replace it. */
6457 if (name_info (data
, tgt
)->preserve_biv
)
6460 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6464 tgt
= gimple_assign_lhs (use
->stmt
);
6465 bsi
= gsi_for_stmt (use
->stmt
);
6472 if (!valid_gimple_rhs_p (comp
)
6473 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6474 /* We can't allow re-allocating the stmt as it might be pointed
6476 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6477 >= gimple_num_ops (gsi_stmt (bsi
)))))
6479 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6480 true, GSI_SAME_STMT
);
6481 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6483 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6484 /* As this isn't a plain copy we have to reset alignment
6486 if (SSA_NAME_PTR_INFO (comp
))
6487 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6491 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6493 ass
= gimple_build_assign (tgt
, comp
);
6494 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6496 bsi
= gsi_for_stmt (use
->stmt
);
6497 remove_phi_node (&bsi
, false);
6501 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6502 use
->stmt
= gsi_stmt (bsi
);
6506 /* Performs a peephole optimization to reorder the iv update statement with
6507 a mem ref to enable instruction combining in later phases. The mem ref uses
6508 the iv value before the update, so the reordering transformation requires
6509 adjustment of the offset. CAND is the selected IV_CAND.
6513 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6521 directly propagating t over to (1) will introduce overlapping live range
6522 thus increase register pressure. This peephole transform it into:
6526 t = MEM_REF (base, iv2, 8, 8);
6533 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6536 gimple iv_update
, stmt
;
6538 gimple_stmt_iterator gsi
, gsi_iv
;
6540 if (cand
->pos
!= IP_NORMAL
)
6543 var_after
= cand
->var_after
;
6544 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6546 bb
= gimple_bb (iv_update
);
6547 gsi
= gsi_last_nondebug_bb (bb
);
6548 stmt
= gsi_stmt (gsi
);
6550 /* Only handle conditional statement for now. */
6551 if (gimple_code (stmt
) != GIMPLE_COND
)
6554 gsi_prev_nondebug (&gsi
);
6555 stmt
= gsi_stmt (gsi
);
6556 if (stmt
!= iv_update
)
6559 gsi_prev_nondebug (&gsi
);
6560 if (gsi_end_p (gsi
))
6563 stmt
= gsi_stmt (gsi
);
6564 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6567 if (stmt
!= use
->stmt
)
6570 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6573 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6575 fprintf (dump_file
, "Reordering \n");
6576 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6577 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6578 fprintf (dump_file
, "\n");
6581 gsi
= gsi_for_stmt (use
->stmt
);
6582 gsi_iv
= gsi_for_stmt (iv_update
);
6583 gsi_move_before (&gsi_iv
, &gsi
);
6585 cand
->pos
= IP_BEFORE_USE
;
6586 cand
->incremented_at
= use
->stmt
;
6589 /* Rewrites USE (address that is an iv) using candidate CAND. */
6592 rewrite_use_address (struct ivopts_data
*data
,
6593 struct iv_use
*use
, struct iv_cand
*cand
)
6596 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6597 tree base_hint
= NULL_TREE
;
6601 adjust_iv_update_pos (cand
, use
);
6602 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6604 unshare_aff_combination (&aff
);
6606 /* To avoid undefined overflow problems, all IV candidates use unsigned
6607 integer types. The drawback is that this makes it impossible for
6608 create_mem_ref to distinguish an IV that is based on a memory object
6609 from one that represents simply an offset.
6611 To work around this problem, we pass a hint to create_mem_ref that
6612 indicates which variable (if any) in aff is an IV based on a memory
6613 object. Note that we only consider the candidate. If this is not
6614 based on an object, the base of the reference is in some subexpression
6615 of the use -- but these will use pointer types, so they are recognized
6616 by the create_mem_ref heuristics anyway. */
6617 if (cand
->iv
->base_object
)
6618 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6620 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6621 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6622 reference_alias_ptr_type (*use
->op_p
),
6623 iv
, base_hint
, data
->speed
);
6624 copy_ref_info (ref
, *use
->op_p
);
6628 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6632 rewrite_use_compare (struct ivopts_data
*data
,
6633 struct iv_use
*use
, struct iv_cand
*cand
)
6635 tree comp
, *var_p
, op
, bound
;
6636 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6637 enum tree_code compare
;
6638 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6644 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6645 tree var_type
= TREE_TYPE (var
);
6648 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6650 fprintf (dump_file
, "Replacing exit test: ");
6651 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6654 bound
= unshare_expr (fold_convert (var_type
, bound
));
6655 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6657 gsi_insert_seq_on_edge_immediate (
6658 loop_preheader_edge (data
->current_loop
),
6661 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
6662 gimple_cond_set_lhs (cond_stmt
, var
);
6663 gimple_cond_set_code (cond_stmt
, compare
);
6664 gimple_cond_set_rhs (cond_stmt
, op
);
6668 /* The induction variable elimination failed; just express the original
6670 comp
= get_computation (data
->current_loop
, use
, cand
);
6671 gcc_assert (comp
!= NULL_TREE
);
6673 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6676 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6677 true, GSI_SAME_STMT
);
6680 /* Rewrites USE using candidate CAND. */
6683 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6687 case USE_NONLINEAR_EXPR
:
6688 rewrite_use_nonlinear_expr (data
, use
, cand
);
6692 rewrite_use_address (data
, use
, cand
);
6696 rewrite_use_compare (data
, use
, cand
);
6703 update_stmt (use
->stmt
);
6706 /* Rewrite the uses using the selected induction variables. */
6709 rewrite_uses (struct ivopts_data
*data
)
6712 struct iv_cand
*cand
;
6715 for (i
= 0; i
< n_iv_uses (data
); i
++)
6717 use
= iv_use (data
, i
);
6718 cand
= use
->selected
;
6721 rewrite_use (data
, use
, cand
);
6725 /* Removes the ivs that are not used after rewriting. */
6728 remove_unused_ivs (struct ivopts_data
*data
)
6732 bitmap toremove
= BITMAP_ALLOC (NULL
);
6734 /* Figure out an order in which to release SSA DEFs so that we don't
6735 release something that we'd have to propagate into a debug stmt
6737 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6739 struct version_info
*info
;
6741 info
= ver_info (data
, j
);
6743 && !integer_zerop (info
->iv
->step
)
6745 && !info
->iv
->have_use_for
6746 && !info
->preserve_biv
)
6748 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6750 tree def
= info
->iv
->ssa_name
;
6752 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6754 imm_use_iterator imm_iter
;
6755 use_operand_p use_p
;
6759 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6761 if (!gimple_debug_bind_p (stmt
))
6764 /* We just want to determine whether to do nothing
6765 (count == 0), to substitute the computed
6766 expression into a single use of the SSA DEF by
6767 itself (count == 1), or to use a debug temp
6768 because the SSA DEF is used multiple times or as
6769 part of a larger expression (count > 1). */
6771 if (gimple_debug_bind_get_value (stmt
) != def
)
6775 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6781 struct iv_use dummy_use
;
6782 struct iv_cand
*best_cand
= NULL
, *cand
;
6783 unsigned i
, best_pref
= 0, cand_pref
;
6785 memset (&dummy_use
, 0, sizeof (dummy_use
));
6786 dummy_use
.iv
= info
->iv
;
6787 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6789 cand
= iv_use (data
, i
)->selected
;
6790 if (cand
== best_cand
)
6792 cand_pref
= operand_equal_p (cand
->iv
->step
,
6796 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6797 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6800 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6802 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6805 best_pref
= cand_pref
;
6812 tree comp
= get_computation_at (data
->current_loop
,
6813 &dummy_use
, best_cand
,
6814 SSA_NAME_DEF_STMT (def
));
6820 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6821 DECL_ARTIFICIAL (vexpr
) = 1;
6822 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6823 if (SSA_NAME_VAR (def
))
6824 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6826 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6828 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
6829 gimple_stmt_iterator gsi
;
6831 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6832 gsi
= gsi_after_labels (gimple_bb
6833 (SSA_NAME_DEF_STMT (def
)));
6835 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6837 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6841 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6843 if (!gimple_debug_bind_p (stmt
))
6846 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6847 SET_USE (use_p
, comp
);
6855 release_defs_bitset (toremove
);
6857 BITMAP_FREE (toremove
);
6860 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6861 for hash_map::traverse. */
6864 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
6870 /* Frees data allocated by the optimization of a single loop. */
6873 free_loop_data (struct ivopts_data
*data
)
6881 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
6882 delete data
->niters
;
6883 data
->niters
= NULL
;
6886 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6888 struct version_info
*info
;
6890 info
= ver_info (data
, i
);
6893 info
->has_nonlin_use
= false;
6894 info
->preserve_biv
= false;
6897 bitmap_clear (data
->relevant
);
6898 bitmap_clear (data
->important_candidates
);
6900 for (i
= 0; i
< n_iv_uses (data
); i
++)
6902 struct iv_use
*use
= iv_use (data
, i
);
6905 BITMAP_FREE (use
->related_cands
);
6906 for (j
= 0; j
< use
->n_map_members
; j
++)
6907 if (use
->cost_map
[j
].depends_on
)
6908 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6909 free (use
->cost_map
);
6912 data
->iv_uses
.truncate (0);
6914 for (i
= 0; i
< n_iv_cands (data
); i
++)
6916 struct iv_cand
*cand
= iv_cand (data
, i
);
6919 if (cand
->depends_on
)
6920 BITMAP_FREE (cand
->depends_on
);
6923 data
->iv_candidates
.truncate (0);
6925 if (data
->version_info_size
< num_ssa_names
)
6927 data
->version_info_size
= 2 * num_ssa_names
;
6928 free (data
->version_info
);
6929 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6932 data
->max_inv_id
= 0;
6934 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6935 SET_DECL_RTL (obj
, NULL_RTX
);
6937 decl_rtl_to_reset
.truncate (0);
6939 data
->inv_expr_tab
->empty ();
6940 data
->inv_expr_id
= 0;
6943 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6947 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6949 free_loop_data (data
);
6950 free (data
->version_info
);
6951 BITMAP_FREE (data
->relevant
);
6952 BITMAP_FREE (data
->important_candidates
);
6954 decl_rtl_to_reset
.release ();
6955 data
->iv_uses
.release ();
6956 data
->iv_candidates
.release ();
6957 delete data
->inv_expr_tab
;
6958 data
->inv_expr_tab
= NULL
;
6959 free_affine_expand_cache (&data
->name_expansion_cache
);
6962 /* Returns true if the loop body BODY includes any function calls. */
6965 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6967 gimple_stmt_iterator gsi
;
6970 for (i
= 0; i
< num_nodes
; i
++)
6971 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6973 gimple stmt
= gsi_stmt (gsi
);
6974 if (is_gimple_call (stmt
)
6975 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6981 /* Optimizes the LOOP. Returns true if anything changed. */
6984 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6986 bool changed
= false;
6987 struct iv_ca
*iv_ca
;
6988 edge exit
= single_dom_exit (loop
);
6991 gcc_assert (!data
->niters
);
6992 data
->current_loop
= loop
;
6993 data
->loop_loc
= find_loop_location (loop
);
6994 data
->speed
= optimize_loop_for_speed_p (loop
);
6996 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6998 fprintf (dump_file
, "Processing loop %d", loop
->num
);
6999 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7000 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7001 LOCATION_LINE (data
->loop_loc
));
7002 fprintf (dump_file
, "\n");
7006 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7007 exit
->src
->index
, exit
->dest
->index
);
7008 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7009 fprintf (dump_file
, "\n");
7012 fprintf (dump_file
, "\n");
7015 body
= get_loop_body (loop
);
7016 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7017 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7020 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7022 /* For each ssa name determines whether it behaves as an induction variable
7024 if (!find_induction_variables (data
))
7027 /* Finds interesting uses (item 1). */
7028 find_interesting_uses (data
);
7029 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
7032 /* Finds candidates for the induction variables (item 2). */
7033 find_iv_candidates (data
);
7035 /* Calculates the costs (item 3, part 1). */
7036 determine_iv_costs (data
);
7037 determine_use_iv_costs (data
);
7038 determine_set_costs (data
);
7040 /* Find the optimal set of induction variables (item 3, part 2). */
7041 iv_ca
= find_optimal_iv_set (data
);
7046 /* Create the new induction variables (item 4, part 1). */
7047 create_new_ivs (data
, iv_ca
);
7048 iv_ca_free (&iv_ca
);
7050 /* Rewrite the uses (item 4, part 2). */
7051 rewrite_uses (data
);
7053 /* Remove the ivs that are unused after rewriting. */
7054 remove_unused_ivs (data
);
7056 /* We have changed the structure of induction variables; it might happen
7057 that definitions in the scev database refer to some of them that were
7062 free_loop_data (data
);
7067 /* Main entry point. Optimizes induction variables in loops. */
7070 tree_ssa_iv_optimize (void)
7073 struct ivopts_data data
;
7075 tree_ssa_iv_optimize_init (&data
);
7077 /* Optimize the loops starting with the innermost ones. */
7078 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7080 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7081 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7083 tree_ssa_iv_optimize_loop (&data
, loop
);
7086 tree_ssa_iv_optimize_finalize (&data
);