1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base
; /* Initial value of the iv. */
105 tree base_object
; /* A memory object to that the induction variable points. */
106 tree step
; /* Step of the iv (constant only). */
107 tree ssa_name
; /* The ssa name with the value. */
108 bool biv_p
; /* Is it a biv? */
109 bool have_use_for
; /* Do we already have a use for it? */
110 unsigned use_id
; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name
; /* The ssa name. */
117 struct iv
*iv
; /* Induction variable description. */
118 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id
; /* Id of an invariant. */
121 bool preserve_biv
; /* For the original biv, whether to preserve it. */
127 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
128 USE_ADDRESS
, /* Use in an address. */
129 USE_COMPARE
/* Use is a compare. */
132 /* The candidate - cost pair. */
135 struct iv_cand
*cand
; /* The candidate. */
136 unsigned cost
; /* The cost. */
137 bitmap depends_on
; /* The list of invariants that have to be
139 tree value
; /* For final value elimination, the expression for
140 the final value of the iv. For iv elimination,
141 the new bound to compare with. */
147 unsigned id
; /* The id of the use. */
148 enum use_type type
; /* Type of the use. */
149 struct iv
*iv
; /* The induction variable it is based on. */
150 tree stmt
; /* Statement in that it occurs. */
151 tree
*op_p
; /* The place where it occurs. */
152 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
155 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
156 struct cost_pair
*cost_map
;
157 /* The costs wrto the iv candidates. */
159 struct iv_cand
*selected
;
160 /* The selected candidate. */
163 /* The position where the iv is computed. */
166 IP_NORMAL
, /* At the end, just before the exit condition. */
167 IP_END
, /* At the end of the latch block. */
168 IP_ORIGINAL
/* The original biv. */
171 /* The induction variable candidate. */
174 unsigned id
; /* The number of the candidate. */
175 bool important
; /* Whether this is an "important" candidate, i.e. such
176 that it should be considered by all uses. */
177 enum iv_position pos
; /* Where it is computed. */
178 tree incremented_at
; /* For original biv, the statement where it is
180 tree var_before
; /* The variable used for it before increment. */
181 tree var_after
; /* The variable used for it after increment. */
182 struct iv
*iv
; /* The value of the candidate. NULL for
183 "pseudocandidate" used to indicate the possibility
184 to replace the final value of an iv by direct
185 computation of the value. */
186 unsigned cost
; /* Cost of the candidate. */
187 bitmap depends_on
; /* The list of invariants that are used in step of the
191 /* The data used by the induction variable optimizations. */
193 typedef struct iv_use
*iv_use_p
;
195 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
197 typedef struct iv_cand
*iv_cand_p
;
198 DEF_VEC_P(iv_cand_p
);
199 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
203 /* The currently optimized loop. */
204 struct loop
*current_loop
;
206 /* Number of registers used in it. */
209 /* Numbers of iterations for all exits of the current loop. */
212 /* The size of version_info array allocated. */
213 unsigned version_info_size
;
215 /* The array of information for the ssa names. */
216 struct version_info
*version_info
;
218 /* The bitmap of indices in version_info whose value was changed. */
221 /* The maximum invariant id. */
224 /* The uses of induction variables. */
225 VEC(iv_use_p
,heap
) *iv_uses
;
227 /* The candidates. */
228 VEC(iv_cand_p
,heap
) *iv_candidates
;
230 /* A bitmap of important candidates. */
231 bitmap important_candidates
;
233 /* Whether to consider just related and important candidates when replacing a
235 bool consider_all_candidates
;
238 /* An assignment of iv candidates to uses. */
242 /* The number of uses covered by the assignment. */
245 /* Number of uses that cannot be expressed by the candidates in the set. */
248 /* Candidate assigned to a use, together with the related costs. */
249 struct cost_pair
**cand_for_use
;
251 /* Number of times each candidate is used. */
252 unsigned *n_cand_uses
;
254 /* The candidates used. */
257 /* The number of candidates in the set. */
260 /* Total number of registers needed. */
263 /* Total cost of expressing uses. */
264 unsigned cand_use_cost
;
266 /* Total cost of candidates. */
269 /* Number of times each invariant is used. */
270 unsigned *n_invariant_uses
;
272 /* Total cost of the assignment. */
276 /* Difference of two iv candidate assignments. */
283 /* An old assignment (for rollback purposes). */
284 struct cost_pair
*old_cp
;
286 /* A new assignment. */
287 struct cost_pair
*new_cp
;
289 /* Next change in the list. */
290 struct iv_ca_delta
*next_change
;
293 /* Bound on number of candidates below that all candidates are considered. */
295 #define CONSIDER_ALL_CANDIDATES_BOUND \
296 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
299 optimizing such a loop would help, and it would take ages). */
301 #define MAX_CONSIDERED_USES \
302 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
304 /* If there are at most this number of ivs in the set, try removing unnecessary
305 ivs from the set always. */
307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
308 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
310 /* The list of trees for that the decl_rtl field must be reset is stored
313 static VEC(tree
,heap
) *decl_rtl_to_reset
;
315 /* Number of uses recorded in DATA. */
317 static inline unsigned
318 n_iv_uses (struct ivopts_data
*data
)
320 return VEC_length (iv_use_p
, data
->iv_uses
);
323 /* Ith use recorded in DATA. */
325 static inline struct iv_use
*
326 iv_use (struct ivopts_data
*data
, unsigned i
)
328 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
331 /* Number of candidates recorded in DATA. */
333 static inline unsigned
334 n_iv_cands (struct ivopts_data
*data
)
336 return VEC_length (iv_cand_p
, data
->iv_candidates
);
339 /* Ith candidate recorded in DATA. */
341 static inline struct iv_cand
*
342 iv_cand (struct ivopts_data
*data
, unsigned i
)
344 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
347 /* The single loop exit if it dominates the latch, NULL otherwise. */
350 single_dom_exit (struct loop
*loop
)
352 edge exit
= loop
->single_exit
;
357 if (!just_once_each_iteration_p (loop
, exit
->src
))
363 /* Dumps information about the induction variable IV to FILE. */
365 extern void dump_iv (FILE *, struct iv
*);
367 dump_iv (FILE *file
, struct iv
*iv
)
371 fprintf (file
, "ssa name ");
372 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
373 fprintf (file
, "\n");
376 fprintf (file
, " type ");
377 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
378 fprintf (file
, "\n");
382 fprintf (file
, " base ");
383 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
384 fprintf (file
, "\n");
386 fprintf (file
, " step ");
387 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
388 fprintf (file
, "\n");
392 fprintf (file
, " invariant ");
393 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
394 fprintf (file
, "\n");
399 fprintf (file
, " base object ");
400 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
401 fprintf (file
, "\n");
405 fprintf (file
, " is a biv\n");
408 /* Dumps information about the USE to FILE. */
410 extern void dump_use (FILE *, struct iv_use
*);
412 dump_use (FILE *file
, struct iv_use
*use
)
414 fprintf (file
, "use %d\n", use
->id
);
418 case USE_NONLINEAR_EXPR
:
419 fprintf (file
, " generic\n");
423 fprintf (file
, " address\n");
427 fprintf (file
, " compare\n");
434 fprintf (file
, " in statement ");
435 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
436 fprintf (file
, "\n");
438 fprintf (file
, " at position ");
440 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
441 fprintf (file
, "\n");
443 dump_iv (file
, use
->iv
);
445 if (use
->related_cands
)
447 fprintf (file
, " related candidates ");
448 dump_bitmap (file
, use
->related_cands
);
452 /* Dumps information about the uses to FILE. */
454 extern void dump_uses (FILE *, struct ivopts_data
*);
456 dump_uses (FILE *file
, struct ivopts_data
*data
)
461 for (i
= 0; i
< n_iv_uses (data
); i
++)
463 use
= iv_use (data
, i
);
465 dump_use (file
, use
);
466 fprintf (file
, "\n");
470 /* Dumps information about induction variable candidate CAND to FILE. */
472 extern void dump_cand (FILE *, struct iv_cand
*);
474 dump_cand (FILE *file
, struct iv_cand
*cand
)
476 struct iv
*iv
= cand
->iv
;
478 fprintf (file
, "candidate %d%s\n",
479 cand
->id
, cand
->important
? " (important)" : "");
481 if (cand
->depends_on
)
483 fprintf (file
, " depends on ");
484 dump_bitmap (file
, cand
->depends_on
);
489 fprintf (file
, " final value replacement\n");
496 fprintf (file
, " incremented before exit test\n");
500 fprintf (file
, " incremented at end\n");
504 fprintf (file
, " original biv\n");
511 /* Returns the info for ssa version VER. */
513 static inline struct version_info
*
514 ver_info (struct ivopts_data
*data
, unsigned ver
)
516 return data
->version_info
+ ver
;
519 /* Returns the info for ssa name NAME. */
521 static inline struct version_info
*
522 name_info (struct ivopts_data
*data
, tree name
)
524 return ver_info (data
, SSA_NAME_VERSION (name
));
527 /* Checks whether there exists number X such that X * B = A, counting modulo
531 divide (unsigned bits
, unsigned HOST_WIDE_INT a
, unsigned HOST_WIDE_INT b
,
534 unsigned HOST_WIDE_INT mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
535 unsigned HOST_WIDE_INT inv
, ex
, val
;
541 /* First divide the whole equation by 2 as long as possible. */
542 while (!(a
& 1) && !(b
& 1))
552 /* If b is still even, a is odd and there is no such x. */
556 /* Find the inverse of b. We compute it as
557 b^(2^(bits - 1) - 1) (mod 2^bits). */
560 for (i
= 0; i
< bits
- 1; i
++)
562 inv
= (inv
* ex
) & mask
;
563 ex
= (ex
* ex
) & mask
;
566 val
= (a
* inv
) & mask
;
568 gcc_assert (((val
* b
) & mask
) == a
);
570 if ((val
>> (bits
- 1)) & 1)
578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
582 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
584 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
588 if (sbb
== loop
->latch
)
594 return stmt
== last_stmt (bb
);
597 /* Returns true if STMT if after the place where the original induction
598 variable CAND is incremented. */
601 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
603 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
604 basic_block stmt_bb
= bb_for_stmt (stmt
);
605 block_stmt_iterator bsi
;
607 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
610 if (stmt_bb
!= cand_bb
)
613 /* Scan the block from the end, since the original ivs are usually
614 incremented at the end of the loop body. */
615 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
617 if (bsi_stmt (bsi
) == cand
->incremented_at
)
619 if (bsi_stmt (bsi
) == stmt
)
624 /* Returns true if STMT if after the place where the induction variable
625 CAND is incremented in LOOP. */
628 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
636 return stmt_after_ip_normal_pos (loop
, stmt
);
639 return stmt_after_ip_original_pos (cand
, stmt
);
646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
649 abnormal_ssa_name_p (tree exp
)
654 if (TREE_CODE (exp
) != SSA_NAME
)
657 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
661 abnormal phi node. Callback for for_each_index. */
664 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
665 void *data ATTRIBUTE_UNUSED
)
667 if (TREE_CODE (base
) == ARRAY_REF
)
669 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
671 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
675 return !abnormal_ssa_name_p (*index
);
678 /* Returns true if EXPR contains a ssa name that occurs in an
679 abnormal phi node. */
682 contains_abnormal_ssa_name_p (tree expr
)
685 enum tree_code_class
class;
690 code
= TREE_CODE (expr
);
691 class = TREE_CODE_CLASS (code
);
693 if (code
== SSA_NAME
)
694 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
696 if (code
== INTEGER_CST
697 || is_gimple_min_invariant (expr
))
700 if (code
== ADDR_EXPR
)
701 return !for_each_index (&TREE_OPERAND (expr
, 0),
702 idx_contains_abnormal_ssa_name_p
,
709 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
714 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
726 /* Element of the table in that we cache the numbers of iterations obtained
727 from exits of the loop. */
731 /* The edge for that the number of iterations is cached. */
734 /* Number of iterations corresponding to this exit, or NULL if it cannot be
739 /* Hash function for nfe_cache_elt E. */
742 nfe_hash (const void *e
)
744 const struct nfe_cache_elt
*elt
= e
;
746 return htab_hash_pointer (elt
->exit
);
749 /* Equality function for nfe_cache_elt E1 and edge E2. */
752 nfe_eq (const void *e1
, const void *e2
)
754 const struct nfe_cache_elt
*elt1
= e1
;
756 return elt1
->exit
== e2
;
759 /* Returns tree describing number of iterations determined from
760 EXIT of DATA->current_loop, or NULL if something goes wrong. */
763 niter_for_exit (struct ivopts_data
*data
, edge exit
)
765 struct nfe_cache_elt
*nfe_desc
;
766 struct tree_niter_desc desc
;
769 slot
= htab_find_slot_with_hash (data
->niters
, exit
,
770 htab_hash_pointer (exit
),
775 nfe_desc
= xmalloc (sizeof (struct nfe_cache_elt
));
776 nfe_desc
->exit
= exit
;
778 /* Try to determine number of iterations. We must know it
779 unconditionally (i.e., without possibility of # of iterations
780 being zero). Also, we cannot safely work with ssa names that
781 appear in phi nodes on abnormal edges, so that we do not create
782 overlapping life ranges for them (PR 27283). */
783 if (number_of_iterations_exit (data
->current_loop
,
785 && zero_p (desc
.may_be_zero
)
786 && !contains_abnormal_ssa_name_p (desc
.niter
))
787 nfe_desc
->niter
= desc
.niter
;
789 nfe_desc
->niter
= NULL_TREE
;
794 return nfe_desc
->niter
;
797 /* Returns tree describing number of iterations determined from
798 single dominating exit of DATA->current_loop, or NULL if something
802 niter_for_single_dom_exit (struct ivopts_data
*data
)
804 edge exit
= single_dom_exit (data
->current_loop
);
809 return niter_for_exit (data
, exit
);
812 /* Initializes data structures used by the iv optimization pass, stored
816 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
818 data
->version_info_size
= 2 * num_ssa_names
;
819 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
820 data
->relevant
= BITMAP_ALLOC (NULL
);
821 data
->important_candidates
= BITMAP_ALLOC (NULL
);
822 data
->max_inv_id
= 0;
823 data
->niters
= htab_create (10, nfe_hash
, nfe_eq
, free
);
824 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
825 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
826 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
829 /* Returns a memory object to that EXPR points. In case we are able to
830 determine that it does not point to any such object, NULL is returned. */
833 determine_base_object (tree expr
)
835 enum tree_code code
= TREE_CODE (expr
);
836 tree base
, obj
, op0
, op1
;
838 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
847 obj
= TREE_OPERAND (expr
, 0);
848 base
= get_base_address (obj
);
853 if (TREE_CODE (base
) == INDIRECT_REF
)
854 return determine_base_object (TREE_OPERAND (base
, 0));
856 return fold_convert (ptr_type_node
,
857 build_fold_addr_expr (base
));
861 op0
= determine_base_object (TREE_OPERAND (expr
, 0));
862 op1
= determine_base_object (TREE_OPERAND (expr
, 1));
868 return (code
== PLUS_EXPR
870 : fold_build1 (NEGATE_EXPR
, ptr_type_node
, op1
));
872 return fold_build2 (code
, ptr_type_node
, op0
, op1
);
876 return determine_base_object (TREE_OPERAND (expr
, 0));
879 return fold_convert (ptr_type_node
, expr
);
883 /* Allocates an induction variable with given initial value BASE and step STEP
887 alloc_iv (tree base
, tree step
)
889 struct iv
*iv
= XCNEW (struct iv
);
891 if (step
&& integer_zerop (step
))
895 iv
->base_object
= determine_base_object (base
);
898 iv
->have_use_for
= false;
900 iv
->ssa_name
= NULL_TREE
;
905 /* Sets STEP and BASE for induction variable IV. */
908 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
910 struct version_info
*info
= name_info (data
, iv
);
912 gcc_assert (!info
->iv
);
914 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
915 info
->iv
= alloc_iv (base
, step
);
916 info
->iv
->ssa_name
= iv
;
919 /* Finds induction variable declaration for VAR. */
922 get_iv (struct ivopts_data
*data
, tree var
)
926 if (!name_info (data
, var
)->iv
)
928 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
931 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
932 set_iv (data
, var
, var
, NULL_TREE
);
935 return name_info (data
, var
)->iv
;
938 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
939 not define a simple affine biv with nonzero step. */
942 determine_biv_step (tree phi
)
944 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
945 tree name
= PHI_RESULT (phi
);
948 if (!is_gimple_reg (name
))
951 if (!simple_iv (loop
, phi
, name
, &iv
, true))
954 return (zero_p (iv
.step
) ? NULL_TREE
: iv
.step
);
957 /* Finds basic ivs. */
960 find_bivs (struct ivopts_data
*data
)
962 tree phi
, step
, type
, base
;
964 struct loop
*loop
= data
->current_loop
;
966 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
968 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
971 step
= determine_biv_step (phi
);
975 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
976 base
= expand_simple_operations (base
);
977 if (contains_abnormal_ssa_name_p (base
)
978 || contains_abnormal_ssa_name_p (step
))
981 type
= TREE_TYPE (PHI_RESULT (phi
));
982 base
= fold_convert (type
, base
);
984 step
= fold_convert (type
, step
);
986 set_iv (data
, PHI_RESULT (phi
), base
, step
);
993 /* Marks basic ivs. */
996 mark_bivs (struct ivopts_data
*data
)
999 struct iv
*iv
, *incr_iv
;
1000 struct loop
*loop
= data
->current_loop
;
1001 basic_block incr_bb
;
1003 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
1005 iv
= get_iv (data
, PHI_RESULT (phi
));
1009 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1010 incr_iv
= get_iv (data
, var
);
1014 /* If the increment is in the subloop, ignore it. */
1015 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
1016 if (incr_bb
->loop_father
!= data
->current_loop
1017 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1021 incr_iv
->biv_p
= true;
1025 /* Checks whether STMT defines a linear induction variable and stores its
1026 parameters to IV. */
1029 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
, affine_iv
*iv
)
1032 struct loop
*loop
= data
->current_loop
;
1034 iv
->base
= NULL_TREE
;
1035 iv
->step
= NULL_TREE
;
1037 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1040 lhs
= TREE_OPERAND (stmt
, 0);
1041 if (TREE_CODE (lhs
) != SSA_NAME
)
1044 if (!simple_iv (loop
, stmt
, TREE_OPERAND (stmt
, 1), iv
, true))
1046 iv
->base
= expand_simple_operations (iv
->base
);
1048 if (contains_abnormal_ssa_name_p (iv
->base
)
1049 || contains_abnormal_ssa_name_p (iv
->step
))
1055 /* Finds general ivs in statement STMT. */
1058 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
1062 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1065 set_iv (data
, TREE_OPERAND (stmt
, 0), iv
.base
, iv
.step
);
1068 /* Finds general ivs in basic block BB. */
1071 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1073 block_stmt_iterator bsi
;
1075 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1076 find_givs_in_stmt (data
, bsi_stmt (bsi
));
1079 /* Finds general ivs. */
1082 find_givs (struct ivopts_data
*data
)
1084 struct loop
*loop
= data
->current_loop
;
1085 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1088 for (i
= 0; i
< loop
->num_nodes
; i
++)
1089 find_givs_in_bb (data
, body
[i
]);
1093 /* For each ssa name defined in LOOP determines whether it is an induction
1094 variable and if so, its initial value and step. */
1097 find_induction_variables (struct ivopts_data
*data
)
1102 if (!find_bivs (data
))
1108 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1110 tree niter
= niter_for_single_dom_exit (data
);
1114 fprintf (dump_file
, " number of iterations ");
1115 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1116 fprintf (dump_file
, "\n\n");
1119 fprintf (dump_file
, "Induction variables:\n\n");
1121 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1123 if (ver_info (data
, i
)->iv
)
1124 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1131 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1133 static struct iv_use
*
1134 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1135 tree stmt
, enum use_type use_type
)
1137 struct iv_use
*use
= XCNEW (struct iv_use
);
1139 use
->id
= n_iv_uses (data
);
1140 use
->type
= use_type
;
1144 use
->related_cands
= BITMAP_ALLOC (NULL
);
1146 /* To avoid showing ssa name in the dumps, if it was not reset by the
1148 iv
->ssa_name
= NULL_TREE
;
1150 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1151 dump_use (dump_file
, use
);
1153 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1158 /* Checks whether OP is a loop-level invariant and if so, records it.
1159 NONLINEAR_USE is true if the invariant is used in a way we do not
1160 handle specially. */
1163 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1166 struct version_info
*info
;
1168 if (TREE_CODE (op
) != SSA_NAME
1169 || !is_gimple_reg (op
))
1172 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1174 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1177 info
= name_info (data
, op
);
1179 info
->has_nonlin_use
|= nonlinear_use
;
1181 info
->inv_id
= ++data
->max_inv_id
;
1182 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1185 /* Checks whether the use OP is interesting and if so, records it. */
1187 static struct iv_use
*
1188 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1195 if (TREE_CODE (op
) != SSA_NAME
)
1198 iv
= get_iv (data
, op
);
1202 if (iv
->have_use_for
)
1204 use
= iv_use (data
, iv
->use_id
);
1206 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1210 if (zero_p (iv
->step
))
1212 record_invariant (data
, op
, true);
1215 iv
->have_use_for
= true;
1217 civ
= XNEW (struct iv
);
1220 stmt
= SSA_NAME_DEF_STMT (op
);
1221 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1222 || TREE_CODE (stmt
) == MODIFY_EXPR
);
1224 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1225 iv
->use_id
= use
->id
;
1230 /* Checks whether the condition *COND_P in STMT is interesting
1231 and if so, records it. */
1234 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1238 struct iv
*iv0
= NULL
, *iv1
= NULL
, *civ
;
1240 tree zero
= integer_zero_node
;
1242 const_iv
.step
= NULL_TREE
;
1244 if (TREE_CODE (*cond_p
) != SSA_NAME
1245 && !COMPARISON_CLASS_P (*cond_p
))
1248 if (TREE_CODE (*cond_p
) == SSA_NAME
)
1255 op0_p
= &TREE_OPERAND (*cond_p
, 0);
1256 op1_p
= &TREE_OPERAND (*cond_p
, 1);
1259 if (TREE_CODE (*op0_p
) == SSA_NAME
)
1260 iv0
= get_iv (data
, *op0_p
);
1264 if (TREE_CODE (*op1_p
) == SSA_NAME
)
1265 iv1
= get_iv (data
, *op1_p
);
1269 if (/* When comparing with non-invariant value, we may not do any senseful
1270 induction variable elimination. */
1272 /* Eliminating condition based on two ivs would be nontrivial.
1273 ??? TODO -- it is not really important to handle this case. */
1274 || (!zero_p (iv0
->step
) && !zero_p (iv1
->step
)))
1276 find_interesting_uses_op (data
, *op0_p
);
1277 find_interesting_uses_op (data
, *op1_p
);
1281 if (zero_p (iv0
->step
) && zero_p (iv1
->step
))
1283 /* If both are invariants, this is a work for unswitching. */
1287 civ
= XNEW (struct iv
);
1288 *civ
= zero_p (iv0
->step
) ? *iv1
: *iv0
;
1289 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1292 /* Returns true if expression EXPR is obviously invariant in LOOP,
1293 i.e. if all its operands are defined outside of the LOOP. */
1296 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1301 if (is_gimple_min_invariant (expr
))
1304 if (TREE_CODE (expr
) == SSA_NAME
)
1306 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1308 && flow_bb_inside_loop_p (loop
, def_bb
))
1317 len
= TREE_CODE_LENGTH (TREE_CODE (expr
));
1318 for (i
= 0; i
< len
; i
++)
1319 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1325 /* Cumulates the steps of indices into DATA and replaces their values with the
1326 initial ones. Returns false when the value of the index cannot be determined.
1327 Callback for for_each_index. */
1329 struct ifs_ivopts_data
1331 struct ivopts_data
*ivopts_data
;
1337 idx_find_step (tree base
, tree
*idx
, void *data
)
1339 struct ifs_ivopts_data
*dta
= data
;
1341 tree step
, iv_base
, iv_step
, lbound
, off
;
1342 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1344 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1345 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1348 /* If base is a component ref, require that the offset of the reference
1350 if (TREE_CODE (base
) == COMPONENT_REF
)
1352 off
= component_ref_field_offset (base
);
1353 return expr_invariant_in_loop_p (loop
, off
);
1356 /* If base is array, first check whether we will be able to move the
1357 reference out of the loop (in order to take its address in strength
1358 reduction). In order for this to work we need both lower bound
1359 and step to be loop invariants. */
1360 if (TREE_CODE (base
) == ARRAY_REF
)
1362 step
= array_ref_element_size (base
);
1363 lbound
= array_ref_low_bound (base
);
1365 if (!expr_invariant_in_loop_p (loop
, step
)
1366 || !expr_invariant_in_loop_p (loop
, lbound
))
1370 if (TREE_CODE (*idx
) != SSA_NAME
)
1373 iv
= get_iv (dta
->ivopts_data
, *idx
);
1377 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1378 *&x[0], which is not folded and does not trigger the
1379 ARRAY_REF path below. */
1385 if (TREE_CODE (base
) == ARRAY_REF
)
1387 step
= array_ref_element_size (base
);
1389 /* We only handle addresses whose step is an integer constant. */
1390 if (TREE_CODE (step
) != INTEGER_CST
)
1394 /* The step for pointer arithmetics already is 1 byte. */
1395 step
= build_int_cst (sizetype
, 1);
1399 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1400 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1403 /* The index might wrap. */
1407 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1410 *dta
->step_p
= step
;
1412 *dta
->step_p
= fold_build2 (PLUS_EXPR
, sizetype
, *dta
->step_p
, step
);
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1421 idx_record_use (tree base
, tree
*idx
,
1424 find_interesting_uses_op (data
, *idx
);
1425 if (TREE_CODE (base
) == ARRAY_REF
)
1427 find_interesting_uses_op (data
, array_ref_element_size (base
));
1428 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1433 /* Returns true if memory reference REF may be unaligned. */
1436 may_be_unaligned_p (tree ref
)
1440 HOST_WIDE_INT bitsize
;
1441 HOST_WIDE_INT bitpos
;
1443 enum machine_mode mode
;
1444 int unsignedp
, volatilep
;
1445 unsigned base_align
;
1447 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1448 thus they are not misaligned. */
1449 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1452 /* The test below is basically copy of what expr.c:normal_inner_ref
1453 does to check whether the object must be loaded by parts when
1454 STRICT_ALIGNMENT is true. */
1455 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1456 &unsignedp
, &volatilep
, true);
1457 base_type
= TREE_TYPE (base
);
1458 base_align
= TYPE_ALIGN (base_type
);
1461 && (base_align
< GET_MODE_ALIGNMENT (mode
)
1462 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1463 || bitpos
% BITS_PER_UNIT
!= 0))
1469 /* Finds addresses in *OP_P inside STMT. */
1472 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1474 tree base
= *op_p
, step
= NULL
;
1476 struct ifs_ivopts_data ifs_ivopts_data
;
1478 /* Do not play with volatile memory references. A bit too conservative,
1479 perhaps, but safe. */
1480 if (stmt_ann (stmt
)->has_volatile_ops
)
1483 /* Ignore bitfields for now. Not really something terribly complicated
1485 if (TREE_CODE (base
) == BIT_FIELD_REF
1486 || (TREE_CODE (base
) == COMPONENT_REF
1487 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base
, 1))))
1490 if (STRICT_ALIGNMENT
1491 && may_be_unaligned_p (base
))
1494 base
= unshare_expr (base
);
1496 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1498 tree type
= build_pointer_type (TREE_TYPE (base
));
1502 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1504 civ
= get_iv (data
, TMR_BASE (base
));
1508 TMR_BASE (base
) = civ
->base
;
1511 if (TMR_INDEX (base
)
1512 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1514 civ
= get_iv (data
, TMR_INDEX (base
));
1518 TMR_INDEX (base
) = civ
->base
;
1523 if (TMR_STEP (base
))
1524 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1527 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1535 base
= tree_mem_ref_addr (type
, base
);
1539 ifs_ivopts_data
.ivopts_data
= data
;
1540 ifs_ivopts_data
.stmt
= stmt
;
1541 ifs_ivopts_data
.step_p
= &step
;
1542 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1546 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1547 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1549 base
= build_fold_addr_expr (base
);
1551 /* Substituting bases of IVs into the base expression might
1552 have caused folding opportunities. */
1553 if (TREE_CODE (base
) == ADDR_EXPR
)
1555 tree
*ref
= &TREE_OPERAND (base
, 0);
1556 while (handled_component_p (*ref
))
1557 ref
= &TREE_OPERAND (*ref
, 0);
1558 if (TREE_CODE (*ref
) == INDIRECT_REF
)
1559 *ref
= fold_indirect_ref (*ref
);
1563 civ
= alloc_iv (base
, step
);
1564 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1568 for_each_index (op_p
, idx_record_use
, data
);
1571 /* Finds and records invariants used in STMT. */
1574 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1577 use_operand_p use_p
;
1580 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1582 op
= USE_FROM_PTR (use_p
);
1583 record_invariant (data
, op
, false);
1587 /* Finds interesting uses of induction variables in the statement STMT. */
1590 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1595 use_operand_p use_p
;
1597 find_invariants_stmt (data
, stmt
);
1599 if (TREE_CODE (stmt
) == COND_EXPR
)
1601 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1605 if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1607 lhs
= TREE_OPERAND (stmt
, 0);
1608 rhs
= TREE_OPERAND (stmt
, 1);
1610 if (TREE_CODE (lhs
) == SSA_NAME
)
1612 /* If the statement defines an induction variable, the uses are not
1613 interesting by themselves. */
1615 iv
= get_iv (data
, lhs
);
1617 if (iv
&& !zero_p (iv
->step
))
1621 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1623 case tcc_comparison
:
1624 find_interesting_uses_cond (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1628 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1629 if (REFERENCE_CLASS_P (lhs
))
1630 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1636 if (REFERENCE_CLASS_P (lhs
)
1637 && is_gimple_val (rhs
))
1639 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1640 find_interesting_uses_op (data
, rhs
);
1644 /* TODO -- we should also handle address uses of type
1646 memory = call (whatever);
1653 if (TREE_CODE (stmt
) == PHI_NODE
1654 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1656 lhs
= PHI_RESULT (stmt
);
1657 iv
= get_iv (data
, lhs
);
1659 if (iv
&& !zero_p (iv
->step
))
1663 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1665 op
= USE_FROM_PTR (use_p
);
1667 if (TREE_CODE (op
) != SSA_NAME
)
1670 iv
= get_iv (data
, op
);
1674 find_interesting_uses_op (data
, op
);
1678 /* Finds interesting uses of induction variables outside of loops
1679 on loop exit edge EXIT. */
1682 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1686 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1688 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1689 find_interesting_uses_op (data
, def
);
1693 /* Finds uses of the induction variables that are interesting. */
1696 find_interesting_uses (struct ivopts_data
*data
)
1699 block_stmt_iterator bsi
;
1701 basic_block
*body
= get_loop_body (data
->current_loop
);
1703 struct version_info
*info
;
1706 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1707 fprintf (dump_file
, "Uses:\n\n");
1709 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1714 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1715 if (e
->dest
!= EXIT_BLOCK_PTR
1716 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1717 find_interesting_uses_outside (data
, e
);
1719 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1720 find_interesting_uses_stmt (data
, phi
);
1721 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1722 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1725 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1729 fprintf (dump_file
, "\n");
1731 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1733 info
= ver_info (data
, i
);
1736 fprintf (dump_file
, " ");
1737 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1738 fprintf (dump_file
, " is invariant (%d)%s\n",
1739 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1743 fprintf (dump_file
, "\n");
1749 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1750 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1751 we are at the top-level of the processed address. */
1754 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1755 unsigned HOST_WIDE_INT
*offset
)
1757 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1758 enum tree_code code
;
1759 tree type
, orig_type
= TREE_TYPE (expr
);
1760 unsigned HOST_WIDE_INT off0
, off1
, st
;
1761 tree orig_expr
= expr
;
1765 type
= TREE_TYPE (expr
);
1766 code
= TREE_CODE (expr
);
1772 if (!cst_and_fits_in_hwi (expr
)
1776 *offset
= int_cst_value (expr
);
1777 return build_int_cst (orig_type
, 0);
1781 op0
= TREE_OPERAND (expr
, 0);
1782 op1
= TREE_OPERAND (expr
, 1);
1784 op0
= strip_offset_1 (op0
, false, false, &off0
);
1785 op1
= strip_offset_1 (op1
, false, false, &off1
);
1787 *offset
= (code
== PLUS_EXPR
? off0
+ off1
: off0
- off1
);
1788 if (op0
== TREE_OPERAND (expr
, 0)
1789 && op1
== TREE_OPERAND (expr
, 1))
1794 else if (zero_p (op0
))
1796 if (code
== PLUS_EXPR
)
1799 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1802 expr
= fold_build2 (code
, type
, op0
, op1
);
1804 return fold_convert (orig_type
, expr
);
1810 step
= array_ref_element_size (expr
);
1811 if (!cst_and_fits_in_hwi (step
))
1814 st
= int_cst_value (step
);
1815 op1
= TREE_OPERAND (expr
, 1);
1816 op1
= strip_offset_1 (op1
, false, false, &off1
);
1817 *offset
= off1
* st
;
1822 /* Strip the component reference completely. */
1823 op0
= TREE_OPERAND (expr
, 0);
1824 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1834 tmp
= component_ref_field_offset (expr
);
1836 && cst_and_fits_in_hwi (tmp
))
1838 /* Strip the component reference completely. */
1839 op0
= TREE_OPERAND (expr
, 0);
1840 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1841 *offset
= off0
+ int_cst_value (tmp
);
1847 op0
= TREE_OPERAND (expr
, 0);
1848 op0
= strip_offset_1 (op0
, true, true, &off0
);
1851 if (op0
== TREE_OPERAND (expr
, 0))
1854 expr
= build_fold_addr_expr (op0
);
1855 return fold_convert (orig_type
, expr
);
1858 inside_addr
= false;
1865 /* Default handling of expressions for that we want to recurse into
1866 the first operand. */
1867 op0
= TREE_OPERAND (expr
, 0);
1868 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
1871 if (op0
== TREE_OPERAND (expr
, 0)
1872 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
1875 expr
= copy_node (expr
);
1876 TREE_OPERAND (expr
, 0) = op0
;
1878 TREE_OPERAND (expr
, 1) = op1
;
1880 /* Inside address, we might strip the top level component references,
1881 thus changing type of the expression. Handling of ADDR_EXPR
1883 expr
= fold_convert (orig_type
, expr
);
1888 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1891 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
1893 return strip_offset_1 (expr
, false, false, offset
);
1896 /* Returns variant of TYPE that can be used as base for different uses.
1897 For integer types, we return unsigned variant of the type, which
1898 avoids problems with overflows. For pointer types, we return void *. */
1901 generic_type_for (tree type
)
1903 if (POINTER_TYPE_P (type
))
1904 return ptr_type_node
;
1906 if (TYPE_UNSIGNED (type
))
1909 return unsigned_type_for (type
);
1912 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1913 the bitmap to that we should store it. */
1915 static struct ivopts_data
*fd_ivopts_data
;
1917 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
1919 bitmap
*depends_on
= data
;
1920 struct version_info
*info
;
1922 if (TREE_CODE (*expr_p
) != SSA_NAME
)
1924 info
= name_info (fd_ivopts_data
, *expr_p
);
1926 if (!info
->inv_id
|| info
->has_nonlin_use
)
1930 *depends_on
= BITMAP_ALLOC (NULL
);
1931 bitmap_set_bit (*depends_on
, info
->inv_id
);
1936 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1937 position to POS. If USE is not NULL, the candidate is set as related to
1938 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1939 replacement of the final value of the iv by a direct computation. */
1941 static struct iv_cand
*
1942 add_candidate_1 (struct ivopts_data
*data
,
1943 tree base
, tree step
, bool important
, enum iv_position pos
,
1944 struct iv_use
*use
, tree incremented_at
)
1947 struct iv_cand
*cand
= NULL
;
1948 tree type
, orig_type
;
1952 orig_type
= TREE_TYPE (base
);
1953 type
= generic_type_for (orig_type
);
1954 if (type
!= orig_type
)
1956 base
= fold_convert (type
, base
);
1958 step
= fold_convert (type
, step
);
1962 for (i
= 0; i
< n_iv_cands (data
); i
++)
1964 cand
= iv_cand (data
, i
);
1966 if (cand
->pos
!= pos
)
1969 if (cand
->incremented_at
!= incremented_at
)
1983 if (!operand_equal_p (base
, cand
->iv
->base
, 0))
1986 if (zero_p (cand
->iv
->step
))
1993 if (step
&& operand_equal_p (step
, cand
->iv
->step
, 0))
1998 if (i
== n_iv_cands (data
))
2000 cand
= XCNEW (struct iv_cand
);
2006 cand
->iv
= alloc_iv (base
, step
);
2009 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2011 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2012 cand
->var_after
= cand
->var_before
;
2014 cand
->important
= important
;
2015 cand
->incremented_at
= incremented_at
;
2016 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2019 && TREE_CODE (step
) != INTEGER_CST
)
2021 fd_ivopts_data
= data
;
2022 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2025 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2026 dump_cand (dump_file
, cand
);
2029 if (important
&& !cand
->important
)
2031 cand
->important
= true;
2032 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2033 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2038 bitmap_set_bit (use
->related_cands
, i
);
2039 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2040 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2047 /* Returns true if incrementing the induction variable at the end of the LOOP
2050 The purpose is to avoid splitting latch edge with a biv increment, thus
2051 creating a jump, possibly confusing other optimization passes and leaving
2052 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2053 is not available (so we do not have a better alternative), or if the latch
2054 edge is already nonempty. */
2057 allow_ip_end_pos_p (struct loop
*loop
)
2059 if (!ip_normal_pos (loop
))
2062 if (!empty_block_p (ip_end_pos (loop
)))
2068 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2069 position to POS. If USE is not NULL, the candidate is set as related to
2070 it. The candidate computation is scheduled on all available positions. */
2073 add_candidate (struct ivopts_data
*data
,
2074 tree base
, tree step
, bool important
, struct iv_use
*use
)
2076 if (ip_normal_pos (data
->current_loop
))
2077 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
2078 if (ip_end_pos (data
->current_loop
)
2079 && allow_ip_end_pos_p (data
->current_loop
))
2080 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
2083 /* Add a standard "0 + 1 * iteration" iv candidate for a
2084 type with SIZE bits. */
2087 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2090 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2091 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2095 /* Adds standard iv candidates. */
2098 add_standard_iv_candidates (struct ivopts_data
*data
)
2100 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2102 /* The same for a double-integer type if it is still fast enough. */
2103 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2104 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2108 /* Adds candidates bases on the old induction variable IV. */
2111 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2114 struct iv_cand
*cand
;
2116 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2118 /* The same, but with initial value zero. */
2119 add_candidate (data
,
2120 build_int_cst (TREE_TYPE (iv
->base
), 0),
2121 iv
->step
, true, NULL
);
2123 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2124 if (TREE_CODE (phi
) == PHI_NODE
)
2126 /* Additionally record the possibility of leaving the original iv
2128 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2129 cand
= add_candidate_1 (data
,
2130 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2131 SSA_NAME_DEF_STMT (def
));
2132 cand
->var_before
= iv
->ssa_name
;
2133 cand
->var_after
= def
;
2137 /* Adds candidates based on the old induction variables. */
2140 add_old_ivs_candidates (struct ivopts_data
*data
)
2146 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2148 iv
= ver_info (data
, i
)->iv
;
2149 if (iv
&& iv
->biv_p
&& !zero_p (iv
->step
))
2150 add_old_iv_candidates (data
, iv
);
2154 /* Adds candidates based on the value of the induction variable IV and USE. */
2157 add_iv_value_candidates (struct ivopts_data
*data
,
2158 struct iv
*iv
, struct iv_use
*use
)
2160 unsigned HOST_WIDE_INT offset
;
2163 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2165 /* The same, but with initial value zero. Make such variable important,
2166 since it is generic enough so that possibly many uses may be based
2168 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2169 iv
->step
, true, use
);
2171 /* Third, try removing the constant offset. */
2172 base
= strip_offset (iv
->base
, &offset
);
2174 add_candidate (data
, base
, iv
->step
, false, use
);
2177 /* Adds candidates based on the uses. */
2180 add_derived_ivs_candidates (struct ivopts_data
*data
)
2184 for (i
= 0; i
< n_iv_uses (data
); i
++)
2186 struct iv_use
*use
= iv_use (data
, i
);
2193 case USE_NONLINEAR_EXPR
:
2196 /* Just add the ivs based on the value of the iv used here. */
2197 add_iv_value_candidates (data
, use
->iv
, use
);
2206 /* Record important candidates and add them to related_cands bitmaps
2210 record_important_candidates (struct ivopts_data
*data
)
2215 for (i
= 0; i
< n_iv_cands (data
); i
++)
2217 struct iv_cand
*cand
= iv_cand (data
, i
);
2219 if (cand
->important
)
2220 bitmap_set_bit (data
->important_candidates
, i
);
2223 data
->consider_all_candidates
= (n_iv_cands (data
)
2224 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2226 if (data
->consider_all_candidates
)
2228 /* We will not need "related_cands" bitmaps in this case,
2229 so release them to decrease peak memory consumption. */
2230 for (i
= 0; i
< n_iv_uses (data
); i
++)
2232 use
= iv_use (data
, i
);
2233 BITMAP_FREE (use
->related_cands
);
2238 /* Add important candidates to the related_cands bitmaps. */
2239 for (i
= 0; i
< n_iv_uses (data
); i
++)
2240 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2241 data
->important_candidates
);
2245 /* Finds the candidates for the induction variables. */
2248 find_iv_candidates (struct ivopts_data
*data
)
2250 /* Add commonly used ivs. */
2251 add_standard_iv_candidates (data
);
2253 /* Add old induction variables. */
2254 add_old_ivs_candidates (data
);
2256 /* Add induction variables derived from uses. */
2257 add_derived_ivs_candidates (data
);
2259 /* Record the important candidates. */
2260 record_important_candidates (data
);
2263 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2264 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2265 we allocate a simple list to every use. */
2268 alloc_use_cost_map (struct ivopts_data
*data
)
2270 unsigned i
, size
, s
, j
;
2272 for (i
= 0; i
< n_iv_uses (data
); i
++)
2274 struct iv_use
*use
= iv_use (data
, i
);
2277 if (data
->consider_all_candidates
)
2278 size
= n_iv_cands (data
);
2282 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2287 /* Round up to the power of two, so that moduling by it is fast. */
2288 for (size
= 1; size
< s
; size
<<= 1)
2292 use
->n_map_members
= size
;
2293 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2297 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2298 on invariants DEPENDS_ON and that the value used in expressing it
2302 set_use_iv_cost (struct ivopts_data
*data
,
2303 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2304 bitmap depends_on
, tree value
)
2310 BITMAP_FREE (depends_on
);
2314 if (data
->consider_all_candidates
)
2316 use
->cost_map
[cand
->id
].cand
= cand
;
2317 use
->cost_map
[cand
->id
].cost
= cost
;
2318 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2319 use
->cost_map
[cand
->id
].value
= value
;
2323 /* n_map_members is a power of two, so this computes modulo. */
2324 s
= cand
->id
& (use
->n_map_members
- 1);
2325 for (i
= s
; i
< use
->n_map_members
; i
++)
2326 if (!use
->cost_map
[i
].cand
)
2328 for (i
= 0; i
< s
; i
++)
2329 if (!use
->cost_map
[i
].cand
)
2335 use
->cost_map
[i
].cand
= cand
;
2336 use
->cost_map
[i
].cost
= cost
;
2337 use
->cost_map
[i
].depends_on
= depends_on
;
2338 use
->cost_map
[i
].value
= value
;
2341 /* Gets cost of (USE, CANDIDATE) pair. */
2343 static struct cost_pair
*
2344 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2345 struct iv_cand
*cand
)
2348 struct cost_pair
*ret
;
2353 if (data
->consider_all_candidates
)
2355 ret
= use
->cost_map
+ cand
->id
;
2362 /* n_map_members is a power of two, so this computes modulo. */
2363 s
= cand
->id
& (use
->n_map_members
- 1);
2364 for (i
= s
; i
< use
->n_map_members
; i
++)
2365 if (use
->cost_map
[i
].cand
== cand
)
2366 return use
->cost_map
+ i
;
2368 for (i
= 0; i
< s
; i
++)
2369 if (use
->cost_map
[i
].cand
== cand
)
2370 return use
->cost_map
+ i
;
2375 /* Returns estimate on cost of computing SEQ. */
2383 for (; seq
; seq
= NEXT_INSN (seq
))
2385 set
= single_set (seq
);
2387 cost
+= rtx_cost (set
, SET
);
2395 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2397 produce_memory_decl_rtl (tree obj
, int *regno
)
2402 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2404 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2405 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2408 x
= gen_raw_REG (Pmode
, (*regno
)++);
2410 return gen_rtx_MEM (DECL_MODE (obj
), x
);
2413 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2414 walk_tree. DATA contains the actual fake register number. */
2417 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2419 tree obj
= NULL_TREE
;
2423 switch (TREE_CODE (*expr_p
))
2426 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2427 handled_component_p (*expr_p
);
2428 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2431 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2432 x
= produce_memory_decl_rtl (obj
, regno
);
2437 obj
= SSA_NAME_VAR (*expr_p
);
2438 if (!DECL_RTL_SET_P (obj
))
2439 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2448 if (DECL_RTL_SET_P (obj
))
2451 if (DECL_MODE (obj
) == BLKmode
)
2452 x
= produce_memory_decl_rtl (obj
, regno
);
2454 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2464 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2465 SET_DECL_RTL (obj
, x
);
2471 /* Determines cost of the computation of EXPR. */
2474 computation_cost (tree expr
)
2477 tree type
= TREE_TYPE (expr
);
2479 /* Avoid using hard regs in ways which may be unsupported. */
2480 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2482 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2484 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2488 cost
= seq_cost (seq
);
2490 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2495 /* Returns variable containing the value of candidate CAND at statement AT. */
2498 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2500 if (stmt_after_increment (loop
, cand
, stmt
))
2501 return cand
->var_after
;
2503 return cand
->var_before
;
2506 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2507 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2510 tree_int_cst_sign_bit (tree t
)
2512 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2513 unsigned HOST_WIDE_INT w
;
2515 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2516 w
= TREE_INT_CST_LOW (t
);
2519 w
= TREE_INT_CST_HIGH (t
);
2520 bitno
-= HOST_BITS_PER_WIDE_INT
;
2523 return (w
>> bitno
) & 1;
2526 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2527 return cst. Otherwise return NULL_TREE. */
2530 constant_multiple_of (tree type
, tree top
, tree bot
)
2532 tree res
, mby
, p0
, p1
;
2533 enum tree_code code
;
2539 if (operand_equal_p (top
, bot
, 0))
2540 return build_int_cst (type
, 1);
2542 code
= TREE_CODE (top
);
2546 mby
= TREE_OPERAND (top
, 1);
2547 if (TREE_CODE (mby
) != INTEGER_CST
)
2550 res
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2554 return fold_binary_to_constant (MULT_EXPR
, type
, res
,
2555 fold_convert (type
, mby
));
2559 p0
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2562 p1
= constant_multiple_of (type
, TREE_OPERAND (top
, 1), bot
);
2566 return fold_binary_to_constant (code
, type
, p0
, p1
);
2569 if (TREE_CODE (bot
) != INTEGER_CST
)
2572 bot
= fold_convert (type
, bot
);
2573 top
= fold_convert (type
, top
);
2575 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2576 the result afterwards. */
2577 if (tree_int_cst_sign_bit (bot
))
2580 bot
= fold_unary_to_constant (NEGATE_EXPR
, type
, bot
);
2585 /* Ditto for TOP. */
2586 if (tree_int_cst_sign_bit (top
))
2589 top
= fold_unary_to_constant (NEGATE_EXPR
, type
, top
);
2592 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR
, type
, top
, bot
)))
2595 res
= fold_binary_to_constant (EXACT_DIV_EXPR
, type
, top
, bot
);
2597 res
= fold_unary_to_constant (NEGATE_EXPR
, type
, res
);
2605 /* Sets COMB to CST. */
2608 aff_combination_const (struct affine_tree_combination
*comb
, tree type
,
2609 unsigned HOST_WIDE_INT cst
)
2611 unsigned prec
= TYPE_PRECISION (type
);
2614 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2617 comb
->rest
= NULL_TREE
;
2618 comb
->offset
= cst
& comb
->mask
;
2621 /* Sets COMB to single element ELT. */
2624 aff_combination_elt (struct affine_tree_combination
*comb
, tree type
, tree elt
)
2626 unsigned prec
= TYPE_PRECISION (type
);
2629 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2632 comb
->elts
[0] = elt
;
2634 comb
->rest
= NULL_TREE
;
2638 /* Scales COMB by SCALE. */
2641 aff_combination_scale (struct affine_tree_combination
*comb
,
2642 unsigned HOST_WIDE_INT scale
)
2651 aff_combination_const (comb
, comb
->type
, 0);
2655 comb
->offset
= (scale
* comb
->offset
) & comb
->mask
;
2656 for (i
= 0, j
= 0; i
< comb
->n
; i
++)
2658 comb
->coefs
[j
] = (scale
* comb
->coefs
[i
]) & comb
->mask
;
2659 comb
->elts
[j
] = comb
->elts
[i
];
2660 if (comb
->coefs
[j
] != 0)
2667 if (comb
->n
< MAX_AFF_ELTS
)
2669 comb
->coefs
[comb
->n
] = scale
;
2670 comb
->elts
[comb
->n
] = comb
->rest
;
2671 comb
->rest
= NULL_TREE
;
2675 comb
->rest
= fold_build2 (MULT_EXPR
, comb
->type
, comb
->rest
,
2676 build_int_cst_type (comb
->type
, scale
));
2680 /* Adds ELT * SCALE to COMB. */
2683 aff_combination_add_elt (struct affine_tree_combination
*comb
, tree elt
,
2684 unsigned HOST_WIDE_INT scale
)
2691 for (i
= 0; i
< comb
->n
; i
++)
2692 if (operand_equal_p (comb
->elts
[i
], elt
, 0))
2694 comb
->coefs
[i
] = (comb
->coefs
[i
] + scale
) & comb
->mask
;
2699 comb
->coefs
[i
] = comb
->coefs
[comb
->n
];
2700 comb
->elts
[i
] = comb
->elts
[comb
->n
];
2704 gcc_assert (comb
->n
== MAX_AFF_ELTS
- 1);
2705 comb
->coefs
[comb
->n
] = 1;
2706 comb
->elts
[comb
->n
] = comb
->rest
;
2707 comb
->rest
= NULL_TREE
;
2712 if (comb
->n
< MAX_AFF_ELTS
)
2714 comb
->coefs
[comb
->n
] = scale
;
2715 comb
->elts
[comb
->n
] = elt
;
2721 elt
= fold_convert (comb
->type
, elt
);
2723 elt
= fold_build2 (MULT_EXPR
, comb
->type
,
2724 fold_convert (comb
->type
, elt
),
2725 build_int_cst_type (comb
->type
, scale
));
2728 comb
->rest
= fold_build2 (PLUS_EXPR
, comb
->type
, comb
->rest
, elt
);
2733 /* Adds COMB2 to COMB1. */
2736 aff_combination_add (struct affine_tree_combination
*comb1
,
2737 struct affine_tree_combination
*comb2
)
2741 comb1
->offset
= (comb1
->offset
+ comb2
->offset
) & comb1
->mask
;
2742 for (i
= 0; i
< comb2
->n
; i
++)
2743 aff_combination_add_elt (comb1
, comb2
->elts
[i
], comb2
->coefs
[i
]);
2745 aff_combination_add_elt (comb1
, comb2
->rest
, 1);
2748 /* Splits EXPR into an affine combination of parts. */
2751 tree_to_aff_combination (tree expr
, tree type
,
2752 struct affine_tree_combination
*comb
)
2754 struct affine_tree_combination tmp
;
2755 enum tree_code code
;
2756 tree cst
, core
, toffset
;
2757 HOST_WIDE_INT bitpos
, bitsize
;
2758 enum machine_mode mode
;
2759 int unsignedp
, volatilep
;
2763 code
= TREE_CODE (expr
);
2767 aff_combination_const (comb
, type
, int_cst_value (expr
));
2772 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2773 tree_to_aff_combination (TREE_OPERAND (expr
, 1), type
, &tmp
);
2774 if (code
== MINUS_EXPR
)
2775 aff_combination_scale (&tmp
, -1);
2776 aff_combination_add (comb
, &tmp
);
2780 cst
= TREE_OPERAND (expr
, 1);
2781 if (TREE_CODE (cst
) != INTEGER_CST
)
2783 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2784 aff_combination_scale (comb
, int_cst_value (cst
));
2788 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2789 aff_combination_scale (comb
, -1);
2793 core
= get_inner_reference (TREE_OPERAND (expr
, 0), &bitsize
, &bitpos
,
2794 &toffset
, &mode
, &unsignedp
, &volatilep
,
2796 if (bitpos
% BITS_PER_UNIT
!= 0)
2798 aff_combination_const (comb
, type
, bitpos
/ BITS_PER_UNIT
);
2799 core
= build_fold_addr_expr (core
);
2800 if (TREE_CODE (core
) == ADDR_EXPR
)
2801 aff_combination_add_elt (comb
, core
, 1);
2804 tree_to_aff_combination (core
, type
, &tmp
);
2805 aff_combination_add (comb
, &tmp
);
2809 tree_to_aff_combination (toffset
, type
, &tmp
);
2810 aff_combination_add (comb
, &tmp
);
2818 aff_combination_elt (comb
, type
, expr
);
2821 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2824 add_elt_to_tree (tree expr
, tree type
, tree elt
, unsigned HOST_WIDE_INT scale
,
2825 unsigned HOST_WIDE_INT mask
)
2827 enum tree_code code
;
2830 elt
= fold_convert (type
, elt
);
2837 return fold_build2 (PLUS_EXPR
, type
, expr
, elt
);
2843 return fold_build1 (NEGATE_EXPR
, type
, elt
);
2845 return fold_build2 (MINUS_EXPR
, type
, expr
, elt
);
2849 return fold_build2 (MULT_EXPR
, type
, elt
,
2850 build_int_cst_type (type
, scale
));
2852 if ((scale
| (mask
>> 1)) == mask
)
2854 /* Scale is negative. */
2856 scale
= (-scale
) & mask
;
2861 elt
= fold_build2 (MULT_EXPR
, type
, elt
,
2862 build_int_cst_type (type
, scale
));
2863 return fold_build2 (code
, type
, expr
, elt
);
2866 /* Copies the tree elements of COMB to ensure that they are not shared. */
2869 unshare_aff_combination (struct affine_tree_combination
*comb
)
2873 for (i
= 0; i
< comb
->n
; i
++)
2874 comb
->elts
[i
] = unshare_expr (comb
->elts
[i
]);
2876 comb
->rest
= unshare_expr (comb
->rest
);
2879 /* Makes tree from the affine combination COMB. */
2882 aff_combination_to_tree (struct affine_tree_combination
*comb
)
2884 tree type
= comb
->type
;
2885 tree expr
= comb
->rest
;
2887 unsigned HOST_WIDE_INT off
, sgn
;
2889 /* Handle the special case produced by get_computation_aff when
2890 the type does not fit in HOST_WIDE_INT. */
2891 if (comb
->n
== 0 && comb
->offset
== 0)
2892 return fold_convert (type
, expr
);
2894 gcc_assert (comb
->n
== MAX_AFF_ELTS
|| comb
->rest
== NULL_TREE
);
2896 for (i
= 0; i
< comb
->n
; i
++)
2897 expr
= add_elt_to_tree (expr
, type
, comb
->elts
[i
], comb
->coefs
[i
],
2900 if ((comb
->offset
| (comb
->mask
>> 1)) == comb
->mask
)
2902 /* Offset is negative. */
2903 off
= (-comb
->offset
) & comb
->mask
;
2911 return add_elt_to_tree (expr
, type
, build_int_cst_type (type
, off
), sgn
,
2915 /* Determines the expression by that USE is expressed from induction variable
2916 CAND at statement AT in LOOP. The expression is stored in a decomposed
2917 form into AFF. Returns false if USE cannot be expressed using CAND. */
2920 get_computation_aff (struct loop
*loop
,
2921 struct iv_use
*use
, struct iv_cand
*cand
, tree at
,
2922 struct affine_tree_combination
*aff
)
2924 tree ubase
= use
->iv
->base
;
2925 tree ustep
= use
->iv
->step
;
2926 tree cbase
= cand
->iv
->base
;
2927 tree cstep
= cand
->iv
->step
;
2928 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2932 unsigned HOST_WIDE_INT ustepi
, cstepi
;
2933 HOST_WIDE_INT ratioi
;
2934 struct affine_tree_combination cbase_aff
, expr_aff
;
2935 tree cstep_orig
= cstep
, ustep_orig
= ustep
;
2937 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2939 /* We do not have a precision to express the values of use. */
2943 expr
= var_at_stmt (loop
, cand
, at
);
2945 if (TREE_TYPE (expr
) != ctype
)
2947 /* This may happen with the original ivs. */
2948 expr
= fold_convert (ctype
, expr
);
2951 if (TYPE_UNSIGNED (utype
))
2955 uutype
= unsigned_type_for (utype
);
2956 ubase
= fold_convert (uutype
, ubase
);
2957 ustep
= fold_convert (uutype
, ustep
);
2960 if (uutype
!= ctype
)
2962 expr
= fold_convert (uutype
, expr
);
2963 cbase
= fold_convert (uutype
, cbase
);
2964 cstep
= fold_convert (uutype
, cstep
);
2966 /* If the conversion is not noop, we must take it into account when
2967 considering the value of the step. */
2968 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2972 if (cst_and_fits_in_hwi (cstep_orig
)
2973 && cst_and_fits_in_hwi (ustep_orig
))
2975 ustepi
= int_cst_value (ustep_orig
);
2976 cstepi
= int_cst_value (cstep_orig
);
2978 if (!divide (TYPE_PRECISION (uutype
), ustepi
, cstepi
, &ratioi
))
2980 /* TODO maybe consider case when ustep divides cstep and the ratio is
2981 a power of 2 (so that the division is fast to execute)? We would
2982 need to be much more careful with overflows etc. then. */
2986 ratio
= build_int_cst_type (uutype
, ratioi
);
2990 ratio
= constant_multiple_of (uutype
, ustep_orig
, cstep_orig
);
2994 /* Ratioi is only used to detect special cases when the multiplicative
2995 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
2996 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
2997 to integer_onep/integer_all_onesp, since the former ignores
2999 if (cst_and_fits_in_hwi (ratio
))
3000 ratioi
= int_cst_value (ratio
);
3001 else if (integer_onep (ratio
))
3003 else if (integer_all_onesp (ratio
))
3009 /* We may need to shift the value if we are after the increment. */
3010 if (stmt_after_increment (loop
, cand
, at
))
3011 cbase
= fold_build2 (PLUS_EXPR
, uutype
, cbase
, cstep
);
3013 /* use = ubase - ratio * cbase + ratio * var.
3015 In general case ubase + ratio * (var - cbase) could be better (one less
3016 multiplication), but often it is possible to eliminate redundant parts
3017 of computations from (ubase - ratio * cbase) term, and if it does not
3018 happen, fold is able to apply the distributive law to obtain this form
3021 if (TYPE_PRECISION (uutype
) > HOST_BITS_PER_WIDE_INT
)
3023 /* Let's compute in trees and just return the result in AFF. This case
3024 should not be very common, and fold itself is not that bad either,
3025 so making the aff. functions more complicated to handle this case
3026 is not that urgent. */
3029 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, cbase
);
3030 expr
= fold_build2 (PLUS_EXPR
, uutype
, expr
, delta
);
3032 else if (ratioi
== -1)
3034 delta
= fold_build2 (PLUS_EXPR
, uutype
, ubase
, cbase
);
3035 expr
= fold_build2 (MINUS_EXPR
, uutype
, delta
, expr
);
3039 delta
= fold_build2 (MULT_EXPR
, uutype
, cbase
, ratio
);
3040 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, delta
);
3041 expr
= fold_build2 (MULT_EXPR
, uutype
, ratio
, expr
);
3042 expr
= fold_build2 (PLUS_EXPR
, uutype
, delta
, expr
);
3053 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3054 possible to compute ratioi. */
3055 gcc_assert (ratioi
);
3057 tree_to_aff_combination (ubase
, uutype
, aff
);
3058 tree_to_aff_combination (cbase
, uutype
, &cbase_aff
);
3059 tree_to_aff_combination (expr
, uutype
, &expr_aff
);
3060 aff_combination_scale (&cbase_aff
, -ratioi
);
3061 aff_combination_scale (&expr_aff
, ratioi
);
3062 aff_combination_add (aff
, &cbase_aff
);
3063 aff_combination_add (aff
, &expr_aff
);
3068 /* Determines the expression by that USE is expressed from induction variable
3069 CAND at statement AT in LOOP. The computation is unshared. */
3072 get_computation_at (struct loop
*loop
,
3073 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
3075 struct affine_tree_combination aff
;
3076 tree type
= TREE_TYPE (use
->iv
->base
);
3078 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3080 unshare_aff_combination (&aff
);
3081 return fold_convert (type
, aff_combination_to_tree (&aff
));
3084 /* Determines the expression by that USE is expressed from induction variable
3085 CAND in LOOP. The computation is unshared. */
3088 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3090 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3093 /* Returns cost of addition in MODE. */
3096 add_cost (enum machine_mode mode
)
3098 static unsigned costs
[NUM_MACHINE_MODES
];
3106 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3107 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3108 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3113 cost
= seq_cost (seq
);
3119 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3120 fprintf (dump_file
, "Addition in %s costs %d\n",
3121 GET_MODE_NAME (mode
), cost
);
3125 /* Entry in a hashtable of already known costs for multiplication. */
3128 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3129 enum machine_mode mode
; /* In mode. */
3130 unsigned cost
; /* The cost. */
3133 /* Counts hash value for the ENTRY. */
3136 mbc_entry_hash (const void *entry
)
3138 const struct mbc_entry
*e
= entry
;
3140 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3143 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3146 mbc_entry_eq (const void *entry1
, const void *entry2
)
3148 const struct mbc_entry
*e1
= entry1
;
3149 const struct mbc_entry
*e2
= entry2
;
3151 return (e1
->mode
== e2
->mode
3152 && e1
->cst
== e2
->cst
);
3155 /* Returns cost of multiplication by constant CST in MODE. */
3158 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
3160 static htab_t costs
;
3161 struct mbc_entry
**cached
, act
;
3166 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3170 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3172 return (*cached
)->cost
;
3174 *cached
= XNEW (struct mbc_entry
);
3175 (*cached
)->mode
= mode
;
3176 (*cached
)->cst
= cst
;
3179 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3180 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3184 cost
= seq_cost (seq
);
3186 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3187 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3188 (int) cst
, GET_MODE_NAME (mode
), cost
);
3190 (*cached
)->cost
= cost
;
3195 /* Returns true if multiplying by RATIO is allowed in address. */
3198 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
)
3200 #define MAX_RATIO 128
3201 static sbitmap valid_mult
;
3205 rtx reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3209 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3210 sbitmap_zero (valid_mult
);
3211 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
3212 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3214 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3215 if (memory_address_p (Pmode
, addr
))
3216 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3219 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3221 fprintf (dump_file
, " allowed multipliers:");
3222 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3223 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3224 fprintf (dump_file
, " %d", (int) i
);
3225 fprintf (dump_file
, "\n");
3226 fprintf (dump_file
, "\n");
3230 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3233 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3236 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3237 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3238 variable is omitted. The created memory accesses MODE.
3240 TODO -- there must be some better way. This all is quite crude. */
3243 get_address_cost (bool symbol_present
, bool var_present
,
3244 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
)
3246 static bool initialized
= false;
3247 static HOST_WIDE_INT rat
, off
;
3248 static HOST_WIDE_INT min_offset
, max_offset
;
3249 static unsigned costs
[2][2][2][2];
3250 unsigned cost
, acost
;
3251 rtx seq
, addr
, base
;
3252 bool offset_p
, ratio_p
;
3254 HOST_WIDE_INT s_offset
;
3255 unsigned HOST_WIDE_INT mask
;
3263 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3265 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
3266 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3268 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3269 if (!memory_address_p (Pmode
, addr
))
3272 max_offset
= i
>> 1;
3275 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3277 XEXP (addr
, 1) = gen_int_mode (-i
, Pmode
);
3278 if (!memory_address_p (Pmode
, addr
))
3281 min_offset
= -(i
>> 1);
3283 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3285 fprintf (dump_file
, "get_address_cost:\n");
3286 fprintf (dump_file
, " min offset %d\n", (int) min_offset
);
3287 fprintf (dump_file
, " max offset %d\n", (int) max_offset
);
3291 for (i
= 2; i
<= MAX_RATIO
; i
++)
3292 if (multiplier_allowed_in_address_p (i
))
3299 bits
= GET_MODE_BITSIZE (Pmode
);
3300 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3302 if ((offset
>> (bits
- 1) & 1))
3307 offset_p
= (s_offset
!= 0
3308 && min_offset
<= s_offset
&& s_offset
<= max_offset
);
3309 ratio_p
= (ratio
!= 1
3310 && multiplier_allowed_in_address_p (ratio
));
3312 if (ratio
!= 1 && !ratio_p
)
3313 cost
+= multiply_by_cost (ratio
, Pmode
);
3315 if (s_offset
&& !offset_p
&& !symbol_present
)
3317 cost
+= add_cost (Pmode
);
3321 acost
= costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3324 int old_cse_not_expected
;
3327 addr
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3328 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 2);
3330 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
, gen_int_mode (rat
, Pmode
));
3333 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
3337 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
3339 base
= gen_rtx_fmt_e (CONST
, Pmode
,
3340 gen_rtx_fmt_ee (PLUS
, Pmode
,
3342 gen_int_mode (off
, Pmode
)));
3345 base
= gen_int_mode (off
, Pmode
);
3350 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
3353 /* To avoid splitting addressing modes, pretend that no cse will
3355 old_cse_not_expected
= cse_not_expected
;
3356 cse_not_expected
= true;
3357 addr
= memory_address (Pmode
, addr
);
3358 cse_not_expected
= old_cse_not_expected
;
3362 acost
= seq_cost (seq
);
3363 acost
+= address_cost (addr
, Pmode
);
3367 costs
[symbol_present
][var_present
][offset_p
][ratio_p
] = acost
;
3370 return cost
+ acost
;
3373 /* Estimates cost of forcing expression EXPR into a variable. */
3376 force_expr_to_var_cost (tree expr
)
3378 static bool costs_initialized
= false;
3379 static unsigned integer_cost
;
3380 static unsigned symbol_cost
;
3381 static unsigned address_cost
;
3383 unsigned cost0
, cost1
, cost
;
3384 enum machine_mode mode
;
3386 if (!costs_initialized
)
3388 tree var
= create_tmp_var_raw (integer_type_node
, "test_var");
3389 rtx x
= gen_rtx_MEM (DECL_MODE (var
),
3390 gen_rtx_SYMBOL_REF (Pmode
, "test_var"));
3392 tree type
= build_pointer_type (integer_type_node
);
3394 integer_cost
= computation_cost (build_int_cst (integer_type_node
,
3397 SET_DECL_RTL (var
, x
);
3398 TREE_STATIC (var
) = 1;
3399 addr
= build1 (ADDR_EXPR
, type
, var
);
3400 symbol_cost
= computation_cost (addr
) + 1;
3403 = computation_cost (build2 (PLUS_EXPR
, type
,
3405 build_int_cst (type
, 2000))) + 1;
3406 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3408 fprintf (dump_file
, "force_expr_to_var_cost:\n");
3409 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
3410 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
3411 fprintf (dump_file
, " address %d\n", (int) address_cost
);
3412 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
3413 fprintf (dump_file
, "\n");
3416 costs_initialized
= true;
3421 if (SSA_VAR_P (expr
))
3424 if (TREE_INVARIANT (expr
))
3426 if (TREE_CODE (expr
) == INTEGER_CST
)
3427 return integer_cost
;
3429 if (TREE_CODE (expr
) == ADDR_EXPR
)
3431 tree obj
= TREE_OPERAND (expr
, 0);
3433 if (TREE_CODE (obj
) == VAR_DECL
3434 || TREE_CODE (obj
) == PARM_DECL
3435 || TREE_CODE (obj
) == RESULT_DECL
)
3439 return address_cost
;
3442 switch (TREE_CODE (expr
))
3447 op0
= TREE_OPERAND (expr
, 0);
3448 op1
= TREE_OPERAND (expr
, 1);
3452 if (is_gimple_val (op0
))
3455 cost0
= force_expr_to_var_cost (op0
);
3457 if (is_gimple_val (op1
))
3460 cost1
= force_expr_to_var_cost (op1
);
3465 /* Just an arbitrary value, FIXME. */
3466 return target_spill_cost
;
3469 mode
= TYPE_MODE (TREE_TYPE (expr
));
3470 switch (TREE_CODE (expr
))
3474 cost
= add_cost (mode
);
3478 if (cst_and_fits_in_hwi (op0
))
3479 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
3480 else if (cst_and_fits_in_hwi (op1
))
3481 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
3483 return target_spill_cost
;
3493 /* Bound the cost by target_spill_cost. The parts of complicated
3494 computations often are either loop invariant or at least can
3495 be shared between several iv uses, so letting this grow without
3496 limits would not give reasonable results. */
3497 return cost
< target_spill_cost
? cost
: target_spill_cost
;
3500 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3501 invariants the computation depends on. */
3504 force_var_cost (struct ivopts_data
*data
,
3505 tree expr
, bitmap
*depends_on
)
3509 fd_ivopts_data
= data
;
3510 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3513 return force_expr_to_var_cost (expr
);
3516 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3517 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3518 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3519 invariants the computation depends on. */
3522 split_address_cost (struct ivopts_data
*data
,
3523 tree addr
, bool *symbol_present
, bool *var_present
,
3524 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3527 HOST_WIDE_INT bitsize
;
3528 HOST_WIDE_INT bitpos
;
3530 enum machine_mode mode
;
3531 int unsignedp
, volatilep
;
3533 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3534 &unsignedp
, &volatilep
, false);
3537 || bitpos
% BITS_PER_UNIT
!= 0
3538 || TREE_CODE (core
) != VAR_DECL
)
3540 *symbol_present
= false;
3541 *var_present
= true;
3542 fd_ivopts_data
= data
;
3543 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3544 return target_spill_cost
;
3547 *offset
+= bitpos
/ BITS_PER_UNIT
;
3548 if (TREE_STATIC (core
)
3549 || DECL_EXTERNAL (core
))
3551 *symbol_present
= true;
3552 *var_present
= false;
3556 *symbol_present
= false;
3557 *var_present
= true;
3561 /* Estimates cost of expressing difference of addresses E1 - E2 as
3562 var + symbol + offset. The value of offset is added to OFFSET,
3563 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3564 part is missing. DEPENDS_ON is a set of the invariants the computation
3568 ptr_difference_cost (struct ivopts_data
*data
,
3569 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3570 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3572 HOST_WIDE_INT diff
= 0;
3575 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3577 if (ptr_difference_const (e1
, e2
, &diff
))
3580 *symbol_present
= false;
3581 *var_present
= false;
3585 if (e2
== integer_zero_node
)
3586 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3587 symbol_present
, var_present
, offset
, depends_on
);
3589 *symbol_present
= false;
3590 *var_present
= true;
3592 cost
= force_var_cost (data
, e1
, depends_on
);
3593 cost
+= force_var_cost (data
, e2
, depends_on
);
3594 cost
+= add_cost (Pmode
);
3599 /* Estimates cost of expressing difference E1 - E2 as
3600 var + symbol + offset. The value of offset is added to OFFSET,
3601 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3602 part is missing. DEPENDS_ON is a set of the invariants the computation
3606 difference_cost (struct ivopts_data
*data
,
3607 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3608 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3611 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3612 unsigned HOST_WIDE_INT off1
, off2
;
3614 e1
= strip_offset (e1
, &off1
);
3615 e2
= strip_offset (e2
, &off2
);
3616 *offset
+= off1
- off2
;
3621 if (TREE_CODE (e1
) == ADDR_EXPR
)
3622 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
3624 *symbol_present
= false;
3626 if (operand_equal_p (e1
, e2
, 0))
3628 *var_present
= false;
3631 *var_present
= true;
3633 return force_var_cost (data
, e1
, depends_on
);
3637 cost
= force_var_cost (data
, e2
, depends_on
);
3638 cost
+= multiply_by_cost (-1, mode
);
3643 cost
= force_var_cost (data
, e1
, depends_on
);
3644 cost
+= force_var_cost (data
, e2
, depends_on
);
3645 cost
+= add_cost (mode
);
3650 /* Determines the cost of the computation by that USE is expressed
3651 from induction variable CAND. If ADDRESS_P is true, we just need
3652 to create an address from it, otherwise we want to get it into
3653 register. A set of invariants we depend on is stored in
3654 DEPENDS_ON. AT is the statement at that the value is computed. */
3657 get_computation_cost_at (struct ivopts_data
*data
,
3658 struct iv_use
*use
, struct iv_cand
*cand
,
3659 bool address_p
, bitmap
*depends_on
, tree at
)
3661 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3663 tree utype
= TREE_TYPE (ubase
), ctype
;
3664 unsigned HOST_WIDE_INT ustepi
, cstepi
, offset
= 0;
3665 HOST_WIDE_INT ratio
, aratio
;
3666 bool var_present
, symbol_present
;
3667 unsigned cost
= 0, n_sums
;
3671 /* Only consider real candidates. */
3675 cbase
= cand
->iv
->base
;
3676 cstep
= cand
->iv
->step
;
3677 ctype
= TREE_TYPE (cbase
);
3679 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3681 /* We do not have a precision to express the values of use. */
3687 /* Do not try to express address of an object with computation based
3688 on address of a different object. This may cause problems in rtl
3689 level alias analysis (that does not expect this to be happening,
3690 as this is illegal in C), and would be unlikely to be useful
3692 if (use
->iv
->base_object
3693 && cand
->iv
->base_object
3694 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3698 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3700 /* TODO -- add direct handling of this case. */
3704 /* CSTEPI is removed from the offset in case statement is after the
3705 increment. If the step is not constant, we use zero instead.
3706 This is a bit imprecise (there is the extra addition), but
3707 redundancy elimination is likely to transform the code so that
3708 it uses value of the variable before increment anyway,
3709 so it is not that much unrealistic. */
3710 if (cst_and_fits_in_hwi (cstep
))
3711 cstepi
= int_cst_value (cstep
);
3715 if (cst_and_fits_in_hwi (ustep
)
3716 && cst_and_fits_in_hwi (cstep
))
3718 ustepi
= int_cst_value (ustep
);
3720 if (!divide (TYPE_PRECISION (utype
), ustepi
, cstepi
, &ratio
))
3727 rat
= constant_multiple_of (utype
, ustep
, cstep
);
3732 if (cst_and_fits_in_hwi (rat
))
3733 ratio
= int_cst_value (rat
);
3734 else if (integer_onep (rat
))
3736 else if (integer_all_onesp (rat
))
3742 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3743 or ratio == 1, it is better to handle this like
3745 ubase - ratio * cbase + ratio * var
3747 (also holds in the case ratio == -1, TODO. */
3749 if (cst_and_fits_in_hwi (cbase
))
3751 offset
= - ratio
* int_cst_value (cbase
);
3752 cost
+= difference_cost (data
,
3753 ubase
, integer_zero_node
,
3754 &symbol_present
, &var_present
, &offset
,
3757 else if (ratio
== 1)
3759 cost
+= difference_cost (data
,
3761 &symbol_present
, &var_present
, &offset
,
3766 cost
+= force_var_cost (data
, cbase
, depends_on
);
3767 cost
+= add_cost (TYPE_MODE (ctype
));
3768 cost
+= difference_cost (data
,
3769 ubase
, integer_zero_node
,
3770 &symbol_present
, &var_present
, &offset
,
3774 /* If we are after the increment, the value of the candidate is higher by
3776 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3777 offset
-= ratio
* cstepi
;
3779 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3780 (symbol/var/const parts may be omitted). If we are looking for an address,
3781 find the cost of addressing this. */
3783 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
);
3785 /* Otherwise estimate the costs for computing the expression. */
3786 aratio
= ratio
> 0 ? ratio
: -ratio
;
3787 if (!symbol_present
&& !var_present
&& !offset
)
3790 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3796 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3800 /* Symbol + offset should be compile-time computable. */
3801 && (symbol_present
|| offset
))
3804 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
3808 /* Just get the expression, expand it and measure the cost. */
3809 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3815 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3817 return computation_cost (comp
);
3821 /* Determines the cost of the computation by that USE is expressed
3822 from induction variable CAND. If ADDRESS_P is true, we just need
3823 to create an address from it, otherwise we want to get it into
3824 register. A set of invariants we depend on is stored in
3828 get_computation_cost (struct ivopts_data
*data
,
3829 struct iv_use
*use
, struct iv_cand
*cand
,
3830 bool address_p
, bitmap
*depends_on
)
3832 return get_computation_cost_at (data
,
3833 use
, cand
, address_p
, depends_on
, use
->stmt
);
3836 /* Determines cost of basing replacement of USE on CAND in a generic
3840 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3841 struct iv_use
*use
, struct iv_cand
*cand
)
3846 /* The simple case first -- if we need to express value of the preserved
3847 original biv, the cost is 0. This also prevents us from counting the
3848 cost of increment twice -- once at this use and once in the cost of
3850 if (cand
->pos
== IP_ORIGINAL
3851 && cand
->incremented_at
== use
->stmt
)
3853 set_use_iv_cost (data
, use
, cand
, 0, NULL
, NULL_TREE
);
3857 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3858 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3860 return cost
!= INFTY
;
3863 /* Determines cost of basing replacement of USE on CAND in an address. */
3866 determine_use_iv_cost_address (struct ivopts_data
*data
,
3867 struct iv_use
*use
, struct iv_cand
*cand
)
3870 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3872 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3874 return cost
!= INFTY
;
3877 /* Computes value of induction variable IV in iteration NITER. */
3880 iv_value (struct iv
*iv
, tree niter
)
3883 tree type
= TREE_TYPE (iv
->base
);
3885 niter
= fold_convert (type
, niter
);
3886 val
= fold_build2 (MULT_EXPR
, type
, iv
->step
, niter
);
3888 return fold_build2 (PLUS_EXPR
, type
, iv
->base
, val
);
3891 /* Computes value of candidate CAND at position AT in iteration NITER. */
3894 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
)
3896 tree val
= iv_value (cand
->iv
, niter
);
3897 tree type
= TREE_TYPE (cand
->iv
->base
);
3899 if (stmt_after_increment (loop
, cand
, at
))
3900 val
= fold_build2 (PLUS_EXPR
, type
, val
, cand
->iv
->step
);
3905 /* Returns period of induction variable iv. */
3908 iv_period (struct iv
*iv
)
3910 tree step
= iv
->step
, period
, type
;
3913 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
3915 /* Period of the iv is gcd (step, type range). Since type range is power
3916 of two, it suffices to determine the maximum power of two that divides
3918 pow2div
= num_ending_zeros (step
);
3919 type
= unsigned_type_for (TREE_TYPE (step
));
3921 period
= build_low_bits_mask (type
,
3922 (TYPE_PRECISION (type
)
3923 - tree_low_cst (pow2div
, 1)));
3928 /* Returns the comparison operator used when eliminating the iv USE. */
3930 static enum tree_code
3931 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
3933 struct loop
*loop
= data
->current_loop
;
3937 ex_bb
= bb_for_stmt (use
->stmt
);
3938 exit
= EDGE_SUCC (ex_bb
, 0);
3939 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3940 exit
= EDGE_SUCC (ex_bb
, 1);
3942 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
3945 /* Check whether it is possible to express the condition in USE by comparison
3946 of candidate CAND. If so, store the value compared with to BOUND. */
3949 may_eliminate_iv (struct ivopts_data
*data
,
3950 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
3955 tree wider_type
, period
, per_type
;
3956 struct loop
*loop
= data
->current_loop
;
3958 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
3961 /* For now works only for exits that dominate the loop latch. TODO -- extend
3962 for other conditions inside loop body. */
3963 ex_bb
= bb_for_stmt (use
->stmt
);
3964 if (use
->stmt
!= last_stmt (ex_bb
)
3965 || TREE_CODE (use
->stmt
) != COND_EXPR
)
3967 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
3970 exit
= EDGE_SUCC (ex_bb
, 0);
3971 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3972 exit
= EDGE_SUCC (ex_bb
, 1);
3973 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3976 nit
= niter_for_exit (data
, exit
);
3980 nit_type
= TREE_TYPE (nit
);
3982 /* Determine whether we may use the variable to test whether niter iterations
3983 elapsed. This is the case iff the period of the induction variable is
3984 greater than the number of iterations. */
3985 period
= iv_period (cand
->iv
);
3988 per_type
= TREE_TYPE (period
);
3990 wider_type
= TREE_TYPE (period
);
3991 if (TYPE_PRECISION (nit_type
) < TYPE_PRECISION (per_type
))
3992 wider_type
= per_type
;
3994 wider_type
= nit_type
;
3996 if (!integer_nonzerop (fold_build2 (GE_EXPR
, boolean_type_node
,
3997 fold_convert (wider_type
, period
),
3998 fold_convert (wider_type
, nit
))))
4001 *bound
= cand_value_at (loop
, cand
, use
->stmt
, nit
);
4005 /* Determines cost of basing replacement of USE on CAND in a condition. */
4008 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4009 struct iv_use
*use
, struct iv_cand
*cand
)
4011 tree bound
= NULL_TREE
, op
, cond
;
4012 bitmap depends_on
= NULL
;
4015 /* Only consider real candidates. */
4018 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
, NULL_TREE
);
4022 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4024 cost
= force_var_cost (data
, bound
, &depends_on
);
4026 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
4027 return cost
!= INFTY
;
4030 /* The induction variable elimination failed; just express the original
4031 giv. If it is compared with an invariant, note that we cannot get
4033 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4036 if (TREE_CODE (cond
) != SSA_NAME
)
4038 op
= TREE_OPERAND (cond
, 0);
4039 if (TREE_CODE (op
) == SSA_NAME
&& !zero_p (get_iv (data
, op
)->step
))
4040 op
= TREE_OPERAND (cond
, 1);
4041 if (TREE_CODE (op
) == SSA_NAME
)
4043 op
= get_iv (data
, op
)->base
;
4044 fd_ivopts_data
= data
;
4045 walk_tree (&op
, find_depends
, &depends_on
, NULL
);
4049 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL
);
4050 return cost
!= INFTY
;
4053 /* Determines cost of basing replacement of USE on CAND. Returns false
4054 if USE cannot be based on CAND. */
4057 determine_use_iv_cost (struct ivopts_data
*data
,
4058 struct iv_use
*use
, struct iv_cand
*cand
)
4062 case USE_NONLINEAR_EXPR
:
4063 return determine_use_iv_cost_generic (data
, use
, cand
);
4066 return determine_use_iv_cost_address (data
, use
, cand
);
4069 return determine_use_iv_cost_condition (data
, use
, cand
);
4076 /* Determines costs of basing the use of the iv on an iv candidate. */
4079 determine_use_iv_costs (struct ivopts_data
*data
)
4083 struct iv_cand
*cand
;
4084 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4086 alloc_use_cost_map (data
);
4088 for (i
= 0; i
< n_iv_uses (data
); i
++)
4090 use
= iv_use (data
, i
);
4092 if (data
->consider_all_candidates
)
4094 for (j
= 0; j
< n_iv_cands (data
); j
++)
4096 cand
= iv_cand (data
, j
);
4097 determine_use_iv_cost (data
, use
, cand
);
4104 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4106 cand
= iv_cand (data
, j
);
4107 if (!determine_use_iv_cost (data
, use
, cand
))
4108 bitmap_set_bit (to_clear
, j
);
4111 /* Remove the candidates for that the cost is infinite from
4112 the list of related candidates. */
4113 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4114 bitmap_clear (to_clear
);
4118 BITMAP_FREE (to_clear
);
4120 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4122 fprintf (dump_file
, "Use-candidate costs:\n");
4124 for (i
= 0; i
< n_iv_uses (data
); i
++)
4126 use
= iv_use (data
, i
);
4128 fprintf (dump_file
, "Use %d:\n", i
);
4129 fprintf (dump_file
, " cand\tcost\tdepends on\n");
4130 for (j
= 0; j
< use
->n_map_members
; j
++)
4132 if (!use
->cost_map
[j
].cand
4133 || use
->cost_map
[j
].cost
== INFTY
)
4136 fprintf (dump_file
, " %d\t%d\t",
4137 use
->cost_map
[j
].cand
->id
,
4138 use
->cost_map
[j
].cost
);
4139 if (use
->cost_map
[j
].depends_on
)
4140 bitmap_print (dump_file
,
4141 use
->cost_map
[j
].depends_on
, "","");
4142 fprintf (dump_file
, "\n");
4145 fprintf (dump_file
, "\n");
4147 fprintf (dump_file
, "\n");
4151 /* Determines cost of the candidate CAND. */
4154 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4156 unsigned cost_base
, cost_step
;
4165 /* There are two costs associated with the candidate -- its increment
4166 and its initialization. The second is almost negligible for any loop
4167 that rolls enough, so we take it just very little into account. */
4169 base
= cand
->iv
->base
;
4170 cost_base
= force_var_cost (data
, base
, NULL
);
4171 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
4173 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
4175 /* Prefer the original iv unless we may gain something by replacing it;
4176 this is not really relevant for artificial ivs created by other
4178 if (cand
->pos
== IP_ORIGINAL
4179 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4182 /* Prefer not to insert statements into latch unless there are some
4183 already (so that we do not create unnecessary jumps). */
4184 if (cand
->pos
== IP_END
4185 && empty_block_p (ip_end_pos (data
->current_loop
)))
4189 /* Determines costs of computation of the candidates. */
4192 determine_iv_costs (struct ivopts_data
*data
)
4196 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4198 fprintf (dump_file
, "Candidate costs:\n");
4199 fprintf (dump_file
, " cand\tcost\n");
4202 for (i
= 0; i
< n_iv_cands (data
); i
++)
4204 struct iv_cand
*cand
= iv_cand (data
, i
);
4206 determine_iv_cost (data
, cand
);
4208 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4209 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4212 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4213 fprintf (dump_file
, "\n");
4216 /* Calculates cost for having SIZE induction variables. */
4219 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4221 return global_cost_for_size (size
, data
->regs_used
, n_iv_uses (data
));
4224 /* For each size of the induction variable set determine the penalty. */
4227 determine_set_costs (struct ivopts_data
*data
)
4231 struct loop
*loop
= data
->current_loop
;
4234 /* We use the following model (definitely improvable, especially the
4235 cost function -- TODO):
4237 We estimate the number of registers available (using MD data), name it A.
4239 We estimate the number of registers used by the loop, name it U. This
4240 number is obtained as the number of loop phi nodes (not counting virtual
4241 registers and bivs) + the number of variables from outside of the loop.
4243 We set a reserve R (free regs that are used for temporary computations,
4244 etc.). For now the reserve is a constant 3.
4246 Let I be the number of induction variables.
4248 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4249 make a lot of ivs without a reason).
4250 -- if A - R < U + I <= A, the cost is I * PRES_COST
4251 -- if U + I > A, the cost is I * PRES_COST and
4252 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4254 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4256 fprintf (dump_file
, "Global costs:\n");
4257 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4258 fprintf (dump_file
, " target_small_cost %d\n", target_small_cost
);
4259 fprintf (dump_file
, " target_pres_cost %d\n", target_pres_cost
);
4260 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
4264 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
4266 op
= PHI_RESULT (phi
);
4268 if (!is_gimple_reg (op
))
4271 if (get_iv (data
, op
))
4277 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4279 struct version_info
*info
= ver_info (data
, j
);
4281 if (info
->inv_id
&& info
->has_nonlin_use
)
4285 data
->regs_used
= n
;
4286 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4287 fprintf (dump_file
, " regs_used %d\n", n
);
4289 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4291 fprintf (dump_file
, " cost for size:\n");
4292 fprintf (dump_file
, " ivs\tcost\n");
4293 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4294 fprintf (dump_file
, " %d\t%d\n", j
,
4295 ivopts_global_cost_for_size (data
, j
));
4296 fprintf (dump_file
, "\n");
4300 /* Returns true if A is a cheaper cost pair than B. */
4303 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4311 if (a
->cost
< b
->cost
)
4314 if (a
->cost
> b
->cost
)
4317 /* In case the costs are the same, prefer the cheaper candidate. */
4318 if (a
->cand
->cost
< b
->cand
->cost
)
4324 /* Computes the cost field of IVS structure. */
4327 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4331 cost
+= ivs
->cand_use_cost
;
4332 cost
+= ivs
->cand_cost
;
4333 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4338 /* Remove invariants in set INVS to set IVS. */
4341 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4349 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4351 ivs
->n_invariant_uses
[iid
]--;
4352 if (ivs
->n_invariant_uses
[iid
] == 0)
4357 /* Set USE not to be expressed by any candidate in IVS. */
4360 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4363 unsigned uid
= use
->id
, cid
;
4364 struct cost_pair
*cp
;
4366 cp
= ivs
->cand_for_use
[uid
];
4372 ivs
->cand_for_use
[uid
] = NULL
;
4373 ivs
->n_cand_uses
[cid
]--;
4375 if (ivs
->n_cand_uses
[cid
] == 0)
4377 bitmap_clear_bit (ivs
->cands
, cid
);
4378 /* Do not count the pseudocandidates. */
4382 ivs
->cand_cost
-= cp
->cand
->cost
;
4384 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4387 ivs
->cand_use_cost
-= cp
->cost
;
4389 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4390 iv_ca_recount_cost (data
, ivs
);
4393 /* Add invariants in set INVS to set IVS. */
4396 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4404 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4406 ivs
->n_invariant_uses
[iid
]++;
4407 if (ivs
->n_invariant_uses
[iid
] == 1)
4412 /* Set cost pair for USE in set IVS to CP. */
4415 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4416 struct iv_use
*use
, struct cost_pair
*cp
)
4418 unsigned uid
= use
->id
, cid
;
4420 if (ivs
->cand_for_use
[uid
] == cp
)
4423 if (ivs
->cand_for_use
[uid
])
4424 iv_ca_set_no_cp (data
, ivs
, use
);
4431 ivs
->cand_for_use
[uid
] = cp
;
4432 ivs
->n_cand_uses
[cid
]++;
4433 if (ivs
->n_cand_uses
[cid
] == 1)
4435 bitmap_set_bit (ivs
->cands
, cid
);
4436 /* Do not count the pseudocandidates. */
4440 ivs
->cand_cost
+= cp
->cand
->cost
;
4442 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4445 ivs
->cand_use_cost
+= cp
->cost
;
4446 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4447 iv_ca_recount_cost (data
, ivs
);
4451 /* Extend set IVS by expressing USE by some of the candidates in it
4455 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4458 struct cost_pair
*best_cp
= NULL
, *cp
;
4462 gcc_assert (ivs
->upto
>= use
->id
);
4464 if (ivs
->upto
== use
->id
)
4470 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4472 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4474 if (cheaper_cost_pair (cp
, best_cp
))
4478 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4481 /* Get cost for assignment IVS. */
4484 iv_ca_cost (struct iv_ca
*ivs
)
4486 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
4489 /* Returns true if all dependences of CP are among invariants in IVS. */
4492 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4497 if (!cp
->depends_on
)
4500 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4502 if (ivs
->n_invariant_uses
[i
] == 0)
4509 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4510 it before NEXT_CHANGE. */
4512 static struct iv_ca_delta
*
4513 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4514 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4516 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
4519 change
->old_cp
= old_cp
;
4520 change
->new_cp
= new_cp
;
4521 change
->next_change
= next_change
;
4526 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4529 static struct iv_ca_delta
*
4530 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4532 struct iv_ca_delta
*last
;
4540 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4542 last
->next_change
= l2
;
4547 /* Returns candidate by that USE is expressed in IVS. */
4549 static struct cost_pair
*
4550 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4552 return ivs
->cand_for_use
[use
->id
];
4555 /* Reverse the list of changes DELTA, forming the inverse to it. */
4557 static struct iv_ca_delta
*
4558 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4560 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4561 struct cost_pair
*tmp
;
4563 for (act
= delta
; act
; act
= next
)
4565 next
= act
->next_change
;
4566 act
->next_change
= prev
;
4570 act
->old_cp
= act
->new_cp
;
4577 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4578 reverted instead. */
4581 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4582 struct iv_ca_delta
*delta
, bool forward
)
4584 struct cost_pair
*from
, *to
;
4585 struct iv_ca_delta
*act
;
4588 delta
= iv_ca_delta_reverse (delta
);
4590 for (act
= delta
; act
; act
= act
->next_change
)
4594 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4595 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4599 iv_ca_delta_reverse (delta
);
4602 /* Returns true if CAND is used in IVS. */
4605 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4607 return ivs
->n_cand_uses
[cand
->id
] > 0;
4610 /* Returns number of induction variable candidates in the set IVS. */
4613 iv_ca_n_cands (struct iv_ca
*ivs
)
4615 return ivs
->n_cands
;
4618 /* Free the list of changes DELTA. */
4621 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4623 struct iv_ca_delta
*act
, *next
;
4625 for (act
= *delta
; act
; act
= next
)
4627 next
= act
->next_change
;
4634 /* Allocates new iv candidates assignment. */
4636 static struct iv_ca
*
4637 iv_ca_new (struct ivopts_data
*data
)
4639 struct iv_ca
*nw
= XNEW (struct iv_ca
);
4643 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
4644 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
4645 nw
->cands
= BITMAP_ALLOC (NULL
);
4648 nw
->cand_use_cost
= 0;
4650 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
4656 /* Free memory occupied by the set IVS. */
4659 iv_ca_free (struct iv_ca
**ivs
)
4661 free ((*ivs
)->cand_for_use
);
4662 free ((*ivs
)->n_cand_uses
);
4663 BITMAP_FREE ((*ivs
)->cands
);
4664 free ((*ivs
)->n_invariant_uses
);
4669 /* Dumps IVS to FILE. */
4672 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4674 const char *pref
= " invariants ";
4677 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
4678 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4680 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4681 if (ivs
->n_invariant_uses
[i
])
4683 fprintf (file
, "%s%d", pref
, i
);
4686 fprintf (file
, "\n");
4689 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4690 new set, and store differences in DELTA. Number of induction variables
4691 in the new set is stored to N_IVS. */
4694 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4695 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4700 struct cost_pair
*old_cp
, *new_cp
;
4703 for (i
= 0; i
< ivs
->upto
; i
++)
4705 use
= iv_use (data
, i
);
4706 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4709 && old_cp
->cand
== cand
)
4712 new_cp
= get_use_iv_cost (data
, use
, cand
);
4716 if (!iv_ca_has_deps (ivs
, new_cp
))
4719 if (!cheaper_cost_pair (new_cp
, old_cp
))
4722 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4725 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4726 cost
= iv_ca_cost (ivs
);
4728 *n_ivs
= iv_ca_n_cands (ivs
);
4729 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4734 /* Try narrowing set IVS by removing CAND. Return the cost of
4735 the new set and store the differences in DELTA. */
4738 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4739 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4743 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4745 struct iv_cand
*cnd
;
4749 for (i
= 0; i
< n_iv_uses (data
); i
++)
4751 use
= iv_use (data
, i
);
4753 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4754 if (old_cp
->cand
!= cand
)
4759 if (data
->consider_all_candidates
)
4761 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4766 cnd
= iv_cand (data
, ci
);
4768 cp
= get_use_iv_cost (data
, use
, cnd
);
4771 if (!iv_ca_has_deps (ivs
, cp
))
4774 if (!cheaper_cost_pair (cp
, new_cp
))
4782 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4787 cnd
= iv_cand (data
, ci
);
4789 cp
= get_use_iv_cost (data
, use
, cnd
);
4792 if (!iv_ca_has_deps (ivs
, cp
))
4795 if (!cheaper_cost_pair (cp
, new_cp
))
4804 iv_ca_delta_free (delta
);
4808 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4811 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4812 cost
= iv_ca_cost (ivs
);
4813 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4818 /* Try optimizing the set of candidates IVS by removing candidates different
4819 from to EXCEPT_CAND from it. Return cost of the new set, and store
4820 differences in DELTA. */
4823 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4824 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
4827 struct iv_ca_delta
*act_delta
, *best_delta
;
4828 unsigned i
, best_cost
, acost
;
4829 struct iv_cand
*cand
;
4832 best_cost
= iv_ca_cost (ivs
);
4834 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4836 cand
= iv_cand (data
, i
);
4838 if (cand
== except_cand
)
4841 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4843 if (acost
< best_cost
)
4846 iv_ca_delta_free (&best_delta
);
4847 best_delta
= act_delta
;
4850 iv_ca_delta_free (&act_delta
);
4859 /* Recurse to possibly remove other unnecessary ivs. */
4860 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4861 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
4862 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
4863 *delta
= iv_ca_delta_join (best_delta
, *delta
);
4867 /* Tries to extend the sets IVS in the best possible way in order
4868 to express the USE. */
4871 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4874 unsigned best_cost
, act_cost
;
4877 struct iv_cand
*cand
;
4878 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
4879 struct cost_pair
*cp
;
4881 iv_ca_add_use (data
, ivs
, use
);
4882 best_cost
= iv_ca_cost (ivs
);
4884 cp
= iv_ca_cand_for_use (ivs
, use
);
4887 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
4888 iv_ca_set_no_cp (data
, ivs
, use
);
4891 /* First try important candidates. Only if it fails, try the specific ones.
4892 Rationale -- in loops with many variables the best choice often is to use
4893 just one generic biv. If we added here many ivs specific to the uses,
4894 the optimization algorithm later would be likely to get stuck in a local
4895 minimum, thus causing us to create too many ivs. The approach from
4896 few ivs to more seems more likely to be successful -- starting from few
4897 ivs, replacing an expensive use by a specific iv should always be a
4899 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
4901 cand
= iv_cand (data
, i
);
4903 if (iv_ca_cand_used_p (ivs
, cand
))
4906 cp
= get_use_iv_cost (data
, use
, cand
);
4910 iv_ca_set_cp (data
, ivs
, use
, cp
);
4911 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4912 iv_ca_set_no_cp (data
, ivs
, use
);
4913 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
4915 if (act_cost
< best_cost
)
4917 best_cost
= act_cost
;
4919 iv_ca_delta_free (&best_delta
);
4920 best_delta
= act_delta
;
4923 iv_ca_delta_free (&act_delta
);
4926 if (best_cost
== INFTY
)
4928 for (i
= 0; i
< use
->n_map_members
; i
++)
4930 cp
= use
->cost_map
+ i
;
4935 /* Already tried this. */
4936 if (cand
->important
)
4939 if (iv_ca_cand_used_p (ivs
, cand
))
4943 iv_ca_set_cp (data
, ivs
, use
, cp
);
4944 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4945 iv_ca_set_no_cp (data
, ivs
, use
);
4946 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
4949 if (act_cost
< best_cost
)
4951 best_cost
= act_cost
;
4954 iv_ca_delta_free (&best_delta
);
4955 best_delta
= act_delta
;
4958 iv_ca_delta_free (&act_delta
);
4962 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4963 iv_ca_delta_free (&best_delta
);
4965 return (best_cost
!= INFTY
);
4968 /* Finds an initial assignment of candidates to uses. */
4970 static struct iv_ca
*
4971 get_initial_solution (struct ivopts_data
*data
)
4973 struct iv_ca
*ivs
= iv_ca_new (data
);
4976 for (i
= 0; i
< n_iv_uses (data
); i
++)
4977 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
4986 /* Tries to improve set of induction variables IVS. */
4989 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4991 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
), n_ivs
;
4992 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
4993 struct iv_cand
*cand
;
4995 /* Try extending the set of induction variables by one. */
4996 for (i
= 0; i
< n_iv_cands (data
); i
++)
4998 cand
= iv_cand (data
, i
);
5000 if (iv_ca_cand_used_p (ivs
, cand
))
5003 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
5007 /* If we successfully added the candidate and the set is small enough,
5008 try optimizing it by removing other candidates. */
5009 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5011 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5012 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5013 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5014 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5017 if (acost
< best_cost
)
5020 iv_ca_delta_free (&best_delta
);
5021 best_delta
= act_delta
;
5024 iv_ca_delta_free (&act_delta
);
5029 /* Try removing the candidates from the set instead. */
5030 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5032 /* Nothing more we can do. */
5037 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5038 gcc_assert (best_cost
== iv_ca_cost (ivs
));
5039 iv_ca_delta_free (&best_delta
);
5043 /* Attempts to find the optimal set of induction variables. We do simple
5044 greedy heuristic -- we try to replace at most one candidate in the selected
5045 solution and remove the unused ivs while this improves the cost. */
5047 static struct iv_ca
*
5048 find_optimal_iv_set (struct ivopts_data
*data
)
5054 /* Get the initial solution. */
5055 set
= get_initial_solution (data
);
5058 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5059 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5063 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5065 fprintf (dump_file
, "Initial set of candidates:\n");
5066 iv_ca_dump (data
, dump_file
, set
);
5069 while (try_improve_iv_set (data
, set
))
5071 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5073 fprintf (dump_file
, "Improved to:\n");
5074 iv_ca_dump (data
, dump_file
, set
);
5078 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5079 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
5081 for (i
= 0; i
< n_iv_uses (data
); i
++)
5083 use
= iv_use (data
, i
);
5084 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5090 /* Creates a new induction variable corresponding to CAND. */
5093 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5095 block_stmt_iterator incr_pos
;
5105 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
5109 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
5114 /* Mark that the iv is preserved. */
5115 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5116 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5118 /* Rewrite the increment so that it uses var_before directly. */
5119 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5124 gimple_add_tmp_var (cand
->var_before
);
5125 add_referenced_var (cand
->var_before
);
5127 base
= unshare_expr (cand
->iv
->base
);
5129 create_iv (base
, unshare_expr (cand
->iv
->step
),
5130 cand
->var_before
, data
->current_loop
,
5131 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5134 /* Creates new induction variables described in SET. */
5137 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5140 struct iv_cand
*cand
;
5143 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5145 cand
= iv_cand (data
, i
);
5146 create_new_iv (data
, cand
);
5150 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5151 is true, remove also the ssa name defined by the statement. */
5154 remove_statement (tree stmt
, bool including_defined_name
)
5156 if (TREE_CODE (stmt
) == PHI_NODE
)
5158 if (!including_defined_name
)
5160 /* Prevent the ssa name defined by the statement from being removed. */
5161 SET_PHI_RESULT (stmt
, NULL
);
5163 remove_phi_node (stmt
, NULL_TREE
);
5167 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
5169 bsi_remove (&bsi
, true);
5173 /* Rewrites USE (definition of iv used in a nonlinear expression)
5174 using candidate CAND. */
5177 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5178 struct iv_use
*use
, struct iv_cand
*cand
)
5181 tree op
, stmts
, tgt
, ass
;
5182 block_stmt_iterator bsi
, pbsi
;
5184 /* An important special case -- if we are asked to express value of
5185 the original iv by itself, just exit; there is no need to
5186 introduce a new computation (that might also need casting the
5187 variable to unsigned and back). */
5188 if (cand
->pos
== IP_ORIGINAL
5189 && cand
->incremented_at
== use
->stmt
)
5191 tree step
, ctype
, utype
;
5192 enum tree_code incr_code
= PLUS_EXPR
;
5194 gcc_assert (TREE_CODE (use
->stmt
) == MODIFY_EXPR
);
5195 gcc_assert (TREE_OPERAND (use
->stmt
, 0) == cand
->var_after
);
5197 step
= cand
->iv
->step
;
5198 ctype
= TREE_TYPE (step
);
5199 utype
= TREE_TYPE (cand
->var_after
);
5200 if (TREE_CODE (step
) == NEGATE_EXPR
)
5202 incr_code
= MINUS_EXPR
;
5203 step
= TREE_OPERAND (step
, 0);
5206 /* Check whether we may leave the computation unchanged.
5207 This is the case only if it does not rely on other
5208 computations in the loop -- otherwise, the computation
5209 we rely upon may be removed in remove_unused_ivs,
5210 thus leading to ICE. */
5211 op
= TREE_OPERAND (use
->stmt
, 1);
5212 if (TREE_CODE (op
) == PLUS_EXPR
5213 || TREE_CODE (op
) == MINUS_EXPR
)
5215 if (TREE_OPERAND (op
, 0) == cand
->var_before
)
5216 op
= TREE_OPERAND (op
, 1);
5217 else if (TREE_CODE (op
) == PLUS_EXPR
5218 && TREE_OPERAND (op
, 1) == cand
->var_before
)
5219 op
= TREE_OPERAND (op
, 0);
5227 && (TREE_CODE (op
) == INTEGER_CST
5228 || operand_equal_p (op
, step
, 0)))
5231 /* Otherwise, add the necessary computations to express
5233 op
= fold_convert (ctype
, cand
->var_before
);
5234 comp
= fold_convert (utype
,
5235 build2 (incr_code
, ctype
, op
,
5236 unshare_expr (step
)));
5239 comp
= get_computation (data
->current_loop
, use
, cand
);
5241 switch (TREE_CODE (use
->stmt
))
5244 tgt
= PHI_RESULT (use
->stmt
);
5246 /* If we should keep the biv, do not replace it. */
5247 if (name_info (data
, tgt
)->preserve_biv
)
5250 pbsi
= bsi
= bsi_start (bb_for_stmt (use
->stmt
));
5251 while (!bsi_end_p (pbsi
)
5252 && TREE_CODE (bsi_stmt (pbsi
)) == LABEL_EXPR
)
5260 tgt
= TREE_OPERAND (use
->stmt
, 0);
5261 bsi
= bsi_for_stmt (use
->stmt
);
5268 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
5270 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
5273 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
5274 ass
= build2 (MODIFY_EXPR
, TREE_TYPE (tgt
), tgt
, op
);
5275 bsi_insert_after (&bsi
, ass
, BSI_NEW_STMT
);
5276 remove_statement (use
->stmt
, false);
5277 SSA_NAME_DEF_STMT (tgt
) = ass
;
5282 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5283 TREE_OPERAND (use
->stmt
, 1) = op
;
5287 /* Replaces ssa name in index IDX by its basic variable. Callback for
5291 idx_remove_ssa_names (tree base
, tree
*idx
,
5292 void *data ATTRIBUTE_UNUSED
)
5296 if (TREE_CODE (*idx
) == SSA_NAME
)
5297 *idx
= SSA_NAME_VAR (*idx
);
5299 if (TREE_CODE (base
) == ARRAY_REF
)
5301 op
= &TREE_OPERAND (base
, 2);
5303 && TREE_CODE (*op
) == SSA_NAME
)
5304 *op
= SSA_NAME_VAR (*op
);
5305 op
= &TREE_OPERAND (base
, 3);
5307 && TREE_CODE (*op
) == SSA_NAME
)
5308 *op
= SSA_NAME_VAR (*op
);
5314 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5317 unshare_and_remove_ssa_names (tree ref
)
5319 ref
= unshare_expr (ref
);
5320 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5325 /* Extract the alias analysis info for the memory reference REF. There are
5326 several ways how this information may be stored and what precisely is
5327 its semantics depending on the type of the reference, but there always is
5328 somewhere hidden one _DECL node that is used to determine the set of
5329 virtual operands for the reference. The code below deciphers this jungle
5330 and extracts this single useful piece of information. */
5333 get_ref_tag (tree ref
, tree orig
)
5335 tree var
= get_base_address (ref
);
5336 tree aref
= NULL_TREE
, tag
, sv
;
5337 HOST_WIDE_INT offset
, size
, maxsize
;
5339 for (sv
= orig
; handled_component_p (sv
); sv
= TREE_OPERAND (sv
, 0))
5341 aref
= get_ref_base_and_extent (sv
, &offset
, &size
, &maxsize
);
5346 if (aref
&& SSA_VAR_P (aref
) && get_subvars_for_var (aref
))
5347 return unshare_expr (sv
);
5352 if (TREE_CODE (var
) == INDIRECT_REF
)
5354 /* If the base is a dereference of a pointer, first check its name memory
5355 tag. If it does not have one, use its symbol memory tag. */
5356 var
= TREE_OPERAND (var
, 0);
5357 if (TREE_CODE (var
) != SSA_NAME
)
5360 if (SSA_NAME_PTR_INFO (var
))
5362 tag
= SSA_NAME_PTR_INFO (var
)->name_mem_tag
;
5367 var
= SSA_NAME_VAR (var
);
5368 tag
= var_ann (var
)->symbol_mem_tag
;
5369 gcc_assert (tag
!= NULL_TREE
);
5377 tag
= var_ann (var
)->symbol_mem_tag
;
5385 /* Copies the reference information from OLD_REF to NEW_REF. */
5388 copy_ref_info (tree new_ref
, tree old_ref
)
5390 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5391 copy_mem_ref_info (new_ref
, old_ref
);
5394 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5395 TMR_TAG (new_ref
) = get_ref_tag (old_ref
, TMR_ORIGINAL (new_ref
));
5399 /* Rewrites USE (address that is an iv) using candidate CAND. */
5402 rewrite_use_address (struct ivopts_data
*data
,
5403 struct iv_use
*use
, struct iv_cand
*cand
)
5405 struct affine_tree_combination aff
;
5406 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5409 get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5410 unshare_aff_combination (&aff
);
5412 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
);
5413 copy_ref_info (ref
, *use
->op_p
);
5417 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5421 rewrite_use_compare (struct ivopts_data
*data
,
5422 struct iv_use
*use
, struct iv_cand
*cand
)
5425 tree
*op_p
, cond
, op
, stmts
, bound
;
5426 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5427 enum tree_code compare
;
5428 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5433 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5434 tree var_type
= TREE_TYPE (var
);
5436 compare
= iv_elimination_compare (data
, use
);
5437 bound
= fold_convert (var_type
, bound
);
5438 op
= force_gimple_operand (unshare_expr (bound
), &stmts
,
5442 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5444 *use
->op_p
= build2 (compare
, boolean_type_node
, var
, op
);
5445 update_stmt (use
->stmt
);
5449 /* The induction variable elimination failed; just express the original
5451 comp
= get_computation (data
->current_loop
, use
, cand
);
5454 op_p
= &TREE_OPERAND (cond
, 0);
5455 if (TREE_CODE (*op_p
) != SSA_NAME
5456 || zero_p (get_iv (data
, *op_p
)->step
))
5457 op_p
= &TREE_OPERAND (cond
, 1);
5459 op
= force_gimple_operand (comp
, &stmts
, true, SSA_NAME_VAR (*op_p
));
5461 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5466 /* Ensure that operand *OP_P may be used at the end of EXIT without
5467 violating loop closed ssa form. */
5470 protect_loop_closed_ssa_form_use (edge exit
, use_operand_p op_p
)
5473 struct loop
*def_loop
;
5476 use
= USE_FROM_PTR (op_p
);
5477 if (TREE_CODE (use
) != SSA_NAME
)
5480 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (use
));
5484 def_loop
= def_bb
->loop_father
;
5485 if (flow_bb_inside_loop_p (def_loop
, exit
->dest
))
5488 /* Try finding a phi node that copies the value out of the loop. */
5489 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
5490 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == use
)
5495 /* Create such a phi node. */
5496 tree new_name
= duplicate_ssa_name (use
, NULL
);
5498 phi
= create_phi_node (new_name
, exit
->dest
);
5499 SSA_NAME_DEF_STMT (new_name
) = phi
;
5500 add_phi_arg (phi
, use
, exit
);
5503 SET_USE (op_p
, PHI_RESULT (phi
));
5506 /* Ensure that operands of STMT may be used at the end of EXIT without
5507 violating loop closed ssa form. */
5510 protect_loop_closed_ssa_form (edge exit
, tree stmt
)
5513 use_operand_p use_p
;
5515 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_ALL_USES
)
5516 protect_loop_closed_ssa_form_use (exit
, use_p
);
5519 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5520 so that they are emitted on the correct place, and so that the loop closed
5521 ssa form is preserved. */
5524 compute_phi_arg_on_exit (edge exit
, tree stmts
, tree op
)
5526 tree_stmt_iterator tsi
;
5527 block_stmt_iterator bsi
;
5528 tree phi
, stmt
, def
, next
;
5530 if (!single_pred_p (exit
->dest
))
5531 split_loop_exit_edge (exit
);
5533 /* Ensure there is label in exit->dest, so that we can
5535 tree_block_label (exit
->dest
);
5536 bsi
= bsi_after_labels (exit
->dest
);
5538 if (TREE_CODE (stmts
) == STATEMENT_LIST
)
5540 for (tsi
= tsi_start (stmts
); !tsi_end_p (tsi
); tsi_next (&tsi
))
5542 tree stmt
= tsi_stmt (tsi
);
5543 bsi_insert_before (&bsi
, stmt
, BSI_SAME_STMT
);
5544 protect_loop_closed_ssa_form (exit
, stmt
);
5549 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5550 protect_loop_closed_ssa_form (exit
, stmts
);
5556 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= next
)
5558 next
= PHI_CHAIN (phi
);
5560 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == op
)
5562 def
= PHI_RESULT (phi
);
5563 remove_statement (phi
, false);
5564 stmt
= build2 (MODIFY_EXPR
, TREE_TYPE (op
),
5566 SSA_NAME_DEF_STMT (def
) = stmt
;
5567 bsi_insert_before (&bsi
, stmt
, BSI_SAME_STMT
);
5572 /* Rewrites USE using candidate CAND. */
5575 rewrite_use (struct ivopts_data
*data
,
5576 struct iv_use
*use
, struct iv_cand
*cand
)
5580 case USE_NONLINEAR_EXPR
:
5581 rewrite_use_nonlinear_expr (data
, use
, cand
);
5585 rewrite_use_address (data
, use
, cand
);
5589 rewrite_use_compare (data
, use
, cand
);
5595 mark_new_vars_to_rename (use
->stmt
);
5598 /* Rewrite the uses using the selected induction variables. */
5601 rewrite_uses (struct ivopts_data
*data
)
5604 struct iv_cand
*cand
;
5607 for (i
= 0; i
< n_iv_uses (data
); i
++)
5609 use
= iv_use (data
, i
);
5610 cand
= use
->selected
;
5613 rewrite_use (data
, use
, cand
);
5617 /* Removes the ivs that are not used after rewriting. */
5620 remove_unused_ivs (struct ivopts_data
*data
)
5625 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5627 struct version_info
*info
;
5629 info
= ver_info (data
, j
);
5631 && !zero_p (info
->iv
->step
)
5633 && !info
->iv
->have_use_for
5634 && !info
->preserve_biv
)
5635 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5639 /* Frees data allocated by the optimization of a single loop. */
5642 free_loop_data (struct ivopts_data
*data
)
5648 htab_empty (data
->niters
);
5650 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5652 struct version_info
*info
;
5654 info
= ver_info (data
, i
);
5658 info
->has_nonlin_use
= false;
5659 info
->preserve_biv
= false;
5662 bitmap_clear (data
->relevant
);
5663 bitmap_clear (data
->important_candidates
);
5665 for (i
= 0; i
< n_iv_uses (data
); i
++)
5667 struct iv_use
*use
= iv_use (data
, i
);
5670 BITMAP_FREE (use
->related_cands
);
5671 for (j
= 0; j
< use
->n_map_members
; j
++)
5672 if (use
->cost_map
[j
].depends_on
)
5673 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5674 free (use
->cost_map
);
5677 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5679 for (i
= 0; i
< n_iv_cands (data
); i
++)
5681 struct iv_cand
*cand
= iv_cand (data
, i
);
5685 if (cand
->depends_on
)
5686 BITMAP_FREE (cand
->depends_on
);
5689 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5691 if (data
->version_info_size
< num_ssa_names
)
5693 data
->version_info_size
= 2 * num_ssa_names
;
5694 free (data
->version_info
);
5695 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
5698 data
->max_inv_id
= 0;
5700 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5701 SET_DECL_RTL (obj
, NULL_RTX
);
5703 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5706 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5710 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
5712 free_loop_data (data
);
5713 free (data
->version_info
);
5714 BITMAP_FREE (data
->relevant
);
5715 BITMAP_FREE (data
->important_candidates
);
5716 htab_delete (data
->niters
);
5718 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5719 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5720 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5723 /* Optimizes the LOOP. Returns true if anything changed. */
5726 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5728 bool changed
= false;
5729 struct iv_ca
*iv_ca
;
5732 data
->current_loop
= loop
;
5734 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5736 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5738 exit
= single_dom_exit (loop
);
5741 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5742 exit
->src
->index
, exit
->dest
->index
);
5743 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
5744 fprintf (dump_file
, "\n");
5747 fprintf (dump_file
, "\n");
5750 /* For each ssa name determines whether it behaves as an induction variable
5752 if (!find_induction_variables (data
))
5755 /* Finds interesting uses (item 1). */
5756 find_interesting_uses (data
);
5757 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5760 /* Finds candidates for the induction variables (item 2). */
5761 find_iv_candidates (data
);
5763 /* Calculates the costs (item 3, part 1). */
5764 determine_use_iv_costs (data
);
5765 determine_iv_costs (data
);
5766 determine_set_costs (data
);
5768 /* Find the optimal set of induction variables (item 3, part 2). */
5769 iv_ca
= find_optimal_iv_set (data
);
5774 /* Create the new induction variables (item 4, part 1). */
5775 create_new_ivs (data
, iv_ca
);
5776 iv_ca_free (&iv_ca
);
5778 /* Rewrite the uses (item 4, part 2). */
5779 rewrite_uses (data
);
5781 /* Remove the ivs that are unused after rewriting. */
5782 remove_unused_ivs (data
);
5784 /* We have changed the structure of induction variables; it might happen
5785 that definitions in the scev database refer to some of them that were
5790 free_loop_data (data
);
5795 /* Main entry point. Optimizes induction variables in LOOPS. */
5798 tree_ssa_iv_optimize (struct loops
*loops
)
5801 struct ivopts_data data
;
5803 tree_ssa_iv_optimize_init (&data
);
5805 /* Optimize the loops starting with the innermost ones. */
5806 loop
= loops
->tree_root
;
5810 /* Scan the loops, inner ones first. */
5811 while (loop
!= loops
->tree_root
)
5813 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5814 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5816 tree_ssa_iv_optimize_loop (&data
, loop
);
5828 tree_ssa_iv_optimize_finalize (&data
);