1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base
; /* Initial value of the iv. */
105 tree base_object
; /* A memory object to that the induction variable points. */
106 tree step
; /* Step of the iv (constant only). */
107 tree ssa_name
; /* The ssa name with the value. */
108 bool biv_p
; /* Is it a biv? */
109 bool have_use_for
; /* Do we already have a use for it? */
110 unsigned use_id
; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name
; /* The ssa name. */
117 struct iv
*iv
; /* Induction variable description. */
118 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id
; /* Id of an invariant. */
121 bool preserve_biv
; /* For the original biv, whether to preserve it. */
127 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
128 USE_ADDRESS
, /* Use in an address. */
129 USE_COMPARE
/* Use is a compare. */
132 /* The candidate - cost pair. */
135 struct iv_cand
*cand
; /* The candidate. */
136 unsigned cost
; /* The cost. */
137 bitmap depends_on
; /* The list of invariants that have to be
139 tree value
; /* For final value elimination, the expression for
140 the final value of the iv. For iv elimination,
141 the new bound to compare with. */
147 unsigned id
; /* The id of the use. */
148 enum use_type type
; /* Type of the use. */
149 struct iv
*iv
; /* The induction variable it is based on. */
150 tree stmt
; /* Statement in that it occurs. */
151 tree
*op_p
; /* The place where it occurs. */
152 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
155 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
156 struct cost_pair
*cost_map
;
157 /* The costs wrto the iv candidates. */
159 struct iv_cand
*selected
;
160 /* The selected candidate. */
163 /* The position where the iv is computed. */
166 IP_NORMAL
, /* At the end, just before the exit condition. */
167 IP_END
, /* At the end of the latch block. */
168 IP_ORIGINAL
/* The original biv. */
171 /* The induction variable candidate. */
174 unsigned id
; /* The number of the candidate. */
175 bool important
; /* Whether this is an "important" candidate, i.e. such
176 that it should be considered by all uses. */
177 enum iv_position pos
; /* Where it is computed. */
178 tree incremented_at
; /* For original biv, the statement where it is
180 tree var_before
; /* The variable used for it before increment. */
181 tree var_after
; /* The variable used for it after increment. */
182 struct iv
*iv
; /* The value of the candidate. NULL for
183 "pseudocandidate" used to indicate the possibility
184 to replace the final value of an iv by direct
185 computation of the value. */
186 unsigned cost
; /* Cost of the candidate. */
187 bitmap depends_on
; /* The list of invariants that are used in step of the
191 /* The data used by the induction variable optimizations. */
193 typedef struct iv_use
*iv_use_p
;
195 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
197 typedef struct iv_cand
*iv_cand_p
;
198 DEF_VEC_P(iv_cand_p
);
199 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
203 /* The currently optimized loop. */
204 struct loop
*current_loop
;
206 /* Number of registers used in it. */
209 /* Numbers of iterations for all exits of the current loop. */
212 /* The size of version_info array allocated. */
213 unsigned version_info_size
;
215 /* The array of information for the ssa names. */
216 struct version_info
*version_info
;
218 /* The bitmap of indices in version_info whose value was changed. */
221 /* The maximum invariant id. */
224 /* The uses of induction variables. */
225 VEC(iv_use_p
,heap
) *iv_uses
;
227 /* The candidates. */
228 VEC(iv_cand_p
,heap
) *iv_candidates
;
230 /* A bitmap of important candidates. */
231 bitmap important_candidates
;
233 /* Whether to consider just related and important candidates when replacing a
235 bool consider_all_candidates
;
238 /* An assignment of iv candidates to uses. */
242 /* The number of uses covered by the assignment. */
245 /* Number of uses that cannot be expressed by the candidates in the set. */
248 /* Candidate assigned to a use, together with the related costs. */
249 struct cost_pair
**cand_for_use
;
251 /* Number of times each candidate is used. */
252 unsigned *n_cand_uses
;
254 /* The candidates used. */
257 /* The number of candidates in the set. */
260 /* Total number of registers needed. */
263 /* Total cost of expressing uses. */
264 unsigned cand_use_cost
;
266 /* Total cost of candidates. */
269 /* Number of times each invariant is used. */
270 unsigned *n_invariant_uses
;
272 /* Total cost of the assignment. */
276 /* Difference of two iv candidate assignments. */
283 /* An old assignment (for rollback purposes). */
284 struct cost_pair
*old_cp
;
286 /* A new assignment. */
287 struct cost_pair
*new_cp
;
289 /* Next change in the list. */
290 struct iv_ca_delta
*next_change
;
293 /* Bound on number of candidates below that all candidates are considered. */
295 #define CONSIDER_ALL_CANDIDATES_BOUND \
296 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
299 optimizing such a loop would help, and it would take ages). */
301 #define MAX_CONSIDERED_USES \
302 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
304 /* If there are at most this number of ivs in the set, try removing unnecessary
305 ivs from the set always. */
307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
308 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
310 /* The list of trees for that the decl_rtl field must be reset is stored
313 static VEC(tree
,heap
) *decl_rtl_to_reset
;
315 /* Number of uses recorded in DATA. */
317 static inline unsigned
318 n_iv_uses (struct ivopts_data
*data
)
320 return VEC_length (iv_use_p
, data
->iv_uses
);
323 /* Ith use recorded in DATA. */
325 static inline struct iv_use
*
326 iv_use (struct ivopts_data
*data
, unsigned i
)
328 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
331 /* Number of candidates recorded in DATA. */
333 static inline unsigned
334 n_iv_cands (struct ivopts_data
*data
)
336 return VEC_length (iv_cand_p
, data
->iv_candidates
);
339 /* Ith candidate recorded in DATA. */
341 static inline struct iv_cand
*
342 iv_cand (struct ivopts_data
*data
, unsigned i
)
344 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
347 /* The single loop exit if it dominates the latch, NULL otherwise. */
350 single_dom_exit (struct loop
*loop
)
352 edge exit
= loop
->single_exit
;
357 if (!just_once_each_iteration_p (loop
, exit
->src
))
363 /* Dumps information about the induction variable IV to FILE. */
365 extern void dump_iv (FILE *, struct iv
*);
367 dump_iv (FILE *file
, struct iv
*iv
)
371 fprintf (file
, "ssa name ");
372 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
373 fprintf (file
, "\n");
376 fprintf (file
, " type ");
377 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
378 fprintf (file
, "\n");
382 fprintf (file
, " base ");
383 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
384 fprintf (file
, "\n");
386 fprintf (file
, " step ");
387 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
388 fprintf (file
, "\n");
392 fprintf (file
, " invariant ");
393 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
394 fprintf (file
, "\n");
399 fprintf (file
, " base object ");
400 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
401 fprintf (file
, "\n");
405 fprintf (file
, " is a biv\n");
408 /* Dumps information about the USE to FILE. */
410 extern void dump_use (FILE *, struct iv_use
*);
412 dump_use (FILE *file
, struct iv_use
*use
)
414 fprintf (file
, "use %d\n", use
->id
);
418 case USE_NONLINEAR_EXPR
:
419 fprintf (file
, " generic\n");
423 fprintf (file
, " address\n");
427 fprintf (file
, " compare\n");
434 fprintf (file
, " in statement ");
435 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
436 fprintf (file
, "\n");
438 fprintf (file
, " at position ");
440 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
441 fprintf (file
, "\n");
443 dump_iv (file
, use
->iv
);
445 if (use
->related_cands
)
447 fprintf (file
, " related candidates ");
448 dump_bitmap (file
, use
->related_cands
);
452 /* Dumps information about the uses to FILE. */
454 extern void dump_uses (FILE *, struct ivopts_data
*);
456 dump_uses (FILE *file
, struct ivopts_data
*data
)
461 for (i
= 0; i
< n_iv_uses (data
); i
++)
463 use
= iv_use (data
, i
);
465 dump_use (file
, use
);
466 fprintf (file
, "\n");
470 /* Dumps information about induction variable candidate CAND to FILE. */
472 extern void dump_cand (FILE *, struct iv_cand
*);
474 dump_cand (FILE *file
, struct iv_cand
*cand
)
476 struct iv
*iv
= cand
->iv
;
478 fprintf (file
, "candidate %d%s\n",
479 cand
->id
, cand
->important
? " (important)" : "");
481 if (cand
->depends_on
)
483 fprintf (file
, " depends on ");
484 dump_bitmap (file
, cand
->depends_on
);
489 fprintf (file
, " final value replacement\n");
496 fprintf (file
, " incremented before exit test\n");
500 fprintf (file
, " incremented at end\n");
504 fprintf (file
, " original biv\n");
511 /* Returns the info for ssa version VER. */
513 static inline struct version_info
*
514 ver_info (struct ivopts_data
*data
, unsigned ver
)
516 return data
->version_info
+ ver
;
519 /* Returns the info for ssa name NAME. */
521 static inline struct version_info
*
522 name_info (struct ivopts_data
*data
, tree name
)
524 return ver_info (data
, SSA_NAME_VERSION (name
));
527 /* Checks whether there exists number X such that X * B = A, counting modulo
531 divide (unsigned bits
, unsigned HOST_WIDE_INT a
, unsigned HOST_WIDE_INT b
,
534 unsigned HOST_WIDE_INT mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
535 unsigned HOST_WIDE_INT inv
, ex
, val
;
541 /* First divide the whole equation by 2 as long as possible. */
542 while (!(a
& 1) && !(b
& 1))
552 /* If b is still even, a is odd and there is no such x. */
556 /* Find the inverse of b. We compute it as
557 b^(2^(bits - 1) - 1) (mod 2^bits). */
560 for (i
= 0; i
< bits
- 1; i
++)
562 inv
= (inv
* ex
) & mask
;
563 ex
= (ex
* ex
) & mask
;
566 val
= (a
* inv
) & mask
;
568 gcc_assert (((val
* b
) & mask
) == a
);
570 if ((val
>> (bits
- 1)) & 1)
578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
582 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
584 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
588 if (sbb
== loop
->latch
)
594 return stmt
== last_stmt (bb
);
597 /* Returns true if STMT if after the place where the original induction
598 variable CAND is incremented. */
601 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
603 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
604 basic_block stmt_bb
= bb_for_stmt (stmt
);
605 block_stmt_iterator bsi
;
607 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
610 if (stmt_bb
!= cand_bb
)
613 /* Scan the block from the end, since the original ivs are usually
614 incremented at the end of the loop body. */
615 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
617 if (bsi_stmt (bsi
) == cand
->incremented_at
)
619 if (bsi_stmt (bsi
) == stmt
)
624 /* Returns true if STMT if after the place where the induction variable
625 CAND is incremented in LOOP. */
628 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
636 return stmt_after_ip_normal_pos (loop
, stmt
);
639 return stmt_after_ip_original_pos (cand
, stmt
);
646 /* Element of the table in that we cache the numbers of iterations obtained
647 from exits of the loop. */
651 /* The edge for that the number of iterations is cached. */
654 /* True if the # of iterations was successfully determined. */
657 /* Description of # of iterations. */
658 struct tree_niter_desc niter
;
661 /* Hash function for nfe_cache_elt E. */
664 nfe_hash (const void *e
)
666 const struct nfe_cache_elt
*elt
= e
;
668 return htab_hash_pointer (elt
->exit
);
671 /* Equality function for nfe_cache_elt E1 and edge E2. */
674 nfe_eq (const void *e1
, const void *e2
)
676 const struct nfe_cache_elt
*elt1
= e1
;
678 return elt1
->exit
== e2
;
681 /* Returns structure describing number of iterations determined from
682 EXIT of DATA->current_loop, or NULL if something goes wrong. */
684 static struct tree_niter_desc
*
685 niter_for_exit (struct ivopts_data
*data
, edge exit
)
687 struct nfe_cache_elt
*nfe_desc
;
690 slot
= htab_find_slot_with_hash (data
->niters
, exit
,
691 htab_hash_pointer (exit
),
696 nfe_desc
= xmalloc (sizeof (struct nfe_cache_elt
));
697 nfe_desc
->exit
= exit
;
698 nfe_desc
->valid_p
= number_of_iterations_exit (data
->current_loop
,
699 exit
, &nfe_desc
->niter
,
706 if (!nfe_desc
->valid_p
)
709 return &nfe_desc
->niter
;
712 /* Returns structure describing number of iterations determined from
713 single dominating exit of DATA->current_loop, or NULL if something
716 static struct tree_niter_desc
*
717 niter_for_single_dom_exit (struct ivopts_data
*data
)
719 edge exit
= single_dom_exit (data
->current_loop
);
724 return niter_for_exit (data
, exit
);
727 /* Initializes data structures used by the iv optimization pass, stored
731 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
733 data
->version_info_size
= 2 * num_ssa_names
;
734 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
735 data
->relevant
= BITMAP_ALLOC (NULL
);
736 data
->important_candidates
= BITMAP_ALLOC (NULL
);
737 data
->max_inv_id
= 0;
738 data
->niters
= htab_create (10, nfe_hash
, nfe_eq
, free
);
739 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
740 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
741 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
744 /* Returns a memory object to that EXPR points. In case we are able to
745 determine that it does not point to any such object, NULL is returned. */
748 determine_base_object (tree expr
)
750 enum tree_code code
= TREE_CODE (expr
);
751 tree base
, obj
, op0
, op1
;
753 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
762 obj
= TREE_OPERAND (expr
, 0);
763 base
= get_base_address (obj
);
768 if (TREE_CODE (base
) == INDIRECT_REF
)
769 return determine_base_object (TREE_OPERAND (base
, 0));
771 return fold_convert (ptr_type_node
,
772 build_fold_addr_expr (base
));
776 op0
= determine_base_object (TREE_OPERAND (expr
, 0));
777 op1
= determine_base_object (TREE_OPERAND (expr
, 1));
783 return (code
== PLUS_EXPR
785 : fold_build1 (NEGATE_EXPR
, ptr_type_node
, op1
));
787 return fold_build2 (code
, ptr_type_node
, op0
, op1
);
791 return determine_base_object (TREE_OPERAND (expr
, 0));
794 return fold_convert (ptr_type_node
, expr
);
798 /* Allocates an induction variable with given initial value BASE and step STEP
802 alloc_iv (tree base
, tree step
)
804 struct iv
*iv
= XCNEW (struct iv
);
806 if (step
&& integer_zerop (step
))
810 iv
->base_object
= determine_base_object (base
);
813 iv
->have_use_for
= false;
815 iv
->ssa_name
= NULL_TREE
;
820 /* Sets STEP and BASE for induction variable IV. */
823 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
825 struct version_info
*info
= name_info (data
, iv
);
827 gcc_assert (!info
->iv
);
829 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
830 info
->iv
= alloc_iv (base
, step
);
831 info
->iv
->ssa_name
= iv
;
834 /* Finds induction variable declaration for VAR. */
837 get_iv (struct ivopts_data
*data
, tree var
)
841 if (!name_info (data
, var
)->iv
)
843 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
846 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
847 set_iv (data
, var
, var
, NULL_TREE
);
850 return name_info (data
, var
)->iv
;
853 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
854 not define a simple affine biv with nonzero step. */
857 determine_biv_step (tree phi
)
859 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
860 tree name
= PHI_RESULT (phi
);
863 if (!is_gimple_reg (name
))
866 if (!simple_iv (loop
, phi
, name
, &iv
, true))
869 return (zero_p (iv
.step
) ? NULL_TREE
: iv
.step
);
872 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
875 abnormal_ssa_name_p (tree exp
)
880 if (TREE_CODE (exp
) != SSA_NAME
)
883 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
886 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
887 abnormal phi node. Callback for for_each_index. */
890 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
891 void *data ATTRIBUTE_UNUSED
)
893 if (TREE_CODE (base
) == ARRAY_REF
)
895 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
897 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
901 return !abnormal_ssa_name_p (*index
);
904 /* Returns true if EXPR contains a ssa name that occurs in an
905 abnormal phi node. */
908 contains_abnormal_ssa_name_p (tree expr
)
911 enum tree_code_class
class;
916 code
= TREE_CODE (expr
);
917 class = TREE_CODE_CLASS (code
);
919 if (code
== SSA_NAME
)
920 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
922 if (code
== INTEGER_CST
923 || is_gimple_min_invariant (expr
))
926 if (code
== ADDR_EXPR
)
927 return !for_each_index (&TREE_OPERAND (expr
, 0),
928 idx_contains_abnormal_ssa_name_p
,
935 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
940 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
952 /* Finds basic ivs. */
955 find_bivs (struct ivopts_data
*data
)
957 tree phi
, step
, type
, base
;
959 struct loop
*loop
= data
->current_loop
;
961 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
963 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
966 step
= determine_biv_step (phi
);
970 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
971 base
= expand_simple_operations (base
);
972 if (contains_abnormal_ssa_name_p (base
)
973 || contains_abnormal_ssa_name_p (step
))
976 type
= TREE_TYPE (PHI_RESULT (phi
));
977 base
= fold_convert (type
, base
);
979 step
= fold_convert (type
, step
);
981 set_iv (data
, PHI_RESULT (phi
), base
, step
);
988 /* Marks basic ivs. */
991 mark_bivs (struct ivopts_data
*data
)
994 struct iv
*iv
, *incr_iv
;
995 struct loop
*loop
= data
->current_loop
;
998 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
1000 iv
= get_iv (data
, PHI_RESULT (phi
));
1004 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1005 incr_iv
= get_iv (data
, var
);
1009 /* If the increment is in the subloop, ignore it. */
1010 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
1011 if (incr_bb
->loop_father
!= data
->current_loop
1012 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1016 incr_iv
->biv_p
= true;
1020 /* Checks whether STMT defines a linear induction variable and stores its
1021 parameters to IV. */
1024 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
, affine_iv
*iv
)
1027 struct loop
*loop
= data
->current_loop
;
1029 iv
->base
= NULL_TREE
;
1030 iv
->step
= NULL_TREE
;
1032 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1035 lhs
= TREE_OPERAND (stmt
, 0);
1036 if (TREE_CODE (lhs
) != SSA_NAME
)
1039 if (!simple_iv (loop
, stmt
, TREE_OPERAND (stmt
, 1), iv
, true))
1041 iv
->base
= expand_simple_operations (iv
->base
);
1043 if (contains_abnormal_ssa_name_p (iv
->base
)
1044 || contains_abnormal_ssa_name_p (iv
->step
))
1050 /* Finds general ivs in statement STMT. */
1053 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
1057 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1060 set_iv (data
, TREE_OPERAND (stmt
, 0), iv
.base
, iv
.step
);
1063 /* Finds general ivs in basic block BB. */
1066 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1068 block_stmt_iterator bsi
;
1070 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1071 find_givs_in_stmt (data
, bsi_stmt (bsi
));
1074 /* Finds general ivs. */
1077 find_givs (struct ivopts_data
*data
)
1079 struct loop
*loop
= data
->current_loop
;
1080 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1083 for (i
= 0; i
< loop
->num_nodes
; i
++)
1084 find_givs_in_bb (data
, body
[i
]);
1088 /* For each ssa name defined in LOOP determines whether it is an induction
1089 variable and if so, its initial value and step. */
1092 find_induction_variables (struct ivopts_data
*data
)
1097 if (!find_bivs (data
))
1103 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1105 struct tree_niter_desc
*niter
;
1107 niter
= niter_for_single_dom_exit (data
);
1111 fprintf (dump_file
, " number of iterations ");
1112 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1113 fprintf (dump_file
, "\n");
1115 fprintf (dump_file
, " may be zero if ");
1116 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1117 fprintf (dump_file
, "\n");
1118 fprintf (dump_file
, "\n");
1121 fprintf (dump_file
, "Induction variables:\n\n");
1123 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1125 if (ver_info (data
, i
)->iv
)
1126 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1133 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1135 static struct iv_use
*
1136 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1137 tree stmt
, enum use_type use_type
)
1139 struct iv_use
*use
= XCNEW (struct iv_use
);
1141 use
->id
= n_iv_uses (data
);
1142 use
->type
= use_type
;
1146 use
->related_cands
= BITMAP_ALLOC (NULL
);
1148 /* To avoid showing ssa name in the dumps, if it was not reset by the
1150 iv
->ssa_name
= NULL_TREE
;
1152 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1153 dump_use (dump_file
, use
);
1155 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1160 /* Checks whether OP is a loop-level invariant and if so, records it.
1161 NONLINEAR_USE is true if the invariant is used in a way we do not
1162 handle specially. */
1165 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1168 struct version_info
*info
;
1170 if (TREE_CODE (op
) != SSA_NAME
1171 || !is_gimple_reg (op
))
1174 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1176 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1179 info
= name_info (data
, op
);
1181 info
->has_nonlin_use
|= nonlinear_use
;
1183 info
->inv_id
= ++data
->max_inv_id
;
1184 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1187 /* Checks whether the use OP is interesting and if so, records it. */
1189 static struct iv_use
*
1190 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1197 if (TREE_CODE (op
) != SSA_NAME
)
1200 iv
= get_iv (data
, op
);
1204 if (iv
->have_use_for
)
1206 use
= iv_use (data
, iv
->use_id
);
1208 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1212 if (zero_p (iv
->step
))
1214 record_invariant (data
, op
, true);
1217 iv
->have_use_for
= true;
1219 civ
= XNEW (struct iv
);
1222 stmt
= SSA_NAME_DEF_STMT (op
);
1223 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1224 || TREE_CODE (stmt
) == MODIFY_EXPR
);
1226 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1227 iv
->use_id
= use
->id
;
1232 /* Checks whether the condition *COND_P in STMT is interesting
1233 and if so, records it. */
1236 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1240 struct iv
*iv0
= NULL
, *iv1
= NULL
, *civ
;
1242 tree zero
= integer_zero_node
;
1244 const_iv
.step
= NULL_TREE
;
1246 if (TREE_CODE (*cond_p
) != SSA_NAME
1247 && !COMPARISON_CLASS_P (*cond_p
))
1250 if (TREE_CODE (*cond_p
) == SSA_NAME
)
1257 op0_p
= &TREE_OPERAND (*cond_p
, 0);
1258 op1_p
= &TREE_OPERAND (*cond_p
, 1);
1261 if (TREE_CODE (*op0_p
) == SSA_NAME
)
1262 iv0
= get_iv (data
, *op0_p
);
1266 if (TREE_CODE (*op1_p
) == SSA_NAME
)
1267 iv1
= get_iv (data
, *op1_p
);
1271 if (/* When comparing with non-invariant value, we may not do any senseful
1272 induction variable elimination. */
1274 /* Eliminating condition based on two ivs would be nontrivial.
1275 ??? TODO -- it is not really important to handle this case. */
1276 || (!zero_p (iv0
->step
) && !zero_p (iv1
->step
)))
1278 find_interesting_uses_op (data
, *op0_p
);
1279 find_interesting_uses_op (data
, *op1_p
);
1283 if (zero_p (iv0
->step
) && zero_p (iv1
->step
))
1285 /* If both are invariants, this is a work for unswitching. */
1289 civ
= XNEW (struct iv
);
1290 *civ
= zero_p (iv0
->step
) ? *iv1
: *iv0
;
1291 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1294 /* Returns true if expression EXPR is obviously invariant in LOOP,
1295 i.e. if all its operands are defined outside of the LOOP. */
1298 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1303 if (is_gimple_min_invariant (expr
))
1306 if (TREE_CODE (expr
) == SSA_NAME
)
1308 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1310 && flow_bb_inside_loop_p (loop
, def_bb
))
1319 len
= TREE_CODE_LENGTH (TREE_CODE (expr
));
1320 for (i
= 0; i
< len
; i
++)
1321 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1327 /* Cumulates the steps of indices into DATA and replaces their values with the
1328 initial ones. Returns false when the value of the index cannot be determined.
1329 Callback for for_each_index. */
1331 struct ifs_ivopts_data
1333 struct ivopts_data
*ivopts_data
;
1339 idx_find_step (tree base
, tree
*idx
, void *data
)
1341 struct ifs_ivopts_data
*dta
= data
;
1343 tree step
, iv_step
, lbound
, off
;
1344 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1346 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1347 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1350 /* If base is a component ref, require that the offset of the reference
1352 if (TREE_CODE (base
) == COMPONENT_REF
)
1354 off
= component_ref_field_offset (base
);
1355 return expr_invariant_in_loop_p (loop
, off
);
1358 /* If base is array, first check whether we will be able to move the
1359 reference out of the loop (in order to take its address in strength
1360 reduction). In order for this to work we need both lower bound
1361 and step to be loop invariants. */
1362 if (TREE_CODE (base
) == ARRAY_REF
)
1364 step
= array_ref_element_size (base
);
1365 lbound
= array_ref_low_bound (base
);
1367 if (!expr_invariant_in_loop_p (loop
, step
)
1368 || !expr_invariant_in_loop_p (loop
, lbound
))
1372 if (TREE_CODE (*idx
) != SSA_NAME
)
1375 iv
= get_iv (dta
->ivopts_data
, *idx
);
1384 if (TREE_CODE (base
) == ARRAY_REF
)
1386 step
= array_ref_element_size (base
);
1388 /* We only handle addresses whose step is an integer constant. */
1389 if (TREE_CODE (step
) != INTEGER_CST
)
1393 /* The step for pointer arithmetics already is 1 byte. */
1394 step
= build_int_cst (sizetype
, 1);
1396 /* FIXME: convert_step should not be used outside chrec_convert: fix
1397 this by calling chrec_convert. */
1398 iv_step
= convert_step (dta
->ivopts_data
->current_loop
,
1399 sizetype
, iv
->base
, iv
->step
, dta
->stmt
);
1403 /* The index might wrap. */
1407 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1410 *dta
->step_p
= step
;
1412 *dta
->step_p
= fold_build2 (PLUS_EXPR
, sizetype
, *dta
->step_p
, step
);
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1421 idx_record_use (tree base
, tree
*idx
,
1424 find_interesting_uses_op (data
, *idx
);
1425 if (TREE_CODE (base
) == ARRAY_REF
)
1427 find_interesting_uses_op (data
, array_ref_element_size (base
));
1428 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1433 /* Returns true if memory reference REF may be unaligned. */
1436 may_be_unaligned_p (tree ref
)
1440 HOST_WIDE_INT bitsize
;
1441 HOST_WIDE_INT bitpos
;
1443 enum machine_mode mode
;
1444 int unsignedp
, volatilep
;
1445 unsigned base_align
;
1447 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1448 thus they are not misaligned. */
1449 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1452 /* The test below is basically copy of what expr.c:normal_inner_ref
1453 does to check whether the object must be loaded by parts when
1454 STRICT_ALIGNMENT is true. */
1455 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1456 &unsignedp
, &volatilep
, true);
1457 base_type
= TREE_TYPE (base
);
1458 base_align
= TYPE_ALIGN (base_type
);
1461 && (base_align
< GET_MODE_ALIGNMENT (mode
)
1462 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1463 || bitpos
% BITS_PER_UNIT
!= 0))
1469 /* Finds addresses in *OP_P inside STMT. */
1472 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1474 tree base
= *op_p
, step
= NULL
;
1476 struct ifs_ivopts_data ifs_ivopts_data
;
1478 /* Do not play with volatile memory references. A bit too conservative,
1479 perhaps, but safe. */
1480 if (stmt_ann (stmt
)->has_volatile_ops
)
1483 /* Ignore bitfields for now. Not really something terribly complicated
1485 if (TREE_CODE (base
) == COMPONENT_REF
1486 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base
, 1)))
1489 if (STRICT_ALIGNMENT
1490 && may_be_unaligned_p (base
))
1493 base
= unshare_expr (base
);
1495 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1497 tree type
= build_pointer_type (TREE_TYPE (base
));
1501 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1503 civ
= get_iv (data
, TMR_BASE (base
));
1507 TMR_BASE (base
) = civ
->base
;
1510 if (TMR_INDEX (base
)
1511 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1513 civ
= get_iv (data
, TMR_INDEX (base
));
1517 TMR_INDEX (base
) = civ
->base
;
1522 if (TMR_STEP (base
))
1523 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1526 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1534 base
= tree_mem_ref_addr (type
, base
);
1538 ifs_ivopts_data
.ivopts_data
= data
;
1539 ifs_ivopts_data
.stmt
= stmt
;
1540 ifs_ivopts_data
.step_p
= &step
;
1541 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1545 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1546 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1548 base
= build_fold_addr_expr (base
);
1551 civ
= alloc_iv (base
, step
);
1552 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1556 for_each_index (op_p
, idx_record_use
, data
);
1559 /* Finds and records invariants used in STMT. */
1562 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1565 use_operand_p use_p
;
1568 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1570 op
= USE_FROM_PTR (use_p
);
1571 record_invariant (data
, op
, false);
1575 /* Finds interesting uses of induction variables in the statement STMT. */
1578 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1583 use_operand_p use_p
;
1585 find_invariants_stmt (data
, stmt
);
1587 if (TREE_CODE (stmt
) == COND_EXPR
)
1589 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1593 if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1595 lhs
= TREE_OPERAND (stmt
, 0);
1596 rhs
= TREE_OPERAND (stmt
, 1);
1598 if (TREE_CODE (lhs
) == SSA_NAME
)
1600 /* If the statement defines an induction variable, the uses are not
1601 interesting by themselves. */
1603 iv
= get_iv (data
, lhs
);
1605 if (iv
&& !zero_p (iv
->step
))
1609 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1611 case tcc_comparison
:
1612 find_interesting_uses_cond (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1616 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1617 if (REFERENCE_CLASS_P (lhs
))
1618 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1624 if (REFERENCE_CLASS_P (lhs
)
1625 && is_gimple_val (rhs
))
1627 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1628 find_interesting_uses_op (data
, rhs
);
1632 /* TODO -- we should also handle address uses of type
1634 memory = call (whatever);
1641 if (TREE_CODE (stmt
) == PHI_NODE
1642 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1644 lhs
= PHI_RESULT (stmt
);
1645 iv
= get_iv (data
, lhs
);
1647 if (iv
&& !zero_p (iv
->step
))
1651 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1653 op
= USE_FROM_PTR (use_p
);
1655 if (TREE_CODE (op
) != SSA_NAME
)
1658 iv
= get_iv (data
, op
);
1662 find_interesting_uses_op (data
, op
);
1666 /* Finds interesting uses of induction variables outside of loops
1667 on loop exit edge EXIT. */
1670 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1674 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1676 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1677 find_interesting_uses_op (data
, def
);
1681 /* Finds uses of the induction variables that are interesting. */
1684 find_interesting_uses (struct ivopts_data
*data
)
1687 block_stmt_iterator bsi
;
1689 basic_block
*body
= get_loop_body (data
->current_loop
);
1691 struct version_info
*info
;
1694 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1695 fprintf (dump_file
, "Uses:\n\n");
1697 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1702 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1703 if (e
->dest
!= EXIT_BLOCK_PTR
1704 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1705 find_interesting_uses_outside (data
, e
);
1707 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1708 find_interesting_uses_stmt (data
, phi
);
1709 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1710 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1713 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1717 fprintf (dump_file
, "\n");
1719 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1721 info
= ver_info (data
, i
);
1724 fprintf (dump_file
, " ");
1725 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1726 fprintf (dump_file
, " is invariant (%d)%s\n",
1727 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1731 fprintf (dump_file
, "\n");
1737 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1738 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1739 we are at the top-level of the processed address. */
1742 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1743 unsigned HOST_WIDE_INT
*offset
)
1745 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1746 enum tree_code code
;
1747 tree type
, orig_type
= TREE_TYPE (expr
);
1748 unsigned HOST_WIDE_INT off0
, off1
, st
;
1749 tree orig_expr
= expr
;
1753 type
= TREE_TYPE (expr
);
1754 code
= TREE_CODE (expr
);
1760 if (!cst_and_fits_in_hwi (expr
)
1764 *offset
= int_cst_value (expr
);
1765 return build_int_cst_type (orig_type
, 0);
1769 op0
= TREE_OPERAND (expr
, 0);
1770 op1
= TREE_OPERAND (expr
, 1);
1772 op0
= strip_offset_1 (op0
, false, false, &off0
);
1773 op1
= strip_offset_1 (op1
, false, false, &off1
);
1775 *offset
= (code
== PLUS_EXPR
? off0
+ off1
: off0
- off1
);
1776 if (op0
== TREE_OPERAND (expr
, 0)
1777 && op1
== TREE_OPERAND (expr
, 1))
1782 else if (zero_p (op0
))
1784 if (code
== PLUS_EXPR
)
1787 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1790 expr
= fold_build2 (code
, type
, op0
, op1
);
1792 return fold_convert (orig_type
, expr
);
1798 step
= array_ref_element_size (expr
);
1799 if (!cst_and_fits_in_hwi (step
))
1802 st
= int_cst_value (step
);
1803 op1
= TREE_OPERAND (expr
, 1);
1804 op1
= strip_offset_1 (op1
, false, false, &off1
);
1805 *offset
= off1
* st
;
1810 /* Strip the component reference completely. */
1811 op0
= TREE_OPERAND (expr
, 0);
1812 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1822 tmp
= component_ref_field_offset (expr
);
1824 && cst_and_fits_in_hwi (tmp
))
1826 /* Strip the component reference completely. */
1827 op0
= TREE_OPERAND (expr
, 0);
1828 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1829 *offset
= off0
+ int_cst_value (tmp
);
1835 op0
= TREE_OPERAND (expr
, 0);
1836 op0
= strip_offset_1 (op0
, true, true, &off0
);
1839 if (op0
== TREE_OPERAND (expr
, 0))
1842 expr
= build_fold_addr_expr (op0
);
1843 return fold_convert (orig_type
, expr
);
1846 inside_addr
= false;
1853 /* Default handling of expressions for that we want to recurse into
1854 the first operand. */
1855 op0
= TREE_OPERAND (expr
, 0);
1856 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
1859 if (op0
== TREE_OPERAND (expr
, 0)
1860 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
1863 expr
= copy_node (expr
);
1864 TREE_OPERAND (expr
, 0) = op0
;
1866 TREE_OPERAND (expr
, 1) = op1
;
1868 /* Inside address, we might strip the top level component references,
1869 thus changing type of the expression. Handling of ADDR_EXPR
1871 expr
= fold_convert (orig_type
, expr
);
1876 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1879 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
1881 return strip_offset_1 (expr
, false, false, offset
);
1884 /* Returns variant of TYPE that can be used as base for different uses.
1885 For integer types, we return unsigned variant of the type, which
1886 avoids problems with overflows. For pointer types, we return void *. */
1889 generic_type_for (tree type
)
1891 if (POINTER_TYPE_P (type
))
1892 return ptr_type_node
;
1894 if (TYPE_UNSIGNED (type
))
1897 return unsigned_type_for (type
);
1900 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1901 the bitmap to that we should store it. */
1903 static struct ivopts_data
*fd_ivopts_data
;
1905 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
1907 bitmap
*depends_on
= data
;
1908 struct version_info
*info
;
1910 if (TREE_CODE (*expr_p
) != SSA_NAME
)
1912 info
= name_info (fd_ivopts_data
, *expr_p
);
1914 if (!info
->inv_id
|| info
->has_nonlin_use
)
1918 *depends_on
= BITMAP_ALLOC (NULL
);
1919 bitmap_set_bit (*depends_on
, info
->inv_id
);
1924 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1925 position to POS. If USE is not NULL, the candidate is set as related to
1926 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1927 replacement of the final value of the iv by a direct computation. */
1929 static struct iv_cand
*
1930 add_candidate_1 (struct ivopts_data
*data
,
1931 tree base
, tree step
, bool important
, enum iv_position pos
,
1932 struct iv_use
*use
, tree incremented_at
)
1935 struct iv_cand
*cand
= NULL
;
1936 tree type
, orig_type
;
1940 orig_type
= TREE_TYPE (base
);
1941 type
= generic_type_for (orig_type
);
1942 if (type
!= orig_type
)
1944 base
= fold_convert (type
, base
);
1946 step
= fold_convert (type
, step
);
1950 for (i
= 0; i
< n_iv_cands (data
); i
++)
1952 cand
= iv_cand (data
, i
);
1954 if (cand
->pos
!= pos
)
1957 if (cand
->incremented_at
!= incremented_at
)
1971 if (!operand_equal_p (base
, cand
->iv
->base
, 0))
1974 if (zero_p (cand
->iv
->step
))
1981 if (step
&& operand_equal_p (step
, cand
->iv
->step
, 0))
1986 if (i
== n_iv_cands (data
))
1988 cand
= XCNEW (struct iv_cand
);
1994 cand
->iv
= alloc_iv (base
, step
);
1997 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
1999 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2000 cand
->var_after
= cand
->var_before
;
2002 cand
->important
= important
;
2003 cand
->incremented_at
= incremented_at
;
2004 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2007 && TREE_CODE (step
) != INTEGER_CST
)
2009 fd_ivopts_data
= data
;
2010 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2013 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2014 dump_cand (dump_file
, cand
);
2017 if (important
&& !cand
->important
)
2019 cand
->important
= true;
2020 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2021 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2026 bitmap_set_bit (use
->related_cands
, i
);
2027 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2028 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2035 /* Returns true if incrementing the induction variable at the end of the LOOP
2038 The purpose is to avoid splitting latch edge with a biv increment, thus
2039 creating a jump, possibly confusing other optimization passes and leaving
2040 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2041 is not available (so we do not have a better alternative), or if the latch
2042 edge is already nonempty. */
2045 allow_ip_end_pos_p (struct loop
*loop
)
2047 if (!ip_normal_pos (loop
))
2050 if (!empty_block_p (ip_end_pos (loop
)))
2056 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2057 position to POS. If USE is not NULL, the candidate is set as related to
2058 it. The candidate computation is scheduled on all available positions. */
2061 add_candidate (struct ivopts_data
*data
,
2062 tree base
, tree step
, bool important
, struct iv_use
*use
)
2064 if (ip_normal_pos (data
->current_loop
))
2065 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
2066 if (ip_end_pos (data
->current_loop
)
2067 && allow_ip_end_pos_p (data
->current_loop
))
2068 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
2071 /* Add a standard "0 + 1 * iteration" iv candidate for a
2072 type with SIZE bits. */
2075 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2078 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2079 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2083 /* Adds standard iv candidates. */
2086 add_standard_iv_candidates (struct ivopts_data
*data
)
2088 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2090 /* The same for a double-integer type if it is still fast enough. */
2091 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2092 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2096 /* Adds candidates bases on the old induction variable IV. */
2099 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2102 struct iv_cand
*cand
;
2104 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2106 /* The same, but with initial value zero. */
2107 add_candidate (data
,
2108 build_int_cst (TREE_TYPE (iv
->base
), 0),
2109 iv
->step
, true, NULL
);
2111 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2112 if (TREE_CODE (phi
) == PHI_NODE
)
2114 /* Additionally record the possibility of leaving the original iv
2116 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2117 cand
= add_candidate_1 (data
,
2118 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2119 SSA_NAME_DEF_STMT (def
));
2120 cand
->var_before
= iv
->ssa_name
;
2121 cand
->var_after
= def
;
2125 /* Adds candidates based on the old induction variables. */
2128 add_old_ivs_candidates (struct ivopts_data
*data
)
2134 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2136 iv
= ver_info (data
, i
)->iv
;
2137 if (iv
&& iv
->biv_p
&& !zero_p (iv
->step
))
2138 add_old_iv_candidates (data
, iv
);
2142 /* Adds candidates based on the value of the induction variable IV and USE. */
2145 add_iv_value_candidates (struct ivopts_data
*data
,
2146 struct iv
*iv
, struct iv_use
*use
)
2148 unsigned HOST_WIDE_INT offset
;
2151 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2153 /* The same, but with initial value zero. Make such variable important,
2154 since it is generic enough so that possibly many uses may be based
2156 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2157 iv
->step
, true, use
);
2159 /* Third, try removing the constant offset. */
2160 base
= strip_offset (iv
->base
, &offset
);
2162 add_candidate (data
, base
, iv
->step
, false, use
);
2165 /* Adds candidates based on the uses. */
2168 add_derived_ivs_candidates (struct ivopts_data
*data
)
2172 for (i
= 0; i
< n_iv_uses (data
); i
++)
2174 struct iv_use
*use
= iv_use (data
, i
);
2181 case USE_NONLINEAR_EXPR
:
2184 /* Just add the ivs based on the value of the iv used here. */
2185 add_iv_value_candidates (data
, use
->iv
, use
);
2194 /* Record important candidates and add them to related_cands bitmaps
2198 record_important_candidates (struct ivopts_data
*data
)
2203 for (i
= 0; i
< n_iv_cands (data
); i
++)
2205 struct iv_cand
*cand
= iv_cand (data
, i
);
2207 if (cand
->important
)
2208 bitmap_set_bit (data
->important_candidates
, i
);
2211 data
->consider_all_candidates
= (n_iv_cands (data
)
2212 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2214 if (data
->consider_all_candidates
)
2216 /* We will not need "related_cands" bitmaps in this case,
2217 so release them to decrease peak memory consumption. */
2218 for (i
= 0; i
< n_iv_uses (data
); i
++)
2220 use
= iv_use (data
, i
);
2221 BITMAP_FREE (use
->related_cands
);
2226 /* Add important candidates to the related_cands bitmaps. */
2227 for (i
= 0; i
< n_iv_uses (data
); i
++)
2228 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2229 data
->important_candidates
);
2233 /* Finds the candidates for the induction variables. */
2236 find_iv_candidates (struct ivopts_data
*data
)
2238 /* Add commonly used ivs. */
2239 add_standard_iv_candidates (data
);
2241 /* Add old induction variables. */
2242 add_old_ivs_candidates (data
);
2244 /* Add induction variables derived from uses. */
2245 add_derived_ivs_candidates (data
);
2247 /* Record the important candidates. */
2248 record_important_candidates (data
);
2251 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2252 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2253 we allocate a simple list to every use. */
2256 alloc_use_cost_map (struct ivopts_data
*data
)
2258 unsigned i
, size
, s
, j
;
2260 for (i
= 0; i
< n_iv_uses (data
); i
++)
2262 struct iv_use
*use
= iv_use (data
, i
);
2265 if (data
->consider_all_candidates
)
2266 size
= n_iv_cands (data
);
2270 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2275 /* Round up to the power of two, so that moduling by it is fast. */
2276 for (size
= 1; size
< s
; size
<<= 1)
2280 use
->n_map_members
= size
;
2281 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2285 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2286 on invariants DEPENDS_ON and that the value used in expressing it
2290 set_use_iv_cost (struct ivopts_data
*data
,
2291 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2292 bitmap depends_on
, tree value
)
2298 BITMAP_FREE (depends_on
);
2302 if (data
->consider_all_candidates
)
2304 use
->cost_map
[cand
->id
].cand
= cand
;
2305 use
->cost_map
[cand
->id
].cost
= cost
;
2306 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2307 use
->cost_map
[cand
->id
].value
= value
;
2311 /* n_map_members is a power of two, so this computes modulo. */
2312 s
= cand
->id
& (use
->n_map_members
- 1);
2313 for (i
= s
; i
< use
->n_map_members
; i
++)
2314 if (!use
->cost_map
[i
].cand
)
2316 for (i
= 0; i
< s
; i
++)
2317 if (!use
->cost_map
[i
].cand
)
2323 use
->cost_map
[i
].cand
= cand
;
2324 use
->cost_map
[i
].cost
= cost
;
2325 use
->cost_map
[i
].depends_on
= depends_on
;
2326 use
->cost_map
[i
].value
= value
;
2329 /* Gets cost of (USE, CANDIDATE) pair. */
2331 static struct cost_pair
*
2332 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2333 struct iv_cand
*cand
)
2336 struct cost_pair
*ret
;
2341 if (data
->consider_all_candidates
)
2343 ret
= use
->cost_map
+ cand
->id
;
2350 /* n_map_members is a power of two, so this computes modulo. */
2351 s
= cand
->id
& (use
->n_map_members
- 1);
2352 for (i
= s
; i
< use
->n_map_members
; i
++)
2353 if (use
->cost_map
[i
].cand
== cand
)
2354 return use
->cost_map
+ i
;
2356 for (i
= 0; i
< s
; i
++)
2357 if (use
->cost_map
[i
].cand
== cand
)
2358 return use
->cost_map
+ i
;
2363 /* Returns estimate on cost of computing SEQ. */
2371 for (; seq
; seq
= NEXT_INSN (seq
))
2373 set
= single_set (seq
);
2375 cost
+= rtx_cost (set
, SET
);
2383 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2385 produce_memory_decl_rtl (tree obj
, int *regno
)
2390 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2392 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2393 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2396 x
= gen_raw_REG (Pmode
, (*regno
)++);
2398 return gen_rtx_MEM (DECL_MODE (obj
), x
);
2401 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2402 walk_tree. DATA contains the actual fake register number. */
2405 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2407 tree obj
= NULL_TREE
;
2411 switch (TREE_CODE (*expr_p
))
2414 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2415 handled_component_p (*expr_p
);
2416 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2420 x
= produce_memory_decl_rtl (obj
, regno
);
2425 obj
= SSA_NAME_VAR (*expr_p
);
2426 if (!DECL_RTL_SET_P (obj
))
2427 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2436 if (DECL_RTL_SET_P (obj
))
2439 if (DECL_MODE (obj
) == BLKmode
)
2440 x
= produce_memory_decl_rtl (obj
, regno
);
2442 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2452 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2453 SET_DECL_RTL (obj
, x
);
2459 /* Determines cost of the computation of EXPR. */
2462 computation_cost (tree expr
)
2465 tree type
= TREE_TYPE (expr
);
2467 /* Avoid using hard regs in ways which may be unsupported. */
2468 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2470 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2472 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2476 cost
= seq_cost (seq
);
2478 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2483 /* Returns variable containing the value of candidate CAND at statement AT. */
2486 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2488 if (stmt_after_increment (loop
, cand
, stmt
))
2489 return cand
->var_after
;
2491 return cand
->var_before
;
2494 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2495 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2498 tree_int_cst_sign_bit (tree t
)
2500 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2501 unsigned HOST_WIDE_INT w
;
2503 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2504 w
= TREE_INT_CST_LOW (t
);
2507 w
= TREE_INT_CST_HIGH (t
);
2508 bitno
-= HOST_BITS_PER_WIDE_INT
;
2511 return (w
>> bitno
) & 1;
2514 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2515 return cst. Otherwise return NULL_TREE. */
2518 constant_multiple_of (tree type
, tree top
, tree bot
)
2520 tree res
, mby
, p0
, p1
;
2521 enum tree_code code
;
2527 if (operand_equal_p (top
, bot
, 0))
2528 return build_int_cst (type
, 1);
2530 code
= TREE_CODE (top
);
2534 mby
= TREE_OPERAND (top
, 1);
2535 if (TREE_CODE (mby
) != INTEGER_CST
)
2538 res
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2542 return fold_binary_to_constant (MULT_EXPR
, type
, res
,
2543 fold_convert (type
, mby
));
2547 p0
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2550 p1
= constant_multiple_of (type
, TREE_OPERAND (top
, 1), bot
);
2554 return fold_binary_to_constant (code
, type
, p0
, p1
);
2557 if (TREE_CODE (bot
) != INTEGER_CST
)
2560 bot
= fold_convert (type
, bot
);
2561 top
= fold_convert (type
, top
);
2563 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2564 the result afterwards. */
2565 if (tree_int_cst_sign_bit (bot
))
2568 bot
= fold_unary_to_constant (NEGATE_EXPR
, type
, bot
);
2573 /* Ditto for TOP. */
2574 if (tree_int_cst_sign_bit (top
))
2577 top
= fold_unary_to_constant (NEGATE_EXPR
, type
, top
);
2580 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR
, type
, top
, bot
)))
2583 res
= fold_binary_to_constant (EXACT_DIV_EXPR
, type
, top
, bot
);
2585 res
= fold_unary_to_constant (NEGATE_EXPR
, type
, res
);
2593 /* Sets COMB to CST. */
2596 aff_combination_const (struct affine_tree_combination
*comb
, tree type
,
2597 unsigned HOST_WIDE_INT cst
)
2599 unsigned prec
= TYPE_PRECISION (type
);
2602 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2605 comb
->rest
= NULL_TREE
;
2606 comb
->offset
= cst
& comb
->mask
;
2609 /* Sets COMB to single element ELT. */
2612 aff_combination_elt (struct affine_tree_combination
*comb
, tree type
, tree elt
)
2614 unsigned prec
= TYPE_PRECISION (type
);
2617 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2620 comb
->elts
[0] = elt
;
2622 comb
->rest
= NULL_TREE
;
2626 /* Scales COMB by SCALE. */
2629 aff_combination_scale (struct affine_tree_combination
*comb
,
2630 unsigned HOST_WIDE_INT scale
)
2639 aff_combination_const (comb
, comb
->type
, 0);
2643 comb
->offset
= (scale
* comb
->offset
) & comb
->mask
;
2644 for (i
= 0, j
= 0; i
< comb
->n
; i
++)
2646 comb
->coefs
[j
] = (scale
* comb
->coefs
[i
]) & comb
->mask
;
2647 comb
->elts
[j
] = comb
->elts
[i
];
2648 if (comb
->coefs
[j
] != 0)
2655 if (comb
->n
< MAX_AFF_ELTS
)
2657 comb
->coefs
[comb
->n
] = scale
;
2658 comb
->elts
[comb
->n
] = comb
->rest
;
2659 comb
->rest
= NULL_TREE
;
2663 comb
->rest
= fold_build2 (MULT_EXPR
, comb
->type
, comb
->rest
,
2664 build_int_cst_type (comb
->type
, scale
));
2668 /* Adds ELT * SCALE to COMB. */
2671 aff_combination_add_elt (struct affine_tree_combination
*comb
, tree elt
,
2672 unsigned HOST_WIDE_INT scale
)
2679 for (i
= 0; i
< comb
->n
; i
++)
2680 if (operand_equal_p (comb
->elts
[i
], elt
, 0))
2682 comb
->coefs
[i
] = (comb
->coefs
[i
] + scale
) & comb
->mask
;
2687 comb
->coefs
[i
] = comb
->coefs
[comb
->n
];
2688 comb
->elts
[i
] = comb
->elts
[comb
->n
];
2692 gcc_assert (comb
->n
== MAX_AFF_ELTS
- 1);
2693 comb
->coefs
[comb
->n
] = 1;
2694 comb
->elts
[comb
->n
] = comb
->rest
;
2695 comb
->rest
= NULL_TREE
;
2700 if (comb
->n
< MAX_AFF_ELTS
)
2702 comb
->coefs
[comb
->n
] = scale
;
2703 comb
->elts
[comb
->n
] = elt
;
2709 elt
= fold_convert (comb
->type
, elt
);
2711 elt
= fold_build2 (MULT_EXPR
, comb
->type
,
2712 fold_convert (comb
->type
, elt
),
2713 build_int_cst_type (comb
->type
, scale
));
2716 comb
->rest
= fold_build2 (PLUS_EXPR
, comb
->type
, comb
->rest
, elt
);
2721 /* Adds COMB2 to COMB1. */
2724 aff_combination_add (struct affine_tree_combination
*comb1
,
2725 struct affine_tree_combination
*comb2
)
2729 comb1
->offset
= (comb1
->offset
+ comb2
->offset
) & comb1
->mask
;
2730 for (i
= 0; i
< comb2
->n
; i
++)
2731 aff_combination_add_elt (comb1
, comb2
->elts
[i
], comb2
->coefs
[i
]);
2733 aff_combination_add_elt (comb1
, comb2
->rest
, 1);
2736 /* Splits EXPR into an affine combination of parts. */
2739 tree_to_aff_combination (tree expr
, tree type
,
2740 struct affine_tree_combination
*comb
)
2742 struct affine_tree_combination tmp
;
2743 enum tree_code code
;
2744 tree cst
, core
, toffset
;
2745 HOST_WIDE_INT bitpos
, bitsize
;
2746 enum machine_mode mode
;
2747 int unsignedp
, volatilep
;
2751 code
= TREE_CODE (expr
);
2755 aff_combination_const (comb
, type
, int_cst_value (expr
));
2760 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2761 tree_to_aff_combination (TREE_OPERAND (expr
, 1), type
, &tmp
);
2762 if (code
== MINUS_EXPR
)
2763 aff_combination_scale (&tmp
, -1);
2764 aff_combination_add (comb
, &tmp
);
2768 cst
= TREE_OPERAND (expr
, 1);
2769 if (TREE_CODE (cst
) != INTEGER_CST
)
2771 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2772 aff_combination_scale (comb
, int_cst_value (cst
));
2776 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2777 aff_combination_scale (comb
, -1);
2781 core
= get_inner_reference (TREE_OPERAND (expr
, 0), &bitsize
, &bitpos
,
2782 &toffset
, &mode
, &unsignedp
, &volatilep
,
2784 if (bitpos
% BITS_PER_UNIT
!= 0)
2786 aff_combination_const (comb
, type
, bitpos
/ BITS_PER_UNIT
);
2787 core
= build_fold_addr_expr (core
);
2788 if (TREE_CODE (core
) == ADDR_EXPR
)
2789 aff_combination_add_elt (comb
, core
, 1);
2792 tree_to_aff_combination (core
, type
, &tmp
);
2793 aff_combination_add (comb
, &tmp
);
2797 tree_to_aff_combination (toffset
, type
, &tmp
);
2798 aff_combination_add (comb
, &tmp
);
2806 aff_combination_elt (comb
, type
, expr
);
2809 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2812 add_elt_to_tree (tree expr
, tree type
, tree elt
, unsigned HOST_WIDE_INT scale
,
2813 unsigned HOST_WIDE_INT mask
)
2815 enum tree_code code
;
2818 elt
= fold_convert (type
, elt
);
2825 return fold_build2 (PLUS_EXPR
, type
, expr
, elt
);
2831 return fold_build1 (NEGATE_EXPR
, type
, elt
);
2833 return fold_build2 (MINUS_EXPR
, type
, expr
, elt
);
2837 return fold_build2 (MULT_EXPR
, type
, elt
,
2838 build_int_cst_type (type
, scale
));
2840 if ((scale
| (mask
>> 1)) == mask
)
2842 /* Scale is negative. */
2844 scale
= (-scale
) & mask
;
2849 elt
= fold_build2 (MULT_EXPR
, type
, elt
,
2850 build_int_cst_type (type
, scale
));
2851 return fold_build2 (code
, type
, expr
, elt
);
2854 /* Copies the tree elements of COMB to ensure that they are not shared. */
2857 unshare_aff_combination (struct affine_tree_combination
*comb
)
2861 for (i
= 0; i
< comb
->n
; i
++)
2862 comb
->elts
[i
] = unshare_expr (comb
->elts
[i
]);
2864 comb
->rest
= unshare_expr (comb
->rest
);
2867 /* Makes tree from the affine combination COMB. */
2870 aff_combination_to_tree (struct affine_tree_combination
*comb
)
2872 tree type
= comb
->type
;
2873 tree expr
= comb
->rest
;
2875 unsigned HOST_WIDE_INT off
, sgn
;
2877 /* Handle the special case produced by get_computation_aff when
2878 the type does not fit in HOST_WIDE_INT. */
2879 if (comb
->n
== 0 && comb
->offset
== 0)
2880 return fold_convert (type
, expr
);
2882 gcc_assert (comb
->n
== MAX_AFF_ELTS
|| comb
->rest
== NULL_TREE
);
2884 for (i
= 0; i
< comb
->n
; i
++)
2885 expr
= add_elt_to_tree (expr
, type
, comb
->elts
[i
], comb
->coefs
[i
],
2888 if ((comb
->offset
| (comb
->mask
>> 1)) == comb
->mask
)
2890 /* Offset is negative. */
2891 off
= (-comb
->offset
) & comb
->mask
;
2899 return add_elt_to_tree (expr
, type
, build_int_cst_type (type
, off
), sgn
,
2903 /* Determines the expression by that USE is expressed from induction variable
2904 CAND at statement AT in LOOP. The expression is stored in a decomposed
2905 form into AFF. Returns false if USE cannot be expressed using CAND. */
2908 get_computation_aff (struct loop
*loop
,
2909 struct iv_use
*use
, struct iv_cand
*cand
, tree at
,
2910 struct affine_tree_combination
*aff
)
2912 tree ubase
= use
->iv
->base
;
2913 tree ustep
= use
->iv
->step
;
2914 tree cbase
= cand
->iv
->base
;
2915 tree cstep
= cand
->iv
->step
;
2916 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2920 unsigned HOST_WIDE_INT ustepi
, cstepi
;
2921 HOST_WIDE_INT ratioi
;
2922 struct affine_tree_combination cbase_aff
, expr_aff
;
2923 tree cstep_orig
= cstep
, ustep_orig
= ustep
;
2925 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2927 /* We do not have a precision to express the values of use. */
2931 expr
= var_at_stmt (loop
, cand
, at
);
2933 if (TREE_TYPE (expr
) != ctype
)
2935 /* This may happen with the original ivs. */
2936 expr
= fold_convert (ctype
, expr
);
2939 if (TYPE_UNSIGNED (utype
))
2943 uutype
= unsigned_type_for (utype
);
2944 ubase
= fold_convert (uutype
, ubase
);
2945 ustep
= fold_convert (uutype
, ustep
);
2948 if (uutype
!= ctype
)
2950 expr
= fold_convert (uutype
, expr
);
2951 cbase
= fold_convert (uutype
, cbase
);
2952 cstep
= fold_convert (uutype
, cstep
);
2954 /* If the conversion is not noop, we must take it into account when
2955 considering the value of the step. */
2956 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2960 if (cst_and_fits_in_hwi (cstep_orig
)
2961 && cst_and_fits_in_hwi (ustep_orig
))
2963 ustepi
= int_cst_value (ustep_orig
);
2964 cstepi
= int_cst_value (cstep_orig
);
2966 if (!divide (TYPE_PRECISION (uutype
), ustepi
, cstepi
, &ratioi
))
2968 /* TODO maybe consider case when ustep divides cstep and the ratio is
2969 a power of 2 (so that the division is fast to execute)? We would
2970 need to be much more careful with overflows etc. then. */
2974 ratio
= build_int_cst_type (uutype
, ratioi
);
2978 ratio
= constant_multiple_of (uutype
, ustep_orig
, cstep_orig
);
2982 /* Ratioi is only used to detect special cases when the multiplicative
2983 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
2984 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
2985 to integer_onep/integer_all_onesp, since the former ignores
2987 if (cst_and_fits_in_hwi (ratio
))
2988 ratioi
= int_cst_value (ratio
);
2989 else if (integer_onep (ratio
))
2991 else if (integer_all_onesp (ratio
))
2997 /* We may need to shift the value if we are after the increment. */
2998 if (stmt_after_increment (loop
, cand
, at
))
2999 cbase
= fold_build2 (PLUS_EXPR
, uutype
, cbase
, cstep
);
3001 /* use = ubase - ratio * cbase + ratio * var.
3003 In general case ubase + ratio * (var - cbase) could be better (one less
3004 multiplication), but often it is possible to eliminate redundant parts
3005 of computations from (ubase - ratio * cbase) term, and if it does not
3006 happen, fold is able to apply the distributive law to obtain this form
3009 if (TYPE_PRECISION (uutype
) > HOST_BITS_PER_WIDE_INT
)
3011 /* Let's compute in trees and just return the result in AFF. This case
3012 should not be very common, and fold itself is not that bad either,
3013 so making the aff. functions more complicated to handle this case
3014 is not that urgent. */
3017 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, cbase
);
3018 expr
= fold_build2 (PLUS_EXPR
, uutype
, expr
, delta
);
3020 else if (ratioi
== -1)
3022 delta
= fold_build2 (PLUS_EXPR
, uutype
, ubase
, cbase
);
3023 expr
= fold_build2 (MINUS_EXPR
, uutype
, delta
, expr
);
3027 delta
= fold_build2 (MULT_EXPR
, uutype
, cbase
, ratio
);
3028 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, delta
);
3029 expr
= fold_build2 (MULT_EXPR
, uutype
, ratio
, expr
);
3030 expr
= fold_build2 (PLUS_EXPR
, uutype
, delta
, expr
);
3041 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3042 possible to compute ratioi. */
3043 gcc_assert (ratioi
);
3045 tree_to_aff_combination (ubase
, uutype
, aff
);
3046 tree_to_aff_combination (cbase
, uutype
, &cbase_aff
);
3047 tree_to_aff_combination (expr
, uutype
, &expr_aff
);
3048 aff_combination_scale (&cbase_aff
, -ratioi
);
3049 aff_combination_scale (&expr_aff
, ratioi
);
3050 aff_combination_add (aff
, &cbase_aff
);
3051 aff_combination_add (aff
, &expr_aff
);
3056 /* Determines the expression by that USE is expressed from induction variable
3057 CAND at statement AT in LOOP. The computation is unshared. */
3060 get_computation_at (struct loop
*loop
,
3061 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
3063 struct affine_tree_combination aff
;
3064 tree type
= TREE_TYPE (use
->iv
->base
);
3066 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3068 unshare_aff_combination (&aff
);
3069 return fold_convert (type
, aff_combination_to_tree (&aff
));
3072 /* Determines the expression by that USE is expressed from induction variable
3073 CAND in LOOP. The computation is unshared. */
3076 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3078 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3081 /* Returns cost of addition in MODE. */
3084 add_cost (enum machine_mode mode
)
3086 static unsigned costs
[NUM_MACHINE_MODES
];
3094 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3095 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3096 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3101 cost
= seq_cost (seq
);
3107 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3108 fprintf (dump_file
, "Addition in %s costs %d\n",
3109 GET_MODE_NAME (mode
), cost
);
3113 /* Entry in a hashtable of already known costs for multiplication. */
3116 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3117 enum machine_mode mode
; /* In mode. */
3118 unsigned cost
; /* The cost. */
3121 /* Counts hash value for the ENTRY. */
3124 mbc_entry_hash (const void *entry
)
3126 const struct mbc_entry
*e
= entry
;
3128 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3131 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3134 mbc_entry_eq (const void *entry1
, const void *entry2
)
3136 const struct mbc_entry
*e1
= entry1
;
3137 const struct mbc_entry
*e2
= entry2
;
3139 return (e1
->mode
== e2
->mode
3140 && e1
->cst
== e2
->cst
);
3143 /* Returns cost of multiplication by constant CST in MODE. */
3146 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
3148 static htab_t costs
;
3149 struct mbc_entry
**cached
, act
;
3154 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3158 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3160 return (*cached
)->cost
;
3162 *cached
= XNEW (struct mbc_entry
);
3163 (*cached
)->mode
= mode
;
3164 (*cached
)->cst
= cst
;
3167 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3168 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3172 cost
= seq_cost (seq
);
3174 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3175 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3176 (int) cst
, GET_MODE_NAME (mode
), cost
);
3178 (*cached
)->cost
= cost
;
3183 /* Returns true if multiplying by RATIO is allowed in address. */
3186 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
)
3188 #define MAX_RATIO 128
3189 static sbitmap valid_mult
;
3193 rtx reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3197 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3198 sbitmap_zero (valid_mult
);
3199 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
3200 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3202 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3203 if (memory_address_p (Pmode
, addr
))
3204 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3207 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3209 fprintf (dump_file
, " allowed multipliers:");
3210 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3211 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3212 fprintf (dump_file
, " %d", (int) i
);
3213 fprintf (dump_file
, "\n");
3214 fprintf (dump_file
, "\n");
3218 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3221 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3224 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3225 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3226 variable is omitted. The created memory accesses MODE.
3228 TODO -- there must be some better way. This all is quite crude. */
3231 get_address_cost (bool symbol_present
, bool var_present
,
3232 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
)
3234 static bool initialized
= false;
3235 static HOST_WIDE_INT rat
, off
;
3236 static HOST_WIDE_INT min_offset
, max_offset
;
3237 static unsigned costs
[2][2][2][2];
3238 unsigned cost
, acost
;
3239 rtx seq
, addr
, base
;
3240 bool offset_p
, ratio_p
;
3242 HOST_WIDE_INT s_offset
;
3243 unsigned HOST_WIDE_INT mask
;
3251 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3253 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
3254 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3256 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3257 if (!memory_address_p (Pmode
, addr
))
3260 max_offset
= i
>> 1;
3263 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3265 XEXP (addr
, 1) = gen_int_mode (-i
, Pmode
);
3266 if (!memory_address_p (Pmode
, addr
))
3269 min_offset
= -(i
>> 1);
3271 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3273 fprintf (dump_file
, "get_address_cost:\n");
3274 fprintf (dump_file
, " min offset %d\n", (int) min_offset
);
3275 fprintf (dump_file
, " max offset %d\n", (int) max_offset
);
3279 for (i
= 2; i
<= MAX_RATIO
; i
++)
3280 if (multiplier_allowed_in_address_p (i
))
3287 bits
= GET_MODE_BITSIZE (Pmode
);
3288 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3290 if ((offset
>> (bits
- 1) & 1))
3295 offset_p
= (s_offset
!= 0
3296 && min_offset
<= s_offset
&& s_offset
<= max_offset
);
3297 ratio_p
= (ratio
!= 1
3298 && multiplier_allowed_in_address_p (ratio
));
3300 if (ratio
!= 1 && !ratio_p
)
3301 cost
+= multiply_by_cost (ratio
, Pmode
);
3303 if (s_offset
&& !offset_p
&& !symbol_present
)
3305 cost
+= add_cost (Pmode
);
3309 acost
= costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3312 int old_cse_not_expected
;
3315 addr
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3316 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 2);
3318 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
, gen_int_mode (rat
, Pmode
));
3321 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
3325 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
3327 base
= gen_rtx_fmt_e (CONST
, Pmode
,
3328 gen_rtx_fmt_ee (PLUS
, Pmode
,
3330 gen_int_mode (off
, Pmode
)));
3333 base
= gen_int_mode (off
, Pmode
);
3338 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
3341 /* To avoid splitting addressing modes, pretend that no cse will
3343 old_cse_not_expected
= cse_not_expected
;
3344 cse_not_expected
= true;
3345 addr
= memory_address (Pmode
, addr
);
3346 cse_not_expected
= old_cse_not_expected
;
3350 acost
= seq_cost (seq
);
3351 acost
+= address_cost (addr
, Pmode
);
3355 costs
[symbol_present
][var_present
][offset_p
][ratio_p
] = acost
;
3358 return cost
+ acost
;
3361 /* Estimates cost of forcing expression EXPR into a variable. */
3364 force_expr_to_var_cost (tree expr
)
3366 static bool costs_initialized
= false;
3367 static unsigned integer_cost
;
3368 static unsigned symbol_cost
;
3369 static unsigned address_cost
;
3371 unsigned cost0
, cost1
, cost
;
3372 enum machine_mode mode
;
3374 if (!costs_initialized
)
3376 tree var
= create_tmp_var_raw (integer_type_node
, "test_var");
3377 rtx x
= gen_rtx_MEM (DECL_MODE (var
),
3378 gen_rtx_SYMBOL_REF (Pmode
, "test_var"));
3380 tree type
= build_pointer_type (integer_type_node
);
3382 integer_cost
= computation_cost (build_int_cst_type (integer_type_node
,
3385 SET_DECL_RTL (var
, x
);
3386 TREE_STATIC (var
) = 1;
3387 addr
= build1 (ADDR_EXPR
, type
, var
);
3388 symbol_cost
= computation_cost (addr
) + 1;
3391 = computation_cost (build2 (PLUS_EXPR
, type
,
3393 build_int_cst_type (type
, 2000))) + 1;
3394 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3396 fprintf (dump_file
, "force_expr_to_var_cost:\n");
3397 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
3398 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
3399 fprintf (dump_file
, " address %d\n", (int) address_cost
);
3400 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
3401 fprintf (dump_file
, "\n");
3404 costs_initialized
= true;
3409 if (SSA_VAR_P (expr
))
3412 if (TREE_INVARIANT (expr
))
3414 if (TREE_CODE (expr
) == INTEGER_CST
)
3415 return integer_cost
;
3417 if (TREE_CODE (expr
) == ADDR_EXPR
)
3419 tree obj
= TREE_OPERAND (expr
, 0);
3421 if (TREE_CODE (obj
) == VAR_DECL
3422 || TREE_CODE (obj
) == PARM_DECL
3423 || TREE_CODE (obj
) == RESULT_DECL
)
3427 return address_cost
;
3430 switch (TREE_CODE (expr
))
3435 op0
= TREE_OPERAND (expr
, 0);
3436 op1
= TREE_OPERAND (expr
, 1);
3440 if (is_gimple_val (op0
))
3443 cost0
= force_expr_to_var_cost (op0
);
3445 if (is_gimple_val (op1
))
3448 cost1
= force_expr_to_var_cost (op1
);
3453 /* Just an arbitrary value, FIXME. */
3454 return target_spill_cost
;
3457 mode
= TYPE_MODE (TREE_TYPE (expr
));
3458 switch (TREE_CODE (expr
))
3462 cost
= add_cost (mode
);
3466 if (cst_and_fits_in_hwi (op0
))
3467 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
3468 else if (cst_and_fits_in_hwi (op1
))
3469 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
3471 return target_spill_cost
;
3481 /* Bound the cost by target_spill_cost. The parts of complicated
3482 computations often are either loop invariant or at least can
3483 be shared between several iv uses, so letting this grow without
3484 limits would not give reasonable results. */
3485 return cost
< target_spill_cost
? cost
: target_spill_cost
;
3488 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3489 invariants the computation depends on. */
3492 force_var_cost (struct ivopts_data
*data
,
3493 tree expr
, bitmap
*depends_on
)
3497 fd_ivopts_data
= data
;
3498 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3501 return force_expr_to_var_cost (expr
);
3504 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3505 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3506 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3507 invariants the computation depends on. */
3510 split_address_cost (struct ivopts_data
*data
,
3511 tree addr
, bool *symbol_present
, bool *var_present
,
3512 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3515 HOST_WIDE_INT bitsize
;
3516 HOST_WIDE_INT bitpos
;
3518 enum machine_mode mode
;
3519 int unsignedp
, volatilep
;
3521 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3522 &unsignedp
, &volatilep
, false);
3525 || bitpos
% BITS_PER_UNIT
!= 0
3526 || TREE_CODE (core
) != VAR_DECL
)
3528 *symbol_present
= false;
3529 *var_present
= true;
3530 fd_ivopts_data
= data
;
3531 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3532 return target_spill_cost
;
3535 *offset
+= bitpos
/ BITS_PER_UNIT
;
3536 if (TREE_STATIC (core
)
3537 || DECL_EXTERNAL (core
))
3539 *symbol_present
= true;
3540 *var_present
= false;
3544 *symbol_present
= false;
3545 *var_present
= true;
3549 /* Estimates cost of expressing difference of addresses E1 - E2 as
3550 var + symbol + offset. The value of offset is added to OFFSET,
3551 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3552 part is missing. DEPENDS_ON is a set of the invariants the computation
3556 ptr_difference_cost (struct ivopts_data
*data
,
3557 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3558 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3560 HOST_WIDE_INT diff
= 0;
3563 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3565 if (ptr_difference_const (e1
, e2
, &diff
))
3568 *symbol_present
= false;
3569 *var_present
= false;
3573 if (e2
== integer_zero_node
)
3574 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3575 symbol_present
, var_present
, offset
, depends_on
);
3577 *symbol_present
= false;
3578 *var_present
= true;
3580 cost
= force_var_cost (data
, e1
, depends_on
);
3581 cost
+= force_var_cost (data
, e2
, depends_on
);
3582 cost
+= add_cost (Pmode
);
3587 /* Estimates cost of expressing difference E1 - E2 as
3588 var + symbol + offset. The value of offset is added to OFFSET,
3589 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3590 part is missing. DEPENDS_ON is a set of the invariants the computation
3594 difference_cost (struct ivopts_data
*data
,
3595 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3596 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3599 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3600 unsigned HOST_WIDE_INT off1
, off2
;
3602 e1
= strip_offset (e1
, &off1
);
3603 e2
= strip_offset (e2
, &off2
);
3604 *offset
+= off1
- off2
;
3609 if (TREE_CODE (e1
) == ADDR_EXPR
)
3610 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
3612 *symbol_present
= false;
3614 if (operand_equal_p (e1
, e2
, 0))
3616 *var_present
= false;
3619 *var_present
= true;
3621 return force_var_cost (data
, e1
, depends_on
);
3625 cost
= force_var_cost (data
, e2
, depends_on
);
3626 cost
+= multiply_by_cost (-1, mode
);
3631 cost
= force_var_cost (data
, e1
, depends_on
);
3632 cost
+= force_var_cost (data
, e2
, depends_on
);
3633 cost
+= add_cost (mode
);
3638 /* Determines the cost of the computation by that USE is expressed
3639 from induction variable CAND. If ADDRESS_P is true, we just need
3640 to create an address from it, otherwise we want to get it into
3641 register. A set of invariants we depend on is stored in
3642 DEPENDS_ON. AT is the statement at that the value is computed. */
3645 get_computation_cost_at (struct ivopts_data
*data
,
3646 struct iv_use
*use
, struct iv_cand
*cand
,
3647 bool address_p
, bitmap
*depends_on
, tree at
)
3649 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3651 tree utype
= TREE_TYPE (ubase
), ctype
;
3652 unsigned HOST_WIDE_INT ustepi
, cstepi
, offset
= 0;
3653 HOST_WIDE_INT ratio
, aratio
;
3654 bool var_present
, symbol_present
;
3655 unsigned cost
= 0, n_sums
;
3659 /* Only consider real candidates. */
3663 cbase
= cand
->iv
->base
;
3664 cstep
= cand
->iv
->step
;
3665 ctype
= TREE_TYPE (cbase
);
3667 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3669 /* We do not have a precision to express the values of use. */
3675 /* Do not try to express address of an object with computation based
3676 on address of a different object. This may cause problems in rtl
3677 level alias analysis (that does not expect this to be happening,
3678 as this is illegal in C), and would be unlikely to be useful
3680 if (use
->iv
->base_object
3681 && cand
->iv
->base_object
3682 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3686 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3688 /* TODO -- add direct handling of this case. */
3692 /* CSTEPI is removed from the offset in case statement is after the
3693 increment. If the step is not constant, we use zero instead.
3694 This is a bit imprecise (there is the extra addition), but
3695 redundancy elimination is likely to transform the code so that
3696 it uses value of the variable before increment anyway,
3697 so it is not that much unrealistic. */
3698 if (cst_and_fits_in_hwi (cstep
))
3699 cstepi
= int_cst_value (cstep
);
3703 if (cst_and_fits_in_hwi (ustep
)
3704 && cst_and_fits_in_hwi (cstep
))
3706 ustepi
= int_cst_value (ustep
);
3708 if (!divide (TYPE_PRECISION (utype
), ustepi
, cstepi
, &ratio
))
3715 rat
= constant_multiple_of (utype
, ustep
, cstep
);
3720 if (cst_and_fits_in_hwi (rat
))
3721 ratio
= int_cst_value (rat
);
3722 else if (integer_onep (rat
))
3724 else if (integer_all_onesp (rat
))
3730 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3731 or ratio == 1, it is better to handle this like
3733 ubase - ratio * cbase + ratio * var
3735 (also holds in the case ratio == -1, TODO. */
3737 if (cst_and_fits_in_hwi (cbase
))
3739 offset
= - ratio
* int_cst_value (cbase
);
3740 cost
+= difference_cost (data
,
3741 ubase
, integer_zero_node
,
3742 &symbol_present
, &var_present
, &offset
,
3745 else if (ratio
== 1)
3747 cost
+= difference_cost (data
,
3749 &symbol_present
, &var_present
, &offset
,
3754 cost
+= force_var_cost (data
, cbase
, depends_on
);
3755 cost
+= add_cost (TYPE_MODE (ctype
));
3756 cost
+= difference_cost (data
,
3757 ubase
, integer_zero_node
,
3758 &symbol_present
, &var_present
, &offset
,
3762 /* If we are after the increment, the value of the candidate is higher by
3764 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3765 offset
-= ratio
* cstepi
;
3767 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3768 (symbol/var/const parts may be omitted). If we are looking for an address,
3769 find the cost of addressing this. */
3771 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
);
3773 /* Otherwise estimate the costs for computing the expression. */
3774 aratio
= ratio
> 0 ? ratio
: -ratio
;
3775 if (!symbol_present
&& !var_present
&& !offset
)
3778 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3784 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3788 /* Symbol + offset should be compile-time computable. */
3789 && (symbol_present
|| offset
))
3792 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
3796 /* Just get the expression, expand it and measure the cost. */
3797 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3803 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3805 return computation_cost (comp
);
3809 /* Determines the cost of the computation by that USE is expressed
3810 from induction variable CAND. If ADDRESS_P is true, we just need
3811 to create an address from it, otherwise we want to get it into
3812 register. A set of invariants we depend on is stored in
3816 get_computation_cost (struct ivopts_data
*data
,
3817 struct iv_use
*use
, struct iv_cand
*cand
,
3818 bool address_p
, bitmap
*depends_on
)
3820 return get_computation_cost_at (data
,
3821 use
, cand
, address_p
, depends_on
, use
->stmt
);
3824 /* Determines cost of basing replacement of USE on CAND in a generic
3828 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3829 struct iv_use
*use
, struct iv_cand
*cand
)
3834 /* The simple case first -- if we need to express value of the preserved
3835 original biv, the cost is 0. This also prevents us from counting the
3836 cost of increment twice -- once at this use and once in the cost of
3838 if (cand
->pos
== IP_ORIGINAL
3839 && cand
->incremented_at
== use
->stmt
)
3841 set_use_iv_cost (data
, use
, cand
, 0, NULL
, NULL_TREE
);
3845 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3846 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3848 return cost
!= INFTY
;
3851 /* Determines cost of basing replacement of USE on CAND in an address. */
3854 determine_use_iv_cost_address (struct ivopts_data
*data
,
3855 struct iv_use
*use
, struct iv_cand
*cand
)
3858 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3860 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3862 return cost
!= INFTY
;
3865 /* Computes value of induction variable IV in iteration NITER. */
3868 iv_value (struct iv
*iv
, tree niter
)
3871 tree type
= TREE_TYPE (iv
->base
);
3873 niter
= fold_convert (type
, niter
);
3874 val
= fold_build2 (MULT_EXPR
, type
, iv
->step
, niter
);
3876 return fold_build2 (PLUS_EXPR
, type
, iv
->base
, val
);
3879 /* Computes value of candidate CAND at position AT in iteration NITER. */
3882 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
)
3884 tree val
= iv_value (cand
->iv
, niter
);
3885 tree type
= TREE_TYPE (cand
->iv
->base
);
3887 if (stmt_after_increment (loop
, cand
, at
))
3888 val
= fold_build2 (PLUS_EXPR
, type
, val
, cand
->iv
->step
);
3893 /* Returns period of induction variable iv. */
3896 iv_period (struct iv
*iv
)
3898 tree step
= iv
->step
, period
, type
;
3901 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
3903 /* Period of the iv is gcd (step, type range). Since type range is power
3904 of two, it suffices to determine the maximum power of two that divides
3906 pow2div
= num_ending_zeros (step
);
3907 type
= unsigned_type_for (TREE_TYPE (step
));
3909 period
= build_low_bits_mask (type
,
3910 (TYPE_PRECISION (type
)
3911 - tree_low_cst (pow2div
, 1)));
3916 /* Returns the comparison operator used when eliminating the iv USE. */
3918 static enum tree_code
3919 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
3921 struct loop
*loop
= data
->current_loop
;
3925 ex_bb
= bb_for_stmt (use
->stmt
);
3926 exit
= EDGE_SUCC (ex_bb
, 0);
3927 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3928 exit
= EDGE_SUCC (ex_bb
, 1);
3930 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
3933 /* Check whether it is possible to express the condition in USE by comparison
3934 of candidate CAND. If so, store the value compared with to BOUND. */
3937 may_eliminate_iv (struct ivopts_data
*data
,
3938 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
3942 struct tree_niter_desc
*niter
;
3944 tree wider_type
, period
, per_type
;
3945 struct loop
*loop
= data
->current_loop
;
3947 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
3950 /* For now works only for exits that dominate the loop latch. TODO -- extend
3951 for other conditions inside loop body. */
3952 ex_bb
= bb_for_stmt (use
->stmt
);
3953 if (use
->stmt
!= last_stmt (ex_bb
)
3954 || TREE_CODE (use
->stmt
) != COND_EXPR
)
3956 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
3959 exit
= EDGE_SUCC (ex_bb
, 0);
3960 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3961 exit
= EDGE_SUCC (ex_bb
, 1);
3962 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3965 niter
= niter_for_exit (data
, exit
);
3967 || !zero_p (niter
->may_be_zero
))
3971 nit_type
= TREE_TYPE (nit
);
3973 /* Determine whether we may use the variable to test whether niter iterations
3974 elapsed. This is the case iff the period of the induction variable is
3975 greater than the number of iterations. */
3976 period
= iv_period (cand
->iv
);
3979 per_type
= TREE_TYPE (period
);
3981 wider_type
= TREE_TYPE (period
);
3982 if (TYPE_PRECISION (nit_type
) < TYPE_PRECISION (per_type
))
3983 wider_type
= per_type
;
3985 wider_type
= nit_type
;
3987 if (!integer_nonzerop (fold_build2 (GE_EXPR
, boolean_type_node
,
3988 fold_convert (wider_type
, period
),
3989 fold_convert (wider_type
, nit
))))
3992 *bound
= cand_value_at (loop
, cand
, use
->stmt
, nit
);
3996 /* Determines cost of basing replacement of USE on CAND in a condition. */
3999 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4000 struct iv_use
*use
, struct iv_cand
*cand
)
4002 tree bound
= NULL_TREE
, op
, cond
;
4003 bitmap depends_on
= NULL
;
4006 /* Only consider real candidates. */
4009 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
, NULL_TREE
);
4013 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4015 cost
= force_var_cost (data
, bound
, &depends_on
);
4017 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
4018 return cost
!= INFTY
;
4021 /* The induction variable elimination failed; just express the original
4022 giv. If it is compared with an invariant, note that we cannot get
4024 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4027 if (TREE_CODE (cond
) != SSA_NAME
)
4029 op
= TREE_OPERAND (cond
, 0);
4030 if (TREE_CODE (op
) == SSA_NAME
&& !zero_p (get_iv (data
, op
)->step
))
4031 op
= TREE_OPERAND (cond
, 1);
4032 if (TREE_CODE (op
) == SSA_NAME
)
4034 op
= get_iv (data
, op
)->base
;
4035 fd_ivopts_data
= data
;
4036 walk_tree (&op
, find_depends
, &depends_on
, NULL
);
4040 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL
);
4041 return cost
!= INFTY
;
4044 /* Determines cost of basing replacement of USE on CAND. Returns false
4045 if USE cannot be based on CAND. */
4048 determine_use_iv_cost (struct ivopts_data
*data
,
4049 struct iv_use
*use
, struct iv_cand
*cand
)
4053 case USE_NONLINEAR_EXPR
:
4054 return determine_use_iv_cost_generic (data
, use
, cand
);
4057 return determine_use_iv_cost_address (data
, use
, cand
);
4060 return determine_use_iv_cost_condition (data
, use
, cand
);
4067 /* Determines costs of basing the use of the iv on an iv candidate. */
4070 determine_use_iv_costs (struct ivopts_data
*data
)
4074 struct iv_cand
*cand
;
4075 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4077 alloc_use_cost_map (data
);
4079 for (i
= 0; i
< n_iv_uses (data
); i
++)
4081 use
= iv_use (data
, i
);
4083 if (data
->consider_all_candidates
)
4085 for (j
= 0; j
< n_iv_cands (data
); j
++)
4087 cand
= iv_cand (data
, j
);
4088 determine_use_iv_cost (data
, use
, cand
);
4095 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4097 cand
= iv_cand (data
, j
);
4098 if (!determine_use_iv_cost (data
, use
, cand
))
4099 bitmap_set_bit (to_clear
, j
);
4102 /* Remove the candidates for that the cost is infinite from
4103 the list of related candidates. */
4104 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4105 bitmap_clear (to_clear
);
4109 BITMAP_FREE (to_clear
);
4111 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4113 fprintf (dump_file
, "Use-candidate costs:\n");
4115 for (i
= 0; i
< n_iv_uses (data
); i
++)
4117 use
= iv_use (data
, i
);
4119 fprintf (dump_file
, "Use %d:\n", i
);
4120 fprintf (dump_file
, " cand\tcost\tdepends on\n");
4121 for (j
= 0; j
< use
->n_map_members
; j
++)
4123 if (!use
->cost_map
[j
].cand
4124 || use
->cost_map
[j
].cost
== INFTY
)
4127 fprintf (dump_file
, " %d\t%d\t",
4128 use
->cost_map
[j
].cand
->id
,
4129 use
->cost_map
[j
].cost
);
4130 if (use
->cost_map
[j
].depends_on
)
4131 bitmap_print (dump_file
,
4132 use
->cost_map
[j
].depends_on
, "","");
4133 fprintf (dump_file
, "\n");
4136 fprintf (dump_file
, "\n");
4138 fprintf (dump_file
, "\n");
4142 /* Determines cost of the candidate CAND. */
4145 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4147 unsigned cost_base
, cost_step
;
4156 /* There are two costs associated with the candidate -- its increment
4157 and its initialization. The second is almost negligible for any loop
4158 that rolls enough, so we take it just very little into account. */
4160 base
= cand
->iv
->base
;
4161 cost_base
= force_var_cost (data
, base
, NULL
);
4162 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
4164 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
4166 /* Prefer the original iv unless we may gain something by replacing it;
4167 this is not really relevant for artificial ivs created by other
4169 if (cand
->pos
== IP_ORIGINAL
4170 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4173 /* Prefer not to insert statements into latch unless there are some
4174 already (so that we do not create unnecessary jumps). */
4175 if (cand
->pos
== IP_END
4176 && empty_block_p (ip_end_pos (data
->current_loop
)))
4180 /* Determines costs of computation of the candidates. */
4183 determine_iv_costs (struct ivopts_data
*data
)
4187 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4189 fprintf (dump_file
, "Candidate costs:\n");
4190 fprintf (dump_file
, " cand\tcost\n");
4193 for (i
= 0; i
< n_iv_cands (data
); i
++)
4195 struct iv_cand
*cand
= iv_cand (data
, i
);
4197 determine_iv_cost (data
, cand
);
4199 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4200 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4203 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4204 fprintf (dump_file
, "\n");
4207 /* Calculates cost for having SIZE induction variables. */
4210 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4212 return global_cost_for_size (size
, data
->regs_used
, n_iv_uses (data
));
4215 /* For each size of the induction variable set determine the penalty. */
4218 determine_set_costs (struct ivopts_data
*data
)
4222 struct loop
*loop
= data
->current_loop
;
4225 /* We use the following model (definitely improvable, especially the
4226 cost function -- TODO):
4228 We estimate the number of registers available (using MD data), name it A.
4230 We estimate the number of registers used by the loop, name it U. This
4231 number is obtained as the number of loop phi nodes (not counting virtual
4232 registers and bivs) + the number of variables from outside of the loop.
4234 We set a reserve R (free regs that are used for temporary computations,
4235 etc.). For now the reserve is a constant 3.
4237 Let I be the number of induction variables.
4239 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4240 make a lot of ivs without a reason).
4241 -- if A - R < U + I <= A, the cost is I * PRES_COST
4242 -- if U + I > A, the cost is I * PRES_COST and
4243 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4245 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4247 fprintf (dump_file
, "Global costs:\n");
4248 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4249 fprintf (dump_file
, " target_small_cost %d\n", target_small_cost
);
4250 fprintf (dump_file
, " target_pres_cost %d\n", target_pres_cost
);
4251 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
4255 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
4257 op
= PHI_RESULT (phi
);
4259 if (!is_gimple_reg (op
))
4262 if (get_iv (data
, op
))
4268 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4270 struct version_info
*info
= ver_info (data
, j
);
4272 if (info
->inv_id
&& info
->has_nonlin_use
)
4276 data
->regs_used
= n
;
4277 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4278 fprintf (dump_file
, " regs_used %d\n", n
);
4280 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4282 fprintf (dump_file
, " cost for size:\n");
4283 fprintf (dump_file
, " ivs\tcost\n");
4284 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4285 fprintf (dump_file
, " %d\t%d\n", j
,
4286 ivopts_global_cost_for_size (data
, j
));
4287 fprintf (dump_file
, "\n");
4291 /* Returns true if A is a cheaper cost pair than B. */
4294 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4302 if (a
->cost
< b
->cost
)
4305 if (a
->cost
> b
->cost
)
4308 /* In case the costs are the same, prefer the cheaper candidate. */
4309 if (a
->cand
->cost
< b
->cand
->cost
)
4315 /* Computes the cost field of IVS structure. */
4318 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4322 cost
+= ivs
->cand_use_cost
;
4323 cost
+= ivs
->cand_cost
;
4324 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4329 /* Remove invariants in set INVS to set IVS. */
4332 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4340 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4342 ivs
->n_invariant_uses
[iid
]--;
4343 if (ivs
->n_invariant_uses
[iid
] == 0)
4348 /* Set USE not to be expressed by any candidate in IVS. */
4351 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4354 unsigned uid
= use
->id
, cid
;
4355 struct cost_pair
*cp
;
4357 cp
= ivs
->cand_for_use
[uid
];
4363 ivs
->cand_for_use
[uid
] = NULL
;
4364 ivs
->n_cand_uses
[cid
]--;
4366 if (ivs
->n_cand_uses
[cid
] == 0)
4368 bitmap_clear_bit (ivs
->cands
, cid
);
4369 /* Do not count the pseudocandidates. */
4373 ivs
->cand_cost
-= cp
->cand
->cost
;
4375 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4378 ivs
->cand_use_cost
-= cp
->cost
;
4380 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4381 iv_ca_recount_cost (data
, ivs
);
4384 /* Add invariants in set INVS to set IVS. */
4387 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4395 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4397 ivs
->n_invariant_uses
[iid
]++;
4398 if (ivs
->n_invariant_uses
[iid
] == 1)
4403 /* Set cost pair for USE in set IVS to CP. */
4406 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4407 struct iv_use
*use
, struct cost_pair
*cp
)
4409 unsigned uid
= use
->id
, cid
;
4411 if (ivs
->cand_for_use
[uid
] == cp
)
4414 if (ivs
->cand_for_use
[uid
])
4415 iv_ca_set_no_cp (data
, ivs
, use
);
4422 ivs
->cand_for_use
[uid
] = cp
;
4423 ivs
->n_cand_uses
[cid
]++;
4424 if (ivs
->n_cand_uses
[cid
] == 1)
4426 bitmap_set_bit (ivs
->cands
, cid
);
4427 /* Do not count the pseudocandidates. */
4431 ivs
->cand_cost
+= cp
->cand
->cost
;
4433 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4436 ivs
->cand_use_cost
+= cp
->cost
;
4437 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4438 iv_ca_recount_cost (data
, ivs
);
4442 /* Extend set IVS by expressing USE by some of the candidates in it
4446 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4449 struct cost_pair
*best_cp
= NULL
, *cp
;
4453 gcc_assert (ivs
->upto
>= use
->id
);
4455 if (ivs
->upto
== use
->id
)
4461 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4463 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4465 if (cheaper_cost_pair (cp
, best_cp
))
4469 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4472 /* Get cost for assignment IVS. */
4475 iv_ca_cost (struct iv_ca
*ivs
)
4477 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
4480 /* Returns true if all dependences of CP are among invariants in IVS. */
4483 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4488 if (!cp
->depends_on
)
4491 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4493 if (ivs
->n_invariant_uses
[i
] == 0)
4500 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4501 it before NEXT_CHANGE. */
4503 static struct iv_ca_delta
*
4504 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4505 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4507 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
4510 change
->old_cp
= old_cp
;
4511 change
->new_cp
= new_cp
;
4512 change
->next_change
= next_change
;
4517 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4520 static struct iv_ca_delta
*
4521 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4523 struct iv_ca_delta
*last
;
4531 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4533 last
->next_change
= l2
;
4538 /* Returns candidate by that USE is expressed in IVS. */
4540 static struct cost_pair
*
4541 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4543 return ivs
->cand_for_use
[use
->id
];
4546 /* Reverse the list of changes DELTA, forming the inverse to it. */
4548 static struct iv_ca_delta
*
4549 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4551 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4552 struct cost_pair
*tmp
;
4554 for (act
= delta
; act
; act
= next
)
4556 next
= act
->next_change
;
4557 act
->next_change
= prev
;
4561 act
->old_cp
= act
->new_cp
;
4568 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4569 reverted instead. */
4572 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4573 struct iv_ca_delta
*delta
, bool forward
)
4575 struct cost_pair
*from
, *to
;
4576 struct iv_ca_delta
*act
;
4579 delta
= iv_ca_delta_reverse (delta
);
4581 for (act
= delta
; act
; act
= act
->next_change
)
4585 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4586 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4590 iv_ca_delta_reverse (delta
);
4593 /* Returns true if CAND is used in IVS. */
4596 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4598 return ivs
->n_cand_uses
[cand
->id
] > 0;
4601 /* Returns number of induction variable candidates in the set IVS. */
4604 iv_ca_n_cands (struct iv_ca
*ivs
)
4606 return ivs
->n_cands
;
4609 /* Free the list of changes DELTA. */
4612 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4614 struct iv_ca_delta
*act
, *next
;
4616 for (act
= *delta
; act
; act
= next
)
4618 next
= act
->next_change
;
4625 /* Allocates new iv candidates assignment. */
4627 static struct iv_ca
*
4628 iv_ca_new (struct ivopts_data
*data
)
4630 struct iv_ca
*nw
= XNEW (struct iv_ca
);
4634 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
4635 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
4636 nw
->cands
= BITMAP_ALLOC (NULL
);
4639 nw
->cand_use_cost
= 0;
4641 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
4647 /* Free memory occupied by the set IVS. */
4650 iv_ca_free (struct iv_ca
**ivs
)
4652 free ((*ivs
)->cand_for_use
);
4653 free ((*ivs
)->n_cand_uses
);
4654 BITMAP_FREE ((*ivs
)->cands
);
4655 free ((*ivs
)->n_invariant_uses
);
4660 /* Dumps IVS to FILE. */
4663 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4665 const char *pref
= " invariants ";
4668 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
4669 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4671 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4672 if (ivs
->n_invariant_uses
[i
])
4674 fprintf (file
, "%s%d", pref
, i
);
4677 fprintf (file
, "\n");
4680 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4681 new set, and store differences in DELTA. Number of induction variables
4682 in the new set is stored to N_IVS. */
4685 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4686 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4691 struct cost_pair
*old_cp
, *new_cp
;
4694 for (i
= 0; i
< ivs
->upto
; i
++)
4696 use
= iv_use (data
, i
);
4697 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4700 && old_cp
->cand
== cand
)
4703 new_cp
= get_use_iv_cost (data
, use
, cand
);
4707 if (!iv_ca_has_deps (ivs
, new_cp
))
4710 if (!cheaper_cost_pair (new_cp
, old_cp
))
4713 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4716 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4717 cost
= iv_ca_cost (ivs
);
4719 *n_ivs
= iv_ca_n_cands (ivs
);
4720 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4725 /* Try narrowing set IVS by removing CAND. Return the cost of
4726 the new set and store the differences in DELTA. */
4729 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4730 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4734 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4736 struct iv_cand
*cnd
;
4740 for (i
= 0; i
< n_iv_uses (data
); i
++)
4742 use
= iv_use (data
, i
);
4744 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4745 if (old_cp
->cand
!= cand
)
4750 if (data
->consider_all_candidates
)
4752 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4757 cnd
= iv_cand (data
, ci
);
4759 cp
= get_use_iv_cost (data
, use
, cnd
);
4762 if (!iv_ca_has_deps (ivs
, cp
))
4765 if (!cheaper_cost_pair (cp
, new_cp
))
4773 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4778 cnd
= iv_cand (data
, ci
);
4780 cp
= get_use_iv_cost (data
, use
, cnd
);
4783 if (!iv_ca_has_deps (ivs
, cp
))
4786 if (!cheaper_cost_pair (cp
, new_cp
))
4795 iv_ca_delta_free (delta
);
4799 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4802 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4803 cost
= iv_ca_cost (ivs
);
4804 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4809 /* Try optimizing the set of candidates IVS by removing candidates different
4810 from to EXCEPT_CAND from it. Return cost of the new set, and store
4811 differences in DELTA. */
4814 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4815 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
4818 struct iv_ca_delta
*act_delta
, *best_delta
;
4819 unsigned i
, best_cost
, acost
;
4820 struct iv_cand
*cand
;
4823 best_cost
= iv_ca_cost (ivs
);
4825 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4827 cand
= iv_cand (data
, i
);
4829 if (cand
== except_cand
)
4832 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4834 if (acost
< best_cost
)
4837 iv_ca_delta_free (&best_delta
);
4838 best_delta
= act_delta
;
4841 iv_ca_delta_free (&act_delta
);
4850 /* Recurse to possibly remove other unnecessary ivs. */
4851 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4852 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
4853 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
4854 *delta
= iv_ca_delta_join (best_delta
, *delta
);
4858 /* Tries to extend the sets IVS in the best possible way in order
4859 to express the USE. */
4862 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4865 unsigned best_cost
, act_cost
;
4868 struct iv_cand
*cand
;
4869 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
4870 struct cost_pair
*cp
;
4872 iv_ca_add_use (data
, ivs
, use
);
4873 best_cost
= iv_ca_cost (ivs
);
4875 cp
= iv_ca_cand_for_use (ivs
, use
);
4878 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
4879 iv_ca_set_no_cp (data
, ivs
, use
);
4882 /* First try important candidates. Only if it fails, try the specific ones.
4883 Rationale -- in loops with many variables the best choice often is to use
4884 just one generic biv. If we added here many ivs specific to the uses,
4885 the optimization algorithm later would be likely to get stuck in a local
4886 minimum, thus causing us to create too many ivs. The approach from
4887 few ivs to more seems more likely to be successful -- starting from few
4888 ivs, replacing an expensive use by a specific iv should always be a
4890 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
4892 cand
= iv_cand (data
, i
);
4894 if (iv_ca_cand_used_p (ivs
, cand
))
4897 cp
= get_use_iv_cost (data
, use
, cand
);
4901 iv_ca_set_cp (data
, ivs
, use
, cp
);
4902 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4903 iv_ca_set_no_cp (data
, ivs
, use
);
4904 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
4906 if (act_cost
< best_cost
)
4908 best_cost
= act_cost
;
4910 iv_ca_delta_free (&best_delta
);
4911 best_delta
= act_delta
;
4914 iv_ca_delta_free (&act_delta
);
4917 if (best_cost
== INFTY
)
4919 for (i
= 0; i
< use
->n_map_members
; i
++)
4921 cp
= use
->cost_map
+ i
;
4926 /* Already tried this. */
4927 if (cand
->important
)
4930 if (iv_ca_cand_used_p (ivs
, cand
))
4934 iv_ca_set_cp (data
, ivs
, use
, cp
);
4935 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4936 iv_ca_set_no_cp (data
, ivs
, use
);
4937 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
4940 if (act_cost
< best_cost
)
4942 best_cost
= act_cost
;
4945 iv_ca_delta_free (&best_delta
);
4946 best_delta
= act_delta
;
4949 iv_ca_delta_free (&act_delta
);
4953 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4954 iv_ca_delta_free (&best_delta
);
4956 return (best_cost
!= INFTY
);
4959 /* Finds an initial assignment of candidates to uses. */
4961 static struct iv_ca
*
4962 get_initial_solution (struct ivopts_data
*data
)
4964 struct iv_ca
*ivs
= iv_ca_new (data
);
4967 for (i
= 0; i
< n_iv_uses (data
); i
++)
4968 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
4977 /* Tries to improve set of induction variables IVS. */
4980 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4982 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
), n_ivs
;
4983 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
4984 struct iv_cand
*cand
;
4986 /* Try extending the set of induction variables by one. */
4987 for (i
= 0; i
< n_iv_cands (data
); i
++)
4989 cand
= iv_cand (data
, i
);
4991 if (iv_ca_cand_used_p (ivs
, cand
))
4994 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
4998 /* If we successfully added the candidate and the set is small enough,
4999 try optimizing it by removing other candidates. */
5000 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5002 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5003 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5004 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5005 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5008 if (acost
< best_cost
)
5011 iv_ca_delta_free (&best_delta
);
5012 best_delta
= act_delta
;
5015 iv_ca_delta_free (&act_delta
);
5020 /* Try removing the candidates from the set instead. */
5021 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5023 /* Nothing more we can do. */
5028 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5029 gcc_assert (best_cost
== iv_ca_cost (ivs
));
5030 iv_ca_delta_free (&best_delta
);
5034 /* Attempts to find the optimal set of induction variables. We do simple
5035 greedy heuristic -- we try to replace at most one candidate in the selected
5036 solution and remove the unused ivs while this improves the cost. */
5038 static struct iv_ca
*
5039 find_optimal_iv_set (struct ivopts_data
*data
)
5045 /* Get the initial solution. */
5046 set
= get_initial_solution (data
);
5049 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5050 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5054 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5056 fprintf (dump_file
, "Initial set of candidates:\n");
5057 iv_ca_dump (data
, dump_file
, set
);
5060 while (try_improve_iv_set (data
, set
))
5062 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5064 fprintf (dump_file
, "Improved to:\n");
5065 iv_ca_dump (data
, dump_file
, set
);
5069 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5070 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
5072 for (i
= 0; i
< n_iv_uses (data
); i
++)
5074 use
= iv_use (data
, i
);
5075 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5081 /* Creates a new induction variable corresponding to CAND. */
5084 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5086 block_stmt_iterator incr_pos
;
5096 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
5100 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
5105 /* Mark that the iv is preserved. */
5106 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5107 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5109 /* Rewrite the increment so that it uses var_before directly. */
5110 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5115 gimple_add_tmp_var (cand
->var_before
);
5116 add_referenced_tmp_var (cand
->var_before
);
5118 base
= unshare_expr (cand
->iv
->base
);
5120 create_iv (base
, unshare_expr (cand
->iv
->step
),
5121 cand
->var_before
, data
->current_loop
,
5122 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5125 /* Creates new induction variables described in SET. */
5128 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5131 struct iv_cand
*cand
;
5134 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5136 cand
= iv_cand (data
, i
);
5137 create_new_iv (data
, cand
);
5141 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5142 is true, remove also the ssa name defined by the statement. */
5145 remove_statement (tree stmt
, bool including_defined_name
)
5147 if (TREE_CODE (stmt
) == PHI_NODE
)
5149 if (!including_defined_name
)
5151 /* Prevent the ssa name defined by the statement from being removed. */
5152 SET_PHI_RESULT (stmt
, NULL
);
5154 remove_phi_node (stmt
, NULL_TREE
);
5158 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
5160 bsi_remove (&bsi
, true);
5164 /* Rewrites USE (definition of iv used in a nonlinear expression)
5165 using candidate CAND. */
5168 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5169 struct iv_use
*use
, struct iv_cand
*cand
)
5172 tree op
, stmts
, tgt
, ass
;
5173 block_stmt_iterator bsi
, pbsi
;
5175 /* An important special case -- if we are asked to express value of
5176 the original iv by itself, just exit; there is no need to
5177 introduce a new computation (that might also need casting the
5178 variable to unsigned and back). */
5179 if (cand
->pos
== IP_ORIGINAL
5180 && cand
->incremented_at
== use
->stmt
)
5182 tree step
, ctype
, utype
;
5183 enum tree_code incr_code
= PLUS_EXPR
;
5185 gcc_assert (TREE_CODE (use
->stmt
) == MODIFY_EXPR
);
5186 gcc_assert (TREE_OPERAND (use
->stmt
, 0) == cand
->var_after
);
5188 step
= cand
->iv
->step
;
5189 ctype
= TREE_TYPE (step
);
5190 utype
= TREE_TYPE (cand
->var_after
);
5191 if (TREE_CODE (step
) == NEGATE_EXPR
)
5193 incr_code
= MINUS_EXPR
;
5194 step
= TREE_OPERAND (step
, 0);
5197 /* Check whether we may leave the computation unchanged.
5198 This is the case only if it does not rely on other
5199 computations in the loop -- otherwise, the computation
5200 we rely upon may be removed in remove_unused_ivs,
5201 thus leading to ICE. */
5202 op
= TREE_OPERAND (use
->stmt
, 1);
5203 if (TREE_CODE (op
) == PLUS_EXPR
5204 || TREE_CODE (op
) == MINUS_EXPR
)
5206 if (TREE_OPERAND (op
, 0) == cand
->var_before
)
5207 op
= TREE_OPERAND (op
, 1);
5208 else if (TREE_CODE (op
) == PLUS_EXPR
5209 && TREE_OPERAND (op
, 1) == cand
->var_before
)
5210 op
= TREE_OPERAND (op
, 0);
5218 && (TREE_CODE (op
) == INTEGER_CST
5219 || operand_equal_p (op
, step
, 0)))
5222 /* Otherwise, add the necessary computations to express
5224 op
= fold_convert (ctype
, cand
->var_before
);
5225 comp
= fold_convert (utype
,
5226 build2 (incr_code
, ctype
, op
,
5227 unshare_expr (step
)));
5230 comp
= get_computation (data
->current_loop
, use
, cand
);
5232 switch (TREE_CODE (use
->stmt
))
5235 tgt
= PHI_RESULT (use
->stmt
);
5237 /* If we should keep the biv, do not replace it. */
5238 if (name_info (data
, tgt
)->preserve_biv
)
5241 pbsi
= bsi
= bsi_start (bb_for_stmt (use
->stmt
));
5242 while (!bsi_end_p (pbsi
)
5243 && TREE_CODE (bsi_stmt (pbsi
)) == LABEL_EXPR
)
5251 tgt
= TREE_OPERAND (use
->stmt
, 0);
5252 bsi
= bsi_for_stmt (use
->stmt
);
5259 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
5261 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
5264 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
5265 ass
= build2 (MODIFY_EXPR
, TREE_TYPE (tgt
), tgt
, op
);
5266 bsi_insert_after (&bsi
, ass
, BSI_NEW_STMT
);
5267 remove_statement (use
->stmt
, false);
5268 SSA_NAME_DEF_STMT (tgt
) = ass
;
5273 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5274 TREE_OPERAND (use
->stmt
, 1) = op
;
5278 /* Replaces ssa name in index IDX by its basic variable. Callback for
5282 idx_remove_ssa_names (tree base
, tree
*idx
,
5283 void *data ATTRIBUTE_UNUSED
)
5287 if (TREE_CODE (*idx
) == SSA_NAME
)
5288 *idx
= SSA_NAME_VAR (*idx
);
5290 if (TREE_CODE (base
) == ARRAY_REF
)
5292 op
= &TREE_OPERAND (base
, 2);
5294 && TREE_CODE (*op
) == SSA_NAME
)
5295 *op
= SSA_NAME_VAR (*op
);
5296 op
= &TREE_OPERAND (base
, 3);
5298 && TREE_CODE (*op
) == SSA_NAME
)
5299 *op
= SSA_NAME_VAR (*op
);
5305 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5308 unshare_and_remove_ssa_names (tree ref
)
5310 ref
= unshare_expr (ref
);
5311 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5316 /* Extract the alias analysis info for the memory reference REF. There are
5317 several ways how this information may be stored and what precisely is
5318 its semantics depending on the type of the reference, but there always is
5319 somewhere hidden one _DECL node that is used to determine the set of
5320 virtual operands for the reference. The code below deciphers this jungle
5321 and extracts this single useful piece of information. */
5324 get_ref_tag (tree ref
, tree orig
)
5326 tree var
= get_base_address (ref
);
5327 tree aref
= NULL_TREE
, tag
, sv
;
5328 HOST_WIDE_INT offset
, size
, maxsize
;
5330 for (sv
= orig
; handled_component_p (sv
); sv
= TREE_OPERAND (sv
, 0))
5332 aref
= get_ref_base_and_extent (sv
, &offset
, &size
, &maxsize
);
5337 if (aref
&& SSA_VAR_P (aref
) && get_subvars_for_var (aref
))
5338 return unshare_expr (sv
);
5343 if (TREE_CODE (var
) == INDIRECT_REF
)
5345 /* In case the base is a dereference of a pointer, first check its name
5346 mem tag, and if it does not have one, use type mem tag. */
5347 var
= TREE_OPERAND (var
, 0);
5348 if (TREE_CODE (var
) != SSA_NAME
)
5351 if (SSA_NAME_PTR_INFO (var
))
5353 tag
= SSA_NAME_PTR_INFO (var
)->name_mem_tag
;
5358 var
= SSA_NAME_VAR (var
);
5359 tag
= var_ann (var
)->type_mem_tag
;
5360 gcc_assert (tag
!= NULL_TREE
);
5368 tag
= var_ann (var
)->type_mem_tag
;
5376 /* Copies the reference information from OLD_REF to NEW_REF. */
5379 copy_ref_info (tree new_ref
, tree old_ref
)
5381 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5382 copy_mem_ref_info (new_ref
, old_ref
);
5385 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5386 TMR_TAG (new_ref
) = get_ref_tag (old_ref
, TMR_ORIGINAL (new_ref
));
5390 /* Rewrites USE (address that is an iv) using candidate CAND. */
5393 rewrite_use_address (struct ivopts_data
*data
,
5394 struct iv_use
*use
, struct iv_cand
*cand
)
5396 struct affine_tree_combination aff
;
5397 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5400 get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5401 unshare_aff_combination (&aff
);
5403 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
);
5404 copy_ref_info (ref
, *use
->op_p
);
5408 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5412 rewrite_use_compare (struct ivopts_data
*data
,
5413 struct iv_use
*use
, struct iv_cand
*cand
)
5416 tree
*op_p
, cond
, op
, stmts
, bound
;
5417 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5418 enum tree_code compare
;
5419 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5424 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5425 tree var_type
= TREE_TYPE (var
);
5427 compare
= iv_elimination_compare (data
, use
);
5428 bound
= fold_convert (var_type
, bound
);
5429 op
= force_gimple_operand (unshare_expr (bound
), &stmts
,
5433 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5435 *use
->op_p
= build2 (compare
, boolean_type_node
, var
, op
);
5436 update_stmt (use
->stmt
);
5440 /* The induction variable elimination failed; just express the original
5442 comp
= get_computation (data
->current_loop
, use
, cand
);
5445 op_p
= &TREE_OPERAND (cond
, 0);
5446 if (TREE_CODE (*op_p
) != SSA_NAME
5447 || zero_p (get_iv (data
, *op_p
)->step
))
5448 op_p
= &TREE_OPERAND (cond
, 1);
5450 op
= force_gimple_operand (comp
, &stmts
, true, SSA_NAME_VAR (*op_p
));
5452 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5457 /* Ensure that operand *OP_P may be used at the end of EXIT without
5458 violating loop closed ssa form. */
5461 protect_loop_closed_ssa_form_use (edge exit
, use_operand_p op_p
)
5464 struct loop
*def_loop
;
5467 use
= USE_FROM_PTR (op_p
);
5468 if (TREE_CODE (use
) != SSA_NAME
)
5471 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (use
));
5475 def_loop
= def_bb
->loop_father
;
5476 if (flow_bb_inside_loop_p (def_loop
, exit
->dest
))
5479 /* Try finding a phi node that copies the value out of the loop. */
5480 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
5481 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == use
)
5486 /* Create such a phi node. */
5487 tree new_name
= duplicate_ssa_name (use
, NULL
);
5489 phi
= create_phi_node (new_name
, exit
->dest
);
5490 SSA_NAME_DEF_STMT (new_name
) = phi
;
5491 add_phi_arg (phi
, use
, exit
);
5494 SET_USE (op_p
, PHI_RESULT (phi
));
5497 /* Ensure that operands of STMT may be used at the end of EXIT without
5498 violating loop closed ssa form. */
5501 protect_loop_closed_ssa_form (edge exit
, tree stmt
)
5504 use_operand_p use_p
;
5506 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_ALL_USES
)
5507 protect_loop_closed_ssa_form_use (exit
, use_p
);
5510 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5511 so that they are emitted on the correct place, and so that the loop closed
5512 ssa form is preserved. */
5515 compute_phi_arg_on_exit (edge exit
, tree stmts
, tree op
)
5517 tree_stmt_iterator tsi
;
5518 block_stmt_iterator bsi
;
5519 tree phi
, stmt
, def
, next
;
5521 if (!single_pred_p (exit
->dest
))
5522 split_loop_exit_edge (exit
);
5524 /* Ensure there is label in exit->dest, so that we can
5526 tree_block_label (exit
->dest
);
5527 bsi
= bsi_after_labels (exit
->dest
);
5529 if (TREE_CODE (stmts
) == STATEMENT_LIST
)
5531 for (tsi
= tsi_start (stmts
); !tsi_end_p (tsi
); tsi_next (&tsi
))
5533 tree stmt
= tsi_stmt (tsi
);
5534 bsi_insert_before (&bsi
, stmt
, BSI_SAME_STMT
);
5535 protect_loop_closed_ssa_form (exit
, stmt
);
5540 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5541 protect_loop_closed_ssa_form (exit
, stmts
);
5547 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= next
)
5549 next
= PHI_CHAIN (phi
);
5551 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == op
)
5553 def
= PHI_RESULT (phi
);
5554 remove_statement (phi
, false);
5555 stmt
= build2 (MODIFY_EXPR
, TREE_TYPE (op
),
5557 SSA_NAME_DEF_STMT (def
) = stmt
;
5558 bsi_insert_before (&bsi
, stmt
, BSI_SAME_STMT
);
5563 /* Rewrites USE using candidate CAND. */
5566 rewrite_use (struct ivopts_data
*data
,
5567 struct iv_use
*use
, struct iv_cand
*cand
)
5571 case USE_NONLINEAR_EXPR
:
5572 rewrite_use_nonlinear_expr (data
, use
, cand
);
5576 rewrite_use_address (data
, use
, cand
);
5580 rewrite_use_compare (data
, use
, cand
);
5586 mark_new_vars_to_rename (use
->stmt
);
5589 /* Rewrite the uses using the selected induction variables. */
5592 rewrite_uses (struct ivopts_data
*data
)
5595 struct iv_cand
*cand
;
5598 for (i
= 0; i
< n_iv_uses (data
); i
++)
5600 use
= iv_use (data
, i
);
5601 cand
= use
->selected
;
5604 rewrite_use (data
, use
, cand
);
5608 /* Removes the ivs that are not used after rewriting. */
5611 remove_unused_ivs (struct ivopts_data
*data
)
5616 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5618 struct version_info
*info
;
5620 info
= ver_info (data
, j
);
5622 && !zero_p (info
->iv
->step
)
5624 && !info
->iv
->have_use_for
5625 && !info
->preserve_biv
)
5626 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5630 /* Frees data allocated by the optimization of a single loop. */
5633 free_loop_data (struct ivopts_data
*data
)
5639 htab_empty (data
->niters
);
5641 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5643 struct version_info
*info
;
5645 info
= ver_info (data
, i
);
5649 info
->has_nonlin_use
= false;
5650 info
->preserve_biv
= false;
5653 bitmap_clear (data
->relevant
);
5654 bitmap_clear (data
->important_candidates
);
5656 for (i
= 0; i
< n_iv_uses (data
); i
++)
5658 struct iv_use
*use
= iv_use (data
, i
);
5661 BITMAP_FREE (use
->related_cands
);
5662 for (j
= 0; j
< use
->n_map_members
; j
++)
5663 if (use
->cost_map
[j
].depends_on
)
5664 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5665 free (use
->cost_map
);
5668 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5670 for (i
= 0; i
< n_iv_cands (data
); i
++)
5672 struct iv_cand
*cand
= iv_cand (data
, i
);
5676 if (cand
->depends_on
)
5677 BITMAP_FREE (cand
->depends_on
);
5680 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5682 if (data
->version_info_size
< num_ssa_names
)
5684 data
->version_info_size
= 2 * num_ssa_names
;
5685 free (data
->version_info
);
5686 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
5689 data
->max_inv_id
= 0;
5691 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5692 SET_DECL_RTL (obj
, NULL_RTX
);
5694 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5697 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5701 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
5703 free_loop_data (data
);
5704 free (data
->version_info
);
5705 BITMAP_FREE (data
->relevant
);
5706 BITMAP_FREE (data
->important_candidates
);
5707 htab_delete (data
->niters
);
5709 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5710 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5711 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5714 /* Optimizes the LOOP. Returns true if anything changed. */
5717 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5719 bool changed
= false;
5720 struct iv_ca
*iv_ca
;
5723 data
->current_loop
= loop
;
5725 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5727 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5729 exit
= single_dom_exit (loop
);
5732 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5733 exit
->src
->index
, exit
->dest
->index
);
5734 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
5735 fprintf (dump_file
, "\n");
5738 fprintf (dump_file
, "\n");
5741 /* For each ssa name determines whether it behaves as an induction variable
5743 if (!find_induction_variables (data
))
5746 /* Finds interesting uses (item 1). */
5747 find_interesting_uses (data
);
5748 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5751 /* Finds candidates for the induction variables (item 2). */
5752 find_iv_candidates (data
);
5754 /* Calculates the costs (item 3, part 1). */
5755 determine_use_iv_costs (data
);
5756 determine_iv_costs (data
);
5757 determine_set_costs (data
);
5759 /* Find the optimal set of induction variables (item 3, part 2). */
5760 iv_ca
= find_optimal_iv_set (data
);
5765 /* Create the new induction variables (item 4, part 1). */
5766 create_new_ivs (data
, iv_ca
);
5767 iv_ca_free (&iv_ca
);
5769 /* Rewrite the uses (item 4, part 2). */
5770 rewrite_uses (data
);
5772 /* Remove the ivs that are unused after rewriting. */
5773 remove_unused_ivs (data
);
5775 /* We have changed the structure of induction variables; it might happen
5776 that definitions in the scev database refer to some of them that were
5781 free_loop_data (data
);
5786 /* Main entry point. Optimizes induction variables in LOOPS. */
5789 tree_ssa_iv_optimize (struct loops
*loops
)
5792 struct ivopts_data data
;
5794 tree_ssa_iv_optimize_init (&data
);
5796 /* Optimize the loops starting with the innermost ones. */
5797 loop
= loops
->tree_root
;
5801 /* Scan the loops, inner ones first. */
5802 while (loop
!= loops
->tree_root
)
5804 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5805 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5807 tree_ssa_iv_optimize_loop (&data
, loop
);
5819 tree_ssa_iv_optimize_finalize (&data
);