1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base
; /* Initial value of the iv. */
105 tree base_object
; /* A memory object to that the induction variable points. */
106 tree step
; /* Step of the iv (constant only). */
107 tree ssa_name
; /* The ssa name with the value. */
108 bool biv_p
; /* Is it a biv? */
109 bool have_use_for
; /* Do we already have a use for it? */
110 unsigned use_id
; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name
; /* The ssa name. */
117 struct iv
*iv
; /* Induction variable description. */
118 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id
; /* Id of an invariant. */
121 bool preserve_biv
; /* For the original biv, whether to preserve it. */
127 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
128 USE_ADDRESS
, /* Use in an address. */
129 USE_COMPARE
/* Use is a compare. */
132 /* The candidate - cost pair. */
135 struct iv_cand
*cand
; /* The candidate. */
136 unsigned cost
; /* The cost. */
137 bitmap depends_on
; /* The list of invariants that have to be
139 tree value
; /* For final value elimination, the expression for
140 the final value of the iv. For iv elimination,
141 the new bound to compare with. */
147 unsigned id
; /* The id of the use. */
148 enum use_type type
; /* Type of the use. */
149 struct iv
*iv
; /* The induction variable it is based on. */
150 tree stmt
; /* Statement in that it occurs. */
151 tree
*op_p
; /* The place where it occurs. */
152 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
155 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
156 struct cost_pair
*cost_map
;
157 /* The costs wrto the iv candidates. */
159 struct iv_cand
*selected
;
160 /* The selected candidate. */
163 /* The position where the iv is computed. */
166 IP_NORMAL
, /* At the end, just before the exit condition. */
167 IP_END
, /* At the end of the latch block. */
168 IP_ORIGINAL
/* The original biv. */
171 /* The induction variable candidate. */
174 unsigned id
; /* The number of the candidate. */
175 bool important
; /* Whether this is an "important" candidate, i.e. such
176 that it should be considered by all uses. */
177 enum iv_position pos
; /* Where it is computed. */
178 tree incremented_at
; /* For original biv, the statement where it is
180 tree var_before
; /* The variable used for it before increment. */
181 tree var_after
; /* The variable used for it after increment. */
182 struct iv
*iv
; /* The value of the candidate. NULL for
183 "pseudocandidate" used to indicate the possibility
184 to replace the final value of an iv by direct
185 computation of the value. */
186 unsigned cost
; /* Cost of the candidate. */
187 bitmap depends_on
; /* The list of invariants that are used in step of the
191 /* The data used by the induction variable optimizations. */
193 typedef struct iv_use
*iv_use_p
;
195 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
197 typedef struct iv_cand
*iv_cand_p
;
198 DEF_VEC_P(iv_cand_p
);
199 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
203 /* The currently optimized loop. */
204 struct loop
*current_loop
;
206 /* Number of registers used in it. */
209 /* Numbers of iterations for all exits of the current loop. */
212 /* The size of version_info array allocated. */
213 unsigned version_info_size
;
215 /* The array of information for the ssa names. */
216 struct version_info
*version_info
;
218 /* The bitmap of indices in version_info whose value was changed. */
221 /* The maximum invariant id. */
224 /* The uses of induction variables. */
225 VEC(iv_use_p
,heap
) *iv_uses
;
227 /* The candidates. */
228 VEC(iv_cand_p
,heap
) *iv_candidates
;
230 /* A bitmap of important candidates. */
231 bitmap important_candidates
;
233 /* Whether to consider just related and important candidates when replacing a
235 bool consider_all_candidates
;
238 /* An assignment of iv candidates to uses. */
242 /* The number of uses covered by the assignment. */
245 /* Number of uses that cannot be expressed by the candidates in the set. */
248 /* Candidate assigned to a use, together with the related costs. */
249 struct cost_pair
**cand_for_use
;
251 /* Number of times each candidate is used. */
252 unsigned *n_cand_uses
;
254 /* The candidates used. */
257 /* The number of candidates in the set. */
260 /* Total number of registers needed. */
263 /* Total cost of expressing uses. */
264 unsigned cand_use_cost
;
266 /* Total cost of candidates. */
269 /* Number of times each invariant is used. */
270 unsigned *n_invariant_uses
;
272 /* Total cost of the assignment. */
276 /* Difference of two iv candidate assignments. */
283 /* An old assignment (for rollback purposes). */
284 struct cost_pair
*old_cp
;
286 /* A new assignment. */
287 struct cost_pair
*new_cp
;
289 /* Next change in the list. */
290 struct iv_ca_delta
*next_change
;
293 /* Bound on number of candidates below that all candidates are considered. */
295 #define CONSIDER_ALL_CANDIDATES_BOUND \
296 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
299 optimizing such a loop would help, and it would take ages). */
301 #define MAX_CONSIDERED_USES \
302 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
304 /* If there are at most this number of ivs in the set, try removing unnecessary
305 ivs from the set always. */
307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
308 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
310 /* The list of trees for that the decl_rtl field must be reset is stored
313 static VEC(tree
,heap
) *decl_rtl_to_reset
;
315 /* Number of uses recorded in DATA. */
317 static inline unsigned
318 n_iv_uses (struct ivopts_data
*data
)
320 return VEC_length (iv_use_p
, data
->iv_uses
);
323 /* Ith use recorded in DATA. */
325 static inline struct iv_use
*
326 iv_use (struct ivopts_data
*data
, unsigned i
)
328 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
331 /* Number of candidates recorded in DATA. */
333 static inline unsigned
334 n_iv_cands (struct ivopts_data
*data
)
336 return VEC_length (iv_cand_p
, data
->iv_candidates
);
339 /* Ith candidate recorded in DATA. */
341 static inline struct iv_cand
*
342 iv_cand (struct ivopts_data
*data
, unsigned i
)
344 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
347 /* The single loop exit if it dominates the latch, NULL otherwise. */
350 single_dom_exit (struct loop
*loop
)
352 edge exit
= loop
->single_exit
;
357 if (!just_once_each_iteration_p (loop
, exit
->src
))
363 /* Dumps information about the induction variable IV to FILE. */
365 extern void dump_iv (FILE *, struct iv
*);
367 dump_iv (FILE *file
, struct iv
*iv
)
371 fprintf (file
, "ssa name ");
372 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
373 fprintf (file
, "\n");
376 fprintf (file
, " type ");
377 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
378 fprintf (file
, "\n");
382 fprintf (file
, " base ");
383 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
384 fprintf (file
, "\n");
386 fprintf (file
, " step ");
387 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
388 fprintf (file
, "\n");
392 fprintf (file
, " invariant ");
393 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
394 fprintf (file
, "\n");
399 fprintf (file
, " base object ");
400 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
401 fprintf (file
, "\n");
405 fprintf (file
, " is a biv\n");
408 /* Dumps information about the USE to FILE. */
410 extern void dump_use (FILE *, struct iv_use
*);
412 dump_use (FILE *file
, struct iv_use
*use
)
414 fprintf (file
, "use %d\n", use
->id
);
418 case USE_NONLINEAR_EXPR
:
419 fprintf (file
, " generic\n");
423 fprintf (file
, " address\n");
427 fprintf (file
, " compare\n");
434 fprintf (file
, " in statement ");
435 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
436 fprintf (file
, "\n");
438 fprintf (file
, " at position ");
440 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
441 fprintf (file
, "\n");
443 dump_iv (file
, use
->iv
);
445 if (use
->related_cands
)
447 fprintf (file
, " related candidates ");
448 dump_bitmap (file
, use
->related_cands
);
452 /* Dumps information about the uses to FILE. */
454 extern void dump_uses (FILE *, struct ivopts_data
*);
456 dump_uses (FILE *file
, struct ivopts_data
*data
)
461 for (i
= 0; i
< n_iv_uses (data
); i
++)
463 use
= iv_use (data
, i
);
465 dump_use (file
, use
);
466 fprintf (file
, "\n");
470 /* Dumps information about induction variable candidate CAND to FILE. */
472 extern void dump_cand (FILE *, struct iv_cand
*);
474 dump_cand (FILE *file
, struct iv_cand
*cand
)
476 struct iv
*iv
= cand
->iv
;
478 fprintf (file
, "candidate %d%s\n",
479 cand
->id
, cand
->important
? " (important)" : "");
481 if (cand
->depends_on
)
483 fprintf (file
, " depends on ");
484 dump_bitmap (file
, cand
->depends_on
);
489 fprintf (file
, " final value replacement\n");
496 fprintf (file
, " incremented before exit test\n");
500 fprintf (file
, " incremented at end\n");
504 fprintf (file
, " original biv\n");
511 /* Returns the info for ssa version VER. */
513 static inline struct version_info
*
514 ver_info (struct ivopts_data
*data
, unsigned ver
)
516 return data
->version_info
+ ver
;
519 /* Returns the info for ssa name NAME. */
521 static inline struct version_info
*
522 name_info (struct ivopts_data
*data
, tree name
)
524 return ver_info (data
, SSA_NAME_VERSION (name
));
527 /* Checks whether there exists number X such that X * B = A, counting modulo
531 divide (unsigned bits
, unsigned HOST_WIDE_INT a
, unsigned HOST_WIDE_INT b
,
534 unsigned HOST_WIDE_INT mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
535 unsigned HOST_WIDE_INT inv
, ex
, val
;
541 /* First divide the whole equation by 2 as long as possible. */
542 while (!(a
& 1) && !(b
& 1))
552 /* If b is still even, a is odd and there is no such x. */
556 /* Find the inverse of b. We compute it as
557 b^(2^(bits - 1) - 1) (mod 2^bits). */
560 for (i
= 0; i
< bits
- 1; i
++)
562 inv
= (inv
* ex
) & mask
;
563 ex
= (ex
* ex
) & mask
;
566 val
= (a
* inv
) & mask
;
568 gcc_assert (((val
* b
) & mask
) == a
);
570 if ((val
>> (bits
- 1)) & 1)
578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
582 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
584 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
588 if (sbb
== loop
->latch
)
594 return stmt
== last_stmt (bb
);
597 /* Returns true if STMT if after the place where the original induction
598 variable CAND is incremented. */
601 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
603 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
604 basic_block stmt_bb
= bb_for_stmt (stmt
);
605 block_stmt_iterator bsi
;
607 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
610 if (stmt_bb
!= cand_bb
)
613 /* Scan the block from the end, since the original ivs are usually
614 incremented at the end of the loop body. */
615 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
617 if (bsi_stmt (bsi
) == cand
->incremented_at
)
619 if (bsi_stmt (bsi
) == stmt
)
624 /* Returns true if STMT if after the place where the induction variable
625 CAND is incremented in LOOP. */
628 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
636 return stmt_after_ip_normal_pos (loop
, stmt
);
639 return stmt_after_ip_original_pos (cand
, stmt
);
646 /* Element of the table in that we cache the numbers of iterations obtained
647 from exits of the loop. */
651 /* The edge for that the number of iterations is cached. */
654 /* True if the # of iterations was successfully determined. */
657 /* Description of # of iterations. */
658 struct tree_niter_desc niter
;
661 /* Hash function for nfe_cache_elt E. */
664 nfe_hash (const void *e
)
666 const struct nfe_cache_elt
*elt
= e
;
668 return htab_hash_pointer (elt
->exit
);
671 /* Equality function for nfe_cache_elt E1 and edge E2. */
674 nfe_eq (const void *e1
, const void *e2
)
676 const struct nfe_cache_elt
*elt1
= e1
;
678 return elt1
->exit
== e2
;
681 /* Returns structure describing number of iterations determined from
682 EXIT of DATA->current_loop, or NULL if something goes wrong. */
684 static struct tree_niter_desc
*
685 niter_for_exit (struct ivopts_data
*data
, edge exit
)
687 struct nfe_cache_elt
*nfe_desc
;
690 slot
= htab_find_slot_with_hash (data
->niters
, exit
,
691 htab_hash_pointer (exit
),
696 nfe_desc
= xmalloc (sizeof (struct nfe_cache_elt
));
697 nfe_desc
->exit
= exit
;
698 nfe_desc
->valid_p
= number_of_iterations_exit (data
->current_loop
,
699 exit
, &nfe_desc
->niter
,
706 if (!nfe_desc
->valid_p
)
709 return &nfe_desc
->niter
;
712 /* Returns structure describing number of iterations determined from
713 single dominating exit of DATA->current_loop, or NULL if something
716 static struct tree_niter_desc
*
717 niter_for_single_dom_exit (struct ivopts_data
*data
)
719 edge exit
= single_dom_exit (data
->current_loop
);
724 return niter_for_exit (data
, exit
);
727 /* Initializes data structures used by the iv optimization pass, stored
731 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
733 data
->version_info_size
= 2 * num_ssa_names
;
734 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
735 data
->relevant
= BITMAP_ALLOC (NULL
);
736 data
->important_candidates
= BITMAP_ALLOC (NULL
);
737 data
->max_inv_id
= 0;
738 data
->niters
= htab_create (10, nfe_hash
, nfe_eq
, free
);
739 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
740 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
741 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
744 /* Returns a memory object to that EXPR points. In case we are able to
745 determine that it does not point to any such object, NULL is returned. */
748 determine_base_object (tree expr
)
750 enum tree_code code
= TREE_CODE (expr
);
751 tree base
, obj
, op0
, op1
;
753 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
762 obj
= TREE_OPERAND (expr
, 0);
763 base
= get_base_address (obj
);
768 if (TREE_CODE (base
) == INDIRECT_REF
)
769 return determine_base_object (TREE_OPERAND (base
, 0));
771 return fold_convert (ptr_type_node
,
772 build_fold_addr_expr (base
));
776 op0
= determine_base_object (TREE_OPERAND (expr
, 0));
777 op1
= determine_base_object (TREE_OPERAND (expr
, 1));
783 return (code
== PLUS_EXPR
785 : fold_build1 (NEGATE_EXPR
, ptr_type_node
, op1
));
787 return fold_build2 (code
, ptr_type_node
, op0
, op1
);
791 return determine_base_object (TREE_OPERAND (expr
, 0));
794 return fold_convert (ptr_type_node
, expr
);
798 /* Allocates an induction variable with given initial value BASE and step STEP
802 alloc_iv (tree base
, tree step
)
804 struct iv
*iv
= XCNEW (struct iv
);
806 if (step
&& integer_zerop (step
))
810 iv
->base_object
= determine_base_object (base
);
813 iv
->have_use_for
= false;
815 iv
->ssa_name
= NULL_TREE
;
820 /* Sets STEP and BASE for induction variable IV. */
823 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
825 struct version_info
*info
= name_info (data
, iv
);
827 gcc_assert (!info
->iv
);
829 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
830 info
->iv
= alloc_iv (base
, step
);
831 info
->iv
->ssa_name
= iv
;
834 /* Finds induction variable declaration for VAR. */
837 get_iv (struct ivopts_data
*data
, tree var
)
841 if (!name_info (data
, var
)->iv
)
843 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
846 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
847 set_iv (data
, var
, var
, NULL_TREE
);
850 return name_info (data
, var
)->iv
;
853 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
854 not define a simple affine biv with nonzero step. */
857 determine_biv_step (tree phi
)
859 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
860 tree name
= PHI_RESULT (phi
);
863 if (!is_gimple_reg (name
))
866 if (!simple_iv (loop
, phi
, name
, &iv
, true))
869 return (zero_p (iv
.step
) ? NULL_TREE
: iv
.step
);
872 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
875 abnormal_ssa_name_p (tree exp
)
880 if (TREE_CODE (exp
) != SSA_NAME
)
883 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
886 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
887 abnormal phi node. Callback for for_each_index. */
890 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
891 void *data ATTRIBUTE_UNUSED
)
893 if (TREE_CODE (base
) == ARRAY_REF
)
895 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
897 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
901 return !abnormal_ssa_name_p (*index
);
904 /* Returns true if EXPR contains a ssa name that occurs in an
905 abnormal phi node. */
908 contains_abnormal_ssa_name_p (tree expr
)
911 enum tree_code_class
class;
916 code
= TREE_CODE (expr
);
917 class = TREE_CODE_CLASS (code
);
919 if (code
== SSA_NAME
)
920 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
922 if (code
== INTEGER_CST
923 || is_gimple_min_invariant (expr
))
926 if (code
== ADDR_EXPR
)
927 return !for_each_index (&TREE_OPERAND (expr
, 0),
928 idx_contains_abnormal_ssa_name_p
,
935 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
940 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
952 /* Finds basic ivs. */
955 find_bivs (struct ivopts_data
*data
)
957 tree phi
, step
, type
, base
;
959 struct loop
*loop
= data
->current_loop
;
961 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
963 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
966 step
= determine_biv_step (phi
);
970 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
971 base
= expand_simple_operations (base
);
972 if (contains_abnormal_ssa_name_p (base
)
973 || contains_abnormal_ssa_name_p (step
))
976 type
= TREE_TYPE (PHI_RESULT (phi
));
977 base
= fold_convert (type
, base
);
979 step
= fold_convert (type
, step
);
981 set_iv (data
, PHI_RESULT (phi
), base
, step
);
988 /* Marks basic ivs. */
991 mark_bivs (struct ivopts_data
*data
)
994 struct iv
*iv
, *incr_iv
;
995 struct loop
*loop
= data
->current_loop
;
998 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
1000 iv
= get_iv (data
, PHI_RESULT (phi
));
1004 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1005 incr_iv
= get_iv (data
, var
);
1009 /* If the increment is in the subloop, ignore it. */
1010 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
1011 if (incr_bb
->loop_father
!= data
->current_loop
1012 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1016 incr_iv
->biv_p
= true;
1020 /* Checks whether STMT defines a linear induction variable and stores its
1021 parameters to IV. */
1024 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
, affine_iv
*iv
)
1027 struct loop
*loop
= data
->current_loop
;
1029 iv
->base
= NULL_TREE
;
1030 iv
->step
= NULL_TREE
;
1032 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1035 lhs
= TREE_OPERAND (stmt
, 0);
1036 if (TREE_CODE (lhs
) != SSA_NAME
)
1039 if (!simple_iv (loop
, stmt
, TREE_OPERAND (stmt
, 1), iv
, true))
1041 iv
->base
= expand_simple_operations (iv
->base
);
1043 if (contains_abnormal_ssa_name_p (iv
->base
)
1044 || contains_abnormal_ssa_name_p (iv
->step
))
1050 /* Finds general ivs in statement STMT. */
1053 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
1057 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1060 set_iv (data
, TREE_OPERAND (stmt
, 0), iv
.base
, iv
.step
);
1063 /* Finds general ivs in basic block BB. */
1066 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1068 block_stmt_iterator bsi
;
1070 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1071 find_givs_in_stmt (data
, bsi_stmt (bsi
));
1074 /* Finds general ivs. */
1077 find_givs (struct ivopts_data
*data
)
1079 struct loop
*loop
= data
->current_loop
;
1080 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1083 for (i
= 0; i
< loop
->num_nodes
; i
++)
1084 find_givs_in_bb (data
, body
[i
]);
1088 /* For each ssa name defined in LOOP determines whether it is an induction
1089 variable and if so, its initial value and step. */
1092 find_induction_variables (struct ivopts_data
*data
)
1097 if (!find_bivs (data
))
1103 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1105 struct tree_niter_desc
*niter
;
1107 niter
= niter_for_single_dom_exit (data
);
1111 fprintf (dump_file
, " number of iterations ");
1112 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1113 fprintf (dump_file
, "\n");
1115 fprintf (dump_file
, " may be zero if ");
1116 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1117 fprintf (dump_file
, "\n");
1118 fprintf (dump_file
, "\n");
1121 fprintf (dump_file
, "Induction variables:\n\n");
1123 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1125 if (ver_info (data
, i
)->iv
)
1126 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1133 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1135 static struct iv_use
*
1136 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1137 tree stmt
, enum use_type use_type
)
1139 struct iv_use
*use
= XCNEW (struct iv_use
);
1141 use
->id
= n_iv_uses (data
);
1142 use
->type
= use_type
;
1146 use
->related_cands
= BITMAP_ALLOC (NULL
);
1148 /* To avoid showing ssa name in the dumps, if it was not reset by the
1150 iv
->ssa_name
= NULL_TREE
;
1152 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1153 dump_use (dump_file
, use
);
1155 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1160 /* Checks whether OP is a loop-level invariant and if so, records it.
1161 NONLINEAR_USE is true if the invariant is used in a way we do not
1162 handle specially. */
1165 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1168 struct version_info
*info
;
1170 if (TREE_CODE (op
) != SSA_NAME
1171 || !is_gimple_reg (op
))
1174 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1176 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1179 info
= name_info (data
, op
);
1181 info
->has_nonlin_use
|= nonlinear_use
;
1183 info
->inv_id
= ++data
->max_inv_id
;
1184 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1187 /* Checks whether the use OP is interesting and if so, records it. */
1189 static struct iv_use
*
1190 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1197 if (TREE_CODE (op
) != SSA_NAME
)
1200 iv
= get_iv (data
, op
);
1204 if (iv
->have_use_for
)
1206 use
= iv_use (data
, iv
->use_id
);
1208 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1212 if (zero_p (iv
->step
))
1214 record_invariant (data
, op
, true);
1217 iv
->have_use_for
= true;
1219 civ
= XNEW (struct iv
);
1222 stmt
= SSA_NAME_DEF_STMT (op
);
1223 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1224 || TREE_CODE (stmt
) == MODIFY_EXPR
);
1226 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1227 iv
->use_id
= use
->id
;
1232 /* Checks whether the condition *COND_P in STMT is interesting
1233 and if so, records it. */
1236 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1240 struct iv
*iv0
= NULL
, *iv1
= NULL
, *civ
;
1242 tree zero
= integer_zero_node
;
1244 const_iv
.step
= NULL_TREE
;
1246 if (TREE_CODE (*cond_p
) != SSA_NAME
1247 && !COMPARISON_CLASS_P (*cond_p
))
1250 if (TREE_CODE (*cond_p
) == SSA_NAME
)
1257 op0_p
= &TREE_OPERAND (*cond_p
, 0);
1258 op1_p
= &TREE_OPERAND (*cond_p
, 1);
1261 if (TREE_CODE (*op0_p
) == SSA_NAME
)
1262 iv0
= get_iv (data
, *op0_p
);
1266 if (TREE_CODE (*op1_p
) == SSA_NAME
)
1267 iv1
= get_iv (data
, *op1_p
);
1271 if (/* When comparing with non-invariant value, we may not do any senseful
1272 induction variable elimination. */
1274 /* Eliminating condition based on two ivs would be nontrivial.
1275 ??? TODO -- it is not really important to handle this case. */
1276 || (!zero_p (iv0
->step
) && !zero_p (iv1
->step
)))
1278 find_interesting_uses_op (data
, *op0_p
);
1279 find_interesting_uses_op (data
, *op1_p
);
1283 if (zero_p (iv0
->step
) && zero_p (iv1
->step
))
1285 /* If both are invariants, this is a work for unswitching. */
1289 civ
= XNEW (struct iv
);
1290 *civ
= zero_p (iv0
->step
) ? *iv1
: *iv0
;
1291 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1294 /* Returns true if expression EXPR is obviously invariant in LOOP,
1295 i.e. if all its operands are defined outside of the LOOP. */
1298 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1303 if (is_gimple_min_invariant (expr
))
1306 if (TREE_CODE (expr
) == SSA_NAME
)
1308 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1310 && flow_bb_inside_loop_p (loop
, def_bb
))
1319 len
= TREE_CODE_LENGTH (TREE_CODE (expr
));
1320 for (i
= 0; i
< len
; i
++)
1321 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1327 /* Cumulates the steps of indices into DATA and replaces their values with the
1328 initial ones. Returns false when the value of the index cannot be determined.
1329 Callback for for_each_index. */
1331 struct ifs_ivopts_data
1333 struct ivopts_data
*ivopts_data
;
1339 idx_find_step (tree base
, tree
*idx
, void *data
)
1341 struct ifs_ivopts_data
*dta
= data
;
1343 tree step
, iv_step
, lbound
, off
;
1344 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1346 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1347 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1350 /* If base is a component ref, require that the offset of the reference
1352 if (TREE_CODE (base
) == COMPONENT_REF
)
1354 off
= component_ref_field_offset (base
);
1355 return expr_invariant_in_loop_p (loop
, off
);
1358 /* If base is array, first check whether we will be able to move the
1359 reference out of the loop (in order to take its address in strength
1360 reduction). In order for this to work we need both lower bound
1361 and step to be loop invariants. */
1362 if (TREE_CODE (base
) == ARRAY_REF
)
1364 step
= array_ref_element_size (base
);
1365 lbound
= array_ref_low_bound (base
);
1367 if (!expr_invariant_in_loop_p (loop
, step
)
1368 || !expr_invariant_in_loop_p (loop
, lbound
))
1372 if (TREE_CODE (*idx
) != SSA_NAME
)
1375 iv
= get_iv (dta
->ivopts_data
, *idx
);
1384 if (TREE_CODE (base
) == ARRAY_REF
)
1386 step
= array_ref_element_size (base
);
1388 /* We only handle addresses whose step is an integer constant. */
1389 if (TREE_CODE (step
) != INTEGER_CST
)
1393 /* The step for pointer arithmetics already is 1 byte. */
1394 step
= build_int_cst (sizetype
, 1);
1396 /* FIXME: convert_step should not be used outside chrec_convert: fix
1397 this by calling chrec_convert. */
1398 iv_step
= convert_step (dta
->ivopts_data
->current_loop
,
1399 sizetype
, iv
->base
, iv
->step
, dta
->stmt
);
1403 /* The index might wrap. */
1407 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1410 *dta
->step_p
= step
;
1412 *dta
->step_p
= fold_build2 (PLUS_EXPR
, sizetype
, *dta
->step_p
, step
);
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1421 idx_record_use (tree base
, tree
*idx
,
1424 find_interesting_uses_op (data
, *idx
);
1425 if (TREE_CODE (base
) == ARRAY_REF
)
1427 find_interesting_uses_op (data
, array_ref_element_size (base
));
1428 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1433 /* Returns true if memory reference REF may be unaligned. */
1436 may_be_unaligned_p (tree ref
)
1440 HOST_WIDE_INT bitsize
;
1441 HOST_WIDE_INT bitpos
;
1443 enum machine_mode mode
;
1444 int unsignedp
, volatilep
;
1445 unsigned base_align
;
1447 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1448 thus they are not misaligned. */
1449 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1452 /* The test below is basically copy of what expr.c:normal_inner_ref
1453 does to check whether the object must be loaded by parts when
1454 STRICT_ALIGNMENT is true. */
1455 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1456 &unsignedp
, &volatilep
, true);
1457 base_type
= TREE_TYPE (base
);
1458 base_align
= TYPE_ALIGN (base_type
);
1461 && (base_align
< GET_MODE_ALIGNMENT (mode
)
1462 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1463 || bitpos
% BITS_PER_UNIT
!= 0))
1469 /* Finds addresses in *OP_P inside STMT. */
1472 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1474 tree base
= *op_p
, step
= NULL
;
1476 struct ifs_ivopts_data ifs_ivopts_data
;
1478 /* Do not play with volatile memory references. A bit too conservative,
1479 perhaps, but safe. */
1480 if (stmt_ann (stmt
)->has_volatile_ops
)
1483 /* Ignore bitfields for now. Not really something terribly complicated
1485 if (TREE_CODE (base
) == BIT_FIELD_REF
1486 || (TREE_CODE (base
) == COMPONENT_REF
1487 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base
, 1))))
1490 if (STRICT_ALIGNMENT
1491 && may_be_unaligned_p (base
))
1494 base
= unshare_expr (base
);
1496 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1498 tree type
= build_pointer_type (TREE_TYPE (base
));
1502 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1504 civ
= get_iv (data
, TMR_BASE (base
));
1508 TMR_BASE (base
) = civ
->base
;
1511 if (TMR_INDEX (base
)
1512 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1514 civ
= get_iv (data
, TMR_INDEX (base
));
1518 TMR_INDEX (base
) = civ
->base
;
1523 if (TMR_STEP (base
))
1524 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1527 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1535 base
= tree_mem_ref_addr (type
, base
);
1539 ifs_ivopts_data
.ivopts_data
= data
;
1540 ifs_ivopts_data
.stmt
= stmt
;
1541 ifs_ivopts_data
.step_p
= &step
;
1542 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1546 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1547 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1549 base
= build_fold_addr_expr (base
);
1552 civ
= alloc_iv (base
, step
);
1553 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1557 for_each_index (op_p
, idx_record_use
, data
);
1560 /* Finds and records invariants used in STMT. */
1563 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1566 use_operand_p use_p
;
1569 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1571 op
= USE_FROM_PTR (use_p
);
1572 record_invariant (data
, op
, false);
1576 /* Finds interesting uses of induction variables in the statement STMT. */
1579 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1584 use_operand_p use_p
;
1586 find_invariants_stmt (data
, stmt
);
1588 if (TREE_CODE (stmt
) == COND_EXPR
)
1590 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1594 if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1596 lhs
= TREE_OPERAND (stmt
, 0);
1597 rhs
= TREE_OPERAND (stmt
, 1);
1599 if (TREE_CODE (lhs
) == SSA_NAME
)
1601 /* If the statement defines an induction variable, the uses are not
1602 interesting by themselves. */
1604 iv
= get_iv (data
, lhs
);
1606 if (iv
&& !zero_p (iv
->step
))
1610 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1612 case tcc_comparison
:
1613 find_interesting_uses_cond (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1617 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1618 if (REFERENCE_CLASS_P (lhs
))
1619 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1625 if (REFERENCE_CLASS_P (lhs
)
1626 && is_gimple_val (rhs
))
1628 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1629 find_interesting_uses_op (data
, rhs
);
1633 /* TODO -- we should also handle address uses of type
1635 memory = call (whatever);
1642 if (TREE_CODE (stmt
) == PHI_NODE
1643 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1645 lhs
= PHI_RESULT (stmt
);
1646 iv
= get_iv (data
, lhs
);
1648 if (iv
&& !zero_p (iv
->step
))
1652 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1654 op
= USE_FROM_PTR (use_p
);
1656 if (TREE_CODE (op
) != SSA_NAME
)
1659 iv
= get_iv (data
, op
);
1663 find_interesting_uses_op (data
, op
);
1667 /* Finds interesting uses of induction variables outside of loops
1668 on loop exit edge EXIT. */
1671 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1675 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1677 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1678 find_interesting_uses_op (data
, def
);
1682 /* Finds uses of the induction variables that are interesting. */
1685 find_interesting_uses (struct ivopts_data
*data
)
1688 block_stmt_iterator bsi
;
1690 basic_block
*body
= get_loop_body (data
->current_loop
);
1692 struct version_info
*info
;
1695 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1696 fprintf (dump_file
, "Uses:\n\n");
1698 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1703 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1704 if (e
->dest
!= EXIT_BLOCK_PTR
1705 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1706 find_interesting_uses_outside (data
, e
);
1708 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1709 find_interesting_uses_stmt (data
, phi
);
1710 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1711 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1714 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1718 fprintf (dump_file
, "\n");
1720 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1722 info
= ver_info (data
, i
);
1725 fprintf (dump_file
, " ");
1726 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1727 fprintf (dump_file
, " is invariant (%d)%s\n",
1728 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1732 fprintf (dump_file
, "\n");
1738 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1739 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1740 we are at the top-level of the processed address. */
1743 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1744 unsigned HOST_WIDE_INT
*offset
)
1746 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1747 enum tree_code code
;
1748 tree type
, orig_type
= TREE_TYPE (expr
);
1749 unsigned HOST_WIDE_INT off0
, off1
, st
;
1750 tree orig_expr
= expr
;
1754 type
= TREE_TYPE (expr
);
1755 code
= TREE_CODE (expr
);
1761 if (!cst_and_fits_in_hwi (expr
)
1765 *offset
= int_cst_value (expr
);
1766 return build_int_cst_type (orig_type
, 0);
1770 op0
= TREE_OPERAND (expr
, 0);
1771 op1
= TREE_OPERAND (expr
, 1);
1773 op0
= strip_offset_1 (op0
, false, false, &off0
);
1774 op1
= strip_offset_1 (op1
, false, false, &off1
);
1776 *offset
= (code
== PLUS_EXPR
? off0
+ off1
: off0
- off1
);
1777 if (op0
== TREE_OPERAND (expr
, 0)
1778 && op1
== TREE_OPERAND (expr
, 1))
1783 else if (zero_p (op0
))
1785 if (code
== PLUS_EXPR
)
1788 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1791 expr
= fold_build2 (code
, type
, op0
, op1
);
1793 return fold_convert (orig_type
, expr
);
1799 step
= array_ref_element_size (expr
);
1800 if (!cst_and_fits_in_hwi (step
))
1803 st
= int_cst_value (step
);
1804 op1
= TREE_OPERAND (expr
, 1);
1805 op1
= strip_offset_1 (op1
, false, false, &off1
);
1806 *offset
= off1
* st
;
1811 /* Strip the component reference completely. */
1812 op0
= TREE_OPERAND (expr
, 0);
1813 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1823 tmp
= component_ref_field_offset (expr
);
1825 && cst_and_fits_in_hwi (tmp
))
1827 /* Strip the component reference completely. */
1828 op0
= TREE_OPERAND (expr
, 0);
1829 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1830 *offset
= off0
+ int_cst_value (tmp
);
1836 op0
= TREE_OPERAND (expr
, 0);
1837 op0
= strip_offset_1 (op0
, true, true, &off0
);
1840 if (op0
== TREE_OPERAND (expr
, 0))
1843 expr
= build_fold_addr_expr (op0
);
1844 return fold_convert (orig_type
, expr
);
1847 inside_addr
= false;
1854 /* Default handling of expressions for that we want to recurse into
1855 the first operand. */
1856 op0
= TREE_OPERAND (expr
, 0);
1857 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
1860 if (op0
== TREE_OPERAND (expr
, 0)
1861 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
1864 expr
= copy_node (expr
);
1865 TREE_OPERAND (expr
, 0) = op0
;
1867 TREE_OPERAND (expr
, 1) = op1
;
1869 /* Inside address, we might strip the top level component references,
1870 thus changing type of the expression. Handling of ADDR_EXPR
1872 expr
= fold_convert (orig_type
, expr
);
1877 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1880 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
1882 return strip_offset_1 (expr
, false, false, offset
);
1885 /* Returns variant of TYPE that can be used as base for different uses.
1886 For integer types, we return unsigned variant of the type, which
1887 avoids problems with overflows. For pointer types, we return void *. */
1890 generic_type_for (tree type
)
1892 if (POINTER_TYPE_P (type
))
1893 return ptr_type_node
;
1895 if (TYPE_UNSIGNED (type
))
1898 return unsigned_type_for (type
);
1901 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1902 the bitmap to that we should store it. */
1904 static struct ivopts_data
*fd_ivopts_data
;
1906 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
1908 bitmap
*depends_on
= data
;
1909 struct version_info
*info
;
1911 if (TREE_CODE (*expr_p
) != SSA_NAME
)
1913 info
= name_info (fd_ivopts_data
, *expr_p
);
1915 if (!info
->inv_id
|| info
->has_nonlin_use
)
1919 *depends_on
= BITMAP_ALLOC (NULL
);
1920 bitmap_set_bit (*depends_on
, info
->inv_id
);
1925 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1926 position to POS. If USE is not NULL, the candidate is set as related to
1927 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1928 replacement of the final value of the iv by a direct computation. */
1930 static struct iv_cand
*
1931 add_candidate_1 (struct ivopts_data
*data
,
1932 tree base
, tree step
, bool important
, enum iv_position pos
,
1933 struct iv_use
*use
, tree incremented_at
)
1936 struct iv_cand
*cand
= NULL
;
1937 tree type
, orig_type
;
1941 orig_type
= TREE_TYPE (base
);
1942 type
= generic_type_for (orig_type
);
1943 if (type
!= orig_type
)
1945 base
= fold_convert (type
, base
);
1947 step
= fold_convert (type
, step
);
1951 for (i
= 0; i
< n_iv_cands (data
); i
++)
1953 cand
= iv_cand (data
, i
);
1955 if (cand
->pos
!= pos
)
1958 if (cand
->incremented_at
!= incremented_at
)
1972 if (!operand_equal_p (base
, cand
->iv
->base
, 0))
1975 if (zero_p (cand
->iv
->step
))
1982 if (step
&& operand_equal_p (step
, cand
->iv
->step
, 0))
1987 if (i
== n_iv_cands (data
))
1989 cand
= XCNEW (struct iv_cand
);
1995 cand
->iv
= alloc_iv (base
, step
);
1998 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2000 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2001 cand
->var_after
= cand
->var_before
;
2003 cand
->important
= important
;
2004 cand
->incremented_at
= incremented_at
;
2005 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2008 && TREE_CODE (step
) != INTEGER_CST
)
2010 fd_ivopts_data
= data
;
2011 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2014 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2015 dump_cand (dump_file
, cand
);
2018 if (important
&& !cand
->important
)
2020 cand
->important
= true;
2021 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2022 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2027 bitmap_set_bit (use
->related_cands
, i
);
2028 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2029 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2036 /* Returns true if incrementing the induction variable at the end of the LOOP
2039 The purpose is to avoid splitting latch edge with a biv increment, thus
2040 creating a jump, possibly confusing other optimization passes and leaving
2041 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2042 is not available (so we do not have a better alternative), or if the latch
2043 edge is already nonempty. */
2046 allow_ip_end_pos_p (struct loop
*loop
)
2048 if (!ip_normal_pos (loop
))
2051 if (!empty_block_p (ip_end_pos (loop
)))
2057 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2058 position to POS. If USE is not NULL, the candidate is set as related to
2059 it. The candidate computation is scheduled on all available positions. */
2062 add_candidate (struct ivopts_data
*data
,
2063 tree base
, tree step
, bool important
, struct iv_use
*use
)
2065 if (ip_normal_pos (data
->current_loop
))
2066 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
2067 if (ip_end_pos (data
->current_loop
)
2068 && allow_ip_end_pos_p (data
->current_loop
))
2069 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
2072 /* Add a standard "0 + 1 * iteration" iv candidate for a
2073 type with SIZE bits. */
2076 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2079 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2080 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2084 /* Adds standard iv candidates. */
2087 add_standard_iv_candidates (struct ivopts_data
*data
)
2089 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2091 /* The same for a double-integer type if it is still fast enough. */
2092 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2093 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2097 /* Adds candidates bases on the old induction variable IV. */
2100 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2103 struct iv_cand
*cand
;
2105 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2107 /* The same, but with initial value zero. */
2108 add_candidate (data
,
2109 build_int_cst (TREE_TYPE (iv
->base
), 0),
2110 iv
->step
, true, NULL
);
2112 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2113 if (TREE_CODE (phi
) == PHI_NODE
)
2115 /* Additionally record the possibility of leaving the original iv
2117 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2118 cand
= add_candidate_1 (data
,
2119 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2120 SSA_NAME_DEF_STMT (def
));
2121 cand
->var_before
= iv
->ssa_name
;
2122 cand
->var_after
= def
;
2126 /* Adds candidates based on the old induction variables. */
2129 add_old_ivs_candidates (struct ivopts_data
*data
)
2135 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2137 iv
= ver_info (data
, i
)->iv
;
2138 if (iv
&& iv
->biv_p
&& !zero_p (iv
->step
))
2139 add_old_iv_candidates (data
, iv
);
2143 /* Adds candidates based on the value of the induction variable IV and USE. */
2146 add_iv_value_candidates (struct ivopts_data
*data
,
2147 struct iv
*iv
, struct iv_use
*use
)
2149 unsigned HOST_WIDE_INT offset
;
2152 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2154 /* The same, but with initial value zero. Make such variable important,
2155 since it is generic enough so that possibly many uses may be based
2157 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2158 iv
->step
, true, use
);
2160 /* Third, try removing the constant offset. */
2161 base
= strip_offset (iv
->base
, &offset
);
2163 add_candidate (data
, base
, iv
->step
, false, use
);
2166 /* Adds candidates based on the uses. */
2169 add_derived_ivs_candidates (struct ivopts_data
*data
)
2173 for (i
= 0; i
< n_iv_uses (data
); i
++)
2175 struct iv_use
*use
= iv_use (data
, i
);
2182 case USE_NONLINEAR_EXPR
:
2185 /* Just add the ivs based on the value of the iv used here. */
2186 add_iv_value_candidates (data
, use
->iv
, use
);
2195 /* Record important candidates and add them to related_cands bitmaps
2199 record_important_candidates (struct ivopts_data
*data
)
2204 for (i
= 0; i
< n_iv_cands (data
); i
++)
2206 struct iv_cand
*cand
= iv_cand (data
, i
);
2208 if (cand
->important
)
2209 bitmap_set_bit (data
->important_candidates
, i
);
2212 data
->consider_all_candidates
= (n_iv_cands (data
)
2213 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2215 if (data
->consider_all_candidates
)
2217 /* We will not need "related_cands" bitmaps in this case,
2218 so release them to decrease peak memory consumption. */
2219 for (i
= 0; i
< n_iv_uses (data
); i
++)
2221 use
= iv_use (data
, i
);
2222 BITMAP_FREE (use
->related_cands
);
2227 /* Add important candidates to the related_cands bitmaps. */
2228 for (i
= 0; i
< n_iv_uses (data
); i
++)
2229 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2230 data
->important_candidates
);
2234 /* Finds the candidates for the induction variables. */
2237 find_iv_candidates (struct ivopts_data
*data
)
2239 /* Add commonly used ivs. */
2240 add_standard_iv_candidates (data
);
2242 /* Add old induction variables. */
2243 add_old_ivs_candidates (data
);
2245 /* Add induction variables derived from uses. */
2246 add_derived_ivs_candidates (data
);
2248 /* Record the important candidates. */
2249 record_important_candidates (data
);
2252 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2253 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2254 we allocate a simple list to every use. */
2257 alloc_use_cost_map (struct ivopts_data
*data
)
2259 unsigned i
, size
, s
, j
;
2261 for (i
= 0; i
< n_iv_uses (data
); i
++)
2263 struct iv_use
*use
= iv_use (data
, i
);
2266 if (data
->consider_all_candidates
)
2267 size
= n_iv_cands (data
);
2271 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2276 /* Round up to the power of two, so that moduling by it is fast. */
2277 for (size
= 1; size
< s
; size
<<= 1)
2281 use
->n_map_members
= size
;
2282 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2286 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2287 on invariants DEPENDS_ON and that the value used in expressing it
2291 set_use_iv_cost (struct ivopts_data
*data
,
2292 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2293 bitmap depends_on
, tree value
)
2299 BITMAP_FREE (depends_on
);
2303 if (data
->consider_all_candidates
)
2305 use
->cost_map
[cand
->id
].cand
= cand
;
2306 use
->cost_map
[cand
->id
].cost
= cost
;
2307 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2308 use
->cost_map
[cand
->id
].value
= value
;
2312 /* n_map_members is a power of two, so this computes modulo. */
2313 s
= cand
->id
& (use
->n_map_members
- 1);
2314 for (i
= s
; i
< use
->n_map_members
; i
++)
2315 if (!use
->cost_map
[i
].cand
)
2317 for (i
= 0; i
< s
; i
++)
2318 if (!use
->cost_map
[i
].cand
)
2324 use
->cost_map
[i
].cand
= cand
;
2325 use
->cost_map
[i
].cost
= cost
;
2326 use
->cost_map
[i
].depends_on
= depends_on
;
2327 use
->cost_map
[i
].value
= value
;
2330 /* Gets cost of (USE, CANDIDATE) pair. */
2332 static struct cost_pair
*
2333 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2334 struct iv_cand
*cand
)
2337 struct cost_pair
*ret
;
2342 if (data
->consider_all_candidates
)
2344 ret
= use
->cost_map
+ cand
->id
;
2351 /* n_map_members is a power of two, so this computes modulo. */
2352 s
= cand
->id
& (use
->n_map_members
- 1);
2353 for (i
= s
; i
< use
->n_map_members
; i
++)
2354 if (use
->cost_map
[i
].cand
== cand
)
2355 return use
->cost_map
+ i
;
2357 for (i
= 0; i
< s
; i
++)
2358 if (use
->cost_map
[i
].cand
== cand
)
2359 return use
->cost_map
+ i
;
2364 /* Returns estimate on cost of computing SEQ. */
2372 for (; seq
; seq
= NEXT_INSN (seq
))
2374 set
= single_set (seq
);
2376 cost
+= rtx_cost (set
, SET
);
2384 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2386 produce_memory_decl_rtl (tree obj
, int *regno
)
2391 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2393 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2394 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2397 x
= gen_raw_REG (Pmode
, (*regno
)++);
2399 return gen_rtx_MEM (DECL_MODE (obj
), x
);
2402 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2403 walk_tree. DATA contains the actual fake register number. */
2406 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2408 tree obj
= NULL_TREE
;
2412 switch (TREE_CODE (*expr_p
))
2415 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2416 handled_component_p (*expr_p
);
2417 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2420 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2421 x
= produce_memory_decl_rtl (obj
, regno
);
2426 obj
= SSA_NAME_VAR (*expr_p
);
2427 if (!DECL_RTL_SET_P (obj
))
2428 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2437 if (DECL_RTL_SET_P (obj
))
2440 if (DECL_MODE (obj
) == BLKmode
)
2441 x
= produce_memory_decl_rtl (obj
, regno
);
2443 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2453 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2454 SET_DECL_RTL (obj
, x
);
2460 /* Determines cost of the computation of EXPR. */
2463 computation_cost (tree expr
)
2466 tree type
= TREE_TYPE (expr
);
2468 /* Avoid using hard regs in ways which may be unsupported. */
2469 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2471 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2473 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2477 cost
= seq_cost (seq
);
2479 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2484 /* Returns variable containing the value of candidate CAND at statement AT. */
2487 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2489 if (stmt_after_increment (loop
, cand
, stmt
))
2490 return cand
->var_after
;
2492 return cand
->var_before
;
2495 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2496 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2499 tree_int_cst_sign_bit (tree t
)
2501 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2502 unsigned HOST_WIDE_INT w
;
2504 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2505 w
= TREE_INT_CST_LOW (t
);
2508 w
= TREE_INT_CST_HIGH (t
);
2509 bitno
-= HOST_BITS_PER_WIDE_INT
;
2512 return (w
>> bitno
) & 1;
2515 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2516 return cst. Otherwise return NULL_TREE. */
2519 constant_multiple_of (tree type
, tree top
, tree bot
)
2521 tree res
, mby
, p0
, p1
;
2522 enum tree_code code
;
2528 if (operand_equal_p (top
, bot
, 0))
2529 return build_int_cst (type
, 1);
2531 code
= TREE_CODE (top
);
2535 mby
= TREE_OPERAND (top
, 1);
2536 if (TREE_CODE (mby
) != INTEGER_CST
)
2539 res
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2543 return fold_binary_to_constant (MULT_EXPR
, type
, res
,
2544 fold_convert (type
, mby
));
2548 p0
= constant_multiple_of (type
, TREE_OPERAND (top
, 0), bot
);
2551 p1
= constant_multiple_of (type
, TREE_OPERAND (top
, 1), bot
);
2555 return fold_binary_to_constant (code
, type
, p0
, p1
);
2558 if (TREE_CODE (bot
) != INTEGER_CST
)
2561 bot
= fold_convert (type
, bot
);
2562 top
= fold_convert (type
, top
);
2564 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2565 the result afterwards. */
2566 if (tree_int_cst_sign_bit (bot
))
2569 bot
= fold_unary_to_constant (NEGATE_EXPR
, type
, bot
);
2574 /* Ditto for TOP. */
2575 if (tree_int_cst_sign_bit (top
))
2578 top
= fold_unary_to_constant (NEGATE_EXPR
, type
, top
);
2581 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR
, type
, top
, bot
)))
2584 res
= fold_binary_to_constant (EXACT_DIV_EXPR
, type
, top
, bot
);
2586 res
= fold_unary_to_constant (NEGATE_EXPR
, type
, res
);
2594 /* Sets COMB to CST. */
2597 aff_combination_const (struct affine_tree_combination
*comb
, tree type
,
2598 unsigned HOST_WIDE_INT cst
)
2600 unsigned prec
= TYPE_PRECISION (type
);
2603 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2606 comb
->rest
= NULL_TREE
;
2607 comb
->offset
= cst
& comb
->mask
;
2610 /* Sets COMB to single element ELT. */
2613 aff_combination_elt (struct affine_tree_combination
*comb
, tree type
, tree elt
)
2615 unsigned prec
= TYPE_PRECISION (type
);
2618 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2621 comb
->elts
[0] = elt
;
2623 comb
->rest
= NULL_TREE
;
2627 /* Scales COMB by SCALE. */
2630 aff_combination_scale (struct affine_tree_combination
*comb
,
2631 unsigned HOST_WIDE_INT scale
)
2640 aff_combination_const (comb
, comb
->type
, 0);
2644 comb
->offset
= (scale
* comb
->offset
) & comb
->mask
;
2645 for (i
= 0, j
= 0; i
< comb
->n
; i
++)
2647 comb
->coefs
[j
] = (scale
* comb
->coefs
[i
]) & comb
->mask
;
2648 comb
->elts
[j
] = comb
->elts
[i
];
2649 if (comb
->coefs
[j
] != 0)
2656 if (comb
->n
< MAX_AFF_ELTS
)
2658 comb
->coefs
[comb
->n
] = scale
;
2659 comb
->elts
[comb
->n
] = comb
->rest
;
2660 comb
->rest
= NULL_TREE
;
2664 comb
->rest
= fold_build2 (MULT_EXPR
, comb
->type
, comb
->rest
,
2665 build_int_cst_type (comb
->type
, scale
));
2669 /* Adds ELT * SCALE to COMB. */
2672 aff_combination_add_elt (struct affine_tree_combination
*comb
, tree elt
,
2673 unsigned HOST_WIDE_INT scale
)
2680 for (i
= 0; i
< comb
->n
; i
++)
2681 if (operand_equal_p (comb
->elts
[i
], elt
, 0))
2683 comb
->coefs
[i
] = (comb
->coefs
[i
] + scale
) & comb
->mask
;
2688 comb
->coefs
[i
] = comb
->coefs
[comb
->n
];
2689 comb
->elts
[i
] = comb
->elts
[comb
->n
];
2693 gcc_assert (comb
->n
== MAX_AFF_ELTS
- 1);
2694 comb
->coefs
[comb
->n
] = 1;
2695 comb
->elts
[comb
->n
] = comb
->rest
;
2696 comb
->rest
= NULL_TREE
;
2701 if (comb
->n
< MAX_AFF_ELTS
)
2703 comb
->coefs
[comb
->n
] = scale
;
2704 comb
->elts
[comb
->n
] = elt
;
2710 elt
= fold_convert (comb
->type
, elt
);
2712 elt
= fold_build2 (MULT_EXPR
, comb
->type
,
2713 fold_convert (comb
->type
, elt
),
2714 build_int_cst_type (comb
->type
, scale
));
2717 comb
->rest
= fold_build2 (PLUS_EXPR
, comb
->type
, comb
->rest
, elt
);
2722 /* Adds COMB2 to COMB1. */
2725 aff_combination_add (struct affine_tree_combination
*comb1
,
2726 struct affine_tree_combination
*comb2
)
2730 comb1
->offset
= (comb1
->offset
+ comb2
->offset
) & comb1
->mask
;
2731 for (i
= 0; i
< comb2
->n
; i
++)
2732 aff_combination_add_elt (comb1
, comb2
->elts
[i
], comb2
->coefs
[i
]);
2734 aff_combination_add_elt (comb1
, comb2
->rest
, 1);
2737 /* Splits EXPR into an affine combination of parts. */
2740 tree_to_aff_combination (tree expr
, tree type
,
2741 struct affine_tree_combination
*comb
)
2743 struct affine_tree_combination tmp
;
2744 enum tree_code code
;
2745 tree cst
, core
, toffset
;
2746 HOST_WIDE_INT bitpos
, bitsize
;
2747 enum machine_mode mode
;
2748 int unsignedp
, volatilep
;
2752 code
= TREE_CODE (expr
);
2756 aff_combination_const (comb
, type
, int_cst_value (expr
));
2761 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2762 tree_to_aff_combination (TREE_OPERAND (expr
, 1), type
, &tmp
);
2763 if (code
== MINUS_EXPR
)
2764 aff_combination_scale (&tmp
, -1);
2765 aff_combination_add (comb
, &tmp
);
2769 cst
= TREE_OPERAND (expr
, 1);
2770 if (TREE_CODE (cst
) != INTEGER_CST
)
2772 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2773 aff_combination_scale (comb
, int_cst_value (cst
));
2777 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2778 aff_combination_scale (comb
, -1);
2782 core
= get_inner_reference (TREE_OPERAND (expr
, 0), &bitsize
, &bitpos
,
2783 &toffset
, &mode
, &unsignedp
, &volatilep
,
2785 if (bitpos
% BITS_PER_UNIT
!= 0)
2787 aff_combination_const (comb
, type
, bitpos
/ BITS_PER_UNIT
);
2788 core
= build_fold_addr_expr (core
);
2789 if (TREE_CODE (core
) == ADDR_EXPR
)
2790 aff_combination_add_elt (comb
, core
, 1);
2793 tree_to_aff_combination (core
, type
, &tmp
);
2794 aff_combination_add (comb
, &tmp
);
2798 tree_to_aff_combination (toffset
, type
, &tmp
);
2799 aff_combination_add (comb
, &tmp
);
2807 aff_combination_elt (comb
, type
, expr
);
2810 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2813 add_elt_to_tree (tree expr
, tree type
, tree elt
, unsigned HOST_WIDE_INT scale
,
2814 unsigned HOST_WIDE_INT mask
)
2816 enum tree_code code
;
2819 elt
= fold_convert (type
, elt
);
2826 return fold_build2 (PLUS_EXPR
, type
, expr
, elt
);
2832 return fold_build1 (NEGATE_EXPR
, type
, elt
);
2834 return fold_build2 (MINUS_EXPR
, type
, expr
, elt
);
2838 return fold_build2 (MULT_EXPR
, type
, elt
,
2839 build_int_cst_type (type
, scale
));
2841 if ((scale
| (mask
>> 1)) == mask
)
2843 /* Scale is negative. */
2845 scale
= (-scale
) & mask
;
2850 elt
= fold_build2 (MULT_EXPR
, type
, elt
,
2851 build_int_cst_type (type
, scale
));
2852 return fold_build2 (code
, type
, expr
, elt
);
2855 /* Copies the tree elements of COMB to ensure that they are not shared. */
2858 unshare_aff_combination (struct affine_tree_combination
*comb
)
2862 for (i
= 0; i
< comb
->n
; i
++)
2863 comb
->elts
[i
] = unshare_expr (comb
->elts
[i
]);
2865 comb
->rest
= unshare_expr (comb
->rest
);
2868 /* Makes tree from the affine combination COMB. */
2871 aff_combination_to_tree (struct affine_tree_combination
*comb
)
2873 tree type
= comb
->type
;
2874 tree expr
= comb
->rest
;
2876 unsigned HOST_WIDE_INT off
, sgn
;
2878 /* Handle the special case produced by get_computation_aff when
2879 the type does not fit in HOST_WIDE_INT. */
2880 if (comb
->n
== 0 && comb
->offset
== 0)
2881 return fold_convert (type
, expr
);
2883 gcc_assert (comb
->n
== MAX_AFF_ELTS
|| comb
->rest
== NULL_TREE
);
2885 for (i
= 0; i
< comb
->n
; i
++)
2886 expr
= add_elt_to_tree (expr
, type
, comb
->elts
[i
], comb
->coefs
[i
],
2889 if ((comb
->offset
| (comb
->mask
>> 1)) == comb
->mask
)
2891 /* Offset is negative. */
2892 off
= (-comb
->offset
) & comb
->mask
;
2900 return add_elt_to_tree (expr
, type
, build_int_cst_type (type
, off
), sgn
,
2904 /* Determines the expression by that USE is expressed from induction variable
2905 CAND at statement AT in LOOP. The expression is stored in a decomposed
2906 form into AFF. Returns false if USE cannot be expressed using CAND. */
2909 get_computation_aff (struct loop
*loop
,
2910 struct iv_use
*use
, struct iv_cand
*cand
, tree at
,
2911 struct affine_tree_combination
*aff
)
2913 tree ubase
= use
->iv
->base
;
2914 tree ustep
= use
->iv
->step
;
2915 tree cbase
= cand
->iv
->base
;
2916 tree cstep
= cand
->iv
->step
;
2917 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2921 unsigned HOST_WIDE_INT ustepi
, cstepi
;
2922 HOST_WIDE_INT ratioi
;
2923 struct affine_tree_combination cbase_aff
, expr_aff
;
2924 tree cstep_orig
= cstep
, ustep_orig
= ustep
;
2926 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2928 /* We do not have a precision to express the values of use. */
2932 expr
= var_at_stmt (loop
, cand
, at
);
2934 if (TREE_TYPE (expr
) != ctype
)
2936 /* This may happen with the original ivs. */
2937 expr
= fold_convert (ctype
, expr
);
2940 if (TYPE_UNSIGNED (utype
))
2944 uutype
= unsigned_type_for (utype
);
2945 ubase
= fold_convert (uutype
, ubase
);
2946 ustep
= fold_convert (uutype
, ustep
);
2949 if (uutype
!= ctype
)
2951 expr
= fold_convert (uutype
, expr
);
2952 cbase
= fold_convert (uutype
, cbase
);
2953 cstep
= fold_convert (uutype
, cstep
);
2955 /* If the conversion is not noop, we must take it into account when
2956 considering the value of the step. */
2957 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2961 if (cst_and_fits_in_hwi (cstep_orig
)
2962 && cst_and_fits_in_hwi (ustep_orig
))
2964 ustepi
= int_cst_value (ustep_orig
);
2965 cstepi
= int_cst_value (cstep_orig
);
2967 if (!divide (TYPE_PRECISION (uutype
), ustepi
, cstepi
, &ratioi
))
2969 /* TODO maybe consider case when ustep divides cstep and the ratio is
2970 a power of 2 (so that the division is fast to execute)? We would
2971 need to be much more careful with overflows etc. then. */
2975 ratio
= build_int_cst_type (uutype
, ratioi
);
2979 ratio
= constant_multiple_of (uutype
, ustep_orig
, cstep_orig
);
2983 /* Ratioi is only used to detect special cases when the multiplicative
2984 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
2985 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
2986 to integer_onep/integer_all_onesp, since the former ignores
2988 if (cst_and_fits_in_hwi (ratio
))
2989 ratioi
= int_cst_value (ratio
);
2990 else if (integer_onep (ratio
))
2992 else if (integer_all_onesp (ratio
))
2998 /* We may need to shift the value if we are after the increment. */
2999 if (stmt_after_increment (loop
, cand
, at
))
3000 cbase
= fold_build2 (PLUS_EXPR
, uutype
, cbase
, cstep
);
3002 /* use = ubase - ratio * cbase + ratio * var.
3004 In general case ubase + ratio * (var - cbase) could be better (one less
3005 multiplication), but often it is possible to eliminate redundant parts
3006 of computations from (ubase - ratio * cbase) term, and if it does not
3007 happen, fold is able to apply the distributive law to obtain this form
3010 if (TYPE_PRECISION (uutype
) > HOST_BITS_PER_WIDE_INT
)
3012 /* Let's compute in trees and just return the result in AFF. This case
3013 should not be very common, and fold itself is not that bad either,
3014 so making the aff. functions more complicated to handle this case
3015 is not that urgent. */
3018 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, cbase
);
3019 expr
= fold_build2 (PLUS_EXPR
, uutype
, expr
, delta
);
3021 else if (ratioi
== -1)
3023 delta
= fold_build2 (PLUS_EXPR
, uutype
, ubase
, cbase
);
3024 expr
= fold_build2 (MINUS_EXPR
, uutype
, delta
, expr
);
3028 delta
= fold_build2 (MULT_EXPR
, uutype
, cbase
, ratio
);
3029 delta
= fold_build2 (MINUS_EXPR
, uutype
, ubase
, delta
);
3030 expr
= fold_build2 (MULT_EXPR
, uutype
, ratio
, expr
);
3031 expr
= fold_build2 (PLUS_EXPR
, uutype
, delta
, expr
);
3042 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3043 possible to compute ratioi. */
3044 gcc_assert (ratioi
);
3046 tree_to_aff_combination (ubase
, uutype
, aff
);
3047 tree_to_aff_combination (cbase
, uutype
, &cbase_aff
);
3048 tree_to_aff_combination (expr
, uutype
, &expr_aff
);
3049 aff_combination_scale (&cbase_aff
, -ratioi
);
3050 aff_combination_scale (&expr_aff
, ratioi
);
3051 aff_combination_add (aff
, &cbase_aff
);
3052 aff_combination_add (aff
, &expr_aff
);
3057 /* Determines the expression by that USE is expressed from induction variable
3058 CAND at statement AT in LOOP. The computation is unshared. */
3061 get_computation_at (struct loop
*loop
,
3062 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
3064 struct affine_tree_combination aff
;
3065 tree type
= TREE_TYPE (use
->iv
->base
);
3067 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3069 unshare_aff_combination (&aff
);
3070 return fold_convert (type
, aff_combination_to_tree (&aff
));
3073 /* Determines the expression by that USE is expressed from induction variable
3074 CAND in LOOP. The computation is unshared. */
3077 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3079 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3082 /* Returns cost of addition in MODE. */
3085 add_cost (enum machine_mode mode
)
3087 static unsigned costs
[NUM_MACHINE_MODES
];
3095 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3096 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3097 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3102 cost
= seq_cost (seq
);
3108 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3109 fprintf (dump_file
, "Addition in %s costs %d\n",
3110 GET_MODE_NAME (mode
), cost
);
3114 /* Entry in a hashtable of already known costs for multiplication. */
3117 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3118 enum machine_mode mode
; /* In mode. */
3119 unsigned cost
; /* The cost. */
3122 /* Counts hash value for the ENTRY. */
3125 mbc_entry_hash (const void *entry
)
3127 const struct mbc_entry
*e
= entry
;
3129 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3132 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3135 mbc_entry_eq (const void *entry1
, const void *entry2
)
3137 const struct mbc_entry
*e1
= entry1
;
3138 const struct mbc_entry
*e2
= entry2
;
3140 return (e1
->mode
== e2
->mode
3141 && e1
->cst
== e2
->cst
);
3144 /* Returns cost of multiplication by constant CST in MODE. */
3147 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
3149 static htab_t costs
;
3150 struct mbc_entry
**cached
, act
;
3155 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3159 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3161 return (*cached
)->cost
;
3163 *cached
= XNEW (struct mbc_entry
);
3164 (*cached
)->mode
= mode
;
3165 (*cached
)->cst
= cst
;
3168 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3169 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3173 cost
= seq_cost (seq
);
3175 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3176 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3177 (int) cst
, GET_MODE_NAME (mode
), cost
);
3179 (*cached
)->cost
= cost
;
3184 /* Returns true if multiplying by RATIO is allowed in address. */
3187 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
)
3189 #define MAX_RATIO 128
3190 static sbitmap valid_mult
;
3194 rtx reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3198 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3199 sbitmap_zero (valid_mult
);
3200 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
3201 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3203 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3204 if (memory_address_p (Pmode
, addr
))
3205 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3208 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3210 fprintf (dump_file
, " allowed multipliers:");
3211 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3212 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3213 fprintf (dump_file
, " %d", (int) i
);
3214 fprintf (dump_file
, "\n");
3215 fprintf (dump_file
, "\n");
3219 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3222 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3225 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3226 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3227 variable is omitted. The created memory accesses MODE.
3229 TODO -- there must be some better way. This all is quite crude. */
3232 get_address_cost (bool symbol_present
, bool var_present
,
3233 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
)
3235 static bool initialized
= false;
3236 static HOST_WIDE_INT rat
, off
;
3237 static HOST_WIDE_INT min_offset
, max_offset
;
3238 static unsigned costs
[2][2][2][2];
3239 unsigned cost
, acost
;
3240 rtx seq
, addr
, base
;
3241 bool offset_p
, ratio_p
;
3243 HOST_WIDE_INT s_offset
;
3244 unsigned HOST_WIDE_INT mask
;
3252 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3254 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
3255 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3257 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3258 if (!memory_address_p (Pmode
, addr
))
3261 max_offset
= i
>> 1;
3264 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3266 XEXP (addr
, 1) = gen_int_mode (-i
, Pmode
);
3267 if (!memory_address_p (Pmode
, addr
))
3270 min_offset
= -(i
>> 1);
3272 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3274 fprintf (dump_file
, "get_address_cost:\n");
3275 fprintf (dump_file
, " min offset %d\n", (int) min_offset
);
3276 fprintf (dump_file
, " max offset %d\n", (int) max_offset
);
3280 for (i
= 2; i
<= MAX_RATIO
; i
++)
3281 if (multiplier_allowed_in_address_p (i
))
3288 bits
= GET_MODE_BITSIZE (Pmode
);
3289 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3291 if ((offset
>> (bits
- 1) & 1))
3296 offset_p
= (s_offset
!= 0
3297 && min_offset
<= s_offset
&& s_offset
<= max_offset
);
3298 ratio_p
= (ratio
!= 1
3299 && multiplier_allowed_in_address_p (ratio
));
3301 if (ratio
!= 1 && !ratio_p
)
3302 cost
+= multiply_by_cost (ratio
, Pmode
);
3304 if (s_offset
&& !offset_p
&& !symbol_present
)
3306 cost
+= add_cost (Pmode
);
3310 acost
= costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3313 int old_cse_not_expected
;
3316 addr
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3317 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 2);
3319 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
, gen_int_mode (rat
, Pmode
));
3322 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
3326 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
3328 base
= gen_rtx_fmt_e (CONST
, Pmode
,
3329 gen_rtx_fmt_ee (PLUS
, Pmode
,
3331 gen_int_mode (off
, Pmode
)));
3334 base
= gen_int_mode (off
, Pmode
);
3339 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
3342 /* To avoid splitting addressing modes, pretend that no cse will
3344 old_cse_not_expected
= cse_not_expected
;
3345 cse_not_expected
= true;
3346 addr
= memory_address (Pmode
, addr
);
3347 cse_not_expected
= old_cse_not_expected
;
3351 acost
= seq_cost (seq
);
3352 acost
+= address_cost (addr
, Pmode
);
3356 costs
[symbol_present
][var_present
][offset_p
][ratio_p
] = acost
;
3359 return cost
+ acost
;
3362 /* Estimates cost of forcing expression EXPR into a variable. */
3365 force_expr_to_var_cost (tree expr
)
3367 static bool costs_initialized
= false;
3368 static unsigned integer_cost
;
3369 static unsigned symbol_cost
;
3370 static unsigned address_cost
;
3372 unsigned cost0
, cost1
, cost
;
3373 enum machine_mode mode
;
3375 if (!costs_initialized
)
3377 tree var
= create_tmp_var_raw (integer_type_node
, "test_var");
3378 rtx x
= gen_rtx_MEM (DECL_MODE (var
),
3379 gen_rtx_SYMBOL_REF (Pmode
, "test_var"));
3381 tree type
= build_pointer_type (integer_type_node
);
3383 integer_cost
= computation_cost (build_int_cst_type (integer_type_node
,
3386 SET_DECL_RTL (var
, x
);
3387 TREE_STATIC (var
) = 1;
3388 addr
= build1 (ADDR_EXPR
, type
, var
);
3389 symbol_cost
= computation_cost (addr
) + 1;
3392 = computation_cost (build2 (PLUS_EXPR
, type
,
3394 build_int_cst_type (type
, 2000))) + 1;
3395 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3397 fprintf (dump_file
, "force_expr_to_var_cost:\n");
3398 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
3399 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
3400 fprintf (dump_file
, " address %d\n", (int) address_cost
);
3401 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
3402 fprintf (dump_file
, "\n");
3405 costs_initialized
= true;
3410 if (SSA_VAR_P (expr
))
3413 if (TREE_INVARIANT (expr
))
3415 if (TREE_CODE (expr
) == INTEGER_CST
)
3416 return integer_cost
;
3418 if (TREE_CODE (expr
) == ADDR_EXPR
)
3420 tree obj
= TREE_OPERAND (expr
, 0);
3422 if (TREE_CODE (obj
) == VAR_DECL
3423 || TREE_CODE (obj
) == PARM_DECL
3424 || TREE_CODE (obj
) == RESULT_DECL
)
3428 return address_cost
;
3431 switch (TREE_CODE (expr
))
3436 op0
= TREE_OPERAND (expr
, 0);
3437 op1
= TREE_OPERAND (expr
, 1);
3441 if (is_gimple_val (op0
))
3444 cost0
= force_expr_to_var_cost (op0
);
3446 if (is_gimple_val (op1
))
3449 cost1
= force_expr_to_var_cost (op1
);
3454 /* Just an arbitrary value, FIXME. */
3455 return target_spill_cost
;
3458 mode
= TYPE_MODE (TREE_TYPE (expr
));
3459 switch (TREE_CODE (expr
))
3463 cost
= add_cost (mode
);
3467 if (cst_and_fits_in_hwi (op0
))
3468 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
3469 else if (cst_and_fits_in_hwi (op1
))
3470 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
3472 return target_spill_cost
;
3482 /* Bound the cost by target_spill_cost. The parts of complicated
3483 computations often are either loop invariant or at least can
3484 be shared between several iv uses, so letting this grow without
3485 limits would not give reasonable results. */
3486 return cost
< target_spill_cost
? cost
: target_spill_cost
;
3489 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3490 invariants the computation depends on. */
3493 force_var_cost (struct ivopts_data
*data
,
3494 tree expr
, bitmap
*depends_on
)
3498 fd_ivopts_data
= data
;
3499 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3502 return force_expr_to_var_cost (expr
);
3505 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3506 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3507 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3508 invariants the computation depends on. */
3511 split_address_cost (struct ivopts_data
*data
,
3512 tree addr
, bool *symbol_present
, bool *var_present
,
3513 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3516 HOST_WIDE_INT bitsize
;
3517 HOST_WIDE_INT bitpos
;
3519 enum machine_mode mode
;
3520 int unsignedp
, volatilep
;
3522 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3523 &unsignedp
, &volatilep
, false);
3526 || bitpos
% BITS_PER_UNIT
!= 0
3527 || TREE_CODE (core
) != VAR_DECL
)
3529 *symbol_present
= false;
3530 *var_present
= true;
3531 fd_ivopts_data
= data
;
3532 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3533 return target_spill_cost
;
3536 *offset
+= bitpos
/ BITS_PER_UNIT
;
3537 if (TREE_STATIC (core
)
3538 || DECL_EXTERNAL (core
))
3540 *symbol_present
= true;
3541 *var_present
= false;
3545 *symbol_present
= false;
3546 *var_present
= true;
3550 /* Estimates cost of expressing difference of addresses E1 - E2 as
3551 var + symbol + offset. The value of offset is added to OFFSET,
3552 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3553 part is missing. DEPENDS_ON is a set of the invariants the computation
3557 ptr_difference_cost (struct ivopts_data
*data
,
3558 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3559 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3561 HOST_WIDE_INT diff
= 0;
3564 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3566 if (ptr_difference_const (e1
, e2
, &diff
))
3569 *symbol_present
= false;
3570 *var_present
= false;
3574 if (e2
== integer_zero_node
)
3575 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3576 symbol_present
, var_present
, offset
, depends_on
);
3578 *symbol_present
= false;
3579 *var_present
= true;
3581 cost
= force_var_cost (data
, e1
, depends_on
);
3582 cost
+= force_var_cost (data
, e2
, depends_on
);
3583 cost
+= add_cost (Pmode
);
3588 /* Estimates cost of expressing difference E1 - E2 as
3589 var + symbol + offset. The value of offset is added to OFFSET,
3590 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3591 part is missing. DEPENDS_ON is a set of the invariants the computation
3595 difference_cost (struct ivopts_data
*data
,
3596 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3597 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3600 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3601 unsigned HOST_WIDE_INT off1
, off2
;
3603 e1
= strip_offset (e1
, &off1
);
3604 e2
= strip_offset (e2
, &off2
);
3605 *offset
+= off1
- off2
;
3610 if (TREE_CODE (e1
) == ADDR_EXPR
)
3611 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
3613 *symbol_present
= false;
3615 if (operand_equal_p (e1
, e2
, 0))
3617 *var_present
= false;
3620 *var_present
= true;
3622 return force_var_cost (data
, e1
, depends_on
);
3626 cost
= force_var_cost (data
, e2
, depends_on
);
3627 cost
+= multiply_by_cost (-1, mode
);
3632 cost
= force_var_cost (data
, e1
, depends_on
);
3633 cost
+= force_var_cost (data
, e2
, depends_on
);
3634 cost
+= add_cost (mode
);
3639 /* Determines the cost of the computation by that USE is expressed
3640 from induction variable CAND. If ADDRESS_P is true, we just need
3641 to create an address from it, otherwise we want to get it into
3642 register. A set of invariants we depend on is stored in
3643 DEPENDS_ON. AT is the statement at that the value is computed. */
3646 get_computation_cost_at (struct ivopts_data
*data
,
3647 struct iv_use
*use
, struct iv_cand
*cand
,
3648 bool address_p
, bitmap
*depends_on
, tree at
)
3650 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3652 tree utype
= TREE_TYPE (ubase
), ctype
;
3653 unsigned HOST_WIDE_INT ustepi
, cstepi
, offset
= 0;
3654 HOST_WIDE_INT ratio
, aratio
;
3655 bool var_present
, symbol_present
;
3656 unsigned cost
= 0, n_sums
;
3660 /* Only consider real candidates. */
3664 cbase
= cand
->iv
->base
;
3665 cstep
= cand
->iv
->step
;
3666 ctype
= TREE_TYPE (cbase
);
3668 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3670 /* We do not have a precision to express the values of use. */
3676 /* Do not try to express address of an object with computation based
3677 on address of a different object. This may cause problems in rtl
3678 level alias analysis (that does not expect this to be happening,
3679 as this is illegal in C), and would be unlikely to be useful
3681 if (use
->iv
->base_object
3682 && cand
->iv
->base_object
3683 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3687 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3689 /* TODO -- add direct handling of this case. */
3693 /* CSTEPI is removed from the offset in case statement is after the
3694 increment. If the step is not constant, we use zero instead.
3695 This is a bit imprecise (there is the extra addition), but
3696 redundancy elimination is likely to transform the code so that
3697 it uses value of the variable before increment anyway,
3698 so it is not that much unrealistic. */
3699 if (cst_and_fits_in_hwi (cstep
))
3700 cstepi
= int_cst_value (cstep
);
3704 if (cst_and_fits_in_hwi (ustep
)
3705 && cst_and_fits_in_hwi (cstep
))
3707 ustepi
= int_cst_value (ustep
);
3709 if (!divide (TYPE_PRECISION (utype
), ustepi
, cstepi
, &ratio
))
3716 rat
= constant_multiple_of (utype
, ustep
, cstep
);
3721 if (cst_and_fits_in_hwi (rat
))
3722 ratio
= int_cst_value (rat
);
3723 else if (integer_onep (rat
))
3725 else if (integer_all_onesp (rat
))
3731 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3732 or ratio == 1, it is better to handle this like
3734 ubase - ratio * cbase + ratio * var
3736 (also holds in the case ratio == -1, TODO. */
3738 if (cst_and_fits_in_hwi (cbase
))
3740 offset
= - ratio
* int_cst_value (cbase
);
3741 cost
+= difference_cost (data
,
3742 ubase
, integer_zero_node
,
3743 &symbol_present
, &var_present
, &offset
,
3746 else if (ratio
== 1)
3748 cost
+= difference_cost (data
,
3750 &symbol_present
, &var_present
, &offset
,
3755 cost
+= force_var_cost (data
, cbase
, depends_on
);
3756 cost
+= add_cost (TYPE_MODE (ctype
));
3757 cost
+= difference_cost (data
,
3758 ubase
, integer_zero_node
,
3759 &symbol_present
, &var_present
, &offset
,
3763 /* If we are after the increment, the value of the candidate is higher by
3765 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3766 offset
-= ratio
* cstepi
;
3768 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3769 (symbol/var/const parts may be omitted). If we are looking for an address,
3770 find the cost of addressing this. */
3772 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
);
3774 /* Otherwise estimate the costs for computing the expression. */
3775 aratio
= ratio
> 0 ? ratio
: -ratio
;
3776 if (!symbol_present
&& !var_present
&& !offset
)
3779 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3785 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3789 /* Symbol + offset should be compile-time computable. */
3790 && (symbol_present
|| offset
))
3793 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
3797 /* Just get the expression, expand it and measure the cost. */
3798 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3804 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3806 return computation_cost (comp
);
3810 /* Determines the cost of the computation by that USE is expressed
3811 from induction variable CAND. If ADDRESS_P is true, we just need
3812 to create an address from it, otherwise we want to get it into
3813 register. A set of invariants we depend on is stored in
3817 get_computation_cost (struct ivopts_data
*data
,
3818 struct iv_use
*use
, struct iv_cand
*cand
,
3819 bool address_p
, bitmap
*depends_on
)
3821 return get_computation_cost_at (data
,
3822 use
, cand
, address_p
, depends_on
, use
->stmt
);
3825 /* Determines cost of basing replacement of USE on CAND in a generic
3829 determine_use_iv_cost_generic (struct ivopts_data
*data
,
3830 struct iv_use
*use
, struct iv_cand
*cand
)
3835 /* The simple case first -- if we need to express value of the preserved
3836 original biv, the cost is 0. This also prevents us from counting the
3837 cost of increment twice -- once at this use and once in the cost of
3839 if (cand
->pos
== IP_ORIGINAL
3840 && cand
->incremented_at
== use
->stmt
)
3842 set_use_iv_cost (data
, use
, cand
, 0, NULL
, NULL_TREE
);
3846 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
3847 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3849 return cost
!= INFTY
;
3852 /* Determines cost of basing replacement of USE on CAND in an address. */
3855 determine_use_iv_cost_address (struct ivopts_data
*data
,
3856 struct iv_use
*use
, struct iv_cand
*cand
)
3859 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
3861 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
3863 return cost
!= INFTY
;
3866 /* Computes value of induction variable IV in iteration NITER. */
3869 iv_value (struct iv
*iv
, tree niter
)
3872 tree type
= TREE_TYPE (iv
->base
);
3874 niter
= fold_convert (type
, niter
);
3875 val
= fold_build2 (MULT_EXPR
, type
, iv
->step
, niter
);
3877 return fold_build2 (PLUS_EXPR
, type
, iv
->base
, val
);
3880 /* Computes value of candidate CAND at position AT in iteration NITER. */
3883 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
)
3885 tree val
= iv_value (cand
->iv
, niter
);
3886 tree type
= TREE_TYPE (cand
->iv
->base
);
3888 if (stmt_after_increment (loop
, cand
, at
))
3889 val
= fold_build2 (PLUS_EXPR
, type
, val
, cand
->iv
->step
);
3894 /* Returns period of induction variable iv. */
3897 iv_period (struct iv
*iv
)
3899 tree step
= iv
->step
, period
, type
;
3902 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
3904 /* Period of the iv is gcd (step, type range). Since type range is power
3905 of two, it suffices to determine the maximum power of two that divides
3907 pow2div
= num_ending_zeros (step
);
3908 type
= unsigned_type_for (TREE_TYPE (step
));
3910 period
= build_low_bits_mask (type
,
3911 (TYPE_PRECISION (type
)
3912 - tree_low_cst (pow2div
, 1)));
3917 /* Returns the comparison operator used when eliminating the iv USE. */
3919 static enum tree_code
3920 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
3922 struct loop
*loop
= data
->current_loop
;
3926 ex_bb
= bb_for_stmt (use
->stmt
);
3927 exit
= EDGE_SUCC (ex_bb
, 0);
3928 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3929 exit
= EDGE_SUCC (ex_bb
, 1);
3931 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
3934 /* Check whether it is possible to express the condition in USE by comparison
3935 of candidate CAND. If so, store the value compared with to BOUND. */
3938 may_eliminate_iv (struct ivopts_data
*data
,
3939 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
3943 struct tree_niter_desc
*niter
;
3945 tree wider_type
, period
, per_type
;
3946 struct loop
*loop
= data
->current_loop
;
3948 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
3951 /* For now works only for exits that dominate the loop latch. TODO -- extend
3952 for other conditions inside loop body. */
3953 ex_bb
= bb_for_stmt (use
->stmt
);
3954 if (use
->stmt
!= last_stmt (ex_bb
)
3955 || TREE_CODE (use
->stmt
) != COND_EXPR
)
3957 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
3960 exit
= EDGE_SUCC (ex_bb
, 0);
3961 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3962 exit
= EDGE_SUCC (ex_bb
, 1);
3963 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
3966 niter
= niter_for_exit (data
, exit
);
3968 || !zero_p (niter
->may_be_zero
))
3972 nit_type
= TREE_TYPE (nit
);
3974 /* Determine whether we may use the variable to test whether niter iterations
3975 elapsed. This is the case iff the period of the induction variable is
3976 greater than the number of iterations. */
3977 period
= iv_period (cand
->iv
);
3980 per_type
= TREE_TYPE (period
);
3982 wider_type
= TREE_TYPE (period
);
3983 if (TYPE_PRECISION (nit_type
) < TYPE_PRECISION (per_type
))
3984 wider_type
= per_type
;
3986 wider_type
= nit_type
;
3988 if (!integer_nonzerop (fold_build2 (GE_EXPR
, boolean_type_node
,
3989 fold_convert (wider_type
, period
),
3990 fold_convert (wider_type
, nit
))))
3993 *bound
= cand_value_at (loop
, cand
, use
->stmt
, nit
);
3997 /* Determines cost of basing replacement of USE on CAND in a condition. */
4000 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4001 struct iv_use
*use
, struct iv_cand
*cand
)
4003 tree bound
= NULL_TREE
, op
, cond
;
4004 bitmap depends_on
= NULL
;
4007 /* Only consider real candidates. */
4010 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
, NULL_TREE
);
4014 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4016 cost
= force_var_cost (data
, bound
, &depends_on
);
4018 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
4019 return cost
!= INFTY
;
4022 /* The induction variable elimination failed; just express the original
4023 giv. If it is compared with an invariant, note that we cannot get
4025 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4028 if (TREE_CODE (cond
) != SSA_NAME
)
4030 op
= TREE_OPERAND (cond
, 0);
4031 if (TREE_CODE (op
) == SSA_NAME
&& !zero_p (get_iv (data
, op
)->step
))
4032 op
= TREE_OPERAND (cond
, 1);
4033 if (TREE_CODE (op
) == SSA_NAME
)
4035 op
= get_iv (data
, op
)->base
;
4036 fd_ivopts_data
= data
;
4037 walk_tree (&op
, find_depends
, &depends_on
, NULL
);
4041 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL
);
4042 return cost
!= INFTY
;
4045 /* Determines cost of basing replacement of USE on CAND. Returns false
4046 if USE cannot be based on CAND. */
4049 determine_use_iv_cost (struct ivopts_data
*data
,
4050 struct iv_use
*use
, struct iv_cand
*cand
)
4054 case USE_NONLINEAR_EXPR
:
4055 return determine_use_iv_cost_generic (data
, use
, cand
);
4058 return determine_use_iv_cost_address (data
, use
, cand
);
4061 return determine_use_iv_cost_condition (data
, use
, cand
);
4068 /* Determines costs of basing the use of the iv on an iv candidate. */
4071 determine_use_iv_costs (struct ivopts_data
*data
)
4075 struct iv_cand
*cand
;
4076 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4078 alloc_use_cost_map (data
);
4080 for (i
= 0; i
< n_iv_uses (data
); i
++)
4082 use
= iv_use (data
, i
);
4084 if (data
->consider_all_candidates
)
4086 for (j
= 0; j
< n_iv_cands (data
); j
++)
4088 cand
= iv_cand (data
, j
);
4089 determine_use_iv_cost (data
, use
, cand
);
4096 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4098 cand
= iv_cand (data
, j
);
4099 if (!determine_use_iv_cost (data
, use
, cand
))
4100 bitmap_set_bit (to_clear
, j
);
4103 /* Remove the candidates for that the cost is infinite from
4104 the list of related candidates. */
4105 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4106 bitmap_clear (to_clear
);
4110 BITMAP_FREE (to_clear
);
4112 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4114 fprintf (dump_file
, "Use-candidate costs:\n");
4116 for (i
= 0; i
< n_iv_uses (data
); i
++)
4118 use
= iv_use (data
, i
);
4120 fprintf (dump_file
, "Use %d:\n", i
);
4121 fprintf (dump_file
, " cand\tcost\tdepends on\n");
4122 for (j
= 0; j
< use
->n_map_members
; j
++)
4124 if (!use
->cost_map
[j
].cand
4125 || use
->cost_map
[j
].cost
== INFTY
)
4128 fprintf (dump_file
, " %d\t%d\t",
4129 use
->cost_map
[j
].cand
->id
,
4130 use
->cost_map
[j
].cost
);
4131 if (use
->cost_map
[j
].depends_on
)
4132 bitmap_print (dump_file
,
4133 use
->cost_map
[j
].depends_on
, "","");
4134 fprintf (dump_file
, "\n");
4137 fprintf (dump_file
, "\n");
4139 fprintf (dump_file
, "\n");
4143 /* Determines cost of the candidate CAND. */
4146 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4148 unsigned cost_base
, cost_step
;
4157 /* There are two costs associated with the candidate -- its increment
4158 and its initialization. The second is almost negligible for any loop
4159 that rolls enough, so we take it just very little into account. */
4161 base
= cand
->iv
->base
;
4162 cost_base
= force_var_cost (data
, base
, NULL
);
4163 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
4165 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
4167 /* Prefer the original iv unless we may gain something by replacing it;
4168 this is not really relevant for artificial ivs created by other
4170 if (cand
->pos
== IP_ORIGINAL
4171 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4174 /* Prefer not to insert statements into latch unless there are some
4175 already (so that we do not create unnecessary jumps). */
4176 if (cand
->pos
== IP_END
4177 && empty_block_p (ip_end_pos (data
->current_loop
)))
4181 /* Determines costs of computation of the candidates. */
4184 determine_iv_costs (struct ivopts_data
*data
)
4188 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4190 fprintf (dump_file
, "Candidate costs:\n");
4191 fprintf (dump_file
, " cand\tcost\n");
4194 for (i
= 0; i
< n_iv_cands (data
); i
++)
4196 struct iv_cand
*cand
= iv_cand (data
, i
);
4198 determine_iv_cost (data
, cand
);
4200 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4201 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4204 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4205 fprintf (dump_file
, "\n");
4208 /* Calculates cost for having SIZE induction variables. */
4211 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4213 return global_cost_for_size (size
, data
->regs_used
, n_iv_uses (data
));
4216 /* For each size of the induction variable set determine the penalty. */
4219 determine_set_costs (struct ivopts_data
*data
)
4223 struct loop
*loop
= data
->current_loop
;
4226 /* We use the following model (definitely improvable, especially the
4227 cost function -- TODO):
4229 We estimate the number of registers available (using MD data), name it A.
4231 We estimate the number of registers used by the loop, name it U. This
4232 number is obtained as the number of loop phi nodes (not counting virtual
4233 registers and bivs) + the number of variables from outside of the loop.
4235 We set a reserve R (free regs that are used for temporary computations,
4236 etc.). For now the reserve is a constant 3.
4238 Let I be the number of induction variables.
4240 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4241 make a lot of ivs without a reason).
4242 -- if A - R < U + I <= A, the cost is I * PRES_COST
4243 -- if U + I > A, the cost is I * PRES_COST and
4244 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4246 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4248 fprintf (dump_file
, "Global costs:\n");
4249 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4250 fprintf (dump_file
, " target_small_cost %d\n", target_small_cost
);
4251 fprintf (dump_file
, " target_pres_cost %d\n", target_pres_cost
);
4252 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
4256 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
4258 op
= PHI_RESULT (phi
);
4260 if (!is_gimple_reg (op
))
4263 if (get_iv (data
, op
))
4269 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4271 struct version_info
*info
= ver_info (data
, j
);
4273 if (info
->inv_id
&& info
->has_nonlin_use
)
4277 data
->regs_used
= n
;
4278 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4279 fprintf (dump_file
, " regs_used %d\n", n
);
4281 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4283 fprintf (dump_file
, " cost for size:\n");
4284 fprintf (dump_file
, " ivs\tcost\n");
4285 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4286 fprintf (dump_file
, " %d\t%d\n", j
,
4287 ivopts_global_cost_for_size (data
, j
));
4288 fprintf (dump_file
, "\n");
4292 /* Returns true if A is a cheaper cost pair than B. */
4295 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4303 if (a
->cost
< b
->cost
)
4306 if (a
->cost
> b
->cost
)
4309 /* In case the costs are the same, prefer the cheaper candidate. */
4310 if (a
->cand
->cost
< b
->cand
->cost
)
4316 /* Computes the cost field of IVS structure. */
4319 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4323 cost
+= ivs
->cand_use_cost
;
4324 cost
+= ivs
->cand_cost
;
4325 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4330 /* Remove invariants in set INVS to set IVS. */
4333 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4341 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4343 ivs
->n_invariant_uses
[iid
]--;
4344 if (ivs
->n_invariant_uses
[iid
] == 0)
4349 /* Set USE not to be expressed by any candidate in IVS. */
4352 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4355 unsigned uid
= use
->id
, cid
;
4356 struct cost_pair
*cp
;
4358 cp
= ivs
->cand_for_use
[uid
];
4364 ivs
->cand_for_use
[uid
] = NULL
;
4365 ivs
->n_cand_uses
[cid
]--;
4367 if (ivs
->n_cand_uses
[cid
] == 0)
4369 bitmap_clear_bit (ivs
->cands
, cid
);
4370 /* Do not count the pseudocandidates. */
4374 ivs
->cand_cost
-= cp
->cand
->cost
;
4376 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4379 ivs
->cand_use_cost
-= cp
->cost
;
4381 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4382 iv_ca_recount_cost (data
, ivs
);
4385 /* Add invariants in set INVS to set IVS. */
4388 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4396 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4398 ivs
->n_invariant_uses
[iid
]++;
4399 if (ivs
->n_invariant_uses
[iid
] == 1)
4404 /* Set cost pair for USE in set IVS to CP. */
4407 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4408 struct iv_use
*use
, struct cost_pair
*cp
)
4410 unsigned uid
= use
->id
, cid
;
4412 if (ivs
->cand_for_use
[uid
] == cp
)
4415 if (ivs
->cand_for_use
[uid
])
4416 iv_ca_set_no_cp (data
, ivs
, use
);
4423 ivs
->cand_for_use
[uid
] = cp
;
4424 ivs
->n_cand_uses
[cid
]++;
4425 if (ivs
->n_cand_uses
[cid
] == 1)
4427 bitmap_set_bit (ivs
->cands
, cid
);
4428 /* Do not count the pseudocandidates. */
4432 ivs
->cand_cost
+= cp
->cand
->cost
;
4434 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4437 ivs
->cand_use_cost
+= cp
->cost
;
4438 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4439 iv_ca_recount_cost (data
, ivs
);
4443 /* Extend set IVS by expressing USE by some of the candidates in it
4447 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4450 struct cost_pair
*best_cp
= NULL
, *cp
;
4454 gcc_assert (ivs
->upto
>= use
->id
);
4456 if (ivs
->upto
== use
->id
)
4462 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4464 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4466 if (cheaper_cost_pair (cp
, best_cp
))
4470 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4473 /* Get cost for assignment IVS. */
4476 iv_ca_cost (struct iv_ca
*ivs
)
4478 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
4481 /* Returns true if all dependences of CP are among invariants in IVS. */
4484 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4489 if (!cp
->depends_on
)
4492 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4494 if (ivs
->n_invariant_uses
[i
] == 0)
4501 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4502 it before NEXT_CHANGE. */
4504 static struct iv_ca_delta
*
4505 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4506 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4508 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
4511 change
->old_cp
= old_cp
;
4512 change
->new_cp
= new_cp
;
4513 change
->next_change
= next_change
;
4518 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4521 static struct iv_ca_delta
*
4522 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4524 struct iv_ca_delta
*last
;
4532 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4534 last
->next_change
= l2
;
4539 /* Returns candidate by that USE is expressed in IVS. */
4541 static struct cost_pair
*
4542 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4544 return ivs
->cand_for_use
[use
->id
];
4547 /* Reverse the list of changes DELTA, forming the inverse to it. */
4549 static struct iv_ca_delta
*
4550 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4552 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4553 struct cost_pair
*tmp
;
4555 for (act
= delta
; act
; act
= next
)
4557 next
= act
->next_change
;
4558 act
->next_change
= prev
;
4562 act
->old_cp
= act
->new_cp
;
4569 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4570 reverted instead. */
4573 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4574 struct iv_ca_delta
*delta
, bool forward
)
4576 struct cost_pair
*from
, *to
;
4577 struct iv_ca_delta
*act
;
4580 delta
= iv_ca_delta_reverse (delta
);
4582 for (act
= delta
; act
; act
= act
->next_change
)
4586 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4587 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4591 iv_ca_delta_reverse (delta
);
4594 /* Returns true if CAND is used in IVS. */
4597 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4599 return ivs
->n_cand_uses
[cand
->id
] > 0;
4602 /* Returns number of induction variable candidates in the set IVS. */
4605 iv_ca_n_cands (struct iv_ca
*ivs
)
4607 return ivs
->n_cands
;
4610 /* Free the list of changes DELTA. */
4613 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4615 struct iv_ca_delta
*act
, *next
;
4617 for (act
= *delta
; act
; act
= next
)
4619 next
= act
->next_change
;
4626 /* Allocates new iv candidates assignment. */
4628 static struct iv_ca
*
4629 iv_ca_new (struct ivopts_data
*data
)
4631 struct iv_ca
*nw
= XNEW (struct iv_ca
);
4635 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
4636 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
4637 nw
->cands
= BITMAP_ALLOC (NULL
);
4640 nw
->cand_use_cost
= 0;
4642 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
4648 /* Free memory occupied by the set IVS. */
4651 iv_ca_free (struct iv_ca
**ivs
)
4653 free ((*ivs
)->cand_for_use
);
4654 free ((*ivs
)->n_cand_uses
);
4655 BITMAP_FREE ((*ivs
)->cands
);
4656 free ((*ivs
)->n_invariant_uses
);
4661 /* Dumps IVS to FILE. */
4664 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4666 const char *pref
= " invariants ";
4669 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
4670 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4672 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4673 if (ivs
->n_invariant_uses
[i
])
4675 fprintf (file
, "%s%d", pref
, i
);
4678 fprintf (file
, "\n");
4681 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4682 new set, and store differences in DELTA. Number of induction variables
4683 in the new set is stored to N_IVS. */
4686 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4687 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4692 struct cost_pair
*old_cp
, *new_cp
;
4695 for (i
= 0; i
< ivs
->upto
; i
++)
4697 use
= iv_use (data
, i
);
4698 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4701 && old_cp
->cand
== cand
)
4704 new_cp
= get_use_iv_cost (data
, use
, cand
);
4708 if (!iv_ca_has_deps (ivs
, new_cp
))
4711 if (!cheaper_cost_pair (new_cp
, old_cp
))
4714 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4717 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4718 cost
= iv_ca_cost (ivs
);
4720 *n_ivs
= iv_ca_n_cands (ivs
);
4721 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4726 /* Try narrowing set IVS by removing CAND. Return the cost of
4727 the new set and store the differences in DELTA. */
4730 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4731 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4735 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4737 struct iv_cand
*cnd
;
4741 for (i
= 0; i
< n_iv_uses (data
); i
++)
4743 use
= iv_use (data
, i
);
4745 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4746 if (old_cp
->cand
!= cand
)
4751 if (data
->consider_all_candidates
)
4753 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4758 cnd
= iv_cand (data
, ci
);
4760 cp
= get_use_iv_cost (data
, use
, cnd
);
4763 if (!iv_ca_has_deps (ivs
, cp
))
4766 if (!cheaper_cost_pair (cp
, new_cp
))
4774 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4779 cnd
= iv_cand (data
, ci
);
4781 cp
= get_use_iv_cost (data
, use
, cnd
);
4784 if (!iv_ca_has_deps (ivs
, cp
))
4787 if (!cheaper_cost_pair (cp
, new_cp
))
4796 iv_ca_delta_free (delta
);
4800 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4803 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4804 cost
= iv_ca_cost (ivs
);
4805 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4810 /* Try optimizing the set of candidates IVS by removing candidates different
4811 from to EXCEPT_CAND from it. Return cost of the new set, and store
4812 differences in DELTA. */
4815 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4816 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
4819 struct iv_ca_delta
*act_delta
, *best_delta
;
4820 unsigned i
, best_cost
, acost
;
4821 struct iv_cand
*cand
;
4824 best_cost
= iv_ca_cost (ivs
);
4826 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4828 cand
= iv_cand (data
, i
);
4830 if (cand
== except_cand
)
4833 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
4835 if (acost
< best_cost
)
4838 iv_ca_delta_free (&best_delta
);
4839 best_delta
= act_delta
;
4842 iv_ca_delta_free (&act_delta
);
4851 /* Recurse to possibly remove other unnecessary ivs. */
4852 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4853 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
4854 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
4855 *delta
= iv_ca_delta_join (best_delta
, *delta
);
4859 /* Tries to extend the sets IVS in the best possible way in order
4860 to express the USE. */
4863 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4866 unsigned best_cost
, act_cost
;
4869 struct iv_cand
*cand
;
4870 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
4871 struct cost_pair
*cp
;
4873 iv_ca_add_use (data
, ivs
, use
);
4874 best_cost
= iv_ca_cost (ivs
);
4876 cp
= iv_ca_cand_for_use (ivs
, use
);
4879 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
4880 iv_ca_set_no_cp (data
, ivs
, use
);
4883 /* First try important candidates. Only if it fails, try the specific ones.
4884 Rationale -- in loops with many variables the best choice often is to use
4885 just one generic biv. If we added here many ivs specific to the uses,
4886 the optimization algorithm later would be likely to get stuck in a local
4887 minimum, thus causing us to create too many ivs. The approach from
4888 few ivs to more seems more likely to be successful -- starting from few
4889 ivs, replacing an expensive use by a specific iv should always be a
4891 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
4893 cand
= iv_cand (data
, i
);
4895 if (iv_ca_cand_used_p (ivs
, cand
))
4898 cp
= get_use_iv_cost (data
, use
, cand
);
4902 iv_ca_set_cp (data
, ivs
, use
, cp
);
4903 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4904 iv_ca_set_no_cp (data
, ivs
, use
);
4905 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
4907 if (act_cost
< best_cost
)
4909 best_cost
= act_cost
;
4911 iv_ca_delta_free (&best_delta
);
4912 best_delta
= act_delta
;
4915 iv_ca_delta_free (&act_delta
);
4918 if (best_cost
== INFTY
)
4920 for (i
= 0; i
< use
->n_map_members
; i
++)
4922 cp
= use
->cost_map
+ i
;
4927 /* Already tried this. */
4928 if (cand
->important
)
4931 if (iv_ca_cand_used_p (ivs
, cand
))
4935 iv_ca_set_cp (data
, ivs
, use
, cp
);
4936 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
4937 iv_ca_set_no_cp (data
, ivs
, use
);
4938 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
4941 if (act_cost
< best_cost
)
4943 best_cost
= act_cost
;
4946 iv_ca_delta_free (&best_delta
);
4947 best_delta
= act_delta
;
4950 iv_ca_delta_free (&act_delta
);
4954 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
4955 iv_ca_delta_free (&best_delta
);
4957 return (best_cost
!= INFTY
);
4960 /* Finds an initial assignment of candidates to uses. */
4962 static struct iv_ca
*
4963 get_initial_solution (struct ivopts_data
*data
)
4965 struct iv_ca
*ivs
= iv_ca_new (data
);
4968 for (i
= 0; i
< n_iv_uses (data
); i
++)
4969 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
4978 /* Tries to improve set of induction variables IVS. */
4981 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4983 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
), n_ivs
;
4984 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
4985 struct iv_cand
*cand
;
4987 /* Try extending the set of induction variables by one. */
4988 for (i
= 0; i
< n_iv_cands (data
); i
++)
4990 cand
= iv_cand (data
, i
);
4992 if (iv_ca_cand_used_p (ivs
, cand
))
4995 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
4999 /* If we successfully added the candidate and the set is small enough,
5000 try optimizing it by removing other candidates. */
5001 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5003 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5004 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5005 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5006 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5009 if (acost
< best_cost
)
5012 iv_ca_delta_free (&best_delta
);
5013 best_delta
= act_delta
;
5016 iv_ca_delta_free (&act_delta
);
5021 /* Try removing the candidates from the set instead. */
5022 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5024 /* Nothing more we can do. */
5029 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5030 gcc_assert (best_cost
== iv_ca_cost (ivs
));
5031 iv_ca_delta_free (&best_delta
);
5035 /* Attempts to find the optimal set of induction variables. We do simple
5036 greedy heuristic -- we try to replace at most one candidate in the selected
5037 solution and remove the unused ivs while this improves the cost. */
5039 static struct iv_ca
*
5040 find_optimal_iv_set (struct ivopts_data
*data
)
5046 /* Get the initial solution. */
5047 set
= get_initial_solution (data
);
5050 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5051 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5055 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5057 fprintf (dump_file
, "Initial set of candidates:\n");
5058 iv_ca_dump (data
, dump_file
, set
);
5061 while (try_improve_iv_set (data
, set
))
5063 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5065 fprintf (dump_file
, "Improved to:\n");
5066 iv_ca_dump (data
, dump_file
, set
);
5070 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5071 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
5073 for (i
= 0; i
< n_iv_uses (data
); i
++)
5075 use
= iv_use (data
, i
);
5076 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5082 /* Creates a new induction variable corresponding to CAND. */
5085 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5087 block_stmt_iterator incr_pos
;
5097 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
5101 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
5106 /* Mark that the iv is preserved. */
5107 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5108 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5110 /* Rewrite the increment so that it uses var_before directly. */
5111 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5116 gimple_add_tmp_var (cand
->var_before
);
5117 add_referenced_tmp_var (cand
->var_before
);
5119 base
= unshare_expr (cand
->iv
->base
);
5121 create_iv (base
, unshare_expr (cand
->iv
->step
),
5122 cand
->var_before
, data
->current_loop
,
5123 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5126 /* Creates new induction variables described in SET. */
5129 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5132 struct iv_cand
*cand
;
5135 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5137 cand
= iv_cand (data
, i
);
5138 create_new_iv (data
, cand
);
5142 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5143 is true, remove also the ssa name defined by the statement. */
5146 remove_statement (tree stmt
, bool including_defined_name
)
5148 if (TREE_CODE (stmt
) == PHI_NODE
)
5150 if (!including_defined_name
)
5152 /* Prevent the ssa name defined by the statement from being removed. */
5153 SET_PHI_RESULT (stmt
, NULL
);
5155 remove_phi_node (stmt
, NULL_TREE
);
5159 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
5161 bsi_remove (&bsi
, true);
5165 /* Rewrites USE (definition of iv used in a nonlinear expression)
5166 using candidate CAND. */
5169 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5170 struct iv_use
*use
, struct iv_cand
*cand
)
5173 tree op
, stmts
, tgt
, ass
;
5174 block_stmt_iterator bsi
, pbsi
;
5176 /* An important special case -- if we are asked to express value of
5177 the original iv by itself, just exit; there is no need to
5178 introduce a new computation (that might also need casting the
5179 variable to unsigned and back). */
5180 if (cand
->pos
== IP_ORIGINAL
5181 && cand
->incremented_at
== use
->stmt
)
5183 tree step
, ctype
, utype
;
5184 enum tree_code incr_code
= PLUS_EXPR
;
5186 gcc_assert (TREE_CODE (use
->stmt
) == MODIFY_EXPR
);
5187 gcc_assert (TREE_OPERAND (use
->stmt
, 0) == cand
->var_after
);
5189 step
= cand
->iv
->step
;
5190 ctype
= TREE_TYPE (step
);
5191 utype
= TREE_TYPE (cand
->var_after
);
5192 if (TREE_CODE (step
) == NEGATE_EXPR
)
5194 incr_code
= MINUS_EXPR
;
5195 step
= TREE_OPERAND (step
, 0);
5198 /* Check whether we may leave the computation unchanged.
5199 This is the case only if it does not rely on other
5200 computations in the loop -- otherwise, the computation
5201 we rely upon may be removed in remove_unused_ivs,
5202 thus leading to ICE. */
5203 op
= TREE_OPERAND (use
->stmt
, 1);
5204 if (TREE_CODE (op
) == PLUS_EXPR
5205 || TREE_CODE (op
) == MINUS_EXPR
)
5207 if (TREE_OPERAND (op
, 0) == cand
->var_before
)
5208 op
= TREE_OPERAND (op
, 1);
5209 else if (TREE_CODE (op
) == PLUS_EXPR
5210 && TREE_OPERAND (op
, 1) == cand
->var_before
)
5211 op
= TREE_OPERAND (op
, 0);
5219 && (TREE_CODE (op
) == INTEGER_CST
5220 || operand_equal_p (op
, step
, 0)))
5223 /* Otherwise, add the necessary computations to express
5225 op
= fold_convert (ctype
, cand
->var_before
);
5226 comp
= fold_convert (utype
,
5227 build2 (incr_code
, ctype
, op
,
5228 unshare_expr (step
)));
5231 comp
= get_computation (data
->current_loop
, use
, cand
);
5233 switch (TREE_CODE (use
->stmt
))
5236 tgt
= PHI_RESULT (use
->stmt
);
5238 /* If we should keep the biv, do not replace it. */
5239 if (name_info (data
, tgt
)->preserve_biv
)
5242 pbsi
= bsi
= bsi_start (bb_for_stmt (use
->stmt
));
5243 while (!bsi_end_p (pbsi
)
5244 && TREE_CODE (bsi_stmt (pbsi
)) == LABEL_EXPR
)
5252 tgt
= TREE_OPERAND (use
->stmt
, 0);
5253 bsi
= bsi_for_stmt (use
->stmt
);
5260 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
5262 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
5265 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
5266 ass
= build2 (MODIFY_EXPR
, TREE_TYPE (tgt
), tgt
, op
);
5267 bsi_insert_after (&bsi
, ass
, BSI_NEW_STMT
);
5268 remove_statement (use
->stmt
, false);
5269 SSA_NAME_DEF_STMT (tgt
) = ass
;
5274 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5275 TREE_OPERAND (use
->stmt
, 1) = op
;
5279 /* Replaces ssa name in index IDX by its basic variable. Callback for
5283 idx_remove_ssa_names (tree base
, tree
*idx
,
5284 void *data ATTRIBUTE_UNUSED
)
5288 if (TREE_CODE (*idx
) == SSA_NAME
)
5289 *idx
= SSA_NAME_VAR (*idx
);
5291 if (TREE_CODE (base
) == ARRAY_REF
)
5293 op
= &TREE_OPERAND (base
, 2);
5295 && TREE_CODE (*op
) == SSA_NAME
)
5296 *op
= SSA_NAME_VAR (*op
);
5297 op
= &TREE_OPERAND (base
, 3);
5299 && TREE_CODE (*op
) == SSA_NAME
)
5300 *op
= SSA_NAME_VAR (*op
);
5306 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5309 unshare_and_remove_ssa_names (tree ref
)
5311 ref
= unshare_expr (ref
);
5312 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5317 /* Extract the alias analysis info for the memory reference REF. There are
5318 several ways how this information may be stored and what precisely is
5319 its semantics depending on the type of the reference, but there always is
5320 somewhere hidden one _DECL node that is used to determine the set of
5321 virtual operands for the reference. The code below deciphers this jungle
5322 and extracts this single useful piece of information. */
5325 get_ref_tag (tree ref
, tree orig
)
5327 tree var
= get_base_address (ref
);
5328 tree aref
= NULL_TREE
, tag
, sv
;
5329 HOST_WIDE_INT offset
, size
, maxsize
;
5331 for (sv
= orig
; handled_component_p (sv
); sv
= TREE_OPERAND (sv
, 0))
5333 aref
= get_ref_base_and_extent (sv
, &offset
, &size
, &maxsize
);
5338 if (aref
&& SSA_VAR_P (aref
) && get_subvars_for_var (aref
))
5339 return unshare_expr (sv
);
5344 if (TREE_CODE (var
) == INDIRECT_REF
)
5346 /* If the base is a dereference of a pointer, first check its name memory
5347 tag. If it does not have one, use its symbol memory tag. */
5348 var
= TREE_OPERAND (var
, 0);
5349 if (TREE_CODE (var
) != SSA_NAME
)
5352 if (SSA_NAME_PTR_INFO (var
))
5354 tag
= SSA_NAME_PTR_INFO (var
)->name_mem_tag
;
5359 var
= SSA_NAME_VAR (var
);
5360 tag
= var_ann (var
)->symbol_mem_tag
;
5361 gcc_assert (tag
!= NULL_TREE
);
5369 tag
= var_ann (var
)->symbol_mem_tag
;
5377 /* Copies the reference information from OLD_REF to NEW_REF. */
5380 copy_ref_info (tree new_ref
, tree old_ref
)
5382 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5383 copy_mem_ref_info (new_ref
, old_ref
);
5386 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5387 TMR_TAG (new_ref
) = get_ref_tag (old_ref
, TMR_ORIGINAL (new_ref
));
5391 /* Rewrites USE (address that is an iv) using candidate CAND. */
5394 rewrite_use_address (struct ivopts_data
*data
,
5395 struct iv_use
*use
, struct iv_cand
*cand
)
5397 struct affine_tree_combination aff
;
5398 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5401 get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5402 unshare_aff_combination (&aff
);
5404 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
);
5405 copy_ref_info (ref
, *use
->op_p
);
5409 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5413 rewrite_use_compare (struct ivopts_data
*data
,
5414 struct iv_use
*use
, struct iv_cand
*cand
)
5417 tree
*op_p
, cond
, op
, stmts
, bound
;
5418 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5419 enum tree_code compare
;
5420 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5425 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5426 tree var_type
= TREE_TYPE (var
);
5428 compare
= iv_elimination_compare (data
, use
);
5429 bound
= fold_convert (var_type
, bound
);
5430 op
= force_gimple_operand (unshare_expr (bound
), &stmts
,
5434 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5436 *use
->op_p
= build2 (compare
, boolean_type_node
, var
, op
);
5437 update_stmt (use
->stmt
);
5441 /* The induction variable elimination failed; just express the original
5443 comp
= get_computation (data
->current_loop
, use
, cand
);
5446 op_p
= &TREE_OPERAND (cond
, 0);
5447 if (TREE_CODE (*op_p
) != SSA_NAME
5448 || zero_p (get_iv (data
, *op_p
)->step
))
5449 op_p
= &TREE_OPERAND (cond
, 1);
5451 op
= force_gimple_operand (comp
, &stmts
, true, SSA_NAME_VAR (*op_p
));
5453 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5458 /* Ensure that operand *OP_P may be used at the end of EXIT without
5459 violating loop closed ssa form. */
5462 protect_loop_closed_ssa_form_use (edge exit
, use_operand_p op_p
)
5465 struct loop
*def_loop
;
5468 use
= USE_FROM_PTR (op_p
);
5469 if (TREE_CODE (use
) != SSA_NAME
)
5472 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (use
));
5476 def_loop
= def_bb
->loop_father
;
5477 if (flow_bb_inside_loop_p (def_loop
, exit
->dest
))
5480 /* Try finding a phi node that copies the value out of the loop. */
5481 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
5482 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == use
)
5487 /* Create such a phi node. */
5488 tree new_name
= duplicate_ssa_name (use
, NULL
);
5490 phi
= create_phi_node (new_name
, exit
->dest
);
5491 SSA_NAME_DEF_STMT (new_name
) = phi
;
5492 add_phi_arg (phi
, use
, exit
);
5495 SET_USE (op_p
, PHI_RESULT (phi
));
5498 /* Ensure that operands of STMT may be used at the end of EXIT without
5499 violating loop closed ssa form. */
5502 protect_loop_closed_ssa_form (edge exit
, tree stmt
)
5505 use_operand_p use_p
;
5507 FOR_EACH_SSA_USE_OPERAND (use_p
, stmt
, iter
, SSA_OP_ALL_USES
)
5508 protect_loop_closed_ssa_form_use (exit
, use_p
);
5511 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5512 so that they are emitted on the correct place, and so that the loop closed
5513 ssa form is preserved. */
5516 compute_phi_arg_on_exit (edge exit
, tree stmts
, tree op
)
5518 tree_stmt_iterator tsi
;
5519 block_stmt_iterator bsi
;
5520 tree phi
, stmt
, def
, next
;
5522 if (!single_pred_p (exit
->dest
))
5523 split_loop_exit_edge (exit
);
5525 /* Ensure there is label in exit->dest, so that we can
5527 tree_block_label (exit
->dest
);
5528 bsi
= bsi_after_labels (exit
->dest
);
5530 if (TREE_CODE (stmts
) == STATEMENT_LIST
)
5532 for (tsi
= tsi_start (stmts
); !tsi_end_p (tsi
); tsi_next (&tsi
))
5534 tree stmt
= tsi_stmt (tsi
);
5535 bsi_insert_before (&bsi
, stmt
, BSI_SAME_STMT
);
5536 protect_loop_closed_ssa_form (exit
, stmt
);
5541 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5542 protect_loop_closed_ssa_form (exit
, stmts
);
5548 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= next
)
5550 next
= PHI_CHAIN (phi
);
5552 if (PHI_ARG_DEF_FROM_EDGE (phi
, exit
) == op
)
5554 def
= PHI_RESULT (phi
);
5555 remove_statement (phi
, false);
5556 stmt
= build2 (MODIFY_EXPR
, TREE_TYPE (op
),
5558 SSA_NAME_DEF_STMT (def
) = stmt
;
5559 bsi_insert_before (&bsi
, stmt
, BSI_SAME_STMT
);
5564 /* Rewrites USE using candidate CAND. */
5567 rewrite_use (struct ivopts_data
*data
,
5568 struct iv_use
*use
, struct iv_cand
*cand
)
5572 case USE_NONLINEAR_EXPR
:
5573 rewrite_use_nonlinear_expr (data
, use
, cand
);
5577 rewrite_use_address (data
, use
, cand
);
5581 rewrite_use_compare (data
, use
, cand
);
5587 mark_new_vars_to_rename (use
->stmt
);
5590 /* Rewrite the uses using the selected induction variables. */
5593 rewrite_uses (struct ivopts_data
*data
)
5596 struct iv_cand
*cand
;
5599 for (i
= 0; i
< n_iv_uses (data
); i
++)
5601 use
= iv_use (data
, i
);
5602 cand
= use
->selected
;
5605 rewrite_use (data
, use
, cand
);
5609 /* Removes the ivs that are not used after rewriting. */
5612 remove_unused_ivs (struct ivopts_data
*data
)
5617 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5619 struct version_info
*info
;
5621 info
= ver_info (data
, j
);
5623 && !zero_p (info
->iv
->step
)
5625 && !info
->iv
->have_use_for
5626 && !info
->preserve_biv
)
5627 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5631 /* Frees data allocated by the optimization of a single loop. */
5634 free_loop_data (struct ivopts_data
*data
)
5640 htab_empty (data
->niters
);
5642 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5644 struct version_info
*info
;
5646 info
= ver_info (data
, i
);
5650 info
->has_nonlin_use
= false;
5651 info
->preserve_biv
= false;
5654 bitmap_clear (data
->relevant
);
5655 bitmap_clear (data
->important_candidates
);
5657 for (i
= 0; i
< n_iv_uses (data
); i
++)
5659 struct iv_use
*use
= iv_use (data
, i
);
5662 BITMAP_FREE (use
->related_cands
);
5663 for (j
= 0; j
< use
->n_map_members
; j
++)
5664 if (use
->cost_map
[j
].depends_on
)
5665 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5666 free (use
->cost_map
);
5669 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5671 for (i
= 0; i
< n_iv_cands (data
); i
++)
5673 struct iv_cand
*cand
= iv_cand (data
, i
);
5677 if (cand
->depends_on
)
5678 BITMAP_FREE (cand
->depends_on
);
5681 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5683 if (data
->version_info_size
< num_ssa_names
)
5685 data
->version_info_size
= 2 * num_ssa_names
;
5686 free (data
->version_info
);
5687 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
5690 data
->max_inv_id
= 0;
5692 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5693 SET_DECL_RTL (obj
, NULL_RTX
);
5695 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5698 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5702 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
5704 free_loop_data (data
);
5705 free (data
->version_info
);
5706 BITMAP_FREE (data
->relevant
);
5707 BITMAP_FREE (data
->important_candidates
);
5708 htab_delete (data
->niters
);
5710 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5711 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5712 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5715 /* Optimizes the LOOP. Returns true if anything changed. */
5718 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5720 bool changed
= false;
5721 struct iv_ca
*iv_ca
;
5724 data
->current_loop
= loop
;
5726 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5728 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5730 exit
= single_dom_exit (loop
);
5733 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5734 exit
->src
->index
, exit
->dest
->index
);
5735 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
5736 fprintf (dump_file
, "\n");
5739 fprintf (dump_file
, "\n");
5742 /* For each ssa name determines whether it behaves as an induction variable
5744 if (!find_induction_variables (data
))
5747 /* Finds interesting uses (item 1). */
5748 find_interesting_uses (data
);
5749 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5752 /* Finds candidates for the induction variables (item 2). */
5753 find_iv_candidates (data
);
5755 /* Calculates the costs (item 3, part 1). */
5756 determine_use_iv_costs (data
);
5757 determine_iv_costs (data
);
5758 determine_set_costs (data
);
5760 /* Find the optimal set of induction variables (item 3, part 2). */
5761 iv_ca
= find_optimal_iv_set (data
);
5766 /* Create the new induction variables (item 4, part 1). */
5767 create_new_ivs (data
, iv_ca
);
5768 iv_ca_free (&iv_ca
);
5770 /* Rewrite the uses (item 4, part 2). */
5771 rewrite_uses (data
);
5773 /* Remove the ivs that are unused after rewriting. */
5774 remove_unused_ivs (data
);
5776 /* We have changed the structure of induction variables; it might happen
5777 that definitions in the scev database refer to some of them that were
5782 free_loop_data (data
);
5787 /* Main entry point. Optimizes induction variables in LOOPS. */
5790 tree_ssa_iv_optimize (struct loops
*loops
)
5793 struct ivopts_data data
;
5795 tree_ssa_iv_optimize_init (&data
);
5797 /* Optimize the loops starting with the innermost ones. */
5798 loop
= loops
->tree_root
;
5802 /* Scan the loops, inner ones first. */
5803 while (loop
!= loops
->tree_root
)
5805 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5806 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5808 tree_ssa_iv_optimize_loop (&data
, loop
);
5820 tree_ssa_iv_optimize_finalize (&data
);