1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
82 #include "tree-pass.h"
84 #include "insn-config.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
104 tree base
; /* Initial value of the iv. */
105 tree base_object
; /* A memory object to that the induction variable points. */
106 tree step
; /* Step of the iv (constant only). */
107 tree ssa_name
; /* The ssa name with the value. */
108 bool biv_p
; /* Is it a biv? */
109 bool have_use_for
; /* Do we already have a use for it? */
110 unsigned use_id
; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
116 tree name
; /* The ssa name. */
117 struct iv
*iv
; /* Induction variable description. */
118 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id
; /* Id of an invariant. */
121 bool preserve_biv
; /* For the original biv, whether to preserve it. */
127 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
128 USE_ADDRESS
, /* Use in an address. */
129 USE_COMPARE
/* Use is a compare. */
132 /* The candidate - cost pair. */
135 struct iv_cand
*cand
; /* The candidate. */
136 unsigned cost
; /* The cost. */
137 bitmap depends_on
; /* The list of invariants that have to be
139 tree value
; /* For final value elimination, the expression for
140 the final value of the iv. For iv elimination,
141 the new bound to compare with. */
147 unsigned id
; /* The id of the use. */
148 enum use_type type
; /* Type of the use. */
149 struct iv
*iv
; /* The induction variable it is based on. */
150 tree stmt
; /* Statement in that it occurs. */
151 tree
*op_p
; /* The place where it occurs. */
152 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
155 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
156 struct cost_pair
*cost_map
;
157 /* The costs wrto the iv candidates. */
159 struct iv_cand
*selected
;
160 /* The selected candidate. */
163 /* The position where the iv is computed. */
166 IP_NORMAL
, /* At the end, just before the exit condition. */
167 IP_END
, /* At the end of the latch block. */
168 IP_ORIGINAL
/* The original biv. */
171 /* The induction variable candidate. */
174 unsigned id
; /* The number of the candidate. */
175 bool important
; /* Whether this is an "important" candidate, i.e. such
176 that it should be considered by all uses. */
177 enum iv_position pos
; /* Where it is computed. */
178 tree incremented_at
; /* For original biv, the statement where it is
180 tree var_before
; /* The variable used for it before increment. */
181 tree var_after
; /* The variable used for it after increment. */
182 struct iv
*iv
; /* The value of the candidate. NULL for
183 "pseudocandidate" used to indicate the possibility
184 to replace the final value of an iv by direct
185 computation of the value. */
186 unsigned cost
; /* Cost of the candidate. */
187 bitmap depends_on
; /* The list of invariants that are used in step of the
191 /* The data used by the induction variable optimizations. */
193 typedef struct iv_use
*iv_use_p
;
195 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
197 typedef struct iv_cand
*iv_cand_p
;
198 DEF_VEC_P(iv_cand_p
);
199 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
203 /* The currently optimized loop. */
204 struct loop
*current_loop
;
206 /* Number of registers used in it. */
209 /* Numbers of iterations for all exits of the current loop. */
212 /* The size of version_info array allocated. */
213 unsigned version_info_size
;
215 /* The array of information for the ssa names. */
216 struct version_info
*version_info
;
218 /* The bitmap of indices in version_info whose value was changed. */
221 /* The maximum invariant id. */
224 /* The uses of induction variables. */
225 VEC(iv_use_p
,heap
) *iv_uses
;
227 /* The candidates. */
228 VEC(iv_cand_p
,heap
) *iv_candidates
;
230 /* A bitmap of important candidates. */
231 bitmap important_candidates
;
233 /* Whether to consider just related and important candidates when replacing a
235 bool consider_all_candidates
;
238 /* An assignment of iv candidates to uses. */
242 /* The number of uses covered by the assignment. */
245 /* Number of uses that cannot be expressed by the candidates in the set. */
248 /* Candidate assigned to a use, together with the related costs. */
249 struct cost_pair
**cand_for_use
;
251 /* Number of times each candidate is used. */
252 unsigned *n_cand_uses
;
254 /* The candidates used. */
257 /* The number of candidates in the set. */
260 /* Total number of registers needed. */
263 /* Total cost of expressing uses. */
264 unsigned cand_use_cost
;
266 /* Total cost of candidates. */
269 /* Number of times each invariant is used. */
270 unsigned *n_invariant_uses
;
272 /* Total cost of the assignment. */
276 /* Difference of two iv candidate assignments. */
283 /* An old assignment (for rollback purposes). */
284 struct cost_pair
*old_cp
;
286 /* A new assignment. */
287 struct cost_pair
*new_cp
;
289 /* Next change in the list. */
290 struct iv_ca_delta
*next_change
;
293 /* Bound on number of candidates below that all candidates are considered. */
295 #define CONSIDER_ALL_CANDIDATES_BOUND \
296 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
299 optimizing such a loop would help, and it would take ages). */
301 #define MAX_CONSIDERED_USES \
302 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
304 /* If there are at most this number of ivs in the set, try removing unnecessary
305 ivs from the set always. */
307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
308 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
310 /* The list of trees for that the decl_rtl field must be reset is stored
313 static VEC(tree
,heap
) *decl_rtl_to_reset
;
315 /* Number of uses recorded in DATA. */
317 static inline unsigned
318 n_iv_uses (struct ivopts_data
*data
)
320 return VEC_length (iv_use_p
, data
->iv_uses
);
323 /* Ith use recorded in DATA. */
325 static inline struct iv_use
*
326 iv_use (struct ivopts_data
*data
, unsigned i
)
328 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
331 /* Number of candidates recorded in DATA. */
333 static inline unsigned
334 n_iv_cands (struct ivopts_data
*data
)
336 return VEC_length (iv_cand_p
, data
->iv_candidates
);
339 /* Ith candidate recorded in DATA. */
341 static inline struct iv_cand
*
342 iv_cand (struct ivopts_data
*data
, unsigned i
)
344 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
347 /* The single loop exit if it dominates the latch, NULL otherwise. */
350 single_dom_exit (struct loop
*loop
)
352 edge exit
= single_exit (loop
);
357 if (!just_once_each_iteration_p (loop
, exit
->src
))
363 /* Dumps information about the induction variable IV to FILE. */
365 extern void dump_iv (FILE *, struct iv
*);
367 dump_iv (FILE *file
, struct iv
*iv
)
371 fprintf (file
, "ssa name ");
372 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
373 fprintf (file
, "\n");
376 fprintf (file
, " type ");
377 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
378 fprintf (file
, "\n");
382 fprintf (file
, " base ");
383 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
384 fprintf (file
, "\n");
386 fprintf (file
, " step ");
387 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
388 fprintf (file
, "\n");
392 fprintf (file
, " invariant ");
393 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
394 fprintf (file
, "\n");
399 fprintf (file
, " base object ");
400 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
401 fprintf (file
, "\n");
405 fprintf (file
, " is a biv\n");
408 /* Dumps information about the USE to FILE. */
410 extern void dump_use (FILE *, struct iv_use
*);
412 dump_use (FILE *file
, struct iv_use
*use
)
414 fprintf (file
, "use %d\n", use
->id
);
418 case USE_NONLINEAR_EXPR
:
419 fprintf (file
, " generic\n");
423 fprintf (file
, " address\n");
427 fprintf (file
, " compare\n");
434 fprintf (file
, " in statement ");
435 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
436 fprintf (file
, "\n");
438 fprintf (file
, " at position ");
440 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
441 fprintf (file
, "\n");
443 dump_iv (file
, use
->iv
);
445 if (use
->related_cands
)
447 fprintf (file
, " related candidates ");
448 dump_bitmap (file
, use
->related_cands
);
452 /* Dumps information about the uses to FILE. */
454 extern void dump_uses (FILE *, struct ivopts_data
*);
456 dump_uses (FILE *file
, struct ivopts_data
*data
)
461 for (i
= 0; i
< n_iv_uses (data
); i
++)
463 use
= iv_use (data
, i
);
465 dump_use (file
, use
);
466 fprintf (file
, "\n");
470 /* Dumps information about induction variable candidate CAND to FILE. */
472 extern void dump_cand (FILE *, struct iv_cand
*);
474 dump_cand (FILE *file
, struct iv_cand
*cand
)
476 struct iv
*iv
= cand
->iv
;
478 fprintf (file
, "candidate %d%s\n",
479 cand
->id
, cand
->important
? " (important)" : "");
481 if (cand
->depends_on
)
483 fprintf (file
, " depends on ");
484 dump_bitmap (file
, cand
->depends_on
);
489 fprintf (file
, " final value replacement\n");
496 fprintf (file
, " incremented before exit test\n");
500 fprintf (file
, " incremented at end\n");
504 fprintf (file
, " original biv\n");
511 /* Returns the info for ssa version VER. */
513 static inline struct version_info
*
514 ver_info (struct ivopts_data
*data
, unsigned ver
)
516 return data
->version_info
+ ver
;
519 /* Returns the info for ssa name NAME. */
521 static inline struct version_info
*
522 name_info (struct ivopts_data
*data
, tree name
)
524 return ver_info (data
, SSA_NAME_VERSION (name
));
527 /* Checks whether there exists number X such that X * B = A, counting modulo
531 divide (unsigned bits
, unsigned HOST_WIDE_INT a
, unsigned HOST_WIDE_INT b
,
534 unsigned HOST_WIDE_INT mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
535 unsigned HOST_WIDE_INT inv
, ex
, val
;
541 /* First divide the whole equation by 2 as long as possible. */
542 while (!(a
& 1) && !(b
& 1))
552 /* If b is still even, a is odd and there is no such x. */
556 /* Find the inverse of b. We compute it as
557 b^(2^(bits - 1) - 1) (mod 2^bits). */
560 for (i
= 0; i
< bits
- 1; i
++)
562 inv
= (inv
* ex
) & mask
;
563 ex
= (ex
* ex
) & mask
;
566 val
= (a
* inv
) & mask
;
568 gcc_assert (((val
* b
) & mask
) == a
);
570 if ((val
>> (bits
- 1)) & 1)
578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
582 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
584 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
588 if (sbb
== loop
->latch
)
594 return stmt
== last_stmt (bb
);
597 /* Returns true if STMT if after the place where the original induction
598 variable CAND is incremented. */
601 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
603 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
604 basic_block stmt_bb
= bb_for_stmt (stmt
);
605 block_stmt_iterator bsi
;
607 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
610 if (stmt_bb
!= cand_bb
)
613 /* Scan the block from the end, since the original ivs are usually
614 incremented at the end of the loop body. */
615 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
617 if (bsi_stmt (bsi
) == cand
->incremented_at
)
619 if (bsi_stmt (bsi
) == stmt
)
624 /* Returns true if STMT if after the place where the induction variable
625 CAND is incremented in LOOP. */
628 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
636 return stmt_after_ip_normal_pos (loop
, stmt
);
639 return stmt_after_ip_original_pos (cand
, stmt
);
646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
649 abnormal_ssa_name_p (tree exp
)
654 if (TREE_CODE (exp
) != SSA_NAME
)
657 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
661 abnormal phi node. Callback for for_each_index. */
664 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
665 void *data ATTRIBUTE_UNUSED
)
667 if (TREE_CODE (base
) == ARRAY_REF
)
669 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
671 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
675 return !abnormal_ssa_name_p (*index
);
678 /* Returns true if EXPR contains a ssa name that occurs in an
679 abnormal phi node. */
682 contains_abnormal_ssa_name_p (tree expr
)
685 enum tree_code_class
class;
690 code
= TREE_CODE (expr
);
691 class = TREE_CODE_CLASS (code
);
693 if (code
== SSA_NAME
)
694 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
696 if (code
== INTEGER_CST
697 || is_gimple_min_invariant (expr
))
700 if (code
== ADDR_EXPR
)
701 return !for_each_index (&TREE_OPERAND (expr
, 0),
702 idx_contains_abnormal_ssa_name_p
,
709 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
714 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
726 /* Element of the table in that we cache the numbers of iterations obtained
727 from exits of the loop. */
731 /* The edge for that the number of iterations is cached. */
734 /* Number of iterations corresponding to this exit, or NULL if it cannot be
739 /* Hash function for nfe_cache_elt E. */
742 nfe_hash (const void *e
)
744 const struct nfe_cache_elt
*elt
= e
;
746 return htab_hash_pointer (elt
->exit
);
749 /* Equality function for nfe_cache_elt E1 and edge E2. */
752 nfe_eq (const void *e1
, const void *e2
)
754 const struct nfe_cache_elt
*elt1
= e1
;
756 return elt1
->exit
== e2
;
759 /* Returns tree describing number of iterations determined from
760 EXIT of DATA->current_loop, or NULL if something goes wrong. */
763 niter_for_exit (struct ivopts_data
*data
, edge exit
)
765 struct nfe_cache_elt
*nfe_desc
;
766 struct tree_niter_desc desc
;
769 slot
= htab_find_slot_with_hash (data
->niters
, exit
,
770 htab_hash_pointer (exit
),
775 nfe_desc
= xmalloc (sizeof (struct nfe_cache_elt
));
776 nfe_desc
->exit
= exit
;
778 /* Try to determine number of iterations. We must know it
779 unconditionally (i.e., without possibility of # of iterations
780 being zero). Also, we cannot safely work with ssa names that
781 appear in phi nodes on abnormal edges, so that we do not create
782 overlapping life ranges for them (PR 27283). */
783 if (number_of_iterations_exit (data
->current_loop
,
785 && zero_p (desc
.may_be_zero
)
786 && !contains_abnormal_ssa_name_p (desc
.niter
))
787 nfe_desc
->niter
= desc
.niter
;
789 nfe_desc
->niter
= NULL_TREE
;
794 return nfe_desc
->niter
;
797 /* Returns tree describing number of iterations determined from
798 single dominating exit of DATA->current_loop, or NULL if something
802 niter_for_single_dom_exit (struct ivopts_data
*data
)
804 edge exit
= single_dom_exit (data
->current_loop
);
809 return niter_for_exit (data
, exit
);
812 /* Initializes data structures used by the iv optimization pass, stored
816 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
818 data
->version_info_size
= 2 * num_ssa_names
;
819 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
820 data
->relevant
= BITMAP_ALLOC (NULL
);
821 data
->important_candidates
= BITMAP_ALLOC (NULL
);
822 data
->max_inv_id
= 0;
823 data
->niters
= htab_create (10, nfe_hash
, nfe_eq
, free
);
824 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
825 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
826 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
829 /* Returns a memory object to that EXPR points. In case we are able to
830 determine that it does not point to any such object, NULL is returned. */
833 determine_base_object (tree expr
)
835 enum tree_code code
= TREE_CODE (expr
);
836 tree base
, obj
, op0
, op1
;
838 /* If this is a pointer casted to any type, we need to determine
839 the base object for the pointer; so handle conversions before
840 throwing away non-pointer expressions. */
841 if (TREE_CODE (expr
) == NOP_EXPR
842 || TREE_CODE (expr
) == CONVERT_EXPR
)
843 return determine_base_object (TREE_OPERAND (expr
, 0));
845 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
854 obj
= TREE_OPERAND (expr
, 0);
855 base
= get_base_address (obj
);
860 if (TREE_CODE (base
) == INDIRECT_REF
)
861 return determine_base_object (TREE_OPERAND (base
, 0));
863 return fold_convert (ptr_type_node
,
864 build_fold_addr_expr (base
));
868 op0
= determine_base_object (TREE_OPERAND (expr
, 0));
869 op1
= determine_base_object (TREE_OPERAND (expr
, 1));
875 return (code
== PLUS_EXPR
877 : fold_build1 (NEGATE_EXPR
, ptr_type_node
, op1
));
879 return fold_build2 (code
, ptr_type_node
, op0
, op1
);
882 return fold_convert (ptr_type_node
, expr
);
886 /* Allocates an induction variable with given initial value BASE and step STEP
890 alloc_iv (tree base
, tree step
)
892 struct iv
*iv
= XCNEW (struct iv
);
894 if (step
&& integer_zerop (step
))
898 iv
->base_object
= determine_base_object (base
);
901 iv
->have_use_for
= false;
903 iv
->ssa_name
= NULL_TREE
;
908 /* Sets STEP and BASE for induction variable IV. */
911 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
913 struct version_info
*info
= name_info (data
, iv
);
915 gcc_assert (!info
->iv
);
917 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
918 info
->iv
= alloc_iv (base
, step
);
919 info
->iv
->ssa_name
= iv
;
922 /* Finds induction variable declaration for VAR. */
925 get_iv (struct ivopts_data
*data
, tree var
)
929 if (!name_info (data
, var
)->iv
)
931 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
934 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
935 set_iv (data
, var
, var
, NULL_TREE
);
938 return name_info (data
, var
)->iv
;
941 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
942 not define a simple affine biv with nonzero step. */
945 determine_biv_step (tree phi
)
947 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
948 tree name
= PHI_RESULT (phi
);
951 if (!is_gimple_reg (name
))
954 if (!simple_iv (loop
, phi
, name
, &iv
, true))
957 return (zero_p (iv
.step
) ? NULL_TREE
: iv
.step
);
960 /* Finds basic ivs. */
963 find_bivs (struct ivopts_data
*data
)
965 tree phi
, step
, type
, base
;
967 struct loop
*loop
= data
->current_loop
;
969 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
971 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
974 step
= determine_biv_step (phi
);
978 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
979 base
= expand_simple_operations (base
);
980 if (contains_abnormal_ssa_name_p (base
)
981 || contains_abnormal_ssa_name_p (step
))
984 type
= TREE_TYPE (PHI_RESULT (phi
));
985 base
= fold_convert (type
, base
);
987 step
= fold_convert (type
, step
);
989 set_iv (data
, PHI_RESULT (phi
), base
, step
);
996 /* Marks basic ivs. */
999 mark_bivs (struct ivopts_data
*data
)
1002 struct iv
*iv
, *incr_iv
;
1003 struct loop
*loop
= data
->current_loop
;
1004 basic_block incr_bb
;
1006 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
1008 iv
= get_iv (data
, PHI_RESULT (phi
));
1012 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1013 incr_iv
= get_iv (data
, var
);
1017 /* If the increment is in the subloop, ignore it. */
1018 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
1019 if (incr_bb
->loop_father
!= data
->current_loop
1020 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1024 incr_iv
->biv_p
= true;
1028 /* Checks whether STMT defines a linear induction variable and stores its
1029 parameters to IV. */
1032 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
, affine_iv
*iv
)
1035 struct loop
*loop
= data
->current_loop
;
1037 iv
->base
= NULL_TREE
;
1038 iv
->step
= NULL_TREE
;
1040 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1043 lhs
= TREE_OPERAND (stmt
, 0);
1044 if (TREE_CODE (lhs
) != SSA_NAME
)
1047 if (!simple_iv (loop
, stmt
, TREE_OPERAND (stmt
, 1), iv
, true))
1049 iv
->base
= expand_simple_operations (iv
->base
);
1051 if (contains_abnormal_ssa_name_p (iv
->base
)
1052 || contains_abnormal_ssa_name_p (iv
->step
))
1058 /* Finds general ivs in statement STMT. */
1061 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
1065 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1068 set_iv (data
, TREE_OPERAND (stmt
, 0), iv
.base
, iv
.step
);
1071 /* Finds general ivs in basic block BB. */
1074 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1076 block_stmt_iterator bsi
;
1078 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1079 find_givs_in_stmt (data
, bsi_stmt (bsi
));
1082 /* Finds general ivs. */
1085 find_givs (struct ivopts_data
*data
)
1087 struct loop
*loop
= data
->current_loop
;
1088 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1091 for (i
= 0; i
< loop
->num_nodes
; i
++)
1092 find_givs_in_bb (data
, body
[i
]);
1096 /* For each ssa name defined in LOOP determines whether it is an induction
1097 variable and if so, its initial value and step. */
1100 find_induction_variables (struct ivopts_data
*data
)
1105 if (!find_bivs (data
))
1111 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1113 tree niter
= niter_for_single_dom_exit (data
);
1117 fprintf (dump_file
, " number of iterations ");
1118 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1119 fprintf (dump_file
, "\n\n");
1122 fprintf (dump_file
, "Induction variables:\n\n");
1124 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1126 if (ver_info (data
, i
)->iv
)
1127 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1134 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1136 static struct iv_use
*
1137 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1138 tree stmt
, enum use_type use_type
)
1140 struct iv_use
*use
= XCNEW (struct iv_use
);
1142 use
->id
= n_iv_uses (data
);
1143 use
->type
= use_type
;
1147 use
->related_cands
= BITMAP_ALLOC (NULL
);
1149 /* To avoid showing ssa name in the dumps, if it was not reset by the
1151 iv
->ssa_name
= NULL_TREE
;
1153 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1154 dump_use (dump_file
, use
);
1156 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1161 /* Checks whether OP is a loop-level invariant and if so, records it.
1162 NONLINEAR_USE is true if the invariant is used in a way we do not
1163 handle specially. */
1166 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1169 struct version_info
*info
;
1171 if (TREE_CODE (op
) != SSA_NAME
1172 || !is_gimple_reg (op
))
1175 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1177 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1180 info
= name_info (data
, op
);
1182 info
->has_nonlin_use
|= nonlinear_use
;
1184 info
->inv_id
= ++data
->max_inv_id
;
1185 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1188 /* Checks whether the use OP is interesting and if so, records it. */
1190 static struct iv_use
*
1191 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1198 if (TREE_CODE (op
) != SSA_NAME
)
1201 iv
= get_iv (data
, op
);
1205 if (iv
->have_use_for
)
1207 use
= iv_use (data
, iv
->use_id
);
1209 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1213 if (zero_p (iv
->step
))
1215 record_invariant (data
, op
, true);
1218 iv
->have_use_for
= true;
1220 civ
= XNEW (struct iv
);
1223 stmt
= SSA_NAME_DEF_STMT (op
);
1224 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1225 || TREE_CODE (stmt
) == MODIFY_EXPR
);
1227 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1228 iv
->use_id
= use
->id
;
1233 /* Checks whether the condition *COND_P in STMT is interesting
1234 and if so, records it. */
1237 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1241 struct iv
*iv0
= NULL
, *iv1
= NULL
, *civ
;
1243 tree zero
= integer_zero_node
;
1245 const_iv
.step
= NULL_TREE
;
1247 if (TREE_CODE (*cond_p
) != SSA_NAME
1248 && !COMPARISON_CLASS_P (*cond_p
))
1251 if (TREE_CODE (*cond_p
) == SSA_NAME
)
1258 op0_p
= &TREE_OPERAND (*cond_p
, 0);
1259 op1_p
= &TREE_OPERAND (*cond_p
, 1);
1262 if (TREE_CODE (*op0_p
) == SSA_NAME
)
1263 iv0
= get_iv (data
, *op0_p
);
1267 if (TREE_CODE (*op1_p
) == SSA_NAME
)
1268 iv1
= get_iv (data
, *op1_p
);
1272 if (/* When comparing with non-invariant value, we may not do any senseful
1273 induction variable elimination. */
1275 /* Eliminating condition based on two ivs would be nontrivial.
1276 ??? TODO -- it is not really important to handle this case. */
1277 || (!zero_p (iv0
->step
) && !zero_p (iv1
->step
)))
1279 find_interesting_uses_op (data
, *op0_p
);
1280 find_interesting_uses_op (data
, *op1_p
);
1284 if (zero_p (iv0
->step
) && zero_p (iv1
->step
))
1286 /* If both are invariants, this is a work for unswitching. */
1290 civ
= XNEW (struct iv
);
1291 *civ
= zero_p (iv0
->step
) ? *iv1
: *iv0
;
1292 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1295 /* Returns true if expression EXPR is obviously invariant in LOOP,
1296 i.e. if all its operands are defined outside of the LOOP. */
1299 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1304 if (is_gimple_min_invariant (expr
))
1307 if (TREE_CODE (expr
) == SSA_NAME
)
1309 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1311 && flow_bb_inside_loop_p (loop
, def_bb
))
1320 len
= TREE_CODE_LENGTH (TREE_CODE (expr
));
1321 for (i
= 0; i
< len
; i
++)
1322 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1328 /* Cumulates the steps of indices into DATA and replaces their values with the
1329 initial ones. Returns false when the value of the index cannot be determined.
1330 Callback for for_each_index. */
1332 struct ifs_ivopts_data
1334 struct ivopts_data
*ivopts_data
;
1340 idx_find_step (tree base
, tree
*idx
, void *data
)
1342 struct ifs_ivopts_data
*dta
= data
;
1344 tree step
, iv_base
, iv_step
, lbound
, off
;
1345 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1347 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1348 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1351 /* If base is a component ref, require that the offset of the reference
1353 if (TREE_CODE (base
) == COMPONENT_REF
)
1355 off
= component_ref_field_offset (base
);
1356 return expr_invariant_in_loop_p (loop
, off
);
1359 /* If base is array, first check whether we will be able to move the
1360 reference out of the loop (in order to take its address in strength
1361 reduction). In order for this to work we need both lower bound
1362 and step to be loop invariants. */
1363 if (TREE_CODE (base
) == ARRAY_REF
)
1365 step
= array_ref_element_size (base
);
1366 lbound
= array_ref_low_bound (base
);
1368 if (!expr_invariant_in_loop_p (loop
, step
)
1369 || !expr_invariant_in_loop_p (loop
, lbound
))
1373 if (TREE_CODE (*idx
) != SSA_NAME
)
1376 iv
= get_iv (dta
->ivopts_data
, *idx
);
1380 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1381 *&x[0], which is not folded and does not trigger the
1382 ARRAY_REF path below. */
1388 if (TREE_CODE (base
) == ARRAY_REF
)
1390 step
= array_ref_element_size (base
);
1392 /* We only handle addresses whose step is an integer constant. */
1393 if (TREE_CODE (step
) != INTEGER_CST
)
1397 /* The step for pointer arithmetics already is 1 byte. */
1398 step
= build_int_cst (sizetype
, 1);
1402 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1403 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1406 /* The index might wrap. */
1410 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1413 *dta
->step_p
= step
;
1415 *dta
->step_p
= fold_build2 (PLUS_EXPR
, sizetype
, *dta
->step_p
, step
);
1420 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1421 object is passed to it in DATA. */
1424 idx_record_use (tree base
, tree
*idx
,
1427 find_interesting_uses_op (data
, *idx
);
1428 if (TREE_CODE (base
) == ARRAY_REF
)
1430 find_interesting_uses_op (data
, array_ref_element_size (base
));
1431 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1436 /* Returns true if memory reference REF may be unaligned. */
1439 may_be_unaligned_p (tree ref
)
1443 HOST_WIDE_INT bitsize
;
1444 HOST_WIDE_INT bitpos
;
1446 enum machine_mode mode
;
1447 int unsignedp
, volatilep
;
1448 unsigned base_align
;
1450 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1451 thus they are not misaligned. */
1452 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1455 /* The test below is basically copy of what expr.c:normal_inner_ref
1456 does to check whether the object must be loaded by parts when
1457 STRICT_ALIGNMENT is true. */
1458 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1459 &unsignedp
, &volatilep
, true);
1460 base_type
= TREE_TYPE (base
);
1461 base_align
= TYPE_ALIGN (base_type
);
1464 && (base_align
< GET_MODE_ALIGNMENT (mode
)
1465 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1466 || bitpos
% BITS_PER_UNIT
!= 0))
1472 /* Return true if EXPR may be non-addressable. */
1475 may_be_nonaddressable_p (tree expr
)
1477 switch (TREE_CODE (expr
))
1480 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1481 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1484 case ARRAY_RANGE_REF
:
1485 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1487 case VIEW_CONVERT_EXPR
:
1488 /* This kind of view-conversions may wrap non-addressable objects
1489 and make them look addressable. After some processing the
1490 non-addressability may be uncovered again, causing ADDR_EXPRs
1491 of inappropriate objects to be built. */
1492 return AGGREGATE_TYPE_P (TREE_TYPE (expr
))
1493 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr
, 0)));
1502 /* Finds addresses in *OP_P inside STMT. */
1505 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1507 tree base
= *op_p
, step
= NULL
;
1509 struct ifs_ivopts_data ifs_ivopts_data
;
1511 /* Do not play with volatile memory references. A bit too conservative,
1512 perhaps, but safe. */
1513 if (stmt_ann (stmt
)->has_volatile_ops
)
1516 /* Ignore bitfields for now. Not really something terribly complicated
1518 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1521 if (may_be_nonaddressable_p (base
))
1524 if (STRICT_ALIGNMENT
1525 && may_be_unaligned_p (base
))
1528 base
= unshare_expr (base
);
1530 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1532 tree type
= build_pointer_type (TREE_TYPE (base
));
1536 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1538 civ
= get_iv (data
, TMR_BASE (base
));
1542 TMR_BASE (base
) = civ
->base
;
1545 if (TMR_INDEX (base
)
1546 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1548 civ
= get_iv (data
, TMR_INDEX (base
));
1552 TMR_INDEX (base
) = civ
->base
;
1557 if (TMR_STEP (base
))
1558 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1561 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1569 base
= tree_mem_ref_addr (type
, base
);
1573 ifs_ivopts_data
.ivopts_data
= data
;
1574 ifs_ivopts_data
.stmt
= stmt
;
1575 ifs_ivopts_data
.step_p
= &step
;
1576 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1580 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1581 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1583 base
= build_fold_addr_expr (base
);
1585 /* Substituting bases of IVs into the base expression might
1586 have caused folding opportunities. */
1587 if (TREE_CODE (base
) == ADDR_EXPR
)
1589 tree
*ref
= &TREE_OPERAND (base
, 0);
1590 while (handled_component_p (*ref
))
1591 ref
= &TREE_OPERAND (*ref
, 0);
1592 if (TREE_CODE (*ref
) == INDIRECT_REF
)
1593 *ref
= fold_indirect_ref (*ref
);
1597 civ
= alloc_iv (base
, step
);
1598 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1602 for_each_index (op_p
, idx_record_use
, data
);
1605 /* Finds and records invariants used in STMT. */
1608 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1611 use_operand_p use_p
;
1614 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1616 op
= USE_FROM_PTR (use_p
);
1617 record_invariant (data
, op
, false);
1621 /* Finds interesting uses of induction variables in the statement STMT. */
1624 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1629 use_operand_p use_p
;
1631 find_invariants_stmt (data
, stmt
);
1633 if (TREE_CODE (stmt
) == COND_EXPR
)
1635 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1639 if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1641 lhs
= TREE_OPERAND (stmt
, 0);
1642 rhs
= TREE_OPERAND (stmt
, 1);
1644 if (TREE_CODE (lhs
) == SSA_NAME
)
1646 /* If the statement defines an induction variable, the uses are not
1647 interesting by themselves. */
1649 iv
= get_iv (data
, lhs
);
1651 if (iv
&& !zero_p (iv
->step
))
1655 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1657 case tcc_comparison
:
1658 find_interesting_uses_cond (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1662 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1663 if (REFERENCE_CLASS_P (lhs
))
1664 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1670 if (REFERENCE_CLASS_P (lhs
)
1671 && is_gimple_val (rhs
))
1673 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1674 find_interesting_uses_op (data
, rhs
);
1678 /* TODO -- we should also handle address uses of type
1680 memory = call (whatever);
1687 if (TREE_CODE (stmt
) == PHI_NODE
1688 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1690 lhs
= PHI_RESULT (stmt
);
1691 iv
= get_iv (data
, lhs
);
1693 if (iv
&& !zero_p (iv
->step
))
1697 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1699 op
= USE_FROM_PTR (use_p
);
1701 if (TREE_CODE (op
) != SSA_NAME
)
1704 iv
= get_iv (data
, op
);
1708 find_interesting_uses_op (data
, op
);
1712 /* Finds interesting uses of induction variables outside of loops
1713 on loop exit edge EXIT. */
1716 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1720 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1722 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1723 find_interesting_uses_op (data
, def
);
1727 /* Finds uses of the induction variables that are interesting. */
1730 find_interesting_uses (struct ivopts_data
*data
)
1733 block_stmt_iterator bsi
;
1735 basic_block
*body
= get_loop_body (data
->current_loop
);
1737 struct version_info
*info
;
1740 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1741 fprintf (dump_file
, "Uses:\n\n");
1743 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1748 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1749 if (e
->dest
!= EXIT_BLOCK_PTR
1750 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1751 find_interesting_uses_outside (data
, e
);
1753 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1754 find_interesting_uses_stmt (data
, phi
);
1755 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1756 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1759 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1763 fprintf (dump_file
, "\n");
1765 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1767 info
= ver_info (data
, i
);
1770 fprintf (dump_file
, " ");
1771 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1772 fprintf (dump_file
, " is invariant (%d)%s\n",
1773 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1777 fprintf (dump_file
, "\n");
1783 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1784 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1785 we are at the top-level of the processed address. */
1788 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1789 unsigned HOST_WIDE_INT
*offset
)
1791 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1792 enum tree_code code
;
1793 tree type
, orig_type
= TREE_TYPE (expr
);
1794 unsigned HOST_WIDE_INT off0
, off1
, st
;
1795 tree orig_expr
= expr
;
1799 type
= TREE_TYPE (expr
);
1800 code
= TREE_CODE (expr
);
1806 if (!cst_and_fits_in_hwi (expr
)
1810 *offset
= int_cst_value (expr
);
1811 return build_int_cst (orig_type
, 0);
1815 op0
= TREE_OPERAND (expr
, 0);
1816 op1
= TREE_OPERAND (expr
, 1);
1818 op0
= strip_offset_1 (op0
, false, false, &off0
);
1819 op1
= strip_offset_1 (op1
, false, false, &off1
);
1821 *offset
= (code
== PLUS_EXPR
? off0
+ off1
: off0
- off1
);
1822 if (op0
== TREE_OPERAND (expr
, 0)
1823 && op1
== TREE_OPERAND (expr
, 1))
1828 else if (zero_p (op0
))
1830 if (code
== PLUS_EXPR
)
1833 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1836 expr
= fold_build2 (code
, type
, op0
, op1
);
1838 return fold_convert (orig_type
, expr
);
1844 step
= array_ref_element_size (expr
);
1845 if (!cst_and_fits_in_hwi (step
))
1848 st
= int_cst_value (step
);
1849 op1
= TREE_OPERAND (expr
, 1);
1850 op1
= strip_offset_1 (op1
, false, false, &off1
);
1851 *offset
= off1
* st
;
1856 /* Strip the component reference completely. */
1857 op0
= TREE_OPERAND (expr
, 0);
1858 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1868 tmp
= component_ref_field_offset (expr
);
1870 && cst_and_fits_in_hwi (tmp
))
1872 /* Strip the component reference completely. */
1873 op0
= TREE_OPERAND (expr
, 0);
1874 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1875 *offset
= off0
+ int_cst_value (tmp
);
1881 op0
= TREE_OPERAND (expr
, 0);
1882 op0
= strip_offset_1 (op0
, true, true, &off0
);
1885 if (op0
== TREE_OPERAND (expr
, 0))
1888 expr
= build_fold_addr_expr (op0
);
1889 return fold_convert (orig_type
, expr
);
1892 inside_addr
= false;
1899 /* Default handling of expressions for that we want to recurse into
1900 the first operand. */
1901 op0
= TREE_OPERAND (expr
, 0);
1902 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
1905 if (op0
== TREE_OPERAND (expr
, 0)
1906 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
1909 expr
= copy_node (expr
);
1910 TREE_OPERAND (expr
, 0) = op0
;
1912 TREE_OPERAND (expr
, 1) = op1
;
1914 /* Inside address, we might strip the top level component references,
1915 thus changing type of the expression. Handling of ADDR_EXPR
1917 expr
= fold_convert (orig_type
, expr
);
1922 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1925 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
1927 return strip_offset_1 (expr
, false, false, offset
);
1930 /* Returns variant of TYPE that can be used as base for different uses.
1931 We return unsigned type with the same precision, which avoids problems
1935 generic_type_for (tree type
)
1937 if (POINTER_TYPE_P (type
))
1938 return unsigned_type_for (type
);
1940 if (TYPE_UNSIGNED (type
))
1943 return unsigned_type_for (type
);
1946 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1947 the bitmap to that we should store it. */
1949 static struct ivopts_data
*fd_ivopts_data
;
1951 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
1953 bitmap
*depends_on
= data
;
1954 struct version_info
*info
;
1956 if (TREE_CODE (*expr_p
) != SSA_NAME
)
1958 info
= name_info (fd_ivopts_data
, *expr_p
);
1960 if (!info
->inv_id
|| info
->has_nonlin_use
)
1964 *depends_on
= BITMAP_ALLOC (NULL
);
1965 bitmap_set_bit (*depends_on
, info
->inv_id
);
1970 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1971 position to POS. If USE is not NULL, the candidate is set as related to
1972 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1973 replacement of the final value of the iv by a direct computation. */
1975 static struct iv_cand
*
1976 add_candidate_1 (struct ivopts_data
*data
,
1977 tree base
, tree step
, bool important
, enum iv_position pos
,
1978 struct iv_use
*use
, tree incremented_at
)
1981 struct iv_cand
*cand
= NULL
;
1982 tree type
, orig_type
;
1986 orig_type
= TREE_TYPE (base
);
1987 type
= generic_type_for (orig_type
);
1988 if (type
!= orig_type
)
1990 base
= fold_convert (type
, base
);
1992 step
= fold_convert (type
, step
);
1996 for (i
= 0; i
< n_iv_cands (data
); i
++)
1998 cand
= iv_cand (data
, i
);
2000 if (cand
->pos
!= pos
)
2003 if (cand
->incremented_at
!= incremented_at
)
2017 if (!operand_equal_p (base
, cand
->iv
->base
, 0))
2020 if (zero_p (cand
->iv
->step
))
2027 if (step
&& operand_equal_p (step
, cand
->iv
->step
, 0))
2032 if (i
== n_iv_cands (data
))
2034 cand
= XCNEW (struct iv_cand
);
2040 cand
->iv
= alloc_iv (base
, step
);
2043 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2045 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2046 cand
->var_after
= cand
->var_before
;
2048 cand
->important
= important
;
2049 cand
->incremented_at
= incremented_at
;
2050 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2053 && TREE_CODE (step
) != INTEGER_CST
)
2055 fd_ivopts_data
= data
;
2056 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2059 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2060 dump_cand (dump_file
, cand
);
2063 if (important
&& !cand
->important
)
2065 cand
->important
= true;
2066 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2067 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2072 bitmap_set_bit (use
->related_cands
, i
);
2073 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2074 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2081 /* Returns true if incrementing the induction variable at the end of the LOOP
2084 The purpose is to avoid splitting latch edge with a biv increment, thus
2085 creating a jump, possibly confusing other optimization passes and leaving
2086 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2087 is not available (so we do not have a better alternative), or if the latch
2088 edge is already nonempty. */
2091 allow_ip_end_pos_p (struct loop
*loop
)
2093 if (!ip_normal_pos (loop
))
2096 if (!empty_block_p (ip_end_pos (loop
)))
2102 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2103 position to POS. If USE is not NULL, the candidate is set as related to
2104 it. The candidate computation is scheduled on all available positions. */
2107 add_candidate (struct ivopts_data
*data
,
2108 tree base
, tree step
, bool important
, struct iv_use
*use
)
2110 if (ip_normal_pos (data
->current_loop
))
2111 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
2112 if (ip_end_pos (data
->current_loop
)
2113 && allow_ip_end_pos_p (data
->current_loop
))
2114 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
2117 /* Add a standard "0 + 1 * iteration" iv candidate for a
2118 type with SIZE bits. */
2121 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2124 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2125 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2129 /* Adds standard iv candidates. */
2132 add_standard_iv_candidates (struct ivopts_data
*data
)
2134 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2136 /* The same for a double-integer type if it is still fast enough. */
2137 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2138 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2142 /* Adds candidates bases on the old induction variable IV. */
2145 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2148 struct iv_cand
*cand
;
2150 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2152 /* The same, but with initial value zero. */
2153 add_candidate (data
,
2154 build_int_cst (TREE_TYPE (iv
->base
), 0),
2155 iv
->step
, true, NULL
);
2157 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2158 if (TREE_CODE (phi
) == PHI_NODE
)
2160 /* Additionally record the possibility of leaving the original iv
2162 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2163 cand
= add_candidate_1 (data
,
2164 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2165 SSA_NAME_DEF_STMT (def
));
2166 cand
->var_before
= iv
->ssa_name
;
2167 cand
->var_after
= def
;
2171 /* Adds candidates based on the old induction variables. */
2174 add_old_ivs_candidates (struct ivopts_data
*data
)
2180 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2182 iv
= ver_info (data
, i
)->iv
;
2183 if (iv
&& iv
->biv_p
&& !zero_p (iv
->step
))
2184 add_old_iv_candidates (data
, iv
);
2188 /* Adds candidates based on the value of the induction variable IV and USE. */
2191 add_iv_value_candidates (struct ivopts_data
*data
,
2192 struct iv
*iv
, struct iv_use
*use
)
2194 unsigned HOST_WIDE_INT offset
;
2197 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2199 /* The same, but with initial value zero. Make such variable important,
2200 since it is generic enough so that possibly many uses may be based
2202 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2203 iv
->step
, true, use
);
2205 /* Third, try removing the constant offset. */
2206 base
= strip_offset (iv
->base
, &offset
);
2208 add_candidate (data
, base
, iv
->step
, false, use
);
2211 /* Adds candidates based on the uses. */
2214 add_derived_ivs_candidates (struct ivopts_data
*data
)
2218 for (i
= 0; i
< n_iv_uses (data
); i
++)
2220 struct iv_use
*use
= iv_use (data
, i
);
2227 case USE_NONLINEAR_EXPR
:
2230 /* Just add the ivs based on the value of the iv used here. */
2231 add_iv_value_candidates (data
, use
->iv
, use
);
2240 /* Record important candidates and add them to related_cands bitmaps
2244 record_important_candidates (struct ivopts_data
*data
)
2249 for (i
= 0; i
< n_iv_cands (data
); i
++)
2251 struct iv_cand
*cand
= iv_cand (data
, i
);
2253 if (cand
->important
)
2254 bitmap_set_bit (data
->important_candidates
, i
);
2257 data
->consider_all_candidates
= (n_iv_cands (data
)
2258 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2260 if (data
->consider_all_candidates
)
2262 /* We will not need "related_cands" bitmaps in this case,
2263 so release them to decrease peak memory consumption. */
2264 for (i
= 0; i
< n_iv_uses (data
); i
++)
2266 use
= iv_use (data
, i
);
2267 BITMAP_FREE (use
->related_cands
);
2272 /* Add important candidates to the related_cands bitmaps. */
2273 for (i
= 0; i
< n_iv_uses (data
); i
++)
2274 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2275 data
->important_candidates
);
2279 /* Finds the candidates for the induction variables. */
2282 find_iv_candidates (struct ivopts_data
*data
)
2284 /* Add commonly used ivs. */
2285 add_standard_iv_candidates (data
);
2287 /* Add old induction variables. */
2288 add_old_ivs_candidates (data
);
2290 /* Add induction variables derived from uses. */
2291 add_derived_ivs_candidates (data
);
2293 /* Record the important candidates. */
2294 record_important_candidates (data
);
2297 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2298 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2299 we allocate a simple list to every use. */
2302 alloc_use_cost_map (struct ivopts_data
*data
)
2304 unsigned i
, size
, s
, j
;
2306 for (i
= 0; i
< n_iv_uses (data
); i
++)
2308 struct iv_use
*use
= iv_use (data
, i
);
2311 if (data
->consider_all_candidates
)
2312 size
= n_iv_cands (data
);
2316 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2321 /* Round up to the power of two, so that moduling by it is fast. */
2322 for (size
= 1; size
< s
; size
<<= 1)
2326 use
->n_map_members
= size
;
2327 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2331 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2332 on invariants DEPENDS_ON and that the value used in expressing it
2336 set_use_iv_cost (struct ivopts_data
*data
,
2337 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2338 bitmap depends_on
, tree value
)
2344 BITMAP_FREE (depends_on
);
2348 if (data
->consider_all_candidates
)
2350 use
->cost_map
[cand
->id
].cand
= cand
;
2351 use
->cost_map
[cand
->id
].cost
= cost
;
2352 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2353 use
->cost_map
[cand
->id
].value
= value
;
2357 /* n_map_members is a power of two, so this computes modulo. */
2358 s
= cand
->id
& (use
->n_map_members
- 1);
2359 for (i
= s
; i
< use
->n_map_members
; i
++)
2360 if (!use
->cost_map
[i
].cand
)
2362 for (i
= 0; i
< s
; i
++)
2363 if (!use
->cost_map
[i
].cand
)
2369 use
->cost_map
[i
].cand
= cand
;
2370 use
->cost_map
[i
].cost
= cost
;
2371 use
->cost_map
[i
].depends_on
= depends_on
;
2372 use
->cost_map
[i
].value
= value
;
2375 /* Gets cost of (USE, CANDIDATE) pair. */
2377 static struct cost_pair
*
2378 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2379 struct iv_cand
*cand
)
2382 struct cost_pair
*ret
;
2387 if (data
->consider_all_candidates
)
2389 ret
= use
->cost_map
+ cand
->id
;
2396 /* n_map_members is a power of two, so this computes modulo. */
2397 s
= cand
->id
& (use
->n_map_members
- 1);
2398 for (i
= s
; i
< use
->n_map_members
; i
++)
2399 if (use
->cost_map
[i
].cand
== cand
)
2400 return use
->cost_map
+ i
;
2402 for (i
= 0; i
< s
; i
++)
2403 if (use
->cost_map
[i
].cand
== cand
)
2404 return use
->cost_map
+ i
;
2409 /* Returns estimate on cost of computing SEQ. */
2417 for (; seq
; seq
= NEXT_INSN (seq
))
2419 set
= single_set (seq
);
2421 cost
+= rtx_cost (set
, SET
);
2429 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2431 produce_memory_decl_rtl (tree obj
, int *regno
)
2436 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2438 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2439 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2442 x
= gen_raw_REG (Pmode
, (*regno
)++);
2444 return gen_rtx_MEM (DECL_MODE (obj
), x
);
2447 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2448 walk_tree. DATA contains the actual fake register number. */
2451 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2453 tree obj
= NULL_TREE
;
2457 switch (TREE_CODE (*expr_p
))
2460 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2461 handled_component_p (*expr_p
);
2462 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2465 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2466 x
= produce_memory_decl_rtl (obj
, regno
);
2471 obj
= SSA_NAME_VAR (*expr_p
);
2472 if (!DECL_RTL_SET_P (obj
))
2473 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2482 if (DECL_RTL_SET_P (obj
))
2485 if (DECL_MODE (obj
) == BLKmode
)
2486 x
= produce_memory_decl_rtl (obj
, regno
);
2488 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2498 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2499 SET_DECL_RTL (obj
, x
);
2505 /* Determines cost of the computation of EXPR. */
2508 computation_cost (tree expr
)
2511 tree type
= TREE_TYPE (expr
);
2513 /* Avoid using hard regs in ways which may be unsupported. */
2514 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2516 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2518 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2522 cost
= seq_cost (seq
);
2524 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2529 /* Returns variable containing the value of candidate CAND at statement AT. */
2532 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2534 if (stmt_after_increment (loop
, cand
, stmt
))
2535 return cand
->var_after
;
2537 return cand
->var_before
;
2540 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2541 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2544 tree_int_cst_sign_bit (tree t
)
2546 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2547 unsigned HOST_WIDE_INT w
;
2549 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2550 w
= TREE_INT_CST_LOW (t
);
2553 w
= TREE_INT_CST_HIGH (t
);
2554 bitno
-= HOST_BITS_PER_WIDE_INT
;
2557 return (w
>> bitno
) & 1;
2560 /* If we can prove that TOP = cst * BOT for some constant cst,
2561 store cst to MUL and return true. Otherwise return false.
2562 The returned value is always sign-extended, regardless of the
2563 signedness of TOP and BOT. */
2566 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
2569 enum tree_code code
;
2570 double_int res
, p0
, p1
;
2571 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
2576 if (operand_equal_p (top
, bot
, 0))
2578 *mul
= double_int_one
;
2582 code
= TREE_CODE (top
);
2586 mby
= TREE_OPERAND (top
, 1);
2587 if (TREE_CODE (mby
) != INTEGER_CST
)
2590 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
2593 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
2599 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
2600 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
2603 if (code
== MINUS_EXPR
)
2604 p1
= double_int_neg (p1
);
2605 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
2609 if (TREE_CODE (bot
) != INTEGER_CST
)
2612 p0
= double_int_sext (tree_to_double_int (bot
), precision
);
2613 p1
= double_int_sext (tree_to_double_int (top
), precision
);
2614 if (double_int_zero_p (p1
))
2616 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
2618 return double_int_zero_p (res
);
2625 /* Sets COMB to CST. */
2628 aff_combination_const (struct affine_tree_combination
*comb
, tree type
,
2629 unsigned HOST_WIDE_INT cst
)
2631 unsigned prec
= TYPE_PRECISION (type
);
2634 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2637 comb
->rest
= NULL_TREE
;
2638 comb
->offset
= cst
& comb
->mask
;
2641 /* Sets COMB to single element ELT. */
2644 aff_combination_elt (struct affine_tree_combination
*comb
, tree type
, tree elt
)
2646 unsigned prec
= TYPE_PRECISION (type
);
2649 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2652 comb
->elts
[0] = elt
;
2654 comb
->rest
= NULL_TREE
;
2658 /* Scales COMB by SCALE. */
2661 aff_combination_scale (struct affine_tree_combination
*comb
,
2662 unsigned HOST_WIDE_INT scale
)
2671 aff_combination_const (comb
, comb
->type
, 0);
2675 comb
->offset
= (scale
* comb
->offset
) & comb
->mask
;
2676 for (i
= 0, j
= 0; i
< comb
->n
; i
++)
2678 comb
->coefs
[j
] = (scale
* comb
->coefs
[i
]) & comb
->mask
;
2679 comb
->elts
[j
] = comb
->elts
[i
];
2680 if (comb
->coefs
[j
] != 0)
2687 if (comb
->n
< MAX_AFF_ELTS
)
2689 comb
->coefs
[comb
->n
] = scale
;
2690 comb
->elts
[comb
->n
] = comb
->rest
;
2691 comb
->rest
= NULL_TREE
;
2695 comb
->rest
= fold_build2 (MULT_EXPR
, comb
->type
, comb
->rest
,
2696 build_int_cst_type (comb
->type
, scale
));
2700 /* Adds ELT * SCALE to COMB. */
2703 aff_combination_add_elt (struct affine_tree_combination
*comb
, tree elt
,
2704 unsigned HOST_WIDE_INT scale
)
2711 for (i
= 0; i
< comb
->n
; i
++)
2712 if (operand_equal_p (comb
->elts
[i
], elt
, 0))
2714 comb
->coefs
[i
] = (comb
->coefs
[i
] + scale
) & comb
->mask
;
2719 comb
->coefs
[i
] = comb
->coefs
[comb
->n
];
2720 comb
->elts
[i
] = comb
->elts
[comb
->n
];
2724 gcc_assert (comb
->n
== MAX_AFF_ELTS
- 1);
2725 comb
->coefs
[comb
->n
] = 1;
2726 comb
->elts
[comb
->n
] = comb
->rest
;
2727 comb
->rest
= NULL_TREE
;
2732 if (comb
->n
< MAX_AFF_ELTS
)
2734 comb
->coefs
[comb
->n
] = scale
;
2735 comb
->elts
[comb
->n
] = elt
;
2741 elt
= fold_convert (comb
->type
, elt
);
2743 elt
= fold_build2 (MULT_EXPR
, comb
->type
,
2744 fold_convert (comb
->type
, elt
),
2745 build_int_cst_type (comb
->type
, scale
));
2748 comb
->rest
= fold_build2 (PLUS_EXPR
, comb
->type
, comb
->rest
, elt
);
2753 /* Adds COMB2 to COMB1. */
2756 aff_combination_add (struct affine_tree_combination
*comb1
,
2757 struct affine_tree_combination
*comb2
)
2761 comb1
->offset
= (comb1
->offset
+ comb2
->offset
) & comb1
->mask
;
2762 for (i
= 0; i
< comb2
->n
; i
++)
2763 aff_combination_add_elt (comb1
, comb2
->elts
[i
], comb2
->coefs
[i
]);
2765 aff_combination_add_elt (comb1
, comb2
->rest
, 1);
2768 /* Convert COMB to TYPE. */
2771 aff_combination_convert (tree type
, struct affine_tree_combination
*comb
)
2773 unsigned prec
= TYPE_PRECISION (type
);
2776 /* If the precision of both types is the same, it suffices to change the type
2777 of the whole combination -- the elements are allowed to have another type
2778 equivalent wrto STRIP_NOPS. */
2779 if (prec
== TYPE_PRECISION (comb
->type
))
2785 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2786 comb
->offset
= comb
->offset
& comb
->mask
;
2788 /* The type of the elements can be different from comb->type only as
2789 much as what STRIP_NOPS would remove. We can just directly cast
2791 for (i
= 0; i
< comb
->n
; i
++)
2792 comb
->elts
[i
] = fold_convert (type
, comb
->elts
[i
]);
2794 comb
->rest
= fold_convert (type
, comb
->rest
);
2799 /* Splits EXPR into an affine combination of parts. */
2802 tree_to_aff_combination (tree expr
, tree type
,
2803 struct affine_tree_combination
*comb
)
2805 struct affine_tree_combination tmp
;
2806 enum tree_code code
;
2807 tree cst
, core
, toffset
;
2808 HOST_WIDE_INT bitpos
, bitsize
;
2809 enum machine_mode mode
;
2810 int unsignedp
, volatilep
;
2814 code
= TREE_CODE (expr
);
2818 aff_combination_const (comb
, type
, int_cst_value (expr
));
2823 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2824 tree_to_aff_combination (TREE_OPERAND (expr
, 1), type
, &tmp
);
2825 if (code
== MINUS_EXPR
)
2826 aff_combination_scale (&tmp
, -1);
2827 aff_combination_add (comb
, &tmp
);
2831 cst
= TREE_OPERAND (expr
, 1);
2832 if (TREE_CODE (cst
) != INTEGER_CST
)
2834 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2835 aff_combination_scale (comb
, int_cst_value (cst
));
2839 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2840 aff_combination_scale (comb
, -1);
2844 core
= get_inner_reference (TREE_OPERAND (expr
, 0), &bitsize
, &bitpos
,
2845 &toffset
, &mode
, &unsignedp
, &volatilep
,
2847 if (bitpos
% BITS_PER_UNIT
!= 0)
2849 aff_combination_const (comb
, type
, bitpos
/ BITS_PER_UNIT
);
2850 core
= build_fold_addr_expr (core
);
2851 if (TREE_CODE (core
) == ADDR_EXPR
)
2852 aff_combination_add_elt (comb
, core
, 1);
2855 tree_to_aff_combination (core
, type
, &tmp
);
2856 aff_combination_add (comb
, &tmp
);
2860 tree_to_aff_combination (toffset
, type
, &tmp
);
2861 aff_combination_add (comb
, &tmp
);
2869 aff_combination_elt (comb
, type
, expr
);
2872 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2875 add_elt_to_tree (tree expr
, tree type
, tree elt
, unsigned HOST_WIDE_INT scale
,
2876 unsigned HOST_WIDE_INT mask
)
2878 enum tree_code code
;
2881 elt
= fold_convert (type
, elt
);
2888 return fold_build2 (PLUS_EXPR
, type
, expr
, elt
);
2894 return fold_build1 (NEGATE_EXPR
, type
, elt
);
2896 return fold_build2 (MINUS_EXPR
, type
, expr
, elt
);
2900 return fold_build2 (MULT_EXPR
, type
, elt
,
2901 build_int_cst_type (type
, scale
));
2903 if ((scale
| (mask
>> 1)) == mask
)
2905 /* Scale is negative. */
2907 scale
= (-scale
) & mask
;
2912 elt
= fold_build2 (MULT_EXPR
, type
, elt
,
2913 build_int_cst_type (type
, scale
));
2914 return fold_build2 (code
, type
, expr
, elt
);
2917 /* Copies the tree elements of COMB to ensure that they are not shared. */
2920 unshare_aff_combination (struct affine_tree_combination
*comb
)
2924 for (i
= 0; i
< comb
->n
; i
++)
2925 comb
->elts
[i
] = unshare_expr (comb
->elts
[i
]);
2927 comb
->rest
= unshare_expr (comb
->rest
);
2930 /* Makes tree from the affine combination COMB. */
2933 aff_combination_to_tree (struct affine_tree_combination
*comb
)
2935 tree type
= comb
->type
;
2936 tree expr
= comb
->rest
;
2938 unsigned HOST_WIDE_INT off
, sgn
;
2940 if (comb
->n
== 0 && comb
->offset
== 0)
2944 /* Handle the special case produced by get_computation_aff when
2945 the type does not fit in HOST_WIDE_INT. */
2946 return fold_convert (type
, expr
);
2949 return build_int_cst (type
, 0);
2952 gcc_assert (comb
->n
== MAX_AFF_ELTS
|| comb
->rest
== NULL_TREE
);
2954 for (i
= 0; i
< comb
->n
; i
++)
2955 expr
= add_elt_to_tree (expr
, type
, comb
->elts
[i
], comb
->coefs
[i
],
2958 if ((comb
->offset
| (comb
->mask
>> 1)) == comb
->mask
)
2960 /* Offset is negative. */
2961 off
= (-comb
->offset
) & comb
->mask
;
2969 return add_elt_to_tree (expr
, type
, build_int_cst_type (type
, off
), sgn
,
2973 /* Folds EXPR using the affine expressions framework. */
2976 fold_affine_expr (tree expr
)
2978 tree type
= TREE_TYPE (expr
);
2979 struct affine_tree_combination comb
;
2981 if (TYPE_PRECISION (type
) > HOST_BITS_PER_WIDE_INT
)
2984 tree_to_aff_combination (expr
, type
, &comb
);
2985 return aff_combination_to_tree (&comb
);
2988 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2989 same precision that is at least as wide as the precision of TYPE, stores
2990 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2994 determine_common_wider_type (tree
*a
, tree
*b
)
2996 tree wider_type
= NULL
;
2998 tree atype
= TREE_TYPE (*a
);
3000 if ((TREE_CODE (*a
) == NOP_EXPR
3001 || TREE_CODE (*a
) == CONVERT_EXPR
))
3003 suba
= TREE_OPERAND (*a
, 0);
3004 wider_type
= TREE_TYPE (suba
);
3005 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3011 if ((TREE_CODE (*b
) == NOP_EXPR
3012 || TREE_CODE (*b
) == CONVERT_EXPR
))
3014 subb
= TREE_OPERAND (*b
, 0);
3015 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3026 /* Determines the expression by that USE is expressed from induction variable
3027 CAND at statement AT in LOOP. The expression is stored in a decomposed
3028 form into AFF. Returns false if USE cannot be expressed using CAND. */
3031 get_computation_aff (struct loop
*loop
,
3032 struct iv_use
*use
, struct iv_cand
*cand
, tree at
,
3033 struct affine_tree_combination
*aff
)
3035 tree ubase
= use
->iv
->base
;
3036 tree ustep
= use
->iv
->step
;
3037 tree cbase
= cand
->iv
->base
;
3038 tree cstep
= cand
->iv
->step
;
3039 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3044 unsigned HOST_WIDE_INT ustepi
, cstepi
;
3045 HOST_WIDE_INT ratioi
;
3046 struct affine_tree_combination cbase_aff
, expr_aff
;
3047 tree cstep_orig
= cstep
, ustep_orig
= ustep
;
3050 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3052 /* We do not have a precision to express the values of use. */
3056 expr
= var_at_stmt (loop
, cand
, at
);
3058 if (TREE_TYPE (expr
) != ctype
)
3060 /* This may happen with the original ivs. */
3061 expr
= fold_convert (ctype
, expr
);
3064 if (TYPE_UNSIGNED (utype
))
3068 uutype
= unsigned_type_for (utype
);
3069 ubase
= fold_convert (uutype
, ubase
);
3070 ustep
= fold_convert (uutype
, ustep
);
3073 if (uutype
!= ctype
)
3075 expr
= fold_convert (uutype
, expr
);
3076 cbase
= fold_convert (uutype
, cbase
);
3077 cstep
= fold_convert (uutype
, cstep
);
3079 /* If the conversion is not noop, we must take it into account when
3080 considering the value of the step. */
3081 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3085 if (cst_and_fits_in_hwi (cstep_orig
)
3086 && cst_and_fits_in_hwi (ustep_orig
))
3088 ustepi
= int_cst_value (ustep_orig
);
3089 cstepi
= int_cst_value (cstep_orig
);
3091 if (!divide (TYPE_PRECISION (uutype
), ustepi
, cstepi
, &ratioi
))
3093 /* TODO maybe consider case when ustep divides cstep and the ratio is
3094 a power of 2 (so that the division is fast to execute)? We would
3095 need to be much more careful with overflows etc. then. */
3099 ratio
= build_int_cst_type (uutype
, ratioi
);
3103 if (!constant_multiple_of (ustep_orig
, cstep_orig
, &rat
))
3105 ratio
= double_int_to_tree (uutype
, rat
);
3107 /* Ratioi is only used to detect special cases when the multiplicative
3108 factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3110 if (double_int_fits_in_shwi_p (rat
))
3111 ratioi
= double_int_to_shwi (rat
);
3116 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3117 type, we achieve better folding by computing their difference in this
3118 wider type, and cast the result to UUTYPE. We do not need to worry about
3119 overflows, as all the arithmetics will in the end be performed in UUTYPE
3121 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3123 /* We may need to shift the value if we are after the increment. */
3124 if (stmt_after_increment (loop
, cand
, at
))
3126 if (uutype
!= common_type
)
3127 cstep
= fold_convert (common_type
, cstep
);
3128 cbase
= fold_build2 (PLUS_EXPR
, common_type
, cbase
, cstep
);
3131 /* use = ubase - ratio * cbase + ratio * var.
3133 In general case ubase + ratio * (var - cbase) could be better (one less
3134 multiplication), but often it is possible to eliminate redundant parts
3135 of computations from (ubase - ratio * cbase) term, and if it does not
3136 happen, fold is able to apply the distributive law to obtain this form
3139 if (TYPE_PRECISION (common_type
) > HOST_BITS_PER_WIDE_INT
)
3141 /* Let's compute in trees and just return the result in AFF. This case
3142 should not be very common, and fold itself is not that bad either,
3143 so making the aff. functions more complicated to handle this case
3144 is not that urgent. */
3147 delta
= fold_build2 (MINUS_EXPR
, common_type
, ubase
, cbase
);
3148 if (uutype
!= common_type
)
3149 delta
= fold_convert (uutype
, delta
);
3150 expr
= fold_build2 (PLUS_EXPR
, uutype
, expr
, delta
);
3152 else if (ratioi
== -1)
3154 delta
= fold_build2 (PLUS_EXPR
, common_type
, ubase
, cbase
);
3155 if (uutype
!= common_type
)
3156 delta
= fold_convert (uutype
, delta
);
3157 expr
= fold_build2 (MINUS_EXPR
, uutype
, delta
, expr
);
3161 delta
= fold_build2 (MULT_EXPR
, common_type
, cbase
, ratio
);
3162 delta
= fold_build2 (MINUS_EXPR
, common_type
, ubase
, delta
);
3163 if (uutype
!= common_type
)
3164 delta
= fold_convert (uutype
, delta
);
3165 expr
= fold_build2 (MULT_EXPR
, uutype
, ratio
, expr
);
3166 expr
= fold_build2 (PLUS_EXPR
, uutype
, delta
, expr
);
3177 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3178 possible to compute ratioi. */
3179 gcc_assert (ratioi
);
3181 tree_to_aff_combination (ubase
, common_type
, aff
);
3182 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3183 tree_to_aff_combination (expr
, uutype
, &expr_aff
);
3184 aff_combination_scale (&cbase_aff
, -ratioi
);
3185 aff_combination_scale (&expr_aff
, ratioi
);
3186 aff_combination_add (aff
, &cbase_aff
);
3187 if (common_type
!= uutype
)
3188 aff_combination_convert (uutype
, aff
);
3189 aff_combination_add (aff
, &expr_aff
);
3194 /* Determines the expression by that USE is expressed from induction variable
3195 CAND at statement AT in LOOP. The computation is unshared. */
3198 get_computation_at (struct loop
*loop
,
3199 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
3201 struct affine_tree_combination aff
;
3202 tree type
= TREE_TYPE (use
->iv
->base
);
3204 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3206 unshare_aff_combination (&aff
);
3207 return fold_convert (type
, aff_combination_to_tree (&aff
));
3210 /* Determines the expression by that USE is expressed from induction variable
3211 CAND in LOOP. The computation is unshared. */
3214 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3216 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3219 /* Returns cost of addition in MODE. */
3222 add_cost (enum machine_mode mode
)
3224 static unsigned costs
[NUM_MACHINE_MODES
];
3232 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3233 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3234 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3239 cost
= seq_cost (seq
);
3245 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3246 fprintf (dump_file
, "Addition in %s costs %d\n",
3247 GET_MODE_NAME (mode
), cost
);
3251 /* Entry in a hashtable of already known costs for multiplication. */
3254 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3255 enum machine_mode mode
; /* In mode. */
3256 unsigned cost
; /* The cost. */
3259 /* Counts hash value for the ENTRY. */
3262 mbc_entry_hash (const void *entry
)
3264 const struct mbc_entry
*e
= entry
;
3266 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3269 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3272 mbc_entry_eq (const void *entry1
, const void *entry2
)
3274 const struct mbc_entry
*e1
= entry1
;
3275 const struct mbc_entry
*e2
= entry2
;
3277 return (e1
->mode
== e2
->mode
3278 && e1
->cst
== e2
->cst
);
3281 /* Returns cost of multiplication by constant CST in MODE. */
3284 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
3286 static htab_t costs
;
3287 struct mbc_entry
**cached
, act
;
3292 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3296 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3298 return (*cached
)->cost
;
3300 *cached
= XNEW (struct mbc_entry
);
3301 (*cached
)->mode
= mode
;
3302 (*cached
)->cst
= cst
;
3305 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3306 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3310 cost
= seq_cost (seq
);
3312 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3313 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3314 (int) cst
, GET_MODE_NAME (mode
), cost
);
3316 (*cached
)->cost
= cost
;
3321 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3322 validity for a memory reference accessing memory of mode MODE. */
3325 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
)
3327 #define MAX_RATIO 128
3328 static sbitmap valid_mult
[MAX_MACHINE_MODE
];
3330 if (!valid_mult
[mode
])
3332 rtx reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3336 valid_mult
[mode
] = sbitmap_alloc (2 * MAX_RATIO
+ 1);
3337 sbitmap_zero (valid_mult
[mode
]);
3338 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
3339 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3341 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3342 if (memory_address_p (mode
, addr
))
3343 SET_BIT (valid_mult
[mode
], i
+ MAX_RATIO
);
3346 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3348 fprintf (dump_file
, " allowed multipliers:");
3349 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3350 if (TEST_BIT (valid_mult
[mode
], i
+ MAX_RATIO
))
3351 fprintf (dump_file
, " %d", (int) i
);
3352 fprintf (dump_file
, "\n");
3353 fprintf (dump_file
, "\n");
3357 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3360 return TEST_BIT (valid_mult
[mode
], ratio
+ MAX_RATIO
);
3363 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3364 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3365 variable is omitted. Compute the cost for a memory reference that accesses
3366 a memory location of mode MEM_MODE.
3368 TODO -- there must be some better way. This all is quite crude. */
3371 get_address_cost (bool symbol_present
, bool var_present
,
3372 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3373 enum machine_mode mem_mode
)
3375 static bool initialized
[MAX_MACHINE_MODE
];
3376 static HOST_WIDE_INT rat
[MAX_MACHINE_MODE
], off
[MAX_MACHINE_MODE
];
3377 static HOST_WIDE_INT min_offset
[MAX_MACHINE_MODE
], max_offset
[MAX_MACHINE_MODE
];
3378 static unsigned costs
[MAX_MACHINE_MODE
][2][2][2][2];
3379 unsigned cost
, acost
;
3380 bool offset_p
, ratio_p
;
3381 HOST_WIDE_INT s_offset
;
3382 unsigned HOST_WIDE_INT mask
;
3385 if (!initialized
[mem_mode
])
3388 HOST_WIDE_INT start
= BIGGEST_ALIGNMENT
/ BITS_PER_UNIT
;
3389 int old_cse_not_expected
;
3390 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3391 rtx seq
, addr
, base
;
3394 initialized
[mem_mode
] = true;
3396 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3398 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
3399 for (i
= start
; i
<= 1 << 20; i
<<= 1)
3401 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3402 if (!memory_address_p (mem_mode
, addr
))
3405 max_offset
[mem_mode
] = i
== start
? 0 : i
>> 1;
3406 off
[mem_mode
] = max_offset
[mem_mode
];
3408 for (i
= start
; i
<= 1 << 20; i
<<= 1)
3410 XEXP (addr
, 1) = gen_int_mode (-i
, Pmode
);
3411 if (!memory_address_p (mem_mode
, addr
))
3414 min_offset
[mem_mode
] = i
== start
? 0 : -(i
>> 1);
3416 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3418 fprintf (dump_file
, "get_address_cost:\n");
3419 fprintf (dump_file
, " min offset %s %d\n",
3420 GET_MODE_NAME (mem_mode
),
3421 (int) min_offset
[mem_mode
]);
3422 fprintf (dump_file
, " max offset %s %d\n",
3423 GET_MODE_NAME (mem_mode
),
3424 (int) max_offset
[mem_mode
]);
3428 for (i
= 2; i
<= MAX_RATIO
; i
++)
3429 if (multiplier_allowed_in_address_p (i
, mem_mode
))
3435 /* Compute the cost of various addressing modes. */
3437 reg0
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3438 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 2);
3440 for (i
= 0; i
< 16; i
++)
3443 var_p
= (i
>> 1) & 1;
3444 off_p
= (i
>> 2) & 1;
3445 rat_p
= (i
>> 3) & 1;
3449 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
,
3450 gen_int_mode (rat
[mem_mode
], Pmode
));
3453 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
3457 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
3459 base
= gen_rtx_fmt_e (CONST
, Pmode
,
3460 gen_rtx_fmt_ee (PLUS
, Pmode
,
3462 gen_int_mode (off
[mem_mode
],
3466 base
= gen_int_mode (off
[mem_mode
], Pmode
);
3471 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
3474 /* To avoid splitting addressing modes, pretend that no cse will
3476 old_cse_not_expected
= cse_not_expected
;
3477 cse_not_expected
= true;
3478 addr
= memory_address (mem_mode
, addr
);
3479 cse_not_expected
= old_cse_not_expected
;
3483 acost
= seq_cost (seq
);
3484 acost
+= address_cost (addr
, mem_mode
);
3488 costs
[mem_mode
][sym_p
][var_p
][off_p
][rat_p
] = acost
;
3491 /* On some targets, it is quite expensive to load symbol to a register,
3492 which makes addresses that contain symbols look much more expensive.
3493 However, the symbol will have to be loaded in any case before the
3494 loop (and quite likely we have it in register already), so it does not
3495 make much sense to penalize them too heavily. So make some final
3496 tweaks for the SYMBOL_PRESENT modes:
3498 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3499 var is cheaper, use this mode with small penalty.
3500 If VAR_PRESENT is true, try whether the mode with
3501 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3502 if this is the case, use it. */
3503 add_c
= add_cost (Pmode
);
3504 for (i
= 0; i
< 8; i
++)
3507 off_p
= (i
>> 1) & 1;
3508 rat_p
= (i
>> 2) & 1;
3510 acost
= costs
[mem_mode
][0][1][off_p
][rat_p
] + 1;
3514 if (acost
< costs
[mem_mode
][1][var_p
][off_p
][rat_p
])
3515 costs
[mem_mode
][1][var_p
][off_p
][rat_p
] = acost
;
3518 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3520 fprintf (dump_file
, "Address costs:\n");
3522 for (i
= 0; i
< 16; i
++)
3525 var_p
= (i
>> 1) & 1;
3526 off_p
= (i
>> 2) & 1;
3527 rat_p
= (i
>> 3) & 1;
3529 fprintf (dump_file
, " ");
3531 fprintf (dump_file
, "sym + ");
3533 fprintf (dump_file
, "var + ");
3535 fprintf (dump_file
, "cst + ");
3537 fprintf (dump_file
, "rat * ");
3539 acost
= costs
[mem_mode
][sym_p
][var_p
][off_p
][rat_p
];
3540 fprintf (dump_file
, "index costs %d\n", acost
);
3542 fprintf (dump_file
, "\n");
3546 bits
= GET_MODE_BITSIZE (Pmode
);
3547 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3549 if ((offset
>> (bits
- 1) & 1))
3554 offset_p
= (s_offset
!= 0
3555 && min_offset
[mem_mode
] <= s_offset
3556 && s_offset
<= max_offset
[mem_mode
]);
3557 ratio_p
= (ratio
!= 1
3558 && multiplier_allowed_in_address_p (ratio
, mem_mode
));
3560 if (ratio
!= 1 && !ratio_p
)
3561 cost
+= multiply_by_cost (ratio
, Pmode
);
3563 if (s_offset
&& !offset_p
&& !symbol_present
)
3564 cost
+= add_cost (Pmode
);
3566 acost
= costs
[mem_mode
][symbol_present
][var_present
][offset_p
][ratio_p
];
3567 return cost
+ acost
;
3570 /* Estimates cost of forcing expression EXPR into a variable. */
3573 force_expr_to_var_cost (tree expr
)
3575 static bool costs_initialized
= false;
3576 static unsigned integer_cost
;
3577 static unsigned symbol_cost
;
3578 static unsigned address_cost
;
3580 unsigned cost0
, cost1
, cost
;
3581 enum machine_mode mode
;
3583 if (!costs_initialized
)
3585 tree var
= create_tmp_var_raw (integer_type_node
, "test_var");
3586 rtx x
= gen_rtx_MEM (DECL_MODE (var
),
3587 gen_rtx_SYMBOL_REF (Pmode
, "test_var"));
3589 tree type
= build_pointer_type (integer_type_node
);
3591 integer_cost
= computation_cost (build_int_cst (integer_type_node
,
3594 SET_DECL_RTL (var
, x
);
3595 TREE_STATIC (var
) = 1;
3596 addr
= build1 (ADDR_EXPR
, type
, var
);
3597 symbol_cost
= computation_cost (addr
) + 1;
3600 = computation_cost (build2 (PLUS_EXPR
, type
,
3602 build_int_cst (type
, 2000))) + 1;
3603 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3605 fprintf (dump_file
, "force_expr_to_var_cost:\n");
3606 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
3607 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
3608 fprintf (dump_file
, " address %d\n", (int) address_cost
);
3609 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
3610 fprintf (dump_file
, "\n");
3613 costs_initialized
= true;
3618 if (SSA_VAR_P (expr
))
3621 if (TREE_INVARIANT (expr
))
3623 if (TREE_CODE (expr
) == INTEGER_CST
)
3624 return integer_cost
;
3626 if (TREE_CODE (expr
) == ADDR_EXPR
)
3628 tree obj
= TREE_OPERAND (expr
, 0);
3630 if (TREE_CODE (obj
) == VAR_DECL
3631 || TREE_CODE (obj
) == PARM_DECL
3632 || TREE_CODE (obj
) == RESULT_DECL
)
3636 return address_cost
;
3639 switch (TREE_CODE (expr
))
3644 op0
= TREE_OPERAND (expr
, 0);
3645 op1
= TREE_OPERAND (expr
, 1);
3649 if (is_gimple_val (op0
))
3652 cost0
= force_expr_to_var_cost (op0
);
3654 if (is_gimple_val (op1
))
3657 cost1
= force_expr_to_var_cost (op1
);
3662 /* Just an arbitrary value, FIXME. */
3663 return target_spill_cost
;
3666 mode
= TYPE_MODE (TREE_TYPE (expr
));
3667 switch (TREE_CODE (expr
))
3671 cost
= add_cost (mode
);
3675 if (cst_and_fits_in_hwi (op0
))
3676 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
3677 else if (cst_and_fits_in_hwi (op1
))
3678 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
3680 return target_spill_cost
;
3690 /* Bound the cost by target_spill_cost. The parts of complicated
3691 computations often are either loop invariant or at least can
3692 be shared between several iv uses, so letting this grow without
3693 limits would not give reasonable results. */
3694 return cost
< target_spill_cost
? cost
: target_spill_cost
;
3697 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3698 invariants the computation depends on. */
3701 force_var_cost (struct ivopts_data
*data
,
3702 tree expr
, bitmap
*depends_on
)
3706 fd_ivopts_data
= data
;
3707 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3710 return force_expr_to_var_cost (expr
);
3713 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3714 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3715 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3716 invariants the computation depends on. */
3719 split_address_cost (struct ivopts_data
*data
,
3720 tree addr
, bool *symbol_present
, bool *var_present
,
3721 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3724 HOST_WIDE_INT bitsize
;
3725 HOST_WIDE_INT bitpos
;
3727 enum machine_mode mode
;
3728 int unsignedp
, volatilep
;
3730 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3731 &unsignedp
, &volatilep
, false);
3734 || bitpos
% BITS_PER_UNIT
!= 0
3735 || TREE_CODE (core
) != VAR_DECL
)
3737 *symbol_present
= false;
3738 *var_present
= true;
3739 fd_ivopts_data
= data
;
3740 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3741 return target_spill_cost
;
3744 *offset
+= bitpos
/ BITS_PER_UNIT
;
3745 if (TREE_STATIC (core
)
3746 || DECL_EXTERNAL (core
))
3748 *symbol_present
= true;
3749 *var_present
= false;
3753 *symbol_present
= false;
3754 *var_present
= true;
3758 /* Estimates cost of expressing difference of addresses E1 - E2 as
3759 var + symbol + offset. The value of offset is added to OFFSET,
3760 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3761 part is missing. DEPENDS_ON is a set of the invariants the computation
3765 ptr_difference_cost (struct ivopts_data
*data
,
3766 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3767 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3769 HOST_WIDE_INT diff
= 0;
3772 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3774 if (ptr_difference_const (e1
, e2
, &diff
))
3777 *symbol_present
= false;
3778 *var_present
= false;
3782 if (e2
== integer_zero_node
)
3783 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3784 symbol_present
, var_present
, offset
, depends_on
);
3786 *symbol_present
= false;
3787 *var_present
= true;
3789 cost
= force_var_cost (data
, e1
, depends_on
);
3790 cost
+= force_var_cost (data
, e2
, depends_on
);
3791 cost
+= add_cost (Pmode
);
3796 /* Estimates cost of expressing difference E1 - E2 as
3797 var + symbol + offset. The value of offset is added to OFFSET,
3798 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3799 part is missing. DEPENDS_ON is a set of the invariants the computation
3803 difference_cost (struct ivopts_data
*data
,
3804 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3805 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3808 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3809 unsigned HOST_WIDE_INT off1
, off2
;
3811 e1
= strip_offset (e1
, &off1
);
3812 e2
= strip_offset (e2
, &off2
);
3813 *offset
+= off1
- off2
;
3818 if (TREE_CODE (e1
) == ADDR_EXPR
)
3819 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
3821 *symbol_present
= false;
3823 if (operand_equal_p (e1
, e2
, 0))
3825 *var_present
= false;
3828 *var_present
= true;
3830 return force_var_cost (data
, e1
, depends_on
);
3834 cost
= force_var_cost (data
, e2
, depends_on
);
3835 cost
+= multiply_by_cost (-1, mode
);
3840 cost
= force_var_cost (data
, e1
, depends_on
);
3841 cost
+= force_var_cost (data
, e2
, depends_on
);
3842 cost
+= add_cost (mode
);
3847 /* Determines the cost of the computation by that USE is expressed
3848 from induction variable CAND. If ADDRESS_P is true, we just need
3849 to create an address from it, otherwise we want to get it into
3850 register. A set of invariants we depend on is stored in
3851 DEPENDS_ON. AT is the statement at that the value is computed. */
3854 get_computation_cost_at (struct ivopts_data
*data
,
3855 struct iv_use
*use
, struct iv_cand
*cand
,
3856 bool address_p
, bitmap
*depends_on
, tree at
)
3858 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3860 tree utype
= TREE_TYPE (ubase
), ctype
;
3861 unsigned HOST_WIDE_INT ustepi
, cstepi
, offset
= 0;
3862 HOST_WIDE_INT ratio
, aratio
;
3863 bool var_present
, symbol_present
;
3864 unsigned cost
= 0, n_sums
;
3868 /* Only consider real candidates. */
3872 cbase
= cand
->iv
->base
;
3873 cstep
= cand
->iv
->step
;
3874 ctype
= TREE_TYPE (cbase
);
3876 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3878 /* We do not have a precision to express the values of use. */
3884 /* Do not try to express address of an object with computation based
3885 on address of a different object. This may cause problems in rtl
3886 level alias analysis (that does not expect this to be happening,
3887 as this is illegal in C), and would be unlikely to be useful
3889 if (use
->iv
->base_object
3890 && cand
->iv
->base_object
3891 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3895 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3897 /* TODO -- add direct handling of this case. */
3901 /* CSTEPI is removed from the offset in case statement is after the
3902 increment. If the step is not constant, we use zero instead.
3903 This is a bit imprecise (there is the extra addition), but
3904 redundancy elimination is likely to transform the code so that
3905 it uses value of the variable before increment anyway,
3906 so it is not that much unrealistic. */
3907 if (cst_and_fits_in_hwi (cstep
))
3908 cstepi
= int_cst_value (cstep
);
3912 if (cst_and_fits_in_hwi (ustep
)
3913 && cst_and_fits_in_hwi (cstep
))
3915 ustepi
= int_cst_value (ustep
);
3917 if (!divide (TYPE_PRECISION (utype
), ustepi
, cstepi
, &ratio
))
3924 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3927 if (double_int_fits_in_shwi_p (rat
))
3928 ratio
= double_int_to_shwi (rat
);
3933 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3934 or ratio == 1, it is better to handle this like
3936 ubase - ratio * cbase + ratio * var
3938 (also holds in the case ratio == -1, TODO. */
3940 if (cst_and_fits_in_hwi (cbase
))
3942 offset
= - ratio
* int_cst_value (cbase
);
3943 cost
+= difference_cost (data
,
3944 ubase
, integer_zero_node
,
3945 &symbol_present
, &var_present
, &offset
,
3948 else if (ratio
== 1)
3950 cost
+= difference_cost (data
,
3952 &symbol_present
, &var_present
, &offset
,
3957 cost
+= force_var_cost (data
, cbase
, depends_on
);
3958 cost
+= add_cost (TYPE_MODE (ctype
));
3959 cost
+= difference_cost (data
,
3960 ubase
, integer_zero_node
,
3961 &symbol_present
, &var_present
, &offset
,
3965 /* If we are after the increment, the value of the candidate is higher by
3967 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3968 offset
-= ratio
* cstepi
;
3970 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3971 (symbol/var/const parts may be omitted). If we are looking for an address,
3972 find the cost of addressing this. */
3974 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
,
3975 TYPE_MODE (TREE_TYPE (*use
->op_p
)));
3977 /* Otherwise estimate the costs for computing the expression. */
3978 aratio
= ratio
> 0 ? ratio
: -ratio
;
3979 if (!symbol_present
&& !var_present
&& !offset
)
3982 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3988 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3992 /* Symbol + offset should be compile-time computable. */
3993 && (symbol_present
|| offset
))
3996 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
4000 /* Just get the expression, expand it and measure the cost. */
4001 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4007 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
4009 return computation_cost (comp
);
4013 /* Determines the cost of the computation by that USE is expressed
4014 from induction variable CAND. If ADDRESS_P is true, we just need
4015 to create an address from it, otherwise we want to get it into
4016 register. A set of invariants we depend on is stored in
4020 get_computation_cost (struct ivopts_data
*data
,
4021 struct iv_use
*use
, struct iv_cand
*cand
,
4022 bool address_p
, bitmap
*depends_on
)
4024 return get_computation_cost_at (data
,
4025 use
, cand
, address_p
, depends_on
, use
->stmt
);
4028 /* Determines cost of basing replacement of USE on CAND in a generic
4032 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4033 struct iv_use
*use
, struct iv_cand
*cand
)
4038 /* The simple case first -- if we need to express value of the preserved
4039 original biv, the cost is 0. This also prevents us from counting the
4040 cost of increment twice -- once at this use and once in the cost of
4042 if (cand
->pos
== IP_ORIGINAL
4043 && cand
->incremented_at
== use
->stmt
)
4045 set_use_iv_cost (data
, use
, cand
, 0, NULL
, NULL_TREE
);
4049 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4050 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
4052 return cost
!= INFTY
;
4055 /* Determines cost of basing replacement of USE on CAND in an address. */
4058 determine_use_iv_cost_address (struct ivopts_data
*data
,
4059 struct iv_use
*use
, struct iv_cand
*cand
)
4062 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
4064 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
4066 return cost
!= INFTY
;
4069 /* Computes value of induction variable IV in iteration NITER. */
4072 iv_value (struct iv
*iv
, tree niter
)
4075 tree type
= TREE_TYPE (iv
->base
);
4077 niter
= fold_convert (type
, niter
);
4078 val
= fold_build2 (MULT_EXPR
, type
, iv
->step
, niter
);
4080 return fold_build2 (PLUS_EXPR
, type
, iv
->base
, val
);
4083 /* Computes value of candidate CAND at position AT in iteration NITER. */
4086 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
)
4088 tree val
= iv_value (cand
->iv
, niter
);
4089 tree type
= TREE_TYPE (cand
->iv
->base
);
4091 if (stmt_after_increment (loop
, cand
, at
))
4092 val
= fold_build2 (PLUS_EXPR
, type
, val
, cand
->iv
->step
);
4097 /* Returns period of induction variable iv. */
4100 iv_period (struct iv
*iv
)
4102 tree step
= iv
->step
, period
, type
;
4105 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4107 /* Period of the iv is gcd (step, type range). Since type range is power
4108 of two, it suffices to determine the maximum power of two that divides
4110 pow2div
= num_ending_zeros (step
);
4111 type
= unsigned_type_for (TREE_TYPE (step
));
4113 period
= build_low_bits_mask (type
,
4114 (TYPE_PRECISION (type
)
4115 - tree_low_cst (pow2div
, 1)));
4120 /* Returns the comparison operator used when eliminating the iv USE. */
4122 static enum tree_code
4123 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4125 struct loop
*loop
= data
->current_loop
;
4129 ex_bb
= bb_for_stmt (use
->stmt
);
4130 exit
= EDGE_SUCC (ex_bb
, 0);
4131 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4132 exit
= EDGE_SUCC (ex_bb
, 1);
4134 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4137 /* Check whether it is possible to express the condition in USE by comparison
4138 of candidate CAND. If so, store the value compared with to BOUND. */
4141 may_eliminate_iv (struct ivopts_data
*data
,
4142 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
4147 tree wider_type
, period
, per_type
;
4148 struct loop
*loop
= data
->current_loop
;
4150 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4153 /* For now works only for exits that dominate the loop latch. TODO -- extend
4154 for other conditions inside loop body. */
4155 ex_bb
= bb_for_stmt (use
->stmt
);
4156 if (use
->stmt
!= last_stmt (ex_bb
)
4157 || TREE_CODE (use
->stmt
) != COND_EXPR
)
4159 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4162 exit
= EDGE_SUCC (ex_bb
, 0);
4163 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4164 exit
= EDGE_SUCC (ex_bb
, 1);
4165 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4168 nit
= niter_for_exit (data
, exit
);
4172 nit_type
= TREE_TYPE (nit
);
4174 /* Determine whether we may use the variable to test whether niter iterations
4175 elapsed. This is the case iff the period of the induction variable is
4176 greater than the number of iterations. */
4177 period
= iv_period (cand
->iv
);
4180 per_type
= TREE_TYPE (period
);
4182 wider_type
= TREE_TYPE (period
);
4183 if (TYPE_PRECISION (nit_type
) < TYPE_PRECISION (per_type
))
4184 wider_type
= per_type
;
4186 wider_type
= nit_type
;
4188 if (!integer_nonzerop (fold_build2 (GE_EXPR
, boolean_type_node
,
4189 fold_convert (wider_type
, period
),
4190 fold_convert (wider_type
, nit
))))
4193 *bound
= fold_affine_expr (cand_value_at (loop
, cand
, use
->stmt
, nit
));
4197 /* Determines cost of basing replacement of USE on CAND in a condition. */
4200 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4201 struct iv_use
*use
, struct iv_cand
*cand
)
4203 tree bound
= NULL_TREE
, op
, cond
;
4204 bitmap depends_on
= NULL
;
4207 /* Only consider real candidates. */
4210 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
, NULL_TREE
);
4214 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4216 cost
= force_var_cost (data
, bound
, &depends_on
);
4218 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
4219 return cost
!= INFTY
;
4222 /* The induction variable elimination failed; just express the original
4223 giv. If it is compared with an invariant, note that we cannot get
4225 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4228 if (TREE_CODE (cond
) != SSA_NAME
)
4230 op
= TREE_OPERAND (cond
, 0);
4231 if (TREE_CODE (op
) == SSA_NAME
&& !zero_p (get_iv (data
, op
)->step
))
4232 op
= TREE_OPERAND (cond
, 1);
4233 if (TREE_CODE (op
) == SSA_NAME
)
4235 op
= get_iv (data
, op
)->base
;
4236 fd_ivopts_data
= data
;
4237 walk_tree (&op
, find_depends
, &depends_on
, NULL
);
4241 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL
);
4242 return cost
!= INFTY
;
4245 /* Determines cost of basing replacement of USE on CAND. Returns false
4246 if USE cannot be based on CAND. */
4249 determine_use_iv_cost (struct ivopts_data
*data
,
4250 struct iv_use
*use
, struct iv_cand
*cand
)
4254 case USE_NONLINEAR_EXPR
:
4255 return determine_use_iv_cost_generic (data
, use
, cand
);
4258 return determine_use_iv_cost_address (data
, use
, cand
);
4261 return determine_use_iv_cost_condition (data
, use
, cand
);
4268 /* Determines costs of basing the use of the iv on an iv candidate. */
4271 determine_use_iv_costs (struct ivopts_data
*data
)
4275 struct iv_cand
*cand
;
4276 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4278 alloc_use_cost_map (data
);
4280 for (i
= 0; i
< n_iv_uses (data
); i
++)
4282 use
= iv_use (data
, i
);
4284 if (data
->consider_all_candidates
)
4286 for (j
= 0; j
< n_iv_cands (data
); j
++)
4288 cand
= iv_cand (data
, j
);
4289 determine_use_iv_cost (data
, use
, cand
);
4296 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4298 cand
= iv_cand (data
, j
);
4299 if (!determine_use_iv_cost (data
, use
, cand
))
4300 bitmap_set_bit (to_clear
, j
);
4303 /* Remove the candidates for that the cost is infinite from
4304 the list of related candidates. */
4305 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4306 bitmap_clear (to_clear
);
4310 BITMAP_FREE (to_clear
);
4312 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4314 fprintf (dump_file
, "Use-candidate costs:\n");
4316 for (i
= 0; i
< n_iv_uses (data
); i
++)
4318 use
= iv_use (data
, i
);
4320 fprintf (dump_file
, "Use %d:\n", i
);
4321 fprintf (dump_file
, " cand\tcost\tdepends on\n");
4322 for (j
= 0; j
< use
->n_map_members
; j
++)
4324 if (!use
->cost_map
[j
].cand
4325 || use
->cost_map
[j
].cost
== INFTY
)
4328 fprintf (dump_file
, " %d\t%d\t",
4329 use
->cost_map
[j
].cand
->id
,
4330 use
->cost_map
[j
].cost
);
4331 if (use
->cost_map
[j
].depends_on
)
4332 bitmap_print (dump_file
,
4333 use
->cost_map
[j
].depends_on
, "","");
4334 fprintf (dump_file
, "\n");
4337 fprintf (dump_file
, "\n");
4339 fprintf (dump_file
, "\n");
4343 /* Determines cost of the candidate CAND. */
4346 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4348 unsigned cost_base
, cost_step
;
4357 /* There are two costs associated with the candidate -- its increment
4358 and its initialization. The second is almost negligible for any loop
4359 that rolls enough, so we take it just very little into account. */
4361 base
= cand
->iv
->base
;
4362 cost_base
= force_var_cost (data
, base
, NULL
);
4363 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
4365 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
4367 /* Prefer the original iv unless we may gain something by replacing it;
4368 this is not really relevant for artificial ivs created by other
4370 if (cand
->pos
== IP_ORIGINAL
4371 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4374 /* Prefer not to insert statements into latch unless there are some
4375 already (so that we do not create unnecessary jumps). */
4376 if (cand
->pos
== IP_END
4377 && empty_block_p (ip_end_pos (data
->current_loop
)))
4381 /* Determines costs of computation of the candidates. */
4384 determine_iv_costs (struct ivopts_data
*data
)
4388 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4390 fprintf (dump_file
, "Candidate costs:\n");
4391 fprintf (dump_file
, " cand\tcost\n");
4394 for (i
= 0; i
< n_iv_cands (data
); i
++)
4396 struct iv_cand
*cand
= iv_cand (data
, i
);
4398 determine_iv_cost (data
, cand
);
4400 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4401 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4404 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4405 fprintf (dump_file
, "\n");
4408 /* Calculates cost for having SIZE induction variables. */
4411 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4413 return global_cost_for_size (size
, data
->regs_used
, n_iv_uses (data
));
4416 /* For each size of the induction variable set determine the penalty. */
4419 determine_set_costs (struct ivopts_data
*data
)
4423 struct loop
*loop
= data
->current_loop
;
4426 /* We use the following model (definitely improvable, especially the
4427 cost function -- TODO):
4429 We estimate the number of registers available (using MD data), name it A.
4431 We estimate the number of registers used by the loop, name it U. This
4432 number is obtained as the number of loop phi nodes (not counting virtual
4433 registers and bivs) + the number of variables from outside of the loop.
4435 We set a reserve R (free regs that are used for temporary computations,
4436 etc.). For now the reserve is a constant 3.
4438 Let I be the number of induction variables.
4440 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4441 make a lot of ivs without a reason).
4442 -- if A - R < U + I <= A, the cost is I * PRES_COST
4443 -- if U + I > A, the cost is I * PRES_COST and
4444 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4446 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4448 fprintf (dump_file
, "Global costs:\n");
4449 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4450 fprintf (dump_file
, " target_small_cost %d\n", target_small_cost
);
4451 fprintf (dump_file
, " target_pres_cost %d\n", target_pres_cost
);
4452 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
4456 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
4458 op
= PHI_RESULT (phi
);
4460 if (!is_gimple_reg (op
))
4463 if (get_iv (data
, op
))
4469 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4471 struct version_info
*info
= ver_info (data
, j
);
4473 if (info
->inv_id
&& info
->has_nonlin_use
)
4477 data
->regs_used
= n
;
4478 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4479 fprintf (dump_file
, " regs_used %d\n", n
);
4481 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4483 fprintf (dump_file
, " cost for size:\n");
4484 fprintf (dump_file
, " ivs\tcost\n");
4485 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4486 fprintf (dump_file
, " %d\t%d\n", j
,
4487 ivopts_global_cost_for_size (data
, j
));
4488 fprintf (dump_file
, "\n");
4492 /* Returns true if A is a cheaper cost pair than B. */
4495 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4503 if (a
->cost
< b
->cost
)
4506 if (a
->cost
> b
->cost
)
4509 /* In case the costs are the same, prefer the cheaper candidate. */
4510 if (a
->cand
->cost
< b
->cand
->cost
)
4516 /* Computes the cost field of IVS structure. */
4519 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4523 cost
+= ivs
->cand_use_cost
;
4524 cost
+= ivs
->cand_cost
;
4525 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4530 /* Remove invariants in set INVS to set IVS. */
4533 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4541 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4543 ivs
->n_invariant_uses
[iid
]--;
4544 if (ivs
->n_invariant_uses
[iid
] == 0)
4549 /* Set USE not to be expressed by any candidate in IVS. */
4552 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4555 unsigned uid
= use
->id
, cid
;
4556 struct cost_pair
*cp
;
4558 cp
= ivs
->cand_for_use
[uid
];
4564 ivs
->cand_for_use
[uid
] = NULL
;
4565 ivs
->n_cand_uses
[cid
]--;
4567 if (ivs
->n_cand_uses
[cid
] == 0)
4569 bitmap_clear_bit (ivs
->cands
, cid
);
4570 /* Do not count the pseudocandidates. */
4574 ivs
->cand_cost
-= cp
->cand
->cost
;
4576 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4579 ivs
->cand_use_cost
-= cp
->cost
;
4581 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4582 iv_ca_recount_cost (data
, ivs
);
4585 /* Add invariants in set INVS to set IVS. */
4588 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4596 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4598 ivs
->n_invariant_uses
[iid
]++;
4599 if (ivs
->n_invariant_uses
[iid
] == 1)
4604 /* Set cost pair for USE in set IVS to CP. */
4607 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4608 struct iv_use
*use
, struct cost_pair
*cp
)
4610 unsigned uid
= use
->id
, cid
;
4612 if (ivs
->cand_for_use
[uid
] == cp
)
4615 if (ivs
->cand_for_use
[uid
])
4616 iv_ca_set_no_cp (data
, ivs
, use
);
4623 ivs
->cand_for_use
[uid
] = cp
;
4624 ivs
->n_cand_uses
[cid
]++;
4625 if (ivs
->n_cand_uses
[cid
] == 1)
4627 bitmap_set_bit (ivs
->cands
, cid
);
4628 /* Do not count the pseudocandidates. */
4632 ivs
->cand_cost
+= cp
->cand
->cost
;
4634 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4637 ivs
->cand_use_cost
+= cp
->cost
;
4638 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4639 iv_ca_recount_cost (data
, ivs
);
4643 /* Extend set IVS by expressing USE by some of the candidates in it
4647 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4650 struct cost_pair
*best_cp
= NULL
, *cp
;
4654 gcc_assert (ivs
->upto
>= use
->id
);
4656 if (ivs
->upto
== use
->id
)
4662 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4664 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4666 if (cheaper_cost_pair (cp
, best_cp
))
4670 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4673 /* Get cost for assignment IVS. */
4676 iv_ca_cost (struct iv_ca
*ivs
)
4678 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
4681 /* Returns true if all dependences of CP are among invariants in IVS. */
4684 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4689 if (!cp
->depends_on
)
4692 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4694 if (ivs
->n_invariant_uses
[i
] == 0)
4701 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4702 it before NEXT_CHANGE. */
4704 static struct iv_ca_delta
*
4705 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4706 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4708 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
4711 change
->old_cp
= old_cp
;
4712 change
->new_cp
= new_cp
;
4713 change
->next_change
= next_change
;
4718 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4721 static struct iv_ca_delta
*
4722 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4724 struct iv_ca_delta
*last
;
4732 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4734 last
->next_change
= l2
;
4739 /* Returns candidate by that USE is expressed in IVS. */
4741 static struct cost_pair
*
4742 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4744 return ivs
->cand_for_use
[use
->id
];
4747 /* Reverse the list of changes DELTA, forming the inverse to it. */
4749 static struct iv_ca_delta
*
4750 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4752 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4753 struct cost_pair
*tmp
;
4755 for (act
= delta
; act
; act
= next
)
4757 next
= act
->next_change
;
4758 act
->next_change
= prev
;
4762 act
->old_cp
= act
->new_cp
;
4769 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4770 reverted instead. */
4773 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4774 struct iv_ca_delta
*delta
, bool forward
)
4776 struct cost_pair
*from
, *to
;
4777 struct iv_ca_delta
*act
;
4780 delta
= iv_ca_delta_reverse (delta
);
4782 for (act
= delta
; act
; act
= act
->next_change
)
4786 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4787 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4791 iv_ca_delta_reverse (delta
);
4794 /* Returns true if CAND is used in IVS. */
4797 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4799 return ivs
->n_cand_uses
[cand
->id
] > 0;
4802 /* Returns number of induction variable candidates in the set IVS. */
4805 iv_ca_n_cands (struct iv_ca
*ivs
)
4807 return ivs
->n_cands
;
4810 /* Free the list of changes DELTA. */
4813 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4815 struct iv_ca_delta
*act
, *next
;
4817 for (act
= *delta
; act
; act
= next
)
4819 next
= act
->next_change
;
4826 /* Allocates new iv candidates assignment. */
4828 static struct iv_ca
*
4829 iv_ca_new (struct ivopts_data
*data
)
4831 struct iv_ca
*nw
= XNEW (struct iv_ca
);
4835 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
4836 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
4837 nw
->cands
= BITMAP_ALLOC (NULL
);
4840 nw
->cand_use_cost
= 0;
4842 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
4848 /* Free memory occupied by the set IVS. */
4851 iv_ca_free (struct iv_ca
**ivs
)
4853 free ((*ivs
)->cand_for_use
);
4854 free ((*ivs
)->n_cand_uses
);
4855 BITMAP_FREE ((*ivs
)->cands
);
4856 free ((*ivs
)->n_invariant_uses
);
4861 /* Dumps IVS to FILE. */
4864 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4866 const char *pref
= " invariants ";
4869 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
4870 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4872 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4873 if (ivs
->n_invariant_uses
[i
])
4875 fprintf (file
, "%s%d", pref
, i
);
4878 fprintf (file
, "\n");
4881 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4882 new set, and store differences in DELTA. Number of induction variables
4883 in the new set is stored to N_IVS. */
4886 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4887 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4892 struct cost_pair
*old_cp
, *new_cp
;
4895 for (i
= 0; i
< ivs
->upto
; i
++)
4897 use
= iv_use (data
, i
);
4898 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4901 && old_cp
->cand
== cand
)
4904 new_cp
= get_use_iv_cost (data
, use
, cand
);
4908 if (!iv_ca_has_deps (ivs
, new_cp
))
4911 if (!cheaper_cost_pair (new_cp
, old_cp
))
4914 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4917 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4918 cost
= iv_ca_cost (ivs
);
4920 *n_ivs
= iv_ca_n_cands (ivs
);
4921 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4926 /* Try narrowing set IVS by removing CAND. Return the cost of
4927 the new set and store the differences in DELTA. */
4930 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4931 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4935 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4937 struct iv_cand
*cnd
;
4941 for (i
= 0; i
< n_iv_uses (data
); i
++)
4943 use
= iv_use (data
, i
);
4945 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4946 if (old_cp
->cand
!= cand
)
4951 if (data
->consider_all_candidates
)
4953 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4958 cnd
= iv_cand (data
, ci
);
4960 cp
= get_use_iv_cost (data
, use
, cnd
);
4963 if (!iv_ca_has_deps (ivs
, cp
))
4966 if (!cheaper_cost_pair (cp
, new_cp
))
4974 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4979 cnd
= iv_cand (data
, ci
);
4981 cp
= get_use_iv_cost (data
, use
, cnd
);
4984 if (!iv_ca_has_deps (ivs
, cp
))
4987 if (!cheaper_cost_pair (cp
, new_cp
))
4996 iv_ca_delta_free (delta
);
5000 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5003 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5004 cost
= iv_ca_cost (ivs
);
5005 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5010 /* Try optimizing the set of candidates IVS by removing candidates different
5011 from to EXCEPT_CAND from it. Return cost of the new set, and store
5012 differences in DELTA. */
5015 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5016 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5019 struct iv_ca_delta
*act_delta
, *best_delta
;
5020 unsigned i
, best_cost
, acost
;
5021 struct iv_cand
*cand
;
5024 best_cost
= iv_ca_cost (ivs
);
5026 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5028 cand
= iv_cand (data
, i
);
5030 if (cand
== except_cand
)
5033 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5035 if (acost
< best_cost
)
5038 iv_ca_delta_free (&best_delta
);
5039 best_delta
= act_delta
;
5042 iv_ca_delta_free (&act_delta
);
5051 /* Recurse to possibly remove other unnecessary ivs. */
5052 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5053 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5054 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5055 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5059 /* Tries to extend the sets IVS in the best possible way in order
5060 to express the USE. */
5063 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5066 unsigned best_cost
, act_cost
;
5069 struct iv_cand
*cand
;
5070 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5071 struct cost_pair
*cp
;
5073 iv_ca_add_use (data
, ivs
, use
);
5074 best_cost
= iv_ca_cost (ivs
);
5076 cp
= iv_ca_cand_for_use (ivs
, use
);
5079 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5080 iv_ca_set_no_cp (data
, ivs
, use
);
5083 /* First try important candidates. Only if it fails, try the specific ones.
5084 Rationale -- in loops with many variables the best choice often is to use
5085 just one generic biv. If we added here many ivs specific to the uses,
5086 the optimization algorithm later would be likely to get stuck in a local
5087 minimum, thus causing us to create too many ivs. The approach from
5088 few ivs to more seems more likely to be successful -- starting from few
5089 ivs, replacing an expensive use by a specific iv should always be a
5091 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5093 cand
= iv_cand (data
, i
);
5095 if (iv_ca_cand_used_p (ivs
, cand
))
5098 cp
= get_use_iv_cost (data
, use
, cand
);
5102 iv_ca_set_cp (data
, ivs
, use
, cp
);
5103 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5104 iv_ca_set_no_cp (data
, ivs
, use
);
5105 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5107 if (act_cost
< best_cost
)
5109 best_cost
= act_cost
;
5111 iv_ca_delta_free (&best_delta
);
5112 best_delta
= act_delta
;
5115 iv_ca_delta_free (&act_delta
);
5118 if (best_cost
== INFTY
)
5120 for (i
= 0; i
< use
->n_map_members
; i
++)
5122 cp
= use
->cost_map
+ i
;
5127 /* Already tried this. */
5128 if (cand
->important
)
5131 if (iv_ca_cand_used_p (ivs
, cand
))
5135 iv_ca_set_cp (data
, ivs
, use
, cp
);
5136 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5137 iv_ca_set_no_cp (data
, ivs
, use
);
5138 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5141 if (act_cost
< best_cost
)
5143 best_cost
= act_cost
;
5146 iv_ca_delta_free (&best_delta
);
5147 best_delta
= act_delta
;
5150 iv_ca_delta_free (&act_delta
);
5154 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5155 iv_ca_delta_free (&best_delta
);
5157 return (best_cost
!= INFTY
);
5160 /* Finds an initial assignment of candidates to uses. */
5162 static struct iv_ca
*
5163 get_initial_solution (struct ivopts_data
*data
)
5165 struct iv_ca
*ivs
= iv_ca_new (data
);
5168 for (i
= 0; i
< n_iv_uses (data
); i
++)
5169 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
5178 /* Tries to improve set of induction variables IVS. */
5181 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5183 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
), n_ivs
;
5184 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5185 struct iv_cand
*cand
;
5187 /* Try extending the set of induction variables by one. */
5188 for (i
= 0; i
< n_iv_cands (data
); i
++)
5190 cand
= iv_cand (data
, i
);
5192 if (iv_ca_cand_used_p (ivs
, cand
))
5195 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
5199 /* If we successfully added the candidate and the set is small enough,
5200 try optimizing it by removing other candidates. */
5201 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5203 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5204 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5205 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5206 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5209 if (acost
< best_cost
)
5212 iv_ca_delta_free (&best_delta
);
5213 best_delta
= act_delta
;
5216 iv_ca_delta_free (&act_delta
);
5221 /* Try removing the candidates from the set instead. */
5222 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5224 /* Nothing more we can do. */
5229 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5230 gcc_assert (best_cost
== iv_ca_cost (ivs
));
5231 iv_ca_delta_free (&best_delta
);
5235 /* Attempts to find the optimal set of induction variables. We do simple
5236 greedy heuristic -- we try to replace at most one candidate in the selected
5237 solution and remove the unused ivs while this improves the cost. */
5239 static struct iv_ca
*
5240 find_optimal_iv_set (struct ivopts_data
*data
)
5246 /* Get the initial solution. */
5247 set
= get_initial_solution (data
);
5250 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5251 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5255 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5257 fprintf (dump_file
, "Initial set of candidates:\n");
5258 iv_ca_dump (data
, dump_file
, set
);
5261 while (try_improve_iv_set (data
, set
))
5263 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5265 fprintf (dump_file
, "Improved to:\n");
5266 iv_ca_dump (data
, dump_file
, set
);
5270 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5271 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
5273 for (i
= 0; i
< n_iv_uses (data
); i
++)
5275 use
= iv_use (data
, i
);
5276 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5282 /* Creates a new induction variable corresponding to CAND. */
5285 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5287 block_stmt_iterator incr_pos
;
5297 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
5301 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
5306 /* Mark that the iv is preserved. */
5307 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5308 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5310 /* Rewrite the increment so that it uses var_before directly. */
5311 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5316 gimple_add_tmp_var (cand
->var_before
);
5317 add_referenced_var (cand
->var_before
);
5319 base
= unshare_expr (cand
->iv
->base
);
5321 create_iv (base
, unshare_expr (cand
->iv
->step
),
5322 cand
->var_before
, data
->current_loop
,
5323 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5326 /* Creates new induction variables described in SET. */
5329 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5332 struct iv_cand
*cand
;
5335 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5337 cand
= iv_cand (data
, i
);
5338 create_new_iv (data
, cand
);
5342 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5343 is true, remove also the ssa name defined by the statement. */
5346 remove_statement (tree stmt
, bool including_defined_name
)
5348 if (TREE_CODE (stmt
) == PHI_NODE
)
5350 if (!including_defined_name
)
5352 /* Prevent the ssa name defined by the statement from being removed. */
5353 SET_PHI_RESULT (stmt
, NULL
);
5355 remove_phi_node (stmt
, NULL_TREE
);
5359 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
5361 bsi_remove (&bsi
, true);
5365 /* Rewrites USE (definition of iv used in a nonlinear expression)
5366 using candidate CAND. */
5369 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5370 struct iv_use
*use
, struct iv_cand
*cand
)
5373 tree op
, stmts
, tgt
, ass
;
5374 block_stmt_iterator bsi
, pbsi
;
5376 /* An important special case -- if we are asked to express value of
5377 the original iv by itself, just exit; there is no need to
5378 introduce a new computation (that might also need casting the
5379 variable to unsigned and back). */
5380 if (cand
->pos
== IP_ORIGINAL
5381 && cand
->incremented_at
== use
->stmt
)
5383 tree step
, ctype
, utype
;
5384 enum tree_code incr_code
= PLUS_EXPR
;
5386 gcc_assert (TREE_CODE (use
->stmt
) == MODIFY_EXPR
);
5387 gcc_assert (TREE_OPERAND (use
->stmt
, 0) == cand
->var_after
);
5389 step
= cand
->iv
->step
;
5390 ctype
= TREE_TYPE (step
);
5391 utype
= TREE_TYPE (cand
->var_after
);
5392 if (TREE_CODE (step
) == NEGATE_EXPR
)
5394 incr_code
= MINUS_EXPR
;
5395 step
= TREE_OPERAND (step
, 0);
5398 /* Check whether we may leave the computation unchanged.
5399 This is the case only if it does not rely on other
5400 computations in the loop -- otherwise, the computation
5401 we rely upon may be removed in remove_unused_ivs,
5402 thus leading to ICE. */
5403 op
= TREE_OPERAND (use
->stmt
, 1);
5404 if (TREE_CODE (op
) == PLUS_EXPR
5405 || TREE_CODE (op
) == MINUS_EXPR
)
5407 if (TREE_OPERAND (op
, 0) == cand
->var_before
)
5408 op
= TREE_OPERAND (op
, 1);
5409 else if (TREE_CODE (op
) == PLUS_EXPR
5410 && TREE_OPERAND (op
, 1) == cand
->var_before
)
5411 op
= TREE_OPERAND (op
, 0);
5419 && (TREE_CODE (op
) == INTEGER_CST
5420 || operand_equal_p (op
, step
, 0)))
5423 /* Otherwise, add the necessary computations to express
5425 op
= fold_convert (ctype
, cand
->var_before
);
5426 comp
= fold_convert (utype
,
5427 build2 (incr_code
, ctype
, op
,
5428 unshare_expr (step
)));
5431 comp
= get_computation (data
->current_loop
, use
, cand
);
5433 switch (TREE_CODE (use
->stmt
))
5436 tgt
= PHI_RESULT (use
->stmt
);
5438 /* If we should keep the biv, do not replace it. */
5439 if (name_info (data
, tgt
)->preserve_biv
)
5442 pbsi
= bsi
= bsi_start (bb_for_stmt (use
->stmt
));
5443 while (!bsi_end_p (pbsi
)
5444 && TREE_CODE (bsi_stmt (pbsi
)) == LABEL_EXPR
)
5452 tgt
= TREE_OPERAND (use
->stmt
, 0);
5453 bsi
= bsi_for_stmt (use
->stmt
);
5460 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
5462 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
5465 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
5466 ass
= build2 (MODIFY_EXPR
, TREE_TYPE (tgt
), tgt
, op
);
5467 bsi_insert_after (&bsi
, ass
, BSI_NEW_STMT
);
5468 remove_statement (use
->stmt
, false);
5469 SSA_NAME_DEF_STMT (tgt
) = ass
;
5474 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5475 TREE_OPERAND (use
->stmt
, 1) = op
;
5479 /* Replaces ssa name in index IDX by its basic variable. Callback for
5483 idx_remove_ssa_names (tree base
, tree
*idx
,
5484 void *data ATTRIBUTE_UNUSED
)
5488 if (TREE_CODE (*idx
) == SSA_NAME
)
5489 *idx
= SSA_NAME_VAR (*idx
);
5491 if (TREE_CODE (base
) == ARRAY_REF
)
5493 op
= &TREE_OPERAND (base
, 2);
5495 && TREE_CODE (*op
) == SSA_NAME
)
5496 *op
= SSA_NAME_VAR (*op
);
5497 op
= &TREE_OPERAND (base
, 3);
5499 && TREE_CODE (*op
) == SSA_NAME
)
5500 *op
= SSA_NAME_VAR (*op
);
5506 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5509 unshare_and_remove_ssa_names (tree ref
)
5511 ref
= unshare_expr (ref
);
5512 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5517 /* Extract the alias analysis info for the memory reference REF. There are
5518 several ways how this information may be stored and what precisely is
5519 its semantics depending on the type of the reference, but there always is
5520 somewhere hidden one _DECL node that is used to determine the set of
5521 virtual operands for the reference. The code below deciphers this jungle
5522 and extracts this single useful piece of information. */
5525 get_ref_tag (tree ref
, tree orig
)
5527 tree var
= get_base_address (ref
);
5528 tree aref
= NULL_TREE
, tag
, sv
;
5529 HOST_WIDE_INT offset
, size
, maxsize
;
5531 for (sv
= orig
; handled_component_p (sv
); sv
= TREE_OPERAND (sv
, 0))
5533 aref
= get_ref_base_and_extent (sv
, &offset
, &size
, &maxsize
);
5538 if (aref
&& SSA_VAR_P (aref
) && get_subvars_for_var (aref
))
5539 return unshare_expr (sv
);
5544 if (TREE_CODE (var
) == INDIRECT_REF
)
5546 /* If the base is a dereference of a pointer, first check its name memory
5547 tag. If it does not have one, use its symbol memory tag. */
5548 var
= TREE_OPERAND (var
, 0);
5549 if (TREE_CODE (var
) != SSA_NAME
)
5552 if (SSA_NAME_PTR_INFO (var
))
5554 tag
= SSA_NAME_PTR_INFO (var
)->name_mem_tag
;
5559 var
= SSA_NAME_VAR (var
);
5560 tag
= var_ann (var
)->symbol_mem_tag
;
5561 gcc_assert (tag
!= NULL_TREE
);
5569 tag
= var_ann (var
)->symbol_mem_tag
;
5577 /* Copies the reference information from OLD_REF to NEW_REF. */
5580 copy_ref_info (tree new_ref
, tree old_ref
)
5582 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5583 copy_mem_ref_info (new_ref
, old_ref
);
5586 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5587 TMR_TAG (new_ref
) = get_ref_tag (old_ref
, TMR_ORIGINAL (new_ref
));
5591 /* Rewrites USE (address that is an iv) using candidate CAND. */
5594 rewrite_use_address (struct ivopts_data
*data
,
5595 struct iv_use
*use
, struct iv_cand
*cand
)
5597 struct affine_tree_combination aff
;
5598 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5601 get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5602 unshare_aff_combination (&aff
);
5604 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
);
5605 copy_ref_info (ref
, *use
->op_p
);
5609 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5613 rewrite_use_compare (struct ivopts_data
*data
,
5614 struct iv_use
*use
, struct iv_cand
*cand
)
5617 tree
*op_p
, cond
, op
, stmts
, bound
;
5618 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5619 enum tree_code compare
;
5620 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5625 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5626 tree var_type
= TREE_TYPE (var
);
5628 compare
= iv_elimination_compare (data
, use
);
5629 bound
= fold_convert (var_type
, bound
);
5630 op
= force_gimple_operand (unshare_expr (bound
), &stmts
,
5634 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5636 *use
->op_p
= build2 (compare
, boolean_type_node
, var
, op
);
5637 update_stmt (use
->stmt
);
5641 /* The induction variable elimination failed; just express the original
5643 comp
= get_computation (data
->current_loop
, use
, cand
);
5646 op_p
= &TREE_OPERAND (cond
, 0);
5647 if (TREE_CODE (*op_p
) != SSA_NAME
5648 || zero_p (get_iv (data
, *op_p
)->step
))
5649 op_p
= &TREE_OPERAND (cond
, 1);
5651 op
= force_gimple_operand (comp
, &stmts
, true, SSA_NAME_VAR (*op_p
));
5653 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5658 /* Rewrites USE using candidate CAND. */
5661 rewrite_use (struct ivopts_data
*data
,
5662 struct iv_use
*use
, struct iv_cand
*cand
)
5666 case USE_NONLINEAR_EXPR
:
5667 rewrite_use_nonlinear_expr (data
, use
, cand
);
5671 rewrite_use_address (data
, use
, cand
);
5675 rewrite_use_compare (data
, use
, cand
);
5681 mark_new_vars_to_rename (use
->stmt
);
5684 /* Rewrite the uses using the selected induction variables. */
5687 rewrite_uses (struct ivopts_data
*data
)
5690 struct iv_cand
*cand
;
5693 for (i
= 0; i
< n_iv_uses (data
); i
++)
5695 use
= iv_use (data
, i
);
5696 cand
= use
->selected
;
5699 rewrite_use (data
, use
, cand
);
5703 /* Removes the ivs that are not used after rewriting. */
5706 remove_unused_ivs (struct ivopts_data
*data
)
5711 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5713 struct version_info
*info
;
5715 info
= ver_info (data
, j
);
5717 && !zero_p (info
->iv
->step
)
5719 && !info
->iv
->have_use_for
5720 && !info
->preserve_biv
)
5721 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5725 /* Frees data allocated by the optimization of a single loop. */
5728 free_loop_data (struct ivopts_data
*data
)
5734 htab_empty (data
->niters
);
5736 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5738 struct version_info
*info
;
5740 info
= ver_info (data
, i
);
5744 info
->has_nonlin_use
= false;
5745 info
->preserve_biv
= false;
5748 bitmap_clear (data
->relevant
);
5749 bitmap_clear (data
->important_candidates
);
5751 for (i
= 0; i
< n_iv_uses (data
); i
++)
5753 struct iv_use
*use
= iv_use (data
, i
);
5756 BITMAP_FREE (use
->related_cands
);
5757 for (j
= 0; j
< use
->n_map_members
; j
++)
5758 if (use
->cost_map
[j
].depends_on
)
5759 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5760 free (use
->cost_map
);
5763 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5765 for (i
= 0; i
< n_iv_cands (data
); i
++)
5767 struct iv_cand
*cand
= iv_cand (data
, i
);
5771 if (cand
->depends_on
)
5772 BITMAP_FREE (cand
->depends_on
);
5775 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5777 if (data
->version_info_size
< num_ssa_names
)
5779 data
->version_info_size
= 2 * num_ssa_names
;
5780 free (data
->version_info
);
5781 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
5784 data
->max_inv_id
= 0;
5786 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5787 SET_DECL_RTL (obj
, NULL_RTX
);
5789 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5792 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5796 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
5798 free_loop_data (data
);
5799 free (data
->version_info
);
5800 BITMAP_FREE (data
->relevant
);
5801 BITMAP_FREE (data
->important_candidates
);
5802 htab_delete (data
->niters
);
5804 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5805 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5806 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5809 /* Optimizes the LOOP. Returns true if anything changed. */
5812 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5814 bool changed
= false;
5815 struct iv_ca
*iv_ca
;
5818 data
->current_loop
= loop
;
5820 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5822 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5824 exit
= single_dom_exit (loop
);
5827 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5828 exit
->src
->index
, exit
->dest
->index
);
5829 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
5830 fprintf (dump_file
, "\n");
5833 fprintf (dump_file
, "\n");
5836 /* For each ssa name determines whether it behaves as an induction variable
5838 if (!find_induction_variables (data
))
5841 /* Finds interesting uses (item 1). */
5842 find_interesting_uses (data
);
5843 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5846 /* Finds candidates for the induction variables (item 2). */
5847 find_iv_candidates (data
);
5849 /* Calculates the costs (item 3, part 1). */
5850 determine_use_iv_costs (data
);
5851 determine_iv_costs (data
);
5852 determine_set_costs (data
);
5854 /* Find the optimal set of induction variables (item 3, part 2). */
5855 iv_ca
= find_optimal_iv_set (data
);
5860 /* Create the new induction variables (item 4, part 1). */
5861 create_new_ivs (data
, iv_ca
);
5862 iv_ca_free (&iv_ca
);
5864 /* Rewrite the uses (item 4, part 2). */
5865 rewrite_uses (data
);
5867 /* Remove the ivs that are unused after rewriting. */
5868 remove_unused_ivs (data
);
5870 /* We have changed the structure of induction variables; it might happen
5871 that definitions in the scev database refer to some of them that were
5876 free_loop_data (data
);
5881 /* Main entry point. Optimizes induction variables in loops. */
5884 tree_ssa_iv_optimize (void)
5887 struct ivopts_data data
;
5889 tree_ssa_iv_optimize_init (&data
);
5891 /* Optimize the loops starting with the innermost ones. */
5892 loop
= current_loops
->tree_root
;
5896 /* Scan the loops, inner ones first. */
5897 while (loop
!= current_loops
->tree_root
)
5899 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5900 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5902 tree_ssa_iv_optimize_loop (&data
, loop
);
5914 tree_ssa_iv_optimize_finalize (&data
);