1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
71 #include "hard-reg-set.h"
72 #include "basic-block.h"
74 #include "diagnostic.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
81 #include "tree-pass.h"
83 #include "insn-config.h"
86 #include "tree-chrec.h"
87 #include "tree-scalar-evolution.h"
90 #include "langhooks.h"
92 /* The infinite cost. */
93 #define INFTY 10000000
95 /* The expected number of loop iterations. TODO -- use profiling instead of
97 #define AVG_LOOP_NITER(LOOP) 5
100 /* Representation of the induction variable. */
103 tree base
; /* Initial value of the iv. */
104 tree base_object
; /* A memory object to that the induction variable points. */
105 tree step
; /* Step of the iv (constant only). */
106 tree ssa_name
; /* The ssa name with the value. */
107 bool biv_p
; /* Is it a biv? */
108 bool have_use_for
; /* Do we already have a use for it? */
109 unsigned use_id
; /* The identifier in the use if it is the case. */
112 /* Per-ssa version information (induction variable descriptions, etc.). */
115 tree name
; /* The ssa name. */
116 struct iv
*iv
; /* Induction variable description. */
117 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
118 an expression that is not an induction variable. */
119 unsigned inv_id
; /* Id of an invariant. */
120 bool preserve_biv
; /* For the original biv, whether to preserve it. */
126 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
127 USE_ADDRESS
, /* Use in an address. */
128 USE_COMPARE
/* Use is a compare. */
131 /* The candidate - cost pair. */
134 struct iv_cand
*cand
; /* The candidate. */
135 unsigned cost
; /* The cost. */
136 bitmap depends_on
; /* The list of invariants that have to be
138 tree value
; /* For final value elimination, the expression for
139 the final value of the iv. For iv elimination,
140 the new bound to compare with. */
146 unsigned id
; /* The id of the use. */
147 enum use_type type
; /* Type of the use. */
148 struct iv
*iv
; /* The induction variable it is based on. */
149 tree stmt
; /* Statement in that it occurs. */
150 tree
*op_p
; /* The place where it occurs. */
151 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
154 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
155 struct cost_pair
*cost_map
;
156 /* The costs wrto the iv candidates. */
158 struct iv_cand
*selected
;
159 /* The selected candidate. */
162 /* The position where the iv is computed. */
165 IP_NORMAL
, /* At the end, just before the exit condition. */
166 IP_END
, /* At the end of the latch block. */
167 IP_ORIGINAL
/* The original biv. */
170 /* The induction variable candidate. */
173 unsigned id
; /* The number of the candidate. */
174 bool important
; /* Whether this is an "important" candidate, i.e. such
175 that it should be considered by all uses. */
176 enum iv_position pos
; /* Where it is computed. */
177 tree incremented_at
; /* For original biv, the statement where it is
179 tree var_before
; /* The variable used for it before increment. */
180 tree var_after
; /* The variable used for it after increment. */
181 struct iv
*iv
; /* The value of the candidate. NULL for
182 "pseudocandidate" used to indicate the possibility
183 to replace the final value of an iv by direct
184 computation of the value. */
185 unsigned cost
; /* Cost of the candidate. */
186 bitmap depends_on
; /* The list of invariants that are used in step of the
190 /* The data used by the induction variable optimizations. */
192 typedef struct iv_use
*iv_use_p
;
194 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
196 typedef struct iv_cand
*iv_cand_p
;
197 DEF_VEC_P(iv_cand_p
);
198 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
202 /* The currently optimized loop. */
203 struct loop
*current_loop
;
205 /* Number of registers used in it. */
208 /* Numbers of iterations for all exits of the current loop. */
211 /* The size of version_info array allocated. */
212 unsigned version_info_size
;
214 /* The array of information for the ssa names. */
215 struct version_info
*version_info
;
217 /* The bitmap of indices in version_info whose value was changed. */
220 /* The maximum invariant id. */
223 /* The uses of induction variables. */
224 VEC(iv_use_p
,heap
) *iv_uses
;
226 /* The candidates. */
227 VEC(iv_cand_p
,heap
) *iv_candidates
;
229 /* A bitmap of important candidates. */
230 bitmap important_candidates
;
232 /* Whether to consider just related and important candidates when replacing a
234 bool consider_all_candidates
;
237 /* An assignment of iv candidates to uses. */
241 /* The number of uses covered by the assignment. */
244 /* Number of uses that cannot be expressed by the candidates in the set. */
247 /* Candidate assigned to a use, together with the related costs. */
248 struct cost_pair
**cand_for_use
;
250 /* Number of times each candidate is used. */
251 unsigned *n_cand_uses
;
253 /* The candidates used. */
256 /* The number of candidates in the set. */
259 /* Total number of registers needed. */
262 /* Total cost of expressing uses. */
263 unsigned cand_use_cost
;
265 /* Total cost of candidates. */
268 /* Number of times each invariant is used. */
269 unsigned *n_invariant_uses
;
271 /* Total cost of the assignment. */
275 /* Difference of two iv candidate assignments. */
282 /* An old assignment (for rollback purposes). */
283 struct cost_pair
*old_cp
;
285 /* A new assignment. */
286 struct cost_pair
*new_cp
;
288 /* Next change in the list. */
289 struct iv_ca_delta
*next_change
;
292 /* Bound on number of candidates below that all candidates are considered. */
294 #define CONSIDER_ALL_CANDIDATES_BOUND \
295 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
297 /* If there are more iv occurrences, we just give up (it is quite unlikely that
298 optimizing such a loop would help, and it would take ages). */
300 #define MAX_CONSIDERED_USES \
301 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
303 /* If there are at most this number of ivs in the set, try removing unnecessary
304 ivs from the set always. */
306 #define ALWAYS_PRUNE_CAND_SET_BOUND \
307 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
309 /* The list of trees for that the decl_rtl field must be reset is stored
312 static VEC(tree
,heap
) *decl_rtl_to_reset
;
314 /* Number of uses recorded in DATA. */
316 static inline unsigned
317 n_iv_uses (struct ivopts_data
*data
)
319 return VEC_length (iv_use_p
, data
->iv_uses
);
322 /* Ith use recorded in DATA. */
324 static inline struct iv_use
*
325 iv_use (struct ivopts_data
*data
, unsigned i
)
327 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
330 /* Number of candidates recorded in DATA. */
332 static inline unsigned
333 n_iv_cands (struct ivopts_data
*data
)
335 return VEC_length (iv_cand_p
, data
->iv_candidates
);
338 /* Ith candidate recorded in DATA. */
340 static inline struct iv_cand
*
341 iv_cand (struct ivopts_data
*data
, unsigned i
)
343 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
346 /* The single loop exit if it dominates the latch, NULL otherwise. */
349 single_dom_exit (struct loop
*loop
)
351 edge exit
= loop
->single_exit
;
356 if (!just_once_each_iteration_p (loop
, exit
->src
))
362 /* Dumps information about the induction variable IV to FILE. */
364 extern void dump_iv (FILE *, struct iv
*);
366 dump_iv (FILE *file
, struct iv
*iv
)
370 fprintf (file
, "ssa name ");
371 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
372 fprintf (file
, "\n");
375 fprintf (file
, " type ");
376 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
377 fprintf (file
, "\n");
381 fprintf (file
, " base ");
382 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
383 fprintf (file
, "\n");
385 fprintf (file
, " step ");
386 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
387 fprintf (file
, "\n");
391 fprintf (file
, " invariant ");
392 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
393 fprintf (file
, "\n");
398 fprintf (file
, " base object ");
399 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
400 fprintf (file
, "\n");
404 fprintf (file
, " is a biv\n");
407 /* Dumps information about the USE to FILE. */
409 extern void dump_use (FILE *, struct iv_use
*);
411 dump_use (FILE *file
, struct iv_use
*use
)
413 fprintf (file
, "use %d\n", use
->id
);
417 case USE_NONLINEAR_EXPR
:
418 fprintf (file
, " generic\n");
422 fprintf (file
, " address\n");
426 fprintf (file
, " compare\n");
433 fprintf (file
, " in statement ");
434 print_generic_expr (file
, use
->stmt
, TDF_SLIM
);
435 fprintf (file
, "\n");
437 fprintf (file
, " at position ");
439 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
440 fprintf (file
, "\n");
442 dump_iv (file
, use
->iv
);
444 if (use
->related_cands
)
446 fprintf (file
, " related candidates ");
447 dump_bitmap (file
, use
->related_cands
);
451 /* Dumps information about the uses to FILE. */
453 extern void dump_uses (FILE *, struct ivopts_data
*);
455 dump_uses (FILE *file
, struct ivopts_data
*data
)
460 for (i
= 0; i
< n_iv_uses (data
); i
++)
462 use
= iv_use (data
, i
);
464 dump_use (file
, use
);
465 fprintf (file
, "\n");
469 /* Dumps information about induction variable candidate CAND to FILE. */
471 extern void dump_cand (FILE *, struct iv_cand
*);
473 dump_cand (FILE *file
, struct iv_cand
*cand
)
475 struct iv
*iv
= cand
->iv
;
477 fprintf (file
, "candidate %d%s\n",
478 cand
->id
, cand
->important
? " (important)" : "");
480 if (cand
->depends_on
)
482 fprintf (file
, " depends on ");
483 dump_bitmap (file
, cand
->depends_on
);
488 fprintf (file
, " final value replacement\n");
495 fprintf (file
, " incremented before exit test\n");
499 fprintf (file
, " incremented at end\n");
503 fprintf (file
, " original biv\n");
510 /* Returns the info for ssa version VER. */
512 static inline struct version_info
*
513 ver_info (struct ivopts_data
*data
, unsigned ver
)
515 return data
->version_info
+ ver
;
518 /* Returns the info for ssa name NAME. */
520 static inline struct version_info
*
521 name_info (struct ivopts_data
*data
, tree name
)
523 return ver_info (data
, SSA_NAME_VERSION (name
));
526 /* Checks whether there exists number X such that X * B = A, counting modulo
530 divide (unsigned bits
, unsigned HOST_WIDE_INT a
, unsigned HOST_WIDE_INT b
,
533 unsigned HOST_WIDE_INT mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
534 unsigned HOST_WIDE_INT inv
, ex
, val
;
540 /* First divide the whole equation by 2 as long as possible. */
541 while (!(a
& 1) && !(b
& 1))
551 /* If b is still even, a is odd and there is no such x. */
555 /* Find the inverse of b. We compute it as
556 b^(2^(bits - 1) - 1) (mod 2^bits). */
559 for (i
= 0; i
< bits
- 1; i
++)
561 inv
= (inv
* ex
) & mask
;
562 ex
= (ex
* ex
) & mask
;
565 val
= (a
* inv
) & mask
;
567 gcc_assert (((val
* b
) & mask
) == a
);
569 if ((val
>> (bits
- 1)) & 1)
577 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
581 stmt_after_ip_normal_pos (struct loop
*loop
, tree stmt
)
583 basic_block bb
= ip_normal_pos (loop
), sbb
= bb_for_stmt (stmt
);
587 if (sbb
== loop
->latch
)
593 return stmt
== last_stmt (bb
);
596 /* Returns true if STMT if after the place where the original induction
597 variable CAND is incremented. */
600 stmt_after_ip_original_pos (struct iv_cand
*cand
, tree stmt
)
602 basic_block cand_bb
= bb_for_stmt (cand
->incremented_at
);
603 basic_block stmt_bb
= bb_for_stmt (stmt
);
604 block_stmt_iterator bsi
;
606 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
609 if (stmt_bb
!= cand_bb
)
612 /* Scan the block from the end, since the original ivs are usually
613 incremented at the end of the loop body. */
614 for (bsi
= bsi_last (stmt_bb
); ; bsi_prev (&bsi
))
616 if (bsi_stmt (bsi
) == cand
->incremented_at
)
618 if (bsi_stmt (bsi
) == stmt
)
623 /* Returns true if STMT if after the place where the induction variable
624 CAND is incremented in LOOP. */
627 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
635 return stmt_after_ip_normal_pos (loop
, stmt
);
638 return stmt_after_ip_original_pos (cand
, stmt
);
645 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
648 abnormal_ssa_name_p (tree exp
)
653 if (TREE_CODE (exp
) != SSA_NAME
)
656 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
659 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
660 abnormal phi node. Callback for for_each_index. */
663 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
664 void *data ATTRIBUTE_UNUSED
)
666 if (TREE_CODE (base
) == ARRAY_REF
)
668 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
670 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
674 return !abnormal_ssa_name_p (*index
);
677 /* Returns true if EXPR contains a ssa name that occurs in an
678 abnormal phi node. */
681 contains_abnormal_ssa_name_p (tree expr
)
684 enum tree_code_class
class;
689 code
= TREE_CODE (expr
);
690 class = TREE_CODE_CLASS (code
);
692 if (code
== SSA_NAME
)
693 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
695 if (code
== INTEGER_CST
696 || is_gimple_min_invariant (expr
))
699 if (code
== ADDR_EXPR
)
700 return !for_each_index (&TREE_OPERAND (expr
, 0),
701 idx_contains_abnormal_ssa_name_p
,
708 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
713 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
725 /* Element of the table in that we cache the numbers of iterations obtained
726 from exits of the loop. */
730 /* The edge for that the number of iterations is cached. */
733 /* Number of iterations corresponding to this exit, or NULL if it cannot be
738 /* Hash function for nfe_cache_elt E. */
741 nfe_hash (const void *e
)
743 const struct nfe_cache_elt
*elt
= e
;
745 return htab_hash_pointer (elt
->exit
);
748 /* Equality function for nfe_cache_elt E1 and edge E2. */
751 nfe_eq (const void *e1
, const void *e2
)
753 const struct nfe_cache_elt
*elt1
= e1
;
755 return elt1
->exit
== e2
;
758 /* Returns tree describing number of iterations determined from
759 EXIT of DATA->current_loop, or NULL if something goes wrong. */
762 niter_for_exit (struct ivopts_data
*data
, edge exit
)
764 struct nfe_cache_elt
*nfe_desc
;
765 struct tree_niter_desc desc
;
768 slot
= htab_find_slot_with_hash (data
->niters
, exit
,
769 htab_hash_pointer (exit
),
774 nfe_desc
= xmalloc (sizeof (struct nfe_cache_elt
));
775 nfe_desc
->exit
= exit
;
777 /* Try to determine number of iterations. We must know it
778 unconditionally (i.e., without possibility of # of iterations
779 being zero). Also, we cannot safely work with ssa names that
780 appear in phi nodes on abnormal edges, so that we do not create
781 overlapping life ranges for them (PR 27283). */
782 if (number_of_iterations_exit (data
->current_loop
,
784 && zero_p (desc
.may_be_zero
)
785 && !contains_abnormal_ssa_name_p (desc
.niter
))
786 nfe_desc
->niter
= desc
.niter
;
788 nfe_desc
->niter
= NULL_TREE
;
793 return nfe_desc
->niter
;
796 /* Returns tree describing number of iterations determined from
797 single dominating exit of DATA->current_loop, or NULL if something
801 niter_for_single_dom_exit (struct ivopts_data
*data
)
803 edge exit
= single_dom_exit (data
->current_loop
);
808 return niter_for_exit (data
, exit
);
811 /* Initializes data structures used by the iv optimization pass, stored
815 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
817 data
->version_info_size
= 2 * num_ssa_names
;
818 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
819 data
->relevant
= BITMAP_ALLOC (NULL
);
820 data
->important_candidates
= BITMAP_ALLOC (NULL
);
821 data
->max_inv_id
= 0;
822 data
->niters
= htab_create (10, nfe_hash
, nfe_eq
, free
);
823 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
824 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
825 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
828 /* Returns a memory object to that EXPR points. In case we are able to
829 determine that it does not point to any such object, NULL is returned. */
832 determine_base_object (tree expr
)
834 enum tree_code code
= TREE_CODE (expr
);
835 tree base
, obj
, op0
, op1
;
837 /* If this is a pointer casted to any type, we need to determine
838 the base object for the pointer; so handle conversions before
839 throwing away non-pointer expressions. */
840 if (TREE_CODE (expr
) == NOP_EXPR
841 || TREE_CODE (expr
) == CONVERT_EXPR
)
842 return determine_base_object (TREE_OPERAND (expr
, 0));
844 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
853 obj
= TREE_OPERAND (expr
, 0);
854 base
= get_base_address (obj
);
859 if (TREE_CODE (base
) == INDIRECT_REF
)
860 return determine_base_object (TREE_OPERAND (base
, 0));
862 return fold_convert (ptr_type_node
,
863 build_fold_addr_expr (base
));
867 op0
= determine_base_object (TREE_OPERAND (expr
, 0));
868 op1
= determine_base_object (TREE_OPERAND (expr
, 1));
874 return (code
== PLUS_EXPR
876 : fold_build1 (NEGATE_EXPR
, ptr_type_node
, op1
));
878 return fold_build2 (code
, ptr_type_node
, op0
, op1
);
881 return fold_convert (ptr_type_node
, expr
);
885 /* Allocates an induction variable with given initial value BASE and step STEP
889 alloc_iv (tree base
, tree step
)
891 struct iv
*iv
= XCNEW (struct iv
);
893 if (step
&& integer_zerop (step
))
897 iv
->base_object
= determine_base_object (base
);
900 iv
->have_use_for
= false;
902 iv
->ssa_name
= NULL_TREE
;
907 /* Sets STEP and BASE for induction variable IV. */
910 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
912 struct version_info
*info
= name_info (data
, iv
);
914 gcc_assert (!info
->iv
);
916 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
917 info
->iv
= alloc_iv (base
, step
);
918 info
->iv
->ssa_name
= iv
;
921 /* Finds induction variable declaration for VAR. */
924 get_iv (struct ivopts_data
*data
, tree var
)
928 if (!name_info (data
, var
)->iv
)
930 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
933 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
934 set_iv (data
, var
, var
, NULL_TREE
);
937 return name_info (data
, var
)->iv
;
940 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
941 not define a simple affine biv with nonzero step. */
944 determine_biv_step (tree phi
)
946 struct loop
*loop
= bb_for_stmt (phi
)->loop_father
;
947 tree name
= PHI_RESULT (phi
);
950 if (!is_gimple_reg (name
))
953 if (!simple_iv (loop
, phi
, name
, &iv
, true))
956 return (zero_p (iv
.step
) ? NULL_TREE
: iv
.step
);
959 /* Finds basic ivs. */
962 find_bivs (struct ivopts_data
*data
)
964 tree phi
, step
, type
, base
;
966 struct loop
*loop
= data
->current_loop
;
968 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
970 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
973 step
= determine_biv_step (phi
);
977 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
978 base
= expand_simple_operations (base
);
979 if (contains_abnormal_ssa_name_p (base
)
980 || contains_abnormal_ssa_name_p (step
))
983 type
= TREE_TYPE (PHI_RESULT (phi
));
984 base
= fold_convert (type
, base
);
986 step
= fold_convert (type
, step
);
988 set_iv (data
, PHI_RESULT (phi
), base
, step
);
995 /* Marks basic ivs. */
998 mark_bivs (struct ivopts_data
*data
)
1001 struct iv
*iv
, *incr_iv
;
1002 struct loop
*loop
= data
->current_loop
;
1003 basic_block incr_bb
;
1005 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
1007 iv
= get_iv (data
, PHI_RESULT (phi
));
1011 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1012 incr_iv
= get_iv (data
, var
);
1016 /* If the increment is in the subloop, ignore it. */
1017 incr_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (var
));
1018 if (incr_bb
->loop_father
!= data
->current_loop
1019 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1023 incr_iv
->biv_p
= true;
1027 /* Checks whether STMT defines a linear induction variable and stores its
1028 parameters to IV. */
1031 find_givs_in_stmt_scev (struct ivopts_data
*data
, tree stmt
, affine_iv
*iv
)
1034 struct loop
*loop
= data
->current_loop
;
1036 iv
->base
= NULL_TREE
;
1037 iv
->step
= NULL_TREE
;
1039 if (TREE_CODE (stmt
) != MODIFY_EXPR
)
1042 lhs
= TREE_OPERAND (stmt
, 0);
1043 if (TREE_CODE (lhs
) != SSA_NAME
)
1046 if (!simple_iv (loop
, stmt
, TREE_OPERAND (stmt
, 1), iv
, true))
1048 iv
->base
= expand_simple_operations (iv
->base
);
1050 if (contains_abnormal_ssa_name_p (iv
->base
)
1051 || contains_abnormal_ssa_name_p (iv
->step
))
1057 /* Finds general ivs in statement STMT. */
1060 find_givs_in_stmt (struct ivopts_data
*data
, tree stmt
)
1064 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1067 set_iv (data
, TREE_OPERAND (stmt
, 0), iv
.base
, iv
.step
);
1070 /* Finds general ivs in basic block BB. */
1073 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1075 block_stmt_iterator bsi
;
1077 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1078 find_givs_in_stmt (data
, bsi_stmt (bsi
));
1081 /* Finds general ivs. */
1084 find_givs (struct ivopts_data
*data
)
1086 struct loop
*loop
= data
->current_loop
;
1087 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1090 for (i
= 0; i
< loop
->num_nodes
; i
++)
1091 find_givs_in_bb (data
, body
[i
]);
1095 /* For each ssa name defined in LOOP determines whether it is an induction
1096 variable and if so, its initial value and step. */
1099 find_induction_variables (struct ivopts_data
*data
)
1104 if (!find_bivs (data
))
1110 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1112 tree niter
= niter_for_single_dom_exit (data
);
1116 fprintf (dump_file
, " number of iterations ");
1117 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1118 fprintf (dump_file
, "\n\n");
1121 fprintf (dump_file
, "Induction variables:\n\n");
1123 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1125 if (ver_info (data
, i
)->iv
)
1126 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1133 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1135 static struct iv_use
*
1136 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1137 tree stmt
, enum use_type use_type
)
1139 struct iv_use
*use
= XCNEW (struct iv_use
);
1141 use
->id
= n_iv_uses (data
);
1142 use
->type
= use_type
;
1146 use
->related_cands
= BITMAP_ALLOC (NULL
);
1148 /* To avoid showing ssa name in the dumps, if it was not reset by the
1150 iv
->ssa_name
= NULL_TREE
;
1152 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1153 dump_use (dump_file
, use
);
1155 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1160 /* Checks whether OP is a loop-level invariant and if so, records it.
1161 NONLINEAR_USE is true if the invariant is used in a way we do not
1162 handle specially. */
1165 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1168 struct version_info
*info
;
1170 if (TREE_CODE (op
) != SSA_NAME
1171 || !is_gimple_reg (op
))
1174 bb
= bb_for_stmt (SSA_NAME_DEF_STMT (op
));
1176 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1179 info
= name_info (data
, op
);
1181 info
->has_nonlin_use
|= nonlinear_use
;
1183 info
->inv_id
= ++data
->max_inv_id
;
1184 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1187 /* Checks whether the use OP is interesting and if so, records it. */
1189 static struct iv_use
*
1190 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1197 if (TREE_CODE (op
) != SSA_NAME
)
1200 iv
= get_iv (data
, op
);
1204 if (iv
->have_use_for
)
1206 use
= iv_use (data
, iv
->use_id
);
1208 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1212 if (zero_p (iv
->step
))
1214 record_invariant (data
, op
, true);
1217 iv
->have_use_for
= true;
1219 civ
= XNEW (struct iv
);
1222 stmt
= SSA_NAME_DEF_STMT (op
);
1223 gcc_assert (TREE_CODE (stmt
) == PHI_NODE
1224 || TREE_CODE (stmt
) == MODIFY_EXPR
);
1226 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1227 iv
->use_id
= use
->id
;
1232 /* Checks whether the condition *COND_P in STMT is interesting
1233 and if so, records it. */
1236 find_interesting_uses_cond (struct ivopts_data
*data
, tree stmt
, tree
*cond_p
)
1240 struct iv
*iv0
= NULL
, *iv1
= NULL
, *civ
;
1242 tree zero
= integer_zero_node
;
1244 const_iv
.step
= NULL_TREE
;
1246 if (TREE_CODE (*cond_p
) != SSA_NAME
1247 && !COMPARISON_CLASS_P (*cond_p
))
1250 if (TREE_CODE (*cond_p
) == SSA_NAME
)
1257 op0_p
= &TREE_OPERAND (*cond_p
, 0);
1258 op1_p
= &TREE_OPERAND (*cond_p
, 1);
1261 if (TREE_CODE (*op0_p
) == SSA_NAME
)
1262 iv0
= get_iv (data
, *op0_p
);
1266 if (TREE_CODE (*op1_p
) == SSA_NAME
)
1267 iv1
= get_iv (data
, *op1_p
);
1271 if (/* When comparing with non-invariant value, we may not do any senseful
1272 induction variable elimination. */
1274 /* Eliminating condition based on two ivs would be nontrivial.
1275 ??? TODO -- it is not really important to handle this case. */
1276 || (!zero_p (iv0
->step
) && !zero_p (iv1
->step
)))
1278 find_interesting_uses_op (data
, *op0_p
);
1279 find_interesting_uses_op (data
, *op1_p
);
1283 if (zero_p (iv0
->step
) && zero_p (iv1
->step
))
1285 /* If both are invariants, this is a work for unswitching. */
1289 civ
= XNEW (struct iv
);
1290 *civ
= zero_p (iv0
->step
) ? *iv1
: *iv0
;
1291 record_use (data
, cond_p
, civ
, stmt
, USE_COMPARE
);
1294 /* Returns true if expression EXPR is obviously invariant in LOOP,
1295 i.e. if all its operands are defined outside of the LOOP. */
1298 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1303 if (is_gimple_min_invariant (expr
))
1306 if (TREE_CODE (expr
) == SSA_NAME
)
1308 def_bb
= bb_for_stmt (SSA_NAME_DEF_STMT (expr
));
1310 && flow_bb_inside_loop_p (loop
, def_bb
))
1319 len
= TREE_CODE_LENGTH (TREE_CODE (expr
));
1320 for (i
= 0; i
< len
; i
++)
1321 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1327 /* Cumulates the steps of indices into DATA and replaces their values with the
1328 initial ones. Returns false when the value of the index cannot be determined.
1329 Callback for for_each_index. */
1331 struct ifs_ivopts_data
1333 struct ivopts_data
*ivopts_data
;
1339 idx_find_step (tree base
, tree
*idx
, void *data
)
1341 struct ifs_ivopts_data
*dta
= data
;
1343 tree step
, iv_base
, iv_step
, lbound
, off
;
1344 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1346 if (TREE_CODE (base
) == MISALIGNED_INDIRECT_REF
1347 || TREE_CODE (base
) == ALIGN_INDIRECT_REF
)
1350 /* If base is a component ref, require that the offset of the reference
1352 if (TREE_CODE (base
) == COMPONENT_REF
)
1354 off
= component_ref_field_offset (base
);
1355 return expr_invariant_in_loop_p (loop
, off
);
1358 /* If base is array, first check whether we will be able to move the
1359 reference out of the loop (in order to take its address in strength
1360 reduction). In order for this to work we need both lower bound
1361 and step to be loop invariants. */
1362 if (TREE_CODE (base
) == ARRAY_REF
)
1364 step
= array_ref_element_size (base
);
1365 lbound
= array_ref_low_bound (base
);
1367 if (!expr_invariant_in_loop_p (loop
, step
)
1368 || !expr_invariant_in_loop_p (loop
, lbound
))
1372 if (TREE_CODE (*idx
) != SSA_NAME
)
1375 iv
= get_iv (dta
->ivopts_data
, *idx
);
1379 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1380 *&x[0], which is not folded and does not trigger the
1381 ARRAY_REF path below. */
1387 if (TREE_CODE (base
) == ARRAY_REF
)
1389 step
= array_ref_element_size (base
);
1391 /* We only handle addresses whose step is an integer constant. */
1392 if (TREE_CODE (step
) != INTEGER_CST
)
1396 /* The step for pointer arithmetics already is 1 byte. */
1397 step
= build_int_cst (sizetype
, 1);
1401 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1402 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1405 /* The index might wrap. */
1409 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1412 *dta
->step_p
= step
;
1414 *dta
->step_p
= fold_build2 (PLUS_EXPR
, sizetype
, *dta
->step_p
, step
);
1419 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1420 object is passed to it in DATA. */
1423 idx_record_use (tree base
, tree
*idx
,
1426 find_interesting_uses_op (data
, *idx
);
1427 if (TREE_CODE (base
) == ARRAY_REF
)
1429 find_interesting_uses_op (data
, array_ref_element_size (base
));
1430 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1435 /* Returns true if memory reference REF may be unaligned. */
1438 may_be_unaligned_p (tree ref
)
1442 HOST_WIDE_INT bitsize
;
1443 HOST_WIDE_INT bitpos
;
1445 enum machine_mode mode
;
1446 int unsignedp
, volatilep
;
1447 unsigned base_align
;
1449 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1450 thus they are not misaligned. */
1451 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1454 /* The test below is basically copy of what expr.c:normal_inner_ref
1455 does to check whether the object must be loaded by parts when
1456 STRICT_ALIGNMENT is true. */
1457 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1458 &unsignedp
, &volatilep
, true);
1459 base_type
= TREE_TYPE (base
);
1460 base_align
= TYPE_ALIGN (base_type
);
1463 && (base_align
< GET_MODE_ALIGNMENT (mode
)
1464 || bitpos
% GET_MODE_ALIGNMENT (mode
) != 0
1465 || bitpos
% BITS_PER_UNIT
!= 0))
1471 /* Return true if EXPR may be non-addressable. */
1474 may_be_nonaddressable_p (tree expr
)
1476 switch (TREE_CODE (expr
))
1479 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1480 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1483 case ARRAY_RANGE_REF
:
1484 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1486 case VIEW_CONVERT_EXPR
:
1487 /* This kind of view-conversions may wrap non-addressable objects
1488 and make them look addressable. After some processing the
1489 non-addressability may be uncovered again, causing ADDR_EXPRs
1490 of inappropriate objects to be built. */
1491 return AGGREGATE_TYPE_P (TREE_TYPE (expr
))
1492 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr
, 0)));
1501 /* Finds addresses in *OP_P inside STMT. */
1504 find_interesting_uses_address (struct ivopts_data
*data
, tree stmt
, tree
*op_p
)
1506 tree base
= *op_p
, step
= NULL
;
1508 struct ifs_ivopts_data ifs_ivopts_data
;
1510 /* Do not play with volatile memory references. A bit too conservative,
1511 perhaps, but safe. */
1512 if (stmt_ann (stmt
)->has_volatile_ops
)
1515 /* Ignore bitfields for now. Not really something terribly complicated
1517 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1520 if (may_be_nonaddressable_p (base
))
1523 if (STRICT_ALIGNMENT
1524 && may_be_unaligned_p (base
))
1527 base
= unshare_expr (base
);
1529 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1531 tree type
= build_pointer_type (TREE_TYPE (base
));
1535 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1537 civ
= get_iv (data
, TMR_BASE (base
));
1541 TMR_BASE (base
) = civ
->base
;
1544 if (TMR_INDEX (base
)
1545 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1547 civ
= get_iv (data
, TMR_INDEX (base
));
1551 TMR_INDEX (base
) = civ
->base
;
1556 if (TMR_STEP (base
))
1557 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1560 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1568 base
= tree_mem_ref_addr (type
, base
);
1572 ifs_ivopts_data
.ivopts_data
= data
;
1573 ifs_ivopts_data
.stmt
= stmt
;
1574 ifs_ivopts_data
.step_p
= &step
;
1575 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1579 gcc_assert (TREE_CODE (base
) != ALIGN_INDIRECT_REF
);
1580 gcc_assert (TREE_CODE (base
) != MISALIGNED_INDIRECT_REF
);
1582 base
= build_fold_addr_expr (base
);
1584 /* Substituting bases of IVs into the base expression might
1585 have caused folding opportunities. */
1586 if (TREE_CODE (base
) == ADDR_EXPR
)
1588 tree
*ref
= &TREE_OPERAND (base
, 0);
1589 while (handled_component_p (*ref
))
1590 ref
= &TREE_OPERAND (*ref
, 0);
1591 if (TREE_CODE (*ref
) == INDIRECT_REF
)
1592 *ref
= fold_indirect_ref (*ref
);
1596 civ
= alloc_iv (base
, step
);
1597 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1601 for_each_index (op_p
, idx_record_use
, data
);
1604 /* Finds and records invariants used in STMT. */
1607 find_invariants_stmt (struct ivopts_data
*data
, tree stmt
)
1610 use_operand_p use_p
;
1613 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1615 op
= USE_FROM_PTR (use_p
);
1616 record_invariant (data
, op
, false);
1620 /* Finds interesting uses of induction variables in the statement STMT. */
1623 find_interesting_uses_stmt (struct ivopts_data
*data
, tree stmt
)
1628 use_operand_p use_p
;
1630 find_invariants_stmt (data
, stmt
);
1632 if (TREE_CODE (stmt
) == COND_EXPR
)
1634 find_interesting_uses_cond (data
, stmt
, &COND_EXPR_COND (stmt
));
1638 if (TREE_CODE (stmt
) == MODIFY_EXPR
)
1640 lhs
= TREE_OPERAND (stmt
, 0);
1641 rhs
= TREE_OPERAND (stmt
, 1);
1643 if (TREE_CODE (lhs
) == SSA_NAME
)
1645 /* If the statement defines an induction variable, the uses are not
1646 interesting by themselves. */
1648 iv
= get_iv (data
, lhs
);
1650 if (iv
&& !zero_p (iv
->step
))
1654 switch (TREE_CODE_CLASS (TREE_CODE (rhs
)))
1656 case tcc_comparison
:
1657 find_interesting_uses_cond (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1661 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 1));
1662 if (REFERENCE_CLASS_P (lhs
))
1663 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1669 if (REFERENCE_CLASS_P (lhs
)
1670 && is_gimple_val (rhs
))
1672 find_interesting_uses_address (data
, stmt
, &TREE_OPERAND (stmt
, 0));
1673 find_interesting_uses_op (data
, rhs
);
1677 /* TODO -- we should also handle address uses of type
1679 memory = call (whatever);
1686 if (TREE_CODE (stmt
) == PHI_NODE
1687 && bb_for_stmt (stmt
) == data
->current_loop
->header
)
1689 lhs
= PHI_RESULT (stmt
);
1690 iv
= get_iv (data
, lhs
);
1692 if (iv
&& !zero_p (iv
->step
))
1696 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1698 op
= USE_FROM_PTR (use_p
);
1700 if (TREE_CODE (op
) != SSA_NAME
)
1703 iv
= get_iv (data
, op
);
1707 find_interesting_uses_op (data
, op
);
1711 /* Finds interesting uses of induction variables outside of loops
1712 on loop exit edge EXIT. */
1715 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1719 for (phi
= phi_nodes (exit
->dest
); phi
; phi
= PHI_CHAIN (phi
))
1721 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1722 find_interesting_uses_op (data
, def
);
1726 /* Finds uses of the induction variables that are interesting. */
1729 find_interesting_uses (struct ivopts_data
*data
)
1732 block_stmt_iterator bsi
;
1734 basic_block
*body
= get_loop_body (data
->current_loop
);
1736 struct version_info
*info
;
1739 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1740 fprintf (dump_file
, "Uses:\n\n");
1742 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1747 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1748 if (e
->dest
!= EXIT_BLOCK_PTR
1749 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1750 find_interesting_uses_outside (data
, e
);
1752 for (phi
= phi_nodes (bb
); phi
; phi
= PHI_CHAIN (phi
))
1753 find_interesting_uses_stmt (data
, phi
);
1754 for (bsi
= bsi_start (bb
); !bsi_end_p (bsi
); bsi_next (&bsi
))
1755 find_interesting_uses_stmt (data
, bsi_stmt (bsi
));
1758 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1762 fprintf (dump_file
, "\n");
1764 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1766 info
= ver_info (data
, i
);
1769 fprintf (dump_file
, " ");
1770 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1771 fprintf (dump_file
, " is invariant (%d)%s\n",
1772 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1776 fprintf (dump_file
, "\n");
1782 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1783 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1784 we are at the top-level of the processed address. */
1787 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1788 unsigned HOST_WIDE_INT
*offset
)
1790 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1791 enum tree_code code
;
1792 tree type
, orig_type
= TREE_TYPE (expr
);
1793 unsigned HOST_WIDE_INT off0
, off1
, st
;
1794 tree orig_expr
= expr
;
1798 type
= TREE_TYPE (expr
);
1799 code
= TREE_CODE (expr
);
1805 if (!cst_and_fits_in_hwi (expr
)
1809 *offset
= int_cst_value (expr
);
1810 return build_int_cst (orig_type
, 0);
1814 op0
= TREE_OPERAND (expr
, 0);
1815 op1
= TREE_OPERAND (expr
, 1);
1817 op0
= strip_offset_1 (op0
, false, false, &off0
);
1818 op1
= strip_offset_1 (op1
, false, false, &off1
);
1820 *offset
= (code
== PLUS_EXPR
? off0
+ off1
: off0
- off1
);
1821 if (op0
== TREE_OPERAND (expr
, 0)
1822 && op1
== TREE_OPERAND (expr
, 1))
1827 else if (zero_p (op0
))
1829 if (code
== PLUS_EXPR
)
1832 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
1835 expr
= fold_build2 (code
, type
, op0
, op1
);
1837 return fold_convert (orig_type
, expr
);
1843 step
= array_ref_element_size (expr
);
1844 if (!cst_and_fits_in_hwi (step
))
1847 st
= int_cst_value (step
);
1848 op1
= TREE_OPERAND (expr
, 1);
1849 op1
= strip_offset_1 (op1
, false, false, &off1
);
1850 *offset
= off1
* st
;
1855 /* Strip the component reference completely. */
1856 op0
= TREE_OPERAND (expr
, 0);
1857 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1867 tmp
= component_ref_field_offset (expr
);
1869 && cst_and_fits_in_hwi (tmp
))
1871 /* Strip the component reference completely. */
1872 op0
= TREE_OPERAND (expr
, 0);
1873 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
1874 *offset
= off0
+ int_cst_value (tmp
);
1880 op0
= TREE_OPERAND (expr
, 0);
1881 op0
= strip_offset_1 (op0
, true, true, &off0
);
1884 if (op0
== TREE_OPERAND (expr
, 0))
1887 expr
= build_fold_addr_expr (op0
);
1888 return fold_convert (orig_type
, expr
);
1891 inside_addr
= false;
1898 /* Default handling of expressions for that we want to recurse into
1899 the first operand. */
1900 op0
= TREE_OPERAND (expr
, 0);
1901 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
1904 if (op0
== TREE_OPERAND (expr
, 0)
1905 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
1908 expr
= copy_node (expr
);
1909 TREE_OPERAND (expr
, 0) = op0
;
1911 TREE_OPERAND (expr
, 1) = op1
;
1913 /* Inside address, we might strip the top level component references,
1914 thus changing type of the expression. Handling of ADDR_EXPR
1916 expr
= fold_convert (orig_type
, expr
);
1921 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1924 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
1926 return strip_offset_1 (expr
, false, false, offset
);
1929 /* Returns variant of TYPE that can be used as base for different uses.
1930 We return unsigned type with the same precision, which avoids problems
1934 generic_type_for (tree type
)
1936 if (POINTER_TYPE_P (type
))
1937 return unsigned_type_for (type
);
1939 if (TYPE_UNSIGNED (type
))
1942 return unsigned_type_for (type
);
1945 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1946 the bitmap to that we should store it. */
1948 static struct ivopts_data
*fd_ivopts_data
;
1950 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
1952 bitmap
*depends_on
= data
;
1953 struct version_info
*info
;
1955 if (TREE_CODE (*expr_p
) != SSA_NAME
)
1957 info
= name_info (fd_ivopts_data
, *expr_p
);
1959 if (!info
->inv_id
|| info
->has_nonlin_use
)
1963 *depends_on
= BITMAP_ALLOC (NULL
);
1964 bitmap_set_bit (*depends_on
, info
->inv_id
);
1969 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1970 position to POS. If USE is not NULL, the candidate is set as related to
1971 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1972 replacement of the final value of the iv by a direct computation. */
1974 static struct iv_cand
*
1975 add_candidate_1 (struct ivopts_data
*data
,
1976 tree base
, tree step
, bool important
, enum iv_position pos
,
1977 struct iv_use
*use
, tree incremented_at
)
1980 struct iv_cand
*cand
= NULL
;
1981 tree type
, orig_type
;
1985 orig_type
= TREE_TYPE (base
);
1986 type
= generic_type_for (orig_type
);
1987 if (type
!= orig_type
)
1989 base
= fold_convert (type
, base
);
1991 step
= fold_convert (type
, step
);
1995 for (i
= 0; i
< n_iv_cands (data
); i
++)
1997 cand
= iv_cand (data
, i
);
1999 if (cand
->pos
!= pos
)
2002 if (cand
->incremented_at
!= incremented_at
)
2016 if (!operand_equal_p (base
, cand
->iv
->base
, 0))
2019 if (zero_p (cand
->iv
->step
))
2026 if (step
&& operand_equal_p (step
, cand
->iv
->step
, 0))
2031 if (i
== n_iv_cands (data
))
2033 cand
= XCNEW (struct iv_cand
);
2039 cand
->iv
= alloc_iv (base
, step
);
2042 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2044 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2045 cand
->var_after
= cand
->var_before
;
2047 cand
->important
= important
;
2048 cand
->incremented_at
= incremented_at
;
2049 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2052 && TREE_CODE (step
) != INTEGER_CST
)
2054 fd_ivopts_data
= data
;
2055 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2058 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2059 dump_cand (dump_file
, cand
);
2062 if (important
&& !cand
->important
)
2064 cand
->important
= true;
2065 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2066 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2071 bitmap_set_bit (use
->related_cands
, i
);
2072 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2073 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2080 /* Returns true if incrementing the induction variable at the end of the LOOP
2083 The purpose is to avoid splitting latch edge with a biv increment, thus
2084 creating a jump, possibly confusing other optimization passes and leaving
2085 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2086 is not available (so we do not have a better alternative), or if the latch
2087 edge is already nonempty. */
2090 allow_ip_end_pos_p (struct loop
*loop
)
2092 if (!ip_normal_pos (loop
))
2095 if (!empty_block_p (ip_end_pos (loop
)))
2101 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2102 position to POS. If USE is not NULL, the candidate is set as related to
2103 it. The candidate computation is scheduled on all available positions. */
2106 add_candidate (struct ivopts_data
*data
,
2107 tree base
, tree step
, bool important
, struct iv_use
*use
)
2109 if (ip_normal_pos (data
->current_loop
))
2110 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL_TREE
);
2111 if (ip_end_pos (data
->current_loop
)
2112 && allow_ip_end_pos_p (data
->current_loop
))
2113 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL_TREE
);
2116 /* Add a standard "0 + 1 * iteration" iv candidate for a
2117 type with SIZE bits. */
2120 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2123 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2124 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2128 /* Adds standard iv candidates. */
2131 add_standard_iv_candidates (struct ivopts_data
*data
)
2133 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2135 /* The same for a double-integer type if it is still fast enough. */
2136 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2137 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2141 /* Adds candidates bases on the old induction variable IV. */
2144 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2147 struct iv_cand
*cand
;
2149 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2151 /* The same, but with initial value zero. */
2152 add_candidate (data
,
2153 build_int_cst (TREE_TYPE (iv
->base
), 0),
2154 iv
->step
, true, NULL
);
2156 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2157 if (TREE_CODE (phi
) == PHI_NODE
)
2159 /* Additionally record the possibility of leaving the original iv
2161 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2162 cand
= add_candidate_1 (data
,
2163 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2164 SSA_NAME_DEF_STMT (def
));
2165 cand
->var_before
= iv
->ssa_name
;
2166 cand
->var_after
= def
;
2170 /* Adds candidates based on the old induction variables. */
2173 add_old_ivs_candidates (struct ivopts_data
*data
)
2179 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2181 iv
= ver_info (data
, i
)->iv
;
2182 if (iv
&& iv
->biv_p
&& !zero_p (iv
->step
))
2183 add_old_iv_candidates (data
, iv
);
2187 /* Adds candidates based on the value of the induction variable IV and USE. */
2190 add_iv_value_candidates (struct ivopts_data
*data
,
2191 struct iv
*iv
, struct iv_use
*use
)
2193 unsigned HOST_WIDE_INT offset
;
2196 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2198 /* The same, but with initial value zero. Make such variable important,
2199 since it is generic enough so that possibly many uses may be based
2201 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2202 iv
->step
, true, use
);
2204 /* Third, try removing the constant offset. */
2205 base
= strip_offset (iv
->base
, &offset
);
2207 add_candidate (data
, base
, iv
->step
, false, use
);
2210 /* Adds candidates based on the uses. */
2213 add_derived_ivs_candidates (struct ivopts_data
*data
)
2217 for (i
= 0; i
< n_iv_uses (data
); i
++)
2219 struct iv_use
*use
= iv_use (data
, i
);
2226 case USE_NONLINEAR_EXPR
:
2229 /* Just add the ivs based on the value of the iv used here. */
2230 add_iv_value_candidates (data
, use
->iv
, use
);
2239 /* Record important candidates and add them to related_cands bitmaps
2243 record_important_candidates (struct ivopts_data
*data
)
2248 for (i
= 0; i
< n_iv_cands (data
); i
++)
2250 struct iv_cand
*cand
= iv_cand (data
, i
);
2252 if (cand
->important
)
2253 bitmap_set_bit (data
->important_candidates
, i
);
2256 data
->consider_all_candidates
= (n_iv_cands (data
)
2257 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2259 if (data
->consider_all_candidates
)
2261 /* We will not need "related_cands" bitmaps in this case,
2262 so release them to decrease peak memory consumption. */
2263 for (i
= 0; i
< n_iv_uses (data
); i
++)
2265 use
= iv_use (data
, i
);
2266 BITMAP_FREE (use
->related_cands
);
2271 /* Add important candidates to the related_cands bitmaps. */
2272 for (i
= 0; i
< n_iv_uses (data
); i
++)
2273 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2274 data
->important_candidates
);
2278 /* Finds the candidates for the induction variables. */
2281 find_iv_candidates (struct ivopts_data
*data
)
2283 /* Add commonly used ivs. */
2284 add_standard_iv_candidates (data
);
2286 /* Add old induction variables. */
2287 add_old_ivs_candidates (data
);
2289 /* Add induction variables derived from uses. */
2290 add_derived_ivs_candidates (data
);
2292 /* Record the important candidates. */
2293 record_important_candidates (data
);
2296 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2297 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2298 we allocate a simple list to every use. */
2301 alloc_use_cost_map (struct ivopts_data
*data
)
2303 unsigned i
, size
, s
, j
;
2305 for (i
= 0; i
< n_iv_uses (data
); i
++)
2307 struct iv_use
*use
= iv_use (data
, i
);
2310 if (data
->consider_all_candidates
)
2311 size
= n_iv_cands (data
);
2315 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2320 /* Round up to the power of two, so that moduling by it is fast. */
2321 for (size
= 1; size
< s
; size
<<= 1)
2325 use
->n_map_members
= size
;
2326 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2330 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2331 on invariants DEPENDS_ON and that the value used in expressing it
2335 set_use_iv_cost (struct ivopts_data
*data
,
2336 struct iv_use
*use
, struct iv_cand
*cand
, unsigned cost
,
2337 bitmap depends_on
, tree value
)
2343 BITMAP_FREE (depends_on
);
2347 if (data
->consider_all_candidates
)
2349 use
->cost_map
[cand
->id
].cand
= cand
;
2350 use
->cost_map
[cand
->id
].cost
= cost
;
2351 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2352 use
->cost_map
[cand
->id
].value
= value
;
2356 /* n_map_members is a power of two, so this computes modulo. */
2357 s
= cand
->id
& (use
->n_map_members
- 1);
2358 for (i
= s
; i
< use
->n_map_members
; i
++)
2359 if (!use
->cost_map
[i
].cand
)
2361 for (i
= 0; i
< s
; i
++)
2362 if (!use
->cost_map
[i
].cand
)
2368 use
->cost_map
[i
].cand
= cand
;
2369 use
->cost_map
[i
].cost
= cost
;
2370 use
->cost_map
[i
].depends_on
= depends_on
;
2371 use
->cost_map
[i
].value
= value
;
2374 /* Gets cost of (USE, CANDIDATE) pair. */
2376 static struct cost_pair
*
2377 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2378 struct iv_cand
*cand
)
2381 struct cost_pair
*ret
;
2386 if (data
->consider_all_candidates
)
2388 ret
= use
->cost_map
+ cand
->id
;
2395 /* n_map_members is a power of two, so this computes modulo. */
2396 s
= cand
->id
& (use
->n_map_members
- 1);
2397 for (i
= s
; i
< use
->n_map_members
; i
++)
2398 if (use
->cost_map
[i
].cand
== cand
)
2399 return use
->cost_map
+ i
;
2401 for (i
= 0; i
< s
; i
++)
2402 if (use
->cost_map
[i
].cand
== cand
)
2403 return use
->cost_map
+ i
;
2408 /* Returns estimate on cost of computing SEQ. */
2416 for (; seq
; seq
= NEXT_INSN (seq
))
2418 set
= single_set (seq
);
2420 cost
+= rtx_cost (set
, SET
);
2428 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2430 produce_memory_decl_rtl (tree obj
, int *regno
)
2435 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2437 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2438 x
= gen_rtx_SYMBOL_REF (Pmode
, name
);
2441 x
= gen_raw_REG (Pmode
, (*regno
)++);
2443 return gen_rtx_MEM (DECL_MODE (obj
), x
);
2446 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2447 walk_tree. DATA contains the actual fake register number. */
2450 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2452 tree obj
= NULL_TREE
;
2456 switch (TREE_CODE (*expr_p
))
2459 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2460 handled_component_p (*expr_p
);
2461 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2464 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2465 x
= produce_memory_decl_rtl (obj
, regno
);
2470 obj
= SSA_NAME_VAR (*expr_p
);
2471 if (!DECL_RTL_SET_P (obj
))
2472 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2481 if (DECL_RTL_SET_P (obj
))
2484 if (DECL_MODE (obj
) == BLKmode
)
2485 x
= produce_memory_decl_rtl (obj
, regno
);
2487 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2497 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2498 SET_DECL_RTL (obj
, x
);
2504 /* Determines cost of the computation of EXPR. */
2507 computation_cost (tree expr
)
2510 tree type
= TREE_TYPE (expr
);
2512 /* Avoid using hard regs in ways which may be unsupported. */
2513 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2515 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2517 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2521 cost
= seq_cost (seq
);
2523 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
));
2528 /* Returns variable containing the value of candidate CAND at statement AT. */
2531 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, tree stmt
)
2533 if (stmt_after_increment (loop
, cand
, stmt
))
2534 return cand
->var_after
;
2536 return cand
->var_before
;
2539 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2540 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2543 tree_int_cst_sign_bit (tree t
)
2545 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2546 unsigned HOST_WIDE_INT w
;
2548 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2549 w
= TREE_INT_CST_LOW (t
);
2552 w
= TREE_INT_CST_HIGH (t
);
2553 bitno
-= HOST_BITS_PER_WIDE_INT
;
2556 return (w
>> bitno
) & 1;
2559 /* If we can prove that TOP = cst * BOT for some constant cst,
2560 store cst to MUL and return true. Otherwise return false.
2561 The returned value is always sign-extended, regardless of the
2562 signedness of TOP and BOT. */
2565 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
2568 enum tree_code code
;
2569 double_int res
, p0
, p1
;
2570 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
2575 if (operand_equal_p (top
, bot
, 0))
2577 *mul
= double_int_one
;
2581 code
= TREE_CODE (top
);
2585 mby
= TREE_OPERAND (top
, 1);
2586 if (TREE_CODE (mby
) != INTEGER_CST
)
2589 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
2592 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
2598 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
2599 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
2602 if (code
== MINUS_EXPR
)
2603 p1
= double_int_neg (p1
);
2604 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
2608 if (TREE_CODE (bot
) != INTEGER_CST
)
2611 p0
= double_int_sext (tree_to_double_int (top
), precision
);
2612 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
2613 if (double_int_zero_p (p1
))
2615 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
2617 return double_int_zero_p (res
);
2624 /* Sets COMB to CST. */
2627 aff_combination_const (struct affine_tree_combination
*comb
, tree type
,
2628 unsigned HOST_WIDE_INT cst
)
2630 unsigned prec
= TYPE_PRECISION (type
);
2633 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2636 comb
->rest
= NULL_TREE
;
2637 comb
->offset
= cst
& comb
->mask
;
2640 /* Sets COMB to single element ELT. */
2643 aff_combination_elt (struct affine_tree_combination
*comb
, tree type
, tree elt
)
2645 unsigned prec
= TYPE_PRECISION (type
);
2648 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2651 comb
->elts
[0] = elt
;
2653 comb
->rest
= NULL_TREE
;
2657 /* Scales COMB by SCALE. */
2660 aff_combination_scale (struct affine_tree_combination
*comb
,
2661 unsigned HOST_WIDE_INT scale
)
2670 aff_combination_const (comb
, comb
->type
, 0);
2674 comb
->offset
= (scale
* comb
->offset
) & comb
->mask
;
2675 for (i
= 0, j
= 0; i
< comb
->n
; i
++)
2677 comb
->coefs
[j
] = (scale
* comb
->coefs
[i
]) & comb
->mask
;
2678 comb
->elts
[j
] = comb
->elts
[i
];
2679 if (comb
->coefs
[j
] != 0)
2686 if (comb
->n
< MAX_AFF_ELTS
)
2688 comb
->coefs
[comb
->n
] = scale
;
2689 comb
->elts
[comb
->n
] = comb
->rest
;
2690 comb
->rest
= NULL_TREE
;
2694 comb
->rest
= fold_build2 (MULT_EXPR
, comb
->type
, comb
->rest
,
2695 build_int_cst_type (comb
->type
, scale
));
2699 /* Adds ELT * SCALE to COMB. */
2702 aff_combination_add_elt (struct affine_tree_combination
*comb
, tree elt
,
2703 unsigned HOST_WIDE_INT scale
)
2710 for (i
= 0; i
< comb
->n
; i
++)
2711 if (operand_equal_p (comb
->elts
[i
], elt
, 0))
2713 comb
->coefs
[i
] = (comb
->coefs
[i
] + scale
) & comb
->mask
;
2718 comb
->coefs
[i
] = comb
->coefs
[comb
->n
];
2719 comb
->elts
[i
] = comb
->elts
[comb
->n
];
2723 gcc_assert (comb
->n
== MAX_AFF_ELTS
- 1);
2724 comb
->coefs
[comb
->n
] = 1;
2725 comb
->elts
[comb
->n
] = comb
->rest
;
2726 comb
->rest
= NULL_TREE
;
2731 if (comb
->n
< MAX_AFF_ELTS
)
2733 comb
->coefs
[comb
->n
] = scale
;
2734 comb
->elts
[comb
->n
] = elt
;
2740 elt
= fold_convert (comb
->type
, elt
);
2742 elt
= fold_build2 (MULT_EXPR
, comb
->type
,
2743 fold_convert (comb
->type
, elt
),
2744 build_int_cst_type (comb
->type
, scale
));
2747 comb
->rest
= fold_build2 (PLUS_EXPR
, comb
->type
, comb
->rest
, elt
);
2752 /* Adds COMB2 to COMB1. */
2755 aff_combination_add (struct affine_tree_combination
*comb1
,
2756 struct affine_tree_combination
*comb2
)
2760 comb1
->offset
= (comb1
->offset
+ comb2
->offset
) & comb1
->mask
;
2761 for (i
= 0; i
< comb2
->n
; i
++)
2762 aff_combination_add_elt (comb1
, comb2
->elts
[i
], comb2
->coefs
[i
]);
2764 aff_combination_add_elt (comb1
, comb2
->rest
, 1);
2767 /* Convert COMB to TYPE. */
2770 aff_combination_convert (tree type
, struct affine_tree_combination
*comb
)
2772 unsigned prec
= TYPE_PRECISION (type
);
2775 /* If the precision of both types is the same, it suffices to change the type
2776 of the whole combination -- the elements are allowed to have another type
2777 equivalent wrto STRIP_NOPS. */
2778 if (prec
== TYPE_PRECISION (comb
->type
))
2784 comb
->mask
= (((unsigned HOST_WIDE_INT
) 2 << (prec
- 1)) - 1);
2785 comb
->offset
= comb
->offset
& comb
->mask
;
2787 /* The type of the elements can be different from comb->type only as
2788 much as what STRIP_NOPS would remove. We can just directly cast
2790 for (i
= 0; i
< comb
->n
; i
++)
2791 comb
->elts
[i
] = fold_convert (type
, comb
->elts
[i
]);
2793 comb
->rest
= fold_convert (type
, comb
->rest
);
2798 /* Splits EXPR into an affine combination of parts. */
2801 tree_to_aff_combination (tree expr
, tree type
,
2802 struct affine_tree_combination
*comb
)
2804 struct affine_tree_combination tmp
;
2805 enum tree_code code
;
2806 tree cst
, core
, toffset
;
2807 HOST_WIDE_INT bitpos
, bitsize
;
2808 enum machine_mode mode
;
2809 int unsignedp
, volatilep
;
2813 code
= TREE_CODE (expr
);
2817 aff_combination_const (comb
, type
, int_cst_value (expr
));
2822 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2823 tree_to_aff_combination (TREE_OPERAND (expr
, 1), type
, &tmp
);
2824 if (code
== MINUS_EXPR
)
2825 aff_combination_scale (&tmp
, -1);
2826 aff_combination_add (comb
, &tmp
);
2830 cst
= TREE_OPERAND (expr
, 1);
2831 if (TREE_CODE (cst
) != INTEGER_CST
)
2833 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2834 aff_combination_scale (comb
, int_cst_value (cst
));
2838 tree_to_aff_combination (TREE_OPERAND (expr
, 0), type
, comb
);
2839 aff_combination_scale (comb
, -1);
2843 core
= get_inner_reference (TREE_OPERAND (expr
, 0), &bitsize
, &bitpos
,
2844 &toffset
, &mode
, &unsignedp
, &volatilep
,
2846 if (bitpos
% BITS_PER_UNIT
!= 0)
2848 aff_combination_const (comb
, type
, bitpos
/ BITS_PER_UNIT
);
2849 core
= build_fold_addr_expr (core
);
2850 if (TREE_CODE (core
) == ADDR_EXPR
)
2851 aff_combination_add_elt (comb
, core
, 1);
2854 tree_to_aff_combination (core
, type
, &tmp
);
2855 aff_combination_add (comb
, &tmp
);
2859 tree_to_aff_combination (toffset
, type
, &tmp
);
2860 aff_combination_add (comb
, &tmp
);
2868 aff_combination_elt (comb
, type
, expr
);
2871 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2874 add_elt_to_tree (tree expr
, tree type
, tree elt
, unsigned HOST_WIDE_INT scale
,
2875 unsigned HOST_WIDE_INT mask
)
2877 enum tree_code code
;
2880 elt
= fold_convert (type
, elt
);
2887 return fold_build2 (PLUS_EXPR
, type
, expr
, elt
);
2893 return fold_build1 (NEGATE_EXPR
, type
, elt
);
2895 return fold_build2 (MINUS_EXPR
, type
, expr
, elt
);
2899 return fold_build2 (MULT_EXPR
, type
, elt
,
2900 build_int_cst_type (type
, scale
));
2902 if ((scale
| (mask
>> 1)) == mask
)
2904 /* Scale is negative. */
2906 scale
= (-scale
) & mask
;
2911 elt
= fold_build2 (MULT_EXPR
, type
, elt
,
2912 build_int_cst_type (type
, scale
));
2913 return fold_build2 (code
, type
, expr
, elt
);
2916 /* Copies the tree elements of COMB to ensure that they are not shared. */
2919 unshare_aff_combination (struct affine_tree_combination
*comb
)
2923 for (i
= 0; i
< comb
->n
; i
++)
2924 comb
->elts
[i
] = unshare_expr (comb
->elts
[i
]);
2926 comb
->rest
= unshare_expr (comb
->rest
);
2929 /* Makes tree from the affine combination COMB. */
2932 aff_combination_to_tree (struct affine_tree_combination
*comb
)
2934 tree type
= comb
->type
;
2935 tree expr
= comb
->rest
;
2937 unsigned HOST_WIDE_INT off
, sgn
;
2939 if (comb
->n
== 0 && comb
->offset
== 0)
2943 /* Handle the special case produced by get_computation_aff when
2944 the type does not fit in HOST_WIDE_INT. */
2945 return fold_convert (type
, expr
);
2948 return build_int_cst (type
, 0);
2951 gcc_assert (comb
->n
== MAX_AFF_ELTS
|| comb
->rest
== NULL_TREE
);
2953 for (i
= 0; i
< comb
->n
; i
++)
2954 expr
= add_elt_to_tree (expr
, type
, comb
->elts
[i
], comb
->coefs
[i
],
2957 if ((comb
->offset
| (comb
->mask
>> 1)) == comb
->mask
)
2959 /* Offset is negative. */
2960 off
= (-comb
->offset
) & comb
->mask
;
2968 return add_elt_to_tree (expr
, type
, build_int_cst_type (type
, off
), sgn
,
2972 /* Folds EXPR using the affine expressions framework. */
2975 fold_affine_expr (tree expr
)
2977 tree type
= TREE_TYPE (expr
);
2978 struct affine_tree_combination comb
;
2980 if (TYPE_PRECISION (type
) > HOST_BITS_PER_WIDE_INT
)
2983 tree_to_aff_combination (expr
, type
, &comb
);
2984 return aff_combination_to_tree (&comb
);
2987 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2988 same precision that is at least as wide as the precision of TYPE, stores
2989 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2993 determine_common_wider_type (tree
*a
, tree
*b
)
2995 tree wider_type
= NULL
;
2997 tree atype
= TREE_TYPE (*a
);
2999 if ((TREE_CODE (*a
) == NOP_EXPR
3000 || TREE_CODE (*a
) == CONVERT_EXPR
))
3002 suba
= TREE_OPERAND (*a
, 0);
3003 wider_type
= TREE_TYPE (suba
);
3004 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3010 if ((TREE_CODE (*b
) == NOP_EXPR
3011 || TREE_CODE (*b
) == CONVERT_EXPR
))
3013 subb
= TREE_OPERAND (*b
, 0);
3014 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3025 /* Determines the expression by that USE is expressed from induction variable
3026 CAND at statement AT in LOOP. The expression is stored in a decomposed
3027 form into AFF. Returns false if USE cannot be expressed using CAND. */
3030 get_computation_aff (struct loop
*loop
,
3031 struct iv_use
*use
, struct iv_cand
*cand
, tree at
,
3032 struct affine_tree_combination
*aff
)
3034 tree ubase
= use
->iv
->base
;
3035 tree ustep
= use
->iv
->step
;
3036 tree cbase
= cand
->iv
->base
;
3037 tree cstep
= cand
->iv
->step
;
3038 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3043 unsigned HOST_WIDE_INT ustepi
, cstepi
;
3044 HOST_WIDE_INT ratioi
;
3045 struct affine_tree_combination cbase_aff
, expr_aff
;
3046 tree cstep_orig
= cstep
, ustep_orig
= ustep
;
3049 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3051 /* We do not have a precision to express the values of use. */
3055 expr
= var_at_stmt (loop
, cand
, at
);
3057 if (TREE_TYPE (expr
) != ctype
)
3059 /* This may happen with the original ivs. */
3060 expr
= fold_convert (ctype
, expr
);
3063 if (TYPE_UNSIGNED (utype
))
3067 uutype
= unsigned_type_for (utype
);
3068 ubase
= fold_convert (uutype
, ubase
);
3069 ustep
= fold_convert (uutype
, ustep
);
3072 if (uutype
!= ctype
)
3074 expr
= fold_convert (uutype
, expr
);
3075 cbase
= fold_convert (uutype
, cbase
);
3076 cstep
= fold_convert (uutype
, cstep
);
3078 /* If the conversion is not noop, we must take it into account when
3079 considering the value of the step. */
3080 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3084 if (cst_and_fits_in_hwi (cstep_orig
)
3085 && cst_and_fits_in_hwi (ustep_orig
))
3087 ustepi
= int_cst_value (ustep_orig
);
3088 cstepi
= int_cst_value (cstep_orig
);
3090 if (!divide (TYPE_PRECISION (uutype
), ustepi
, cstepi
, &ratioi
))
3092 /* TODO maybe consider case when ustep divides cstep and the ratio is
3093 a power of 2 (so that the division is fast to execute)? We would
3094 need to be much more careful with overflows etc. then. */
3098 ratio
= build_int_cst_type (uutype
, ratioi
);
3102 if (!constant_multiple_of (ustep_orig
, cstep_orig
, &rat
))
3104 ratio
= double_int_to_tree (uutype
, rat
);
3106 /* Ratioi is only used to detect special cases when the multiplicative
3107 factor is 1 or -1, so if rat does not fit to HOST_WIDE_INT, we may
3109 if (double_int_fits_in_shwi_p (rat
))
3110 ratioi
= double_int_to_shwi (rat
);
3115 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3116 type, we achieve better folding by computing their difference in this
3117 wider type, and cast the result to UUTYPE. We do not need to worry about
3118 overflows, as all the arithmetics will in the end be performed in UUTYPE
3120 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3122 /* We may need to shift the value if we are after the increment. */
3123 if (stmt_after_increment (loop
, cand
, at
))
3125 if (uutype
!= common_type
)
3126 cstep
= fold_convert (common_type
, cstep
);
3127 cbase
= fold_build2 (PLUS_EXPR
, common_type
, cbase
, cstep
);
3130 /* use = ubase - ratio * cbase + ratio * var.
3132 In general case ubase + ratio * (var - cbase) could be better (one less
3133 multiplication), but often it is possible to eliminate redundant parts
3134 of computations from (ubase - ratio * cbase) term, and if it does not
3135 happen, fold is able to apply the distributive law to obtain this form
3138 if (TYPE_PRECISION (common_type
) > HOST_BITS_PER_WIDE_INT
)
3140 /* Let's compute in trees and just return the result in AFF. This case
3141 should not be very common, and fold itself is not that bad either,
3142 so making the aff. functions more complicated to handle this case
3143 is not that urgent. */
3146 delta
= fold_build2 (MINUS_EXPR
, common_type
, ubase
, cbase
);
3147 if (uutype
!= common_type
)
3148 delta
= fold_convert (uutype
, delta
);
3149 expr
= fold_build2 (PLUS_EXPR
, uutype
, expr
, delta
);
3151 else if (ratioi
== -1)
3153 delta
= fold_build2 (PLUS_EXPR
, common_type
, ubase
, cbase
);
3154 if (uutype
!= common_type
)
3155 delta
= fold_convert (uutype
, delta
);
3156 expr
= fold_build2 (MINUS_EXPR
, uutype
, delta
, expr
);
3160 delta
= fold_build2 (MULT_EXPR
, common_type
, cbase
, ratio
);
3161 delta
= fold_build2 (MINUS_EXPR
, common_type
, ubase
, delta
);
3162 if (uutype
!= common_type
)
3163 delta
= fold_convert (uutype
, delta
);
3164 expr
= fold_build2 (MULT_EXPR
, uutype
, ratio
, expr
);
3165 expr
= fold_build2 (PLUS_EXPR
, uutype
, delta
, expr
);
3176 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3177 possible to compute ratioi. */
3178 gcc_assert (ratioi
);
3180 tree_to_aff_combination (ubase
, common_type
, aff
);
3181 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3182 tree_to_aff_combination (expr
, uutype
, &expr_aff
);
3183 aff_combination_scale (&cbase_aff
, -ratioi
);
3184 aff_combination_scale (&expr_aff
, ratioi
);
3185 aff_combination_add (aff
, &cbase_aff
);
3186 if (common_type
!= uutype
)
3187 aff_combination_convert (uutype
, aff
);
3188 aff_combination_add (aff
, &expr_aff
);
3193 /* Determines the expression by that USE is expressed from induction variable
3194 CAND at statement AT in LOOP. The computation is unshared. */
3197 get_computation_at (struct loop
*loop
,
3198 struct iv_use
*use
, struct iv_cand
*cand
, tree at
)
3200 struct affine_tree_combination aff
;
3201 tree type
= TREE_TYPE (use
->iv
->base
);
3203 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3205 unshare_aff_combination (&aff
);
3206 return fold_convert (type
, aff_combination_to_tree (&aff
));
3209 /* Determines the expression by that USE is expressed from induction variable
3210 CAND in LOOP. The computation is unshared. */
3213 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3215 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3218 /* Returns cost of addition in MODE. */
3221 add_cost (enum machine_mode mode
)
3223 static unsigned costs
[NUM_MACHINE_MODES
];
3231 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3232 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3233 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3238 cost
= seq_cost (seq
);
3244 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3245 fprintf (dump_file
, "Addition in %s costs %d\n",
3246 GET_MODE_NAME (mode
), cost
);
3250 /* Entry in a hashtable of already known costs for multiplication. */
3253 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3254 enum machine_mode mode
; /* In mode. */
3255 unsigned cost
; /* The cost. */
3258 /* Counts hash value for the ENTRY. */
3261 mbc_entry_hash (const void *entry
)
3263 const struct mbc_entry
*e
= entry
;
3265 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3268 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3271 mbc_entry_eq (const void *entry1
, const void *entry2
)
3273 const struct mbc_entry
*e1
= entry1
;
3274 const struct mbc_entry
*e2
= entry2
;
3276 return (e1
->mode
== e2
->mode
3277 && e1
->cst
== e2
->cst
);
3280 /* Returns cost of multiplication by constant CST in MODE. */
3283 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
)
3285 static htab_t costs
;
3286 struct mbc_entry
**cached
, act
;
3291 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3295 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3297 return (*cached
)->cost
;
3299 *cached
= XNEW (struct mbc_entry
);
3300 (*cached
)->mode
= mode
;
3301 (*cached
)->cst
= cst
;
3304 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3305 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3309 cost
= seq_cost (seq
);
3311 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3312 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3313 (int) cst
, GET_MODE_NAME (mode
), cost
);
3315 (*cached
)->cost
= cost
;
3320 /* Returns true if multiplying by RATIO is allowed in address. */
3323 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
)
3325 #define MAX_RATIO 128
3326 static sbitmap valid_mult
;
3330 rtx reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3334 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3335 sbitmap_zero (valid_mult
);
3336 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, reg1
, NULL_RTX
);
3337 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3339 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3340 if (memory_address_p (Pmode
, addr
))
3341 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3344 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3346 fprintf (dump_file
, " allowed multipliers:");
3347 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3348 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3349 fprintf (dump_file
, " %d", (int) i
);
3350 fprintf (dump_file
, "\n");
3351 fprintf (dump_file
, "\n");
3355 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3358 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3361 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3362 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3363 variable is omitted. The created memory accesses MODE.
3365 TODO -- there must be some better way. This all is quite crude. */
3368 get_address_cost (bool symbol_present
, bool var_present
,
3369 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
)
3371 static bool initialized
= false;
3372 static HOST_WIDE_INT rat
, off
;
3373 static HOST_WIDE_INT min_offset
, max_offset
;
3374 static unsigned costs
[2][2][2][2];
3375 unsigned cost
, acost
;
3376 bool offset_p
, ratio_p
;
3377 HOST_WIDE_INT s_offset
;
3378 unsigned HOST_WIDE_INT mask
;
3384 int old_cse_not_expected
;
3385 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3386 rtx seq
, addr
, base
;
3391 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3393 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, reg1
, NULL_RTX
);
3394 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3396 XEXP (addr
, 1) = gen_int_mode (i
, Pmode
);
3397 if (!memory_address_p (Pmode
, addr
))
3400 max_offset
= i
>> 1;
3403 for (i
= 1; i
<= 1 << 20; i
<<= 1)
3405 XEXP (addr
, 1) = gen_int_mode (-i
, Pmode
);
3406 if (!memory_address_p (Pmode
, addr
))
3409 min_offset
= -(i
>> 1);
3411 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3413 fprintf (dump_file
, "get_address_cost:\n");
3414 fprintf (dump_file
, " min offset %d\n", (int) min_offset
);
3415 fprintf (dump_file
, " max offset %d\n", (int) max_offset
);
3419 for (i
= 2; i
<= MAX_RATIO
; i
++)
3420 if (multiplier_allowed_in_address_p (i
))
3426 /* Compute the cost of various addressing modes. */
3428 reg0
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 1);
3429 reg1
= gen_raw_REG (Pmode
, LAST_VIRTUAL_REGISTER
+ 2);
3431 for (i
= 0; i
< 16; i
++)
3434 var_p
= (i
>> 1) & 1;
3435 off_p
= (i
>> 2) & 1;
3436 rat_p
= (i
>> 3) & 1;
3440 addr
= gen_rtx_fmt_ee (MULT
, Pmode
, addr
, gen_int_mode (rat
, Pmode
));
3443 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, reg1
);
3447 base
= gen_rtx_SYMBOL_REF (Pmode
, ggc_strdup (""));
3449 base
= gen_rtx_fmt_e (CONST
, Pmode
,
3450 gen_rtx_fmt_ee (PLUS
, Pmode
,
3452 gen_int_mode (off
, Pmode
)));
3455 base
= gen_int_mode (off
, Pmode
);
3460 addr
= gen_rtx_fmt_ee (PLUS
, Pmode
, addr
, base
);
3463 /* To avoid splitting addressing modes, pretend that no cse will
3465 old_cse_not_expected
= cse_not_expected
;
3466 cse_not_expected
= true;
3467 addr
= memory_address (Pmode
, addr
);
3468 cse_not_expected
= old_cse_not_expected
;
3472 acost
= seq_cost (seq
);
3473 acost
+= address_cost (addr
, Pmode
);
3477 costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3480 /* On some targets, it is quite expensive to load symbol to a register,
3481 which makes addresses that contain symbols look much more expensive.
3482 However, the symbol will have to be loaded in any case before the
3483 loop (and quite likely we have it in register already), so it does not
3484 make much sense to penalize them too heavily. So make some final
3485 tweaks for the SYMBOL_PRESENT modes:
3487 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3488 var is cheaper, use this mode with small penalty.
3489 If VAR_PRESENT is true, try whether the mode with
3490 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3491 if this is the case, use it. */
3492 add_c
= add_cost (Pmode
);
3493 for (i
= 0; i
< 8; i
++)
3496 off_p
= (i
>> 1) & 1;
3497 rat_p
= (i
>> 2) & 1;
3499 acost
= costs
[0][1][off_p
][rat_p
] + 1;
3503 if (acost
< costs
[1][var_p
][off_p
][rat_p
])
3504 costs
[1][var_p
][off_p
][rat_p
] = acost
;
3507 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3509 fprintf (dump_file
, "Address costs:\n");
3511 for (i
= 0; i
< 16; i
++)
3514 var_p
= (i
>> 1) & 1;
3515 off_p
= (i
>> 2) & 1;
3516 rat_p
= (i
>> 3) & 1;
3518 fprintf (dump_file
, " ");
3520 fprintf (dump_file
, "sym + ");
3522 fprintf (dump_file
, "var + ");
3524 fprintf (dump_file
, "cst + ");
3526 fprintf (dump_file
, "rat * ");
3528 acost
= costs
[sym_p
][var_p
][off_p
][rat_p
];
3529 fprintf (dump_file
, "index costs %d\n", acost
);
3531 fprintf (dump_file
, "\n");
3535 bits
= GET_MODE_BITSIZE (Pmode
);
3536 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3538 if ((offset
>> (bits
- 1) & 1))
3543 offset_p
= (s_offset
!= 0
3544 && min_offset
<= s_offset
&& s_offset
<= max_offset
);
3545 ratio_p
= (ratio
!= 1
3546 && multiplier_allowed_in_address_p (ratio
));
3548 if (ratio
!= 1 && !ratio_p
)
3549 cost
+= multiply_by_cost (ratio
, Pmode
);
3551 if (s_offset
&& !offset_p
&& !symbol_present
)
3553 cost
+= add_cost (Pmode
);
3557 acost
= costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3558 return cost
+ acost
;
3561 /* Estimates cost of forcing expression EXPR into a variable. */
3564 force_expr_to_var_cost (tree expr
)
3566 static bool costs_initialized
= false;
3567 static unsigned integer_cost
;
3568 static unsigned symbol_cost
;
3569 static unsigned address_cost
;
3571 unsigned cost0
, cost1
, cost
;
3572 enum machine_mode mode
;
3574 if (!costs_initialized
)
3576 tree var
= create_tmp_var_raw (integer_type_node
, "test_var");
3577 rtx x
= gen_rtx_MEM (DECL_MODE (var
),
3578 gen_rtx_SYMBOL_REF (Pmode
, "test_var"));
3580 tree type
= build_pointer_type (integer_type_node
);
3582 integer_cost
= computation_cost (build_int_cst (integer_type_node
,
3585 SET_DECL_RTL (var
, x
);
3586 TREE_STATIC (var
) = 1;
3587 addr
= build1 (ADDR_EXPR
, type
, var
);
3588 symbol_cost
= computation_cost (addr
) + 1;
3591 = computation_cost (build2 (PLUS_EXPR
, type
,
3593 build_int_cst (type
, 2000))) + 1;
3594 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3596 fprintf (dump_file
, "force_expr_to_var_cost:\n");
3597 fprintf (dump_file
, " integer %d\n", (int) integer_cost
);
3598 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
);
3599 fprintf (dump_file
, " address %d\n", (int) address_cost
);
3600 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
);
3601 fprintf (dump_file
, "\n");
3604 costs_initialized
= true;
3609 if (SSA_VAR_P (expr
))
3612 if (TREE_INVARIANT (expr
))
3614 if (TREE_CODE (expr
) == INTEGER_CST
)
3615 return integer_cost
;
3617 if (TREE_CODE (expr
) == ADDR_EXPR
)
3619 tree obj
= TREE_OPERAND (expr
, 0);
3621 if (TREE_CODE (obj
) == VAR_DECL
3622 || TREE_CODE (obj
) == PARM_DECL
3623 || TREE_CODE (obj
) == RESULT_DECL
)
3627 return address_cost
;
3630 switch (TREE_CODE (expr
))
3635 op0
= TREE_OPERAND (expr
, 0);
3636 op1
= TREE_OPERAND (expr
, 1);
3640 if (is_gimple_val (op0
))
3643 cost0
= force_expr_to_var_cost (op0
);
3645 if (is_gimple_val (op1
))
3648 cost1
= force_expr_to_var_cost (op1
);
3653 /* Just an arbitrary value, FIXME. */
3654 return target_spill_cost
;
3657 mode
= TYPE_MODE (TREE_TYPE (expr
));
3658 switch (TREE_CODE (expr
))
3662 cost
= add_cost (mode
);
3666 if (cst_and_fits_in_hwi (op0
))
3667 cost
= multiply_by_cost (int_cst_value (op0
), mode
);
3668 else if (cst_and_fits_in_hwi (op1
))
3669 cost
= multiply_by_cost (int_cst_value (op1
), mode
);
3671 return target_spill_cost
;
3681 /* Bound the cost by target_spill_cost. The parts of complicated
3682 computations often are either loop invariant or at least can
3683 be shared between several iv uses, so letting this grow without
3684 limits would not give reasonable results. */
3685 return cost
< target_spill_cost
? cost
: target_spill_cost
;
3688 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3689 invariants the computation depends on. */
3692 force_var_cost (struct ivopts_data
*data
,
3693 tree expr
, bitmap
*depends_on
)
3697 fd_ivopts_data
= data
;
3698 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3701 return force_expr_to_var_cost (expr
);
3704 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3705 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3706 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3707 invariants the computation depends on. */
3710 split_address_cost (struct ivopts_data
*data
,
3711 tree addr
, bool *symbol_present
, bool *var_present
,
3712 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3715 HOST_WIDE_INT bitsize
;
3716 HOST_WIDE_INT bitpos
;
3718 enum machine_mode mode
;
3719 int unsignedp
, volatilep
;
3721 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3722 &unsignedp
, &volatilep
, false);
3725 || bitpos
% BITS_PER_UNIT
!= 0
3726 || TREE_CODE (core
) != VAR_DECL
)
3728 *symbol_present
= false;
3729 *var_present
= true;
3730 fd_ivopts_data
= data
;
3731 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3732 return target_spill_cost
;
3735 *offset
+= bitpos
/ BITS_PER_UNIT
;
3736 if (TREE_STATIC (core
)
3737 || DECL_EXTERNAL (core
))
3739 *symbol_present
= true;
3740 *var_present
= false;
3744 *symbol_present
= false;
3745 *var_present
= true;
3749 /* Estimates cost of expressing difference of addresses E1 - E2 as
3750 var + symbol + offset. The value of offset is added to OFFSET,
3751 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3752 part is missing. DEPENDS_ON is a set of the invariants the computation
3756 ptr_difference_cost (struct ivopts_data
*data
,
3757 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3758 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3760 HOST_WIDE_INT diff
= 0;
3763 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3765 if (ptr_difference_const (e1
, e2
, &diff
))
3768 *symbol_present
= false;
3769 *var_present
= false;
3773 if (e2
== integer_zero_node
)
3774 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3775 symbol_present
, var_present
, offset
, depends_on
);
3777 *symbol_present
= false;
3778 *var_present
= true;
3780 cost
= force_var_cost (data
, e1
, depends_on
);
3781 cost
+= force_var_cost (data
, e2
, depends_on
);
3782 cost
+= add_cost (Pmode
);
3787 /* Estimates cost of expressing difference E1 - E2 as
3788 var + symbol + offset. The value of offset is added to OFFSET,
3789 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3790 part is missing. DEPENDS_ON is a set of the invariants the computation
3794 difference_cost (struct ivopts_data
*data
,
3795 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3796 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3799 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3800 unsigned HOST_WIDE_INT off1
, off2
;
3802 e1
= strip_offset (e1
, &off1
);
3803 e2
= strip_offset (e2
, &off2
);
3804 *offset
+= off1
- off2
;
3809 if (TREE_CODE (e1
) == ADDR_EXPR
)
3810 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
, offset
,
3812 *symbol_present
= false;
3814 if (operand_equal_p (e1
, e2
, 0))
3816 *var_present
= false;
3819 *var_present
= true;
3821 return force_var_cost (data
, e1
, depends_on
);
3825 cost
= force_var_cost (data
, e2
, depends_on
);
3826 cost
+= multiply_by_cost (-1, mode
);
3831 cost
= force_var_cost (data
, e1
, depends_on
);
3832 cost
+= force_var_cost (data
, e2
, depends_on
);
3833 cost
+= add_cost (mode
);
3838 /* Determines the cost of the computation by that USE is expressed
3839 from induction variable CAND. If ADDRESS_P is true, we just need
3840 to create an address from it, otherwise we want to get it into
3841 register. A set of invariants we depend on is stored in
3842 DEPENDS_ON. AT is the statement at that the value is computed. */
3845 get_computation_cost_at (struct ivopts_data
*data
,
3846 struct iv_use
*use
, struct iv_cand
*cand
,
3847 bool address_p
, bitmap
*depends_on
, tree at
)
3849 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3851 tree utype
= TREE_TYPE (ubase
), ctype
;
3852 unsigned HOST_WIDE_INT ustepi
, cstepi
, offset
= 0;
3853 HOST_WIDE_INT ratio
, aratio
;
3854 bool var_present
, symbol_present
;
3855 unsigned cost
= 0, n_sums
;
3859 /* Only consider real candidates. */
3863 cbase
= cand
->iv
->base
;
3864 cstep
= cand
->iv
->step
;
3865 ctype
= TREE_TYPE (cbase
);
3867 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3869 /* We do not have a precision to express the values of use. */
3875 /* Do not try to express address of an object with computation based
3876 on address of a different object. This may cause problems in rtl
3877 level alias analysis (that does not expect this to be happening,
3878 as this is illegal in C), and would be unlikely to be useful
3880 if (use
->iv
->base_object
3881 && cand
->iv
->base_object
3882 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3886 if (TYPE_PRECISION (utype
) != TYPE_PRECISION (ctype
))
3888 /* TODO -- add direct handling of this case. */
3892 /* CSTEPI is removed from the offset in case statement is after the
3893 increment. If the step is not constant, we use zero instead.
3894 This is a bit imprecise (there is the extra addition), but
3895 redundancy elimination is likely to transform the code so that
3896 it uses value of the variable before increment anyway,
3897 so it is not that much unrealistic. */
3898 if (cst_and_fits_in_hwi (cstep
))
3899 cstepi
= int_cst_value (cstep
);
3903 if (cst_and_fits_in_hwi (ustep
)
3904 && cst_and_fits_in_hwi (cstep
))
3906 ustepi
= int_cst_value (ustep
);
3908 if (!divide (TYPE_PRECISION (utype
), ustepi
, cstepi
, &ratio
))
3915 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3918 if (double_int_fits_in_shwi_p (rat
))
3919 ratio
= double_int_to_shwi (rat
);
3924 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3925 or ratio == 1, it is better to handle this like
3927 ubase - ratio * cbase + ratio * var
3929 (also holds in the case ratio == -1, TODO. */
3931 if (cst_and_fits_in_hwi (cbase
))
3933 offset
= - ratio
* int_cst_value (cbase
);
3934 cost
+= difference_cost (data
,
3935 ubase
, integer_zero_node
,
3936 &symbol_present
, &var_present
, &offset
,
3939 else if (ratio
== 1)
3941 cost
+= difference_cost (data
,
3943 &symbol_present
, &var_present
, &offset
,
3948 cost
+= force_var_cost (data
, cbase
, depends_on
);
3949 cost
+= add_cost (TYPE_MODE (ctype
));
3950 cost
+= difference_cost (data
,
3951 ubase
, integer_zero_node
,
3952 &symbol_present
, &var_present
, &offset
,
3956 /* If we are after the increment, the value of the candidate is higher by
3958 if (stmt_after_increment (data
->current_loop
, cand
, at
))
3959 offset
-= ratio
* cstepi
;
3961 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3962 (symbol/var/const parts may be omitted). If we are looking for an address,
3963 find the cost of addressing this. */
3965 return cost
+ get_address_cost (symbol_present
, var_present
, offset
, ratio
);
3967 /* Otherwise estimate the costs for computing the expression. */
3968 aratio
= ratio
> 0 ? ratio
: -ratio
;
3969 if (!symbol_present
&& !var_present
&& !offset
)
3972 cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
));
3978 cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
));
3982 /* Symbol + offset should be compile-time computable. */
3983 && (symbol_present
|| offset
))
3986 return cost
+ n_sums
* add_cost (TYPE_MODE (ctype
));
3990 /* Just get the expression, expand it and measure the cost. */
3991 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
3997 comp
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (comp
)), comp
);
3999 return computation_cost (comp
);
4003 /* Determines the cost of the computation by that USE is expressed
4004 from induction variable CAND. If ADDRESS_P is true, we just need
4005 to create an address from it, otherwise we want to get it into
4006 register. A set of invariants we depend on is stored in
4010 get_computation_cost (struct ivopts_data
*data
,
4011 struct iv_use
*use
, struct iv_cand
*cand
,
4012 bool address_p
, bitmap
*depends_on
)
4014 return get_computation_cost_at (data
,
4015 use
, cand
, address_p
, depends_on
, use
->stmt
);
4018 /* Determines cost of basing replacement of USE on CAND in a generic
4022 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4023 struct iv_use
*use
, struct iv_cand
*cand
)
4028 /* The simple case first -- if we need to express value of the preserved
4029 original biv, the cost is 0. This also prevents us from counting the
4030 cost of increment twice -- once at this use and once in the cost of
4032 if (cand
->pos
== IP_ORIGINAL
4033 && cand
->incremented_at
== use
->stmt
)
4035 set_use_iv_cost (data
, use
, cand
, 0, NULL
, NULL_TREE
);
4039 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4040 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
4042 return cost
!= INFTY
;
4045 /* Determines cost of basing replacement of USE on CAND in an address. */
4048 determine_use_iv_cost_address (struct ivopts_data
*data
,
4049 struct iv_use
*use
, struct iv_cand
*cand
)
4052 unsigned cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
);
4054 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
);
4056 return cost
!= INFTY
;
4059 /* Computes value of induction variable IV in iteration NITER. */
4062 iv_value (struct iv
*iv
, tree niter
)
4065 tree type
= TREE_TYPE (iv
->base
);
4067 niter
= fold_convert (type
, niter
);
4068 val
= fold_build2 (MULT_EXPR
, type
, iv
->step
, niter
);
4070 return fold_build2 (PLUS_EXPR
, type
, iv
->base
, val
);
4073 /* Computes value of candidate CAND at position AT in iteration NITER. */
4076 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, tree at
, tree niter
)
4078 tree val
= iv_value (cand
->iv
, niter
);
4079 tree type
= TREE_TYPE (cand
->iv
->base
);
4081 if (stmt_after_increment (loop
, cand
, at
))
4082 val
= fold_build2 (PLUS_EXPR
, type
, val
, cand
->iv
->step
);
4087 /* Returns period of induction variable iv. */
4090 iv_period (struct iv
*iv
)
4092 tree step
= iv
->step
, period
, type
;
4095 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4097 /* Period of the iv is gcd (step, type range). Since type range is power
4098 of two, it suffices to determine the maximum power of two that divides
4100 pow2div
= num_ending_zeros (step
);
4101 type
= unsigned_type_for (TREE_TYPE (step
));
4103 period
= build_low_bits_mask (type
,
4104 (TYPE_PRECISION (type
)
4105 - tree_low_cst (pow2div
, 1)));
4110 /* Returns the comparison operator used when eliminating the iv USE. */
4112 static enum tree_code
4113 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4115 struct loop
*loop
= data
->current_loop
;
4119 ex_bb
= bb_for_stmt (use
->stmt
);
4120 exit
= EDGE_SUCC (ex_bb
, 0);
4121 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4122 exit
= EDGE_SUCC (ex_bb
, 1);
4124 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4127 /* Check whether it is possible to express the condition in USE by comparison
4128 of candidate CAND. If so, store the value compared with to BOUND. */
4131 may_eliminate_iv (struct ivopts_data
*data
,
4132 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
4137 tree wider_type
, period
, per_type
;
4138 struct loop
*loop
= data
->current_loop
;
4140 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4143 /* For now works only for exits that dominate the loop latch. TODO -- extend
4144 for other conditions inside loop body. */
4145 ex_bb
= bb_for_stmt (use
->stmt
);
4146 if (use
->stmt
!= last_stmt (ex_bb
)
4147 || TREE_CODE (use
->stmt
) != COND_EXPR
)
4149 if (!dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4152 exit
= EDGE_SUCC (ex_bb
, 0);
4153 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4154 exit
= EDGE_SUCC (ex_bb
, 1);
4155 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4158 nit
= niter_for_exit (data
, exit
);
4162 nit_type
= TREE_TYPE (nit
);
4164 /* Determine whether we may use the variable to test whether niter iterations
4165 elapsed. This is the case iff the period of the induction variable is
4166 greater than the number of iterations. */
4167 period
= iv_period (cand
->iv
);
4170 per_type
= TREE_TYPE (period
);
4172 wider_type
= TREE_TYPE (period
);
4173 if (TYPE_PRECISION (nit_type
) < TYPE_PRECISION (per_type
))
4174 wider_type
= per_type
;
4176 wider_type
= nit_type
;
4178 if (!integer_nonzerop (fold_build2 (GE_EXPR
, boolean_type_node
,
4179 fold_convert (wider_type
, period
),
4180 fold_convert (wider_type
, nit
))))
4183 *bound
= fold_affine_expr (cand_value_at (loop
, cand
, use
->stmt
, nit
));
4187 /* Determines cost of basing replacement of USE on CAND in a condition. */
4190 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4191 struct iv_use
*use
, struct iv_cand
*cand
)
4193 tree bound
= NULL_TREE
, op
, cond
;
4194 bitmap depends_on
= NULL
;
4197 /* Only consider real candidates. */
4200 set_use_iv_cost (data
, use
, cand
, INFTY
, NULL
, NULL_TREE
);
4204 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4206 cost
= force_var_cost (data
, bound
, &depends_on
);
4208 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
);
4209 return cost
!= INFTY
;
4212 /* The induction variable elimination failed; just express the original
4213 giv. If it is compared with an invariant, note that we cannot get
4215 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
);
4218 if (TREE_CODE (cond
) != SSA_NAME
)
4220 op
= TREE_OPERAND (cond
, 0);
4221 if (TREE_CODE (op
) == SSA_NAME
&& !zero_p (get_iv (data
, op
)->step
))
4222 op
= TREE_OPERAND (cond
, 1);
4223 if (TREE_CODE (op
) == SSA_NAME
)
4225 op
= get_iv (data
, op
)->base
;
4226 fd_ivopts_data
= data
;
4227 walk_tree (&op
, find_depends
, &depends_on
, NULL
);
4231 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL
);
4232 return cost
!= INFTY
;
4235 /* Determines cost of basing replacement of USE on CAND. Returns false
4236 if USE cannot be based on CAND. */
4239 determine_use_iv_cost (struct ivopts_data
*data
,
4240 struct iv_use
*use
, struct iv_cand
*cand
)
4244 case USE_NONLINEAR_EXPR
:
4245 return determine_use_iv_cost_generic (data
, use
, cand
);
4248 return determine_use_iv_cost_address (data
, use
, cand
);
4251 return determine_use_iv_cost_condition (data
, use
, cand
);
4258 /* Determines costs of basing the use of the iv on an iv candidate. */
4261 determine_use_iv_costs (struct ivopts_data
*data
)
4265 struct iv_cand
*cand
;
4266 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4268 alloc_use_cost_map (data
);
4270 for (i
= 0; i
< n_iv_uses (data
); i
++)
4272 use
= iv_use (data
, i
);
4274 if (data
->consider_all_candidates
)
4276 for (j
= 0; j
< n_iv_cands (data
); j
++)
4278 cand
= iv_cand (data
, j
);
4279 determine_use_iv_cost (data
, use
, cand
);
4286 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4288 cand
= iv_cand (data
, j
);
4289 if (!determine_use_iv_cost (data
, use
, cand
))
4290 bitmap_set_bit (to_clear
, j
);
4293 /* Remove the candidates for that the cost is infinite from
4294 the list of related candidates. */
4295 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4296 bitmap_clear (to_clear
);
4300 BITMAP_FREE (to_clear
);
4302 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4304 fprintf (dump_file
, "Use-candidate costs:\n");
4306 for (i
= 0; i
< n_iv_uses (data
); i
++)
4308 use
= iv_use (data
, i
);
4310 fprintf (dump_file
, "Use %d:\n", i
);
4311 fprintf (dump_file
, " cand\tcost\tdepends on\n");
4312 for (j
= 0; j
< use
->n_map_members
; j
++)
4314 if (!use
->cost_map
[j
].cand
4315 || use
->cost_map
[j
].cost
== INFTY
)
4318 fprintf (dump_file
, " %d\t%d\t",
4319 use
->cost_map
[j
].cand
->id
,
4320 use
->cost_map
[j
].cost
);
4321 if (use
->cost_map
[j
].depends_on
)
4322 bitmap_print (dump_file
,
4323 use
->cost_map
[j
].depends_on
, "","");
4324 fprintf (dump_file
, "\n");
4327 fprintf (dump_file
, "\n");
4329 fprintf (dump_file
, "\n");
4333 /* Determines cost of the candidate CAND. */
4336 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4338 unsigned cost_base
, cost_step
;
4347 /* There are two costs associated with the candidate -- its increment
4348 and its initialization. The second is almost negligible for any loop
4349 that rolls enough, so we take it just very little into account. */
4351 base
= cand
->iv
->base
;
4352 cost_base
= force_var_cost (data
, base
, NULL
);
4353 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)));
4355 cand
->cost
= cost_step
+ cost_base
/ AVG_LOOP_NITER (current_loop
);
4357 /* Prefer the original iv unless we may gain something by replacing it;
4358 this is not really relevant for artificial ivs created by other
4360 if (cand
->pos
== IP_ORIGINAL
4361 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4364 /* Prefer not to insert statements into latch unless there are some
4365 already (so that we do not create unnecessary jumps). */
4366 if (cand
->pos
== IP_END
4367 && empty_block_p (ip_end_pos (data
->current_loop
)))
4371 /* Determines costs of computation of the candidates. */
4374 determine_iv_costs (struct ivopts_data
*data
)
4378 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4380 fprintf (dump_file
, "Candidate costs:\n");
4381 fprintf (dump_file
, " cand\tcost\n");
4384 for (i
= 0; i
< n_iv_cands (data
); i
++)
4386 struct iv_cand
*cand
= iv_cand (data
, i
);
4388 determine_iv_cost (data
, cand
);
4390 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4391 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4394 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4395 fprintf (dump_file
, "\n");
4398 /* Calculates cost for having SIZE induction variables. */
4401 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4403 return global_cost_for_size (size
, data
->regs_used
, n_iv_uses (data
));
4406 /* For each size of the induction variable set determine the penalty. */
4409 determine_set_costs (struct ivopts_data
*data
)
4413 struct loop
*loop
= data
->current_loop
;
4416 /* We use the following model (definitely improvable, especially the
4417 cost function -- TODO):
4419 We estimate the number of registers available (using MD data), name it A.
4421 We estimate the number of registers used by the loop, name it U. This
4422 number is obtained as the number of loop phi nodes (not counting virtual
4423 registers and bivs) + the number of variables from outside of the loop.
4425 We set a reserve R (free regs that are used for temporary computations,
4426 etc.). For now the reserve is a constant 3.
4428 Let I be the number of induction variables.
4430 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4431 make a lot of ivs without a reason).
4432 -- if A - R < U + I <= A, the cost is I * PRES_COST
4433 -- if U + I > A, the cost is I * PRES_COST and
4434 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4436 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4438 fprintf (dump_file
, "Global costs:\n");
4439 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4440 fprintf (dump_file
, " target_small_cost %d\n", target_small_cost
);
4441 fprintf (dump_file
, " target_pres_cost %d\n", target_pres_cost
);
4442 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
);
4446 for (phi
= phi_nodes (loop
->header
); phi
; phi
= PHI_CHAIN (phi
))
4448 op
= PHI_RESULT (phi
);
4450 if (!is_gimple_reg (op
))
4453 if (get_iv (data
, op
))
4459 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4461 struct version_info
*info
= ver_info (data
, j
);
4463 if (info
->inv_id
&& info
->has_nonlin_use
)
4467 data
->regs_used
= n
;
4468 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4469 fprintf (dump_file
, " regs_used %d\n", n
);
4471 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4473 fprintf (dump_file
, " cost for size:\n");
4474 fprintf (dump_file
, " ivs\tcost\n");
4475 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4476 fprintf (dump_file
, " %d\t%d\n", j
,
4477 ivopts_global_cost_for_size (data
, j
));
4478 fprintf (dump_file
, "\n");
4482 /* Returns true if A is a cheaper cost pair than B. */
4485 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4493 if (a
->cost
< b
->cost
)
4496 if (a
->cost
> b
->cost
)
4499 /* In case the costs are the same, prefer the cheaper candidate. */
4500 if (a
->cand
->cost
< b
->cand
->cost
)
4506 /* Computes the cost field of IVS structure. */
4509 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4513 cost
+= ivs
->cand_use_cost
;
4514 cost
+= ivs
->cand_cost
;
4515 cost
+= ivopts_global_cost_for_size (data
, ivs
->n_regs
);
4520 /* Remove invariants in set INVS to set IVS. */
4523 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4531 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4533 ivs
->n_invariant_uses
[iid
]--;
4534 if (ivs
->n_invariant_uses
[iid
] == 0)
4539 /* Set USE not to be expressed by any candidate in IVS. */
4542 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4545 unsigned uid
= use
->id
, cid
;
4546 struct cost_pair
*cp
;
4548 cp
= ivs
->cand_for_use
[uid
];
4554 ivs
->cand_for_use
[uid
] = NULL
;
4555 ivs
->n_cand_uses
[cid
]--;
4557 if (ivs
->n_cand_uses
[cid
] == 0)
4559 bitmap_clear_bit (ivs
->cands
, cid
);
4560 /* Do not count the pseudocandidates. */
4564 ivs
->cand_cost
-= cp
->cand
->cost
;
4566 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4569 ivs
->cand_use_cost
-= cp
->cost
;
4571 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4572 iv_ca_recount_cost (data
, ivs
);
4575 /* Add invariants in set INVS to set IVS. */
4578 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4586 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4588 ivs
->n_invariant_uses
[iid
]++;
4589 if (ivs
->n_invariant_uses
[iid
] == 1)
4594 /* Set cost pair for USE in set IVS to CP. */
4597 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4598 struct iv_use
*use
, struct cost_pair
*cp
)
4600 unsigned uid
= use
->id
, cid
;
4602 if (ivs
->cand_for_use
[uid
] == cp
)
4605 if (ivs
->cand_for_use
[uid
])
4606 iv_ca_set_no_cp (data
, ivs
, use
);
4613 ivs
->cand_for_use
[uid
] = cp
;
4614 ivs
->n_cand_uses
[cid
]++;
4615 if (ivs
->n_cand_uses
[cid
] == 1)
4617 bitmap_set_bit (ivs
->cands
, cid
);
4618 /* Do not count the pseudocandidates. */
4622 ivs
->cand_cost
+= cp
->cand
->cost
;
4624 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4627 ivs
->cand_use_cost
+= cp
->cost
;
4628 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4629 iv_ca_recount_cost (data
, ivs
);
4633 /* Extend set IVS by expressing USE by some of the candidates in it
4637 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4640 struct cost_pair
*best_cp
= NULL
, *cp
;
4644 gcc_assert (ivs
->upto
>= use
->id
);
4646 if (ivs
->upto
== use
->id
)
4652 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
4654 cp
= get_use_iv_cost (data
, use
, iv_cand (data
, i
));
4656 if (cheaper_cost_pair (cp
, best_cp
))
4660 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4663 /* Get cost for assignment IVS. */
4666 iv_ca_cost (struct iv_ca
*ivs
)
4668 return (ivs
->bad_uses
? INFTY
: ivs
->cost
);
4671 /* Returns true if all dependences of CP are among invariants in IVS. */
4674 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
4679 if (!cp
->depends_on
)
4682 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
4684 if (ivs
->n_invariant_uses
[i
] == 0)
4691 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4692 it before NEXT_CHANGE. */
4694 static struct iv_ca_delta
*
4695 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
4696 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
4698 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
4701 change
->old_cp
= old_cp
;
4702 change
->new_cp
= new_cp
;
4703 change
->next_change
= next_change
;
4708 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4711 static struct iv_ca_delta
*
4712 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
4714 struct iv_ca_delta
*last
;
4722 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
4724 last
->next_change
= l2
;
4729 /* Returns candidate by that USE is expressed in IVS. */
4731 static struct cost_pair
*
4732 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4734 return ivs
->cand_for_use
[use
->id
];
4737 /* Reverse the list of changes DELTA, forming the inverse to it. */
4739 static struct iv_ca_delta
*
4740 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
4742 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
4743 struct cost_pair
*tmp
;
4745 for (act
= delta
; act
; act
= next
)
4747 next
= act
->next_change
;
4748 act
->next_change
= prev
;
4752 act
->old_cp
= act
->new_cp
;
4759 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4760 reverted instead. */
4763 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4764 struct iv_ca_delta
*delta
, bool forward
)
4766 struct cost_pair
*from
, *to
;
4767 struct iv_ca_delta
*act
;
4770 delta
= iv_ca_delta_reverse (delta
);
4772 for (act
= delta
; act
; act
= act
->next_change
)
4776 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
4777 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
4781 iv_ca_delta_reverse (delta
);
4784 /* Returns true if CAND is used in IVS. */
4787 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
4789 return ivs
->n_cand_uses
[cand
->id
] > 0;
4792 /* Returns number of induction variable candidates in the set IVS. */
4795 iv_ca_n_cands (struct iv_ca
*ivs
)
4797 return ivs
->n_cands
;
4800 /* Free the list of changes DELTA. */
4803 iv_ca_delta_free (struct iv_ca_delta
**delta
)
4805 struct iv_ca_delta
*act
, *next
;
4807 for (act
= *delta
; act
; act
= next
)
4809 next
= act
->next_change
;
4816 /* Allocates new iv candidates assignment. */
4818 static struct iv_ca
*
4819 iv_ca_new (struct ivopts_data
*data
)
4821 struct iv_ca
*nw
= XNEW (struct iv_ca
);
4825 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
4826 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
4827 nw
->cands
= BITMAP_ALLOC (NULL
);
4830 nw
->cand_use_cost
= 0;
4832 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
4838 /* Free memory occupied by the set IVS. */
4841 iv_ca_free (struct iv_ca
**ivs
)
4843 free ((*ivs
)->cand_for_use
);
4844 free ((*ivs
)->n_cand_uses
);
4845 BITMAP_FREE ((*ivs
)->cands
);
4846 free ((*ivs
)->n_invariant_uses
);
4851 /* Dumps IVS to FILE. */
4854 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
4856 const char *pref
= " invariants ";
4859 fprintf (file
, " cost %d\n", iv_ca_cost (ivs
));
4860 bitmap_print (file
, ivs
->cands
, " candidates ","\n");
4862 for (i
= 1; i
<= data
->max_inv_id
; i
++)
4863 if (ivs
->n_invariant_uses
[i
])
4865 fprintf (file
, "%s%d", pref
, i
);
4868 fprintf (file
, "\n");
4871 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4872 new set, and store differences in DELTA. Number of induction variables
4873 in the new set is stored to N_IVS. */
4876 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4877 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
4882 struct cost_pair
*old_cp
, *new_cp
;
4885 for (i
= 0; i
< ivs
->upto
; i
++)
4887 use
= iv_use (data
, i
);
4888 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4891 && old_cp
->cand
== cand
)
4894 new_cp
= get_use_iv_cost (data
, use
, cand
);
4898 if (!iv_ca_has_deps (ivs
, new_cp
))
4901 if (!cheaper_cost_pair (new_cp
, old_cp
))
4904 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4907 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4908 cost
= iv_ca_cost (ivs
);
4910 *n_ivs
= iv_ca_n_cands (ivs
);
4911 iv_ca_delta_commit (data
, ivs
, *delta
, false);
4916 /* Try narrowing set IVS by removing CAND. Return the cost of
4917 the new set and store the differences in DELTA. */
4920 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4921 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
4925 struct cost_pair
*old_cp
, *new_cp
, *cp
;
4927 struct iv_cand
*cnd
;
4931 for (i
= 0; i
< n_iv_uses (data
); i
++)
4933 use
= iv_use (data
, i
);
4935 old_cp
= iv_ca_cand_for_use (ivs
, use
);
4936 if (old_cp
->cand
!= cand
)
4941 if (data
->consider_all_candidates
)
4943 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
4948 cnd
= iv_cand (data
, ci
);
4950 cp
= get_use_iv_cost (data
, use
, cnd
);
4953 if (!iv_ca_has_deps (ivs
, cp
))
4956 if (!cheaper_cost_pair (cp
, new_cp
))
4964 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
4969 cnd
= iv_cand (data
, ci
);
4971 cp
= get_use_iv_cost (data
, use
, cnd
);
4974 if (!iv_ca_has_deps (ivs
, cp
))
4977 if (!cheaper_cost_pair (cp
, new_cp
))
4986 iv_ca_delta_free (delta
);
4990 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
4993 iv_ca_delta_commit (data
, ivs
, *delta
, true);
4994 cost
= iv_ca_cost (ivs
);
4995 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5000 /* Try optimizing the set of candidates IVS by removing candidates different
5001 from to EXCEPT_CAND from it. Return cost of the new set, and store
5002 differences in DELTA. */
5005 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5006 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5009 struct iv_ca_delta
*act_delta
, *best_delta
;
5010 unsigned i
, best_cost
, acost
;
5011 struct iv_cand
*cand
;
5014 best_cost
= iv_ca_cost (ivs
);
5016 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5018 cand
= iv_cand (data
, i
);
5020 if (cand
== except_cand
)
5023 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5025 if (acost
< best_cost
)
5028 iv_ca_delta_free (&best_delta
);
5029 best_delta
= act_delta
;
5032 iv_ca_delta_free (&act_delta
);
5041 /* Recurse to possibly remove other unnecessary ivs. */
5042 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5043 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5044 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5045 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5049 /* Tries to extend the sets IVS in the best possible way in order
5050 to express the USE. */
5053 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5056 unsigned best_cost
, act_cost
;
5059 struct iv_cand
*cand
;
5060 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5061 struct cost_pair
*cp
;
5063 iv_ca_add_use (data
, ivs
, use
);
5064 best_cost
= iv_ca_cost (ivs
);
5066 cp
= iv_ca_cand_for_use (ivs
, use
);
5069 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5070 iv_ca_set_no_cp (data
, ivs
, use
);
5073 /* First try important candidates. Only if it fails, try the specific ones.
5074 Rationale -- in loops with many variables the best choice often is to use
5075 just one generic biv. If we added here many ivs specific to the uses,
5076 the optimization algorithm later would be likely to get stuck in a local
5077 minimum, thus causing us to create too many ivs. The approach from
5078 few ivs to more seems more likely to be successful -- starting from few
5079 ivs, replacing an expensive use by a specific iv should always be a
5081 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5083 cand
= iv_cand (data
, i
);
5085 if (iv_ca_cand_used_p (ivs
, cand
))
5088 cp
= get_use_iv_cost (data
, use
, cand
);
5092 iv_ca_set_cp (data
, ivs
, use
, cp
);
5093 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5094 iv_ca_set_no_cp (data
, ivs
, use
);
5095 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5097 if (act_cost
< best_cost
)
5099 best_cost
= act_cost
;
5101 iv_ca_delta_free (&best_delta
);
5102 best_delta
= act_delta
;
5105 iv_ca_delta_free (&act_delta
);
5108 if (best_cost
== INFTY
)
5110 for (i
= 0; i
< use
->n_map_members
; i
++)
5112 cp
= use
->cost_map
+ i
;
5117 /* Already tried this. */
5118 if (cand
->important
)
5121 if (iv_ca_cand_used_p (ivs
, cand
))
5125 iv_ca_set_cp (data
, ivs
, use
, cp
);
5126 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
);
5127 iv_ca_set_no_cp (data
, ivs
, use
);
5128 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5131 if (act_cost
< best_cost
)
5133 best_cost
= act_cost
;
5136 iv_ca_delta_free (&best_delta
);
5137 best_delta
= act_delta
;
5140 iv_ca_delta_free (&act_delta
);
5144 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5145 iv_ca_delta_free (&best_delta
);
5147 return (best_cost
!= INFTY
);
5150 /* Finds an initial assignment of candidates to uses. */
5152 static struct iv_ca
*
5153 get_initial_solution (struct ivopts_data
*data
)
5155 struct iv_ca
*ivs
= iv_ca_new (data
);
5158 for (i
= 0; i
< n_iv_uses (data
); i
++)
5159 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
)))
5168 /* Tries to improve set of induction variables IVS. */
5171 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5173 unsigned i
, acost
, best_cost
= iv_ca_cost (ivs
), n_ivs
;
5174 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5175 struct iv_cand
*cand
;
5177 /* Try extending the set of induction variables by one. */
5178 for (i
= 0; i
< n_iv_cands (data
); i
++)
5180 cand
= iv_cand (data
, i
);
5182 if (iv_ca_cand_used_p (ivs
, cand
))
5185 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
);
5189 /* If we successfully added the candidate and the set is small enough,
5190 try optimizing it by removing other candidates. */
5191 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5193 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5194 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5195 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5196 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5199 if (acost
< best_cost
)
5202 iv_ca_delta_free (&best_delta
);
5203 best_delta
= act_delta
;
5206 iv_ca_delta_free (&act_delta
);
5211 /* Try removing the candidates from the set instead. */
5212 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5214 /* Nothing more we can do. */
5219 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5220 gcc_assert (best_cost
== iv_ca_cost (ivs
));
5221 iv_ca_delta_free (&best_delta
);
5225 /* Attempts to find the optimal set of induction variables. We do simple
5226 greedy heuristic -- we try to replace at most one candidate in the selected
5227 solution and remove the unused ivs while this improves the cost. */
5229 static struct iv_ca
*
5230 find_optimal_iv_set (struct ivopts_data
*data
)
5236 /* Get the initial solution. */
5237 set
= get_initial_solution (data
);
5240 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5241 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5245 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5247 fprintf (dump_file
, "Initial set of candidates:\n");
5248 iv_ca_dump (data
, dump_file
, set
);
5251 while (try_improve_iv_set (data
, set
))
5253 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5255 fprintf (dump_file
, "Improved to:\n");
5256 iv_ca_dump (data
, dump_file
, set
);
5260 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5261 fprintf (dump_file
, "Final cost %d\n\n", iv_ca_cost (set
));
5263 for (i
= 0; i
< n_iv_uses (data
); i
++)
5265 use
= iv_use (data
, i
);
5266 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5272 /* Creates a new induction variable corresponding to CAND. */
5275 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5277 block_stmt_iterator incr_pos
;
5287 incr_pos
= bsi_last (ip_normal_pos (data
->current_loop
));
5291 incr_pos
= bsi_last (ip_end_pos (data
->current_loop
));
5296 /* Mark that the iv is preserved. */
5297 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5298 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5300 /* Rewrite the increment so that it uses var_before directly. */
5301 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5306 gimple_add_tmp_var (cand
->var_before
);
5307 add_referenced_var (cand
->var_before
);
5309 base
= unshare_expr (cand
->iv
->base
);
5311 create_iv (base
, unshare_expr (cand
->iv
->step
),
5312 cand
->var_before
, data
->current_loop
,
5313 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5316 /* Creates new induction variables described in SET. */
5319 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5322 struct iv_cand
*cand
;
5325 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5327 cand
= iv_cand (data
, i
);
5328 create_new_iv (data
, cand
);
5332 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5333 is true, remove also the ssa name defined by the statement. */
5336 remove_statement (tree stmt
, bool including_defined_name
)
5338 if (TREE_CODE (stmt
) == PHI_NODE
)
5340 if (!including_defined_name
)
5342 /* Prevent the ssa name defined by the statement from being removed. */
5343 SET_PHI_RESULT (stmt
, NULL
);
5345 remove_phi_node (stmt
, NULL_TREE
);
5349 block_stmt_iterator bsi
= bsi_for_stmt (stmt
);
5351 bsi_remove (&bsi
, true);
5355 /* Rewrites USE (definition of iv used in a nonlinear expression)
5356 using candidate CAND. */
5359 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5360 struct iv_use
*use
, struct iv_cand
*cand
)
5363 tree op
, stmts
, tgt
, ass
;
5364 block_stmt_iterator bsi
, pbsi
;
5366 /* An important special case -- if we are asked to express value of
5367 the original iv by itself, just exit; there is no need to
5368 introduce a new computation (that might also need casting the
5369 variable to unsigned and back). */
5370 if (cand
->pos
== IP_ORIGINAL
5371 && cand
->incremented_at
== use
->stmt
)
5373 tree step
, ctype
, utype
;
5374 enum tree_code incr_code
= PLUS_EXPR
;
5376 gcc_assert (TREE_CODE (use
->stmt
) == MODIFY_EXPR
);
5377 gcc_assert (TREE_OPERAND (use
->stmt
, 0) == cand
->var_after
);
5379 step
= cand
->iv
->step
;
5380 ctype
= TREE_TYPE (step
);
5381 utype
= TREE_TYPE (cand
->var_after
);
5382 if (TREE_CODE (step
) == NEGATE_EXPR
)
5384 incr_code
= MINUS_EXPR
;
5385 step
= TREE_OPERAND (step
, 0);
5388 /* Check whether we may leave the computation unchanged.
5389 This is the case only if it does not rely on other
5390 computations in the loop -- otherwise, the computation
5391 we rely upon may be removed in remove_unused_ivs,
5392 thus leading to ICE. */
5393 op
= TREE_OPERAND (use
->stmt
, 1);
5394 if (TREE_CODE (op
) == PLUS_EXPR
5395 || TREE_CODE (op
) == MINUS_EXPR
)
5397 if (TREE_OPERAND (op
, 0) == cand
->var_before
)
5398 op
= TREE_OPERAND (op
, 1);
5399 else if (TREE_CODE (op
) == PLUS_EXPR
5400 && TREE_OPERAND (op
, 1) == cand
->var_before
)
5401 op
= TREE_OPERAND (op
, 0);
5409 && (TREE_CODE (op
) == INTEGER_CST
5410 || operand_equal_p (op
, step
, 0)))
5413 /* Otherwise, add the necessary computations to express
5415 op
= fold_convert (ctype
, cand
->var_before
);
5416 comp
= fold_convert (utype
,
5417 build2 (incr_code
, ctype
, op
,
5418 unshare_expr (step
)));
5421 comp
= get_computation (data
->current_loop
, use
, cand
);
5423 switch (TREE_CODE (use
->stmt
))
5426 tgt
= PHI_RESULT (use
->stmt
);
5428 /* If we should keep the biv, do not replace it. */
5429 if (name_info (data
, tgt
)->preserve_biv
)
5432 pbsi
= bsi
= bsi_start (bb_for_stmt (use
->stmt
));
5433 while (!bsi_end_p (pbsi
)
5434 && TREE_CODE (bsi_stmt (pbsi
)) == LABEL_EXPR
)
5442 tgt
= TREE_OPERAND (use
->stmt
, 0);
5443 bsi
= bsi_for_stmt (use
->stmt
);
5450 op
= force_gimple_operand (comp
, &stmts
, false, SSA_NAME_VAR (tgt
));
5452 if (TREE_CODE (use
->stmt
) == PHI_NODE
)
5455 bsi_insert_after (&bsi
, stmts
, BSI_CONTINUE_LINKING
);
5456 ass
= build2 (MODIFY_EXPR
, TREE_TYPE (tgt
), tgt
, op
);
5457 bsi_insert_after (&bsi
, ass
, BSI_NEW_STMT
);
5458 remove_statement (use
->stmt
, false);
5459 SSA_NAME_DEF_STMT (tgt
) = ass
;
5464 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5465 TREE_OPERAND (use
->stmt
, 1) = op
;
5469 /* Replaces ssa name in index IDX by its basic variable. Callback for
5473 idx_remove_ssa_names (tree base
, tree
*idx
,
5474 void *data ATTRIBUTE_UNUSED
)
5478 if (TREE_CODE (*idx
) == SSA_NAME
)
5479 *idx
= SSA_NAME_VAR (*idx
);
5481 if (TREE_CODE (base
) == ARRAY_REF
)
5483 op
= &TREE_OPERAND (base
, 2);
5485 && TREE_CODE (*op
) == SSA_NAME
)
5486 *op
= SSA_NAME_VAR (*op
);
5487 op
= &TREE_OPERAND (base
, 3);
5489 && TREE_CODE (*op
) == SSA_NAME
)
5490 *op
= SSA_NAME_VAR (*op
);
5496 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5499 unshare_and_remove_ssa_names (tree ref
)
5501 ref
= unshare_expr (ref
);
5502 for_each_index (&ref
, idx_remove_ssa_names
, NULL
);
5507 /* Extract the alias analysis info for the memory reference REF. There are
5508 several ways how this information may be stored and what precisely is
5509 its semantics depending on the type of the reference, but there always is
5510 somewhere hidden one _DECL node that is used to determine the set of
5511 virtual operands for the reference. The code below deciphers this jungle
5512 and extracts this single useful piece of information. */
5515 get_ref_tag (tree ref
, tree orig
)
5517 tree var
= get_base_address (ref
);
5518 tree aref
= NULL_TREE
, tag
, sv
;
5519 HOST_WIDE_INT offset
, size
, maxsize
;
5521 for (sv
= orig
; handled_component_p (sv
); sv
= TREE_OPERAND (sv
, 0))
5523 aref
= get_ref_base_and_extent (sv
, &offset
, &size
, &maxsize
);
5528 if (aref
&& SSA_VAR_P (aref
) && get_subvars_for_var (aref
))
5529 return unshare_expr (sv
);
5534 if (TREE_CODE (var
) == INDIRECT_REF
)
5536 /* If the base is a dereference of a pointer, first check its name memory
5537 tag. If it does not have one, use its symbol memory tag. */
5538 var
= TREE_OPERAND (var
, 0);
5539 if (TREE_CODE (var
) != SSA_NAME
)
5542 if (SSA_NAME_PTR_INFO (var
))
5544 tag
= SSA_NAME_PTR_INFO (var
)->name_mem_tag
;
5549 var
= SSA_NAME_VAR (var
);
5550 tag
= var_ann (var
)->symbol_mem_tag
;
5551 gcc_assert (tag
!= NULL_TREE
);
5559 tag
= var_ann (var
)->symbol_mem_tag
;
5567 /* Copies the reference information from OLD_REF to NEW_REF. */
5570 copy_ref_info (tree new_ref
, tree old_ref
)
5572 if (TREE_CODE (old_ref
) == TARGET_MEM_REF
)
5573 copy_mem_ref_info (new_ref
, old_ref
);
5576 TMR_ORIGINAL (new_ref
) = unshare_and_remove_ssa_names (old_ref
);
5577 TMR_TAG (new_ref
) = get_ref_tag (old_ref
, TMR_ORIGINAL (new_ref
));
5581 /* Rewrites USE (address that is an iv) using candidate CAND. */
5584 rewrite_use_address (struct ivopts_data
*data
,
5585 struct iv_use
*use
, struct iv_cand
*cand
)
5587 struct affine_tree_combination aff
;
5588 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5591 get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
5592 unshare_aff_combination (&aff
);
5594 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
);
5595 copy_ref_info (ref
, *use
->op_p
);
5599 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5603 rewrite_use_compare (struct ivopts_data
*data
,
5604 struct iv_use
*use
, struct iv_cand
*cand
)
5607 tree
*op_p
, cond
, op
, stmts
, bound
;
5608 block_stmt_iterator bsi
= bsi_for_stmt (use
->stmt
);
5609 enum tree_code compare
;
5610 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
5615 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
5616 tree var_type
= TREE_TYPE (var
);
5618 compare
= iv_elimination_compare (data
, use
);
5619 bound
= fold_convert (var_type
, bound
);
5620 op
= force_gimple_operand (unshare_expr (bound
), &stmts
,
5624 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5626 *use
->op_p
= build2 (compare
, boolean_type_node
, var
, op
);
5627 update_stmt (use
->stmt
);
5631 /* The induction variable elimination failed; just express the original
5633 comp
= get_computation (data
->current_loop
, use
, cand
);
5636 op_p
= &TREE_OPERAND (cond
, 0);
5637 if (TREE_CODE (*op_p
) != SSA_NAME
5638 || zero_p (get_iv (data
, *op_p
)->step
))
5639 op_p
= &TREE_OPERAND (cond
, 1);
5641 op
= force_gimple_operand (comp
, &stmts
, true, SSA_NAME_VAR (*op_p
));
5643 bsi_insert_before (&bsi
, stmts
, BSI_SAME_STMT
);
5648 /* Rewrites USE using candidate CAND. */
5651 rewrite_use (struct ivopts_data
*data
,
5652 struct iv_use
*use
, struct iv_cand
*cand
)
5656 case USE_NONLINEAR_EXPR
:
5657 rewrite_use_nonlinear_expr (data
, use
, cand
);
5661 rewrite_use_address (data
, use
, cand
);
5665 rewrite_use_compare (data
, use
, cand
);
5671 mark_new_vars_to_rename (use
->stmt
);
5674 /* Rewrite the uses using the selected induction variables. */
5677 rewrite_uses (struct ivopts_data
*data
)
5680 struct iv_cand
*cand
;
5683 for (i
= 0; i
< n_iv_uses (data
); i
++)
5685 use
= iv_use (data
, i
);
5686 cand
= use
->selected
;
5689 rewrite_use (data
, use
, cand
);
5693 /* Removes the ivs that are not used after rewriting. */
5696 remove_unused_ivs (struct ivopts_data
*data
)
5701 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5703 struct version_info
*info
;
5705 info
= ver_info (data
, j
);
5707 && !zero_p (info
->iv
->step
)
5709 && !info
->iv
->have_use_for
5710 && !info
->preserve_biv
)
5711 remove_statement (SSA_NAME_DEF_STMT (info
->iv
->ssa_name
), true);
5715 /* Frees data allocated by the optimization of a single loop. */
5718 free_loop_data (struct ivopts_data
*data
)
5724 htab_empty (data
->niters
);
5726 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5728 struct version_info
*info
;
5730 info
= ver_info (data
, i
);
5734 info
->has_nonlin_use
= false;
5735 info
->preserve_biv
= false;
5738 bitmap_clear (data
->relevant
);
5739 bitmap_clear (data
->important_candidates
);
5741 for (i
= 0; i
< n_iv_uses (data
); i
++)
5743 struct iv_use
*use
= iv_use (data
, i
);
5746 BITMAP_FREE (use
->related_cands
);
5747 for (j
= 0; j
< use
->n_map_members
; j
++)
5748 if (use
->cost_map
[j
].depends_on
)
5749 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
5750 free (use
->cost_map
);
5753 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
5755 for (i
= 0; i
< n_iv_cands (data
); i
++)
5757 struct iv_cand
*cand
= iv_cand (data
, i
);
5761 if (cand
->depends_on
)
5762 BITMAP_FREE (cand
->depends_on
);
5765 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
5767 if (data
->version_info_size
< num_ssa_names
)
5769 data
->version_info_size
= 2 * num_ssa_names
;
5770 free (data
->version_info
);
5771 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
5774 data
->max_inv_id
= 0;
5776 for (i
= 0; VEC_iterate (tree
, decl_rtl_to_reset
, i
, obj
); i
++)
5777 SET_DECL_RTL (obj
, NULL_RTX
);
5779 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
5782 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5786 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
5788 free_loop_data (data
);
5789 free (data
->version_info
);
5790 BITMAP_FREE (data
->relevant
);
5791 BITMAP_FREE (data
->important_candidates
);
5792 htab_delete (data
->niters
);
5794 VEC_free (tree
, heap
, decl_rtl_to_reset
);
5795 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
5796 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
5799 /* Optimizes the LOOP. Returns true if anything changed. */
5802 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
5804 bool changed
= false;
5805 struct iv_ca
*iv_ca
;
5808 data
->current_loop
= loop
;
5810 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5812 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
5814 exit
= single_dom_exit (loop
);
5817 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
5818 exit
->src
->index
, exit
->dest
->index
);
5819 print_generic_expr (dump_file
, last_stmt (exit
->src
), TDF_SLIM
);
5820 fprintf (dump_file
, "\n");
5823 fprintf (dump_file
, "\n");
5826 /* For each ssa name determines whether it behaves as an induction variable
5828 if (!find_induction_variables (data
))
5831 /* Finds interesting uses (item 1). */
5832 find_interesting_uses (data
);
5833 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
5836 /* Finds candidates for the induction variables (item 2). */
5837 find_iv_candidates (data
);
5839 /* Calculates the costs (item 3, part 1). */
5840 determine_use_iv_costs (data
);
5841 determine_iv_costs (data
);
5842 determine_set_costs (data
);
5844 /* Find the optimal set of induction variables (item 3, part 2). */
5845 iv_ca
= find_optimal_iv_set (data
);
5850 /* Create the new induction variables (item 4, part 1). */
5851 create_new_ivs (data
, iv_ca
);
5852 iv_ca_free (&iv_ca
);
5854 /* Rewrite the uses (item 4, part 2). */
5855 rewrite_uses (data
);
5857 /* Remove the ivs that are unused after rewriting. */
5858 remove_unused_ivs (data
);
5860 /* We have changed the structure of induction variables; it might happen
5861 that definitions in the scev database refer to some of them that were
5866 free_loop_data (data
);
5871 /* Main entry point. Optimizes induction variables in LOOPS. */
5874 tree_ssa_iv_optimize (struct loops
*loops
)
5877 struct ivopts_data data
;
5879 tree_ssa_iv_optimize_init (&data
);
5881 /* Optimize the loops starting with the innermost ones. */
5882 loop
= loops
->tree_root
;
5886 /* Scan the loops, inner ones first. */
5887 while (loop
!= loops
->tree_root
)
5889 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5890 flow_loop_dump (loop
, dump_file
, NULL
, 1);
5892 tree_ssa_iv_optimize_loop (&data
, loop
);
5904 tree_ssa_iv_optimize_finalize (&data
);