1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
79 #include "tree-pass.h"
81 #include "insn-config.h"
83 #include "pointer-set.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
100 /* The infinite cost. */
101 #define INFTY 10000000
103 #define AVG_LOOP_NITER(LOOP) 5
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop
*loop
)
112 HOST_WIDE_INT niter
= estimated_loop_iterations_int (loop
, false);
114 return AVG_LOOP_NITER (loop
);
119 /* Representation of the induction variable. */
122 tree base
; /* Initial value of the iv. */
123 tree base_object
; /* A memory object to that the induction variable points. */
124 tree step
; /* Step of the iv (constant only). */
125 tree ssa_name
; /* The ssa name with the value. */
126 bool biv_p
; /* Is it a biv? */
127 bool have_use_for
; /* Do we already have a use for it? */
128 unsigned use_id
; /* The identifier in the use if it is the case. */
131 /* Per-ssa version information (induction variable descriptions, etc.). */
134 tree name
; /* The ssa name. */
135 struct iv
*iv
; /* Induction variable description. */
136 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv
; /* For the original biv, whether to preserve it. */
139 unsigned inv_id
; /* Id of an invariant. */
145 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
146 USE_ADDRESS
, /* Use in an address. */
147 USE_COMPARE
/* Use is a compare. */
150 /* Cost of a computation. */
153 int cost
; /* The runtime cost. */
154 unsigned complexity
; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
160 static const comp_cost zero_cost
= {0, 0};
161 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
163 /* The candidate - cost pair. */
166 struct iv_cand
*cand
; /* The candidate. */
167 comp_cost cost
; /* The cost. */
168 bitmap depends_on
; /* The list of invariants that have to be
170 tree value
; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id
; /* Loop invariant expression id. */
179 unsigned id
; /* The id of the use. */
180 enum use_type type
; /* Type of the use. */
181 struct iv
*iv
; /* The induction variable it is based on. */
182 gimple stmt
; /* Statement in that it occurs. */
183 tree
*op_p
; /* The place where it occurs. */
184 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
187 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
188 struct cost_pair
*cost_map
;
189 /* The costs wrto the iv candidates. */
191 struct iv_cand
*selected
;
192 /* The selected candidate. */
195 /* The position where the iv is computed. */
198 IP_NORMAL
, /* At the end, just before the exit condition. */
199 IP_END
, /* At the end of the latch block. */
200 IP_BEFORE_USE
, /* Immediately before a specific use. */
201 IP_AFTER_USE
, /* Immediately after a specific use. */
202 IP_ORIGINAL
/* The original biv. */
205 /* The induction variable candidate. */
208 unsigned id
; /* The number of the candidate. */
209 bool important
; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
212 gimple incremented_at
;/* For original biv, the statement where it is
214 tree var_before
; /* The variable used for it before increment. */
215 tree var_after
; /* The variable used for it after increment. */
216 struct iv
*iv
; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost
; /* Cost of the candidate. */
221 unsigned cost_step
; /* Cost of the candidate's increment operation. */
222 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on
; /* The list of invariants that are used in step of the
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
236 /* The data used by the induction variable optimizations. */
238 typedef struct iv_use
*iv_use_p
;
240 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
242 typedef struct iv_cand
*iv_cand_p
;
243 DEF_VEC_P(iv_cand_p
);
244 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
248 /* The currently optimized loop. */
249 struct loop
*current_loop
;
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t
*niters
;
254 /* Number of registers used in it. */
257 /* The size of version_info array allocated. */
258 unsigned version_info_size
;
260 /* The array of information for the ssa names. */
261 struct version_info
*version_info
;
263 /* The hashtable of loop invariant expressions created
267 /* Loop invariant expression id. */
270 /* The bitmap of indices in version_info whose value was changed. */
273 /* The uses of induction variables. */
274 VEC(iv_use_p
,heap
) *iv_uses
;
276 /* The candidates. */
277 VEC(iv_cand_p
,heap
) *iv_candidates
;
279 /* A bitmap of important candidates. */
280 bitmap important_candidates
;
282 /* The maximum invariant id. */
285 /* Whether to consider just related and important candidates when replacing a
287 bool consider_all_candidates
;
289 /* Are we optimizing for speed? */
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call
;
296 /* An assignment of iv candidates to uses. */
300 /* The number of uses covered by the assignment. */
303 /* Number of uses that cannot be expressed by the candidates in the set. */
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair
**cand_for_use
;
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses
;
312 /* The candidates used. */
315 /* The number of candidates in the set. */
318 /* Total number of registers needed. */
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost
;
324 /* Total cost of candidates. */
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses
;
330 /* The array holding the number of uses of each loop
331 invariant expressions created by ivopt. */
332 unsigned *used_inv_expr
;
334 /* The number of created loop invariants. */
335 unsigned num_used_inv_expr
;
337 /* Total cost of the assignment. */
341 /* Difference of two iv candidate assignments. */
348 /* An old assignment (for rollback purposes). */
349 struct cost_pair
*old_cp
;
351 /* A new assignment. */
352 struct cost_pair
*new_cp
;
354 /* Next change in the list. */
355 struct iv_ca_delta
*next_change
;
358 /* Bound on number of candidates below that all candidates are considered. */
360 #define CONSIDER_ALL_CANDIDATES_BOUND \
361 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
364 optimizing such a loop would help, and it would take ages). */
366 #define MAX_CONSIDERED_USES \
367 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
369 /* If there are at most this number of ivs in the set, try removing unnecessary
370 ivs from the set always. */
372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
373 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
375 /* The list of trees for that the decl_rtl field must be reset is stored
378 static VEC(tree
,heap
) *decl_rtl_to_reset
;
380 /* Number of uses recorded in DATA. */
382 static inline unsigned
383 n_iv_uses (struct ivopts_data
*data
)
385 return VEC_length (iv_use_p
, data
->iv_uses
);
388 /* Ith use recorded in DATA. */
390 static inline struct iv_use
*
391 iv_use (struct ivopts_data
*data
, unsigned i
)
393 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
396 /* Number of candidates recorded in DATA. */
398 static inline unsigned
399 n_iv_cands (struct ivopts_data
*data
)
401 return VEC_length (iv_cand_p
, data
->iv_candidates
);
404 /* Ith candidate recorded in DATA. */
406 static inline struct iv_cand
*
407 iv_cand (struct ivopts_data
*data
, unsigned i
)
409 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
412 /* The single loop exit if it dominates the latch, NULL otherwise. */
415 single_dom_exit (struct loop
*loop
)
417 edge exit
= single_exit (loop
);
422 if (!just_once_each_iteration_p (loop
, exit
->src
))
428 /* Dumps information about the induction variable IV to FILE. */
430 extern void dump_iv (FILE *, struct iv
*);
432 dump_iv (FILE *file
, struct iv
*iv
)
436 fprintf (file
, "ssa name ");
437 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
438 fprintf (file
, "\n");
441 fprintf (file
, " type ");
442 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
443 fprintf (file
, "\n");
447 fprintf (file
, " base ");
448 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
449 fprintf (file
, "\n");
451 fprintf (file
, " step ");
452 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
453 fprintf (file
, "\n");
457 fprintf (file
, " invariant ");
458 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
459 fprintf (file
, "\n");
464 fprintf (file
, " base object ");
465 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
466 fprintf (file
, "\n");
470 fprintf (file
, " is a biv\n");
473 /* Dumps information about the USE to FILE. */
475 extern void dump_use (FILE *, struct iv_use
*);
477 dump_use (FILE *file
, struct iv_use
*use
)
479 fprintf (file
, "use %d\n", use
->id
);
483 case USE_NONLINEAR_EXPR
:
484 fprintf (file
, " generic\n");
488 fprintf (file
, " address\n");
492 fprintf (file
, " compare\n");
499 fprintf (file
, " in statement ");
500 print_gimple_stmt (file
, use
->stmt
, 0, 0);
501 fprintf (file
, "\n");
503 fprintf (file
, " at position ");
505 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
506 fprintf (file
, "\n");
508 dump_iv (file
, use
->iv
);
510 if (use
->related_cands
)
512 fprintf (file
, " related candidates ");
513 dump_bitmap (file
, use
->related_cands
);
517 /* Dumps information about the uses to FILE. */
519 extern void dump_uses (FILE *, struct ivopts_data
*);
521 dump_uses (FILE *file
, struct ivopts_data
*data
)
526 for (i
= 0; i
< n_iv_uses (data
); i
++)
528 use
= iv_use (data
, i
);
530 dump_use (file
, use
);
531 fprintf (file
, "\n");
535 /* Dumps information about induction variable candidate CAND to FILE. */
537 extern void dump_cand (FILE *, struct iv_cand
*);
539 dump_cand (FILE *file
, struct iv_cand
*cand
)
541 struct iv
*iv
= cand
->iv
;
543 fprintf (file
, "candidate %d%s\n",
544 cand
->id
, cand
->important
? " (important)" : "");
546 if (cand
->depends_on
)
548 fprintf (file
, " depends on ");
549 dump_bitmap (file
, cand
->depends_on
);
554 fprintf (file
, " final value replacement\n");
558 if (cand
->var_before
)
560 fprintf (file
, " var_before ");
561 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
562 fprintf (file
, "\n");
566 fprintf (file
, " var_after ");
567 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
568 fprintf (file
, "\n");
574 fprintf (file
, " incremented before exit test\n");
578 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
582 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
586 fprintf (file
, " incremented at end\n");
590 fprintf (file
, " original biv\n");
597 /* Returns the info for ssa version VER. */
599 static inline struct version_info
*
600 ver_info (struct ivopts_data
*data
, unsigned ver
)
602 return data
->version_info
+ ver
;
605 /* Returns the info for ssa name NAME. */
607 static inline struct version_info
*
608 name_info (struct ivopts_data
*data
, tree name
)
610 return ver_info (data
, SSA_NAME_VERSION (name
));
613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
617 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
619 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
623 if (sbb
== loop
->latch
)
629 return stmt
== last_stmt (bb
);
632 /* Returns true if STMT if after the place where the original induction
633 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
634 if the positions are identical. */
637 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
639 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
640 basic_block stmt_bb
= gimple_bb (stmt
);
642 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
645 if (stmt_bb
!= cand_bb
)
649 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
651 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
654 /* Returns true if STMT if after the place where the induction variable
655 CAND is incremented in LOOP. */
658 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
666 return stmt_after_ip_normal_pos (loop
, stmt
);
670 return stmt_after_inc_pos (cand
, stmt
, false);
673 return stmt_after_inc_pos (cand
, stmt
, true);
680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
683 abnormal_ssa_name_p (tree exp
)
688 if (TREE_CODE (exp
) != SSA_NAME
)
691 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
695 abnormal phi node. Callback for for_each_index. */
698 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
699 void *data ATTRIBUTE_UNUSED
)
701 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
703 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
705 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
709 return !abnormal_ssa_name_p (*index
);
712 /* Returns true if EXPR contains a ssa name that occurs in an
713 abnormal phi node. */
716 contains_abnormal_ssa_name_p (tree expr
)
719 enum tree_code_class codeclass
;
724 code
= TREE_CODE (expr
);
725 codeclass
= TREE_CODE_CLASS (code
);
727 if (code
== SSA_NAME
)
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
730 if (code
== INTEGER_CST
731 || is_gimple_min_invariant (expr
))
734 if (code
== ADDR_EXPR
)
735 return !for_each_index (&TREE_OPERAND (expr
, 0),
736 idx_contains_abnormal_ssa_name_p
,
739 if (code
== COND_EXPR
)
740 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
742 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
748 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
753 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
765 /* Returns tree describing number of iterations determined from
766 EXIT of DATA->current_loop, or NULL if something goes wrong. */
769 niter_for_exit (struct ivopts_data
*data
, edge exit
,
770 struct tree_niter_desc
**desc_p
)
772 struct tree_niter_desc
* desc
= NULL
;
778 data
->niters
= pointer_map_create ();
782 slot
= pointer_map_contains (data
->niters
, exit
);
786 /* Try to determine number of iterations. We must know it
787 unconditionally (i.e., without possibility of # of iterations
788 being zero). Also, we cannot safely work with ssa names that
789 appear in phi nodes on abnormal edges, so that we do not create
790 overlapping life ranges for them (PR 27283). */
791 desc
= XNEW (struct tree_niter_desc
);
792 if (number_of_iterations_exit (data
->current_loop
,
794 && integer_zerop (desc
->may_be_zero
)
795 && !contains_abnormal_ssa_name_p (desc
->niter
))
801 slot
= pointer_map_insert (data
->niters
, exit
);
805 niter
= ((struct tree_niter_desc
*) *slot
)->niter
;
808 *desc_p
= (struct tree_niter_desc
*) *slot
;
812 /* Returns tree describing number of iterations determined from
813 single dominating exit of DATA->current_loop, or NULL if something
817 niter_for_single_dom_exit (struct ivopts_data
*data
)
819 edge exit
= single_dom_exit (data
->current_loop
);
824 return niter_for_exit (data
, exit
, NULL
);
827 /* Hash table equality function for expressions. */
830 htab_inv_expr_eq (const void *ent1
, const void *ent2
)
832 const struct iv_inv_expr_ent
*expr1
=
833 (const struct iv_inv_expr_ent
*)ent1
;
834 const struct iv_inv_expr_ent
*expr2
=
835 (const struct iv_inv_expr_ent
*)ent2
;
837 return operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
840 /* Hash function for loop invariant expressions. */
843 htab_inv_expr_hash (const void *ent
)
845 const struct iv_inv_expr_ent
*expr
=
846 (const struct iv_inv_expr_ent
*)ent
;
850 /* Initializes data structures used by the iv optimization pass, stored
854 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
856 data
->version_info_size
= 2 * num_ssa_names
;
857 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
858 data
->relevant
= BITMAP_ALLOC (NULL
);
859 data
->important_candidates
= BITMAP_ALLOC (NULL
);
860 data
->max_inv_id
= 0;
862 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
863 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
864 data
->inv_expr_tab
= htab_create (10, htab_inv_expr_hash
,
865 htab_inv_expr_eq
, free
);
866 data
->inv_expr_id
= 0;
867 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
870 /* Returns a memory object to that EXPR points. In case we are able to
871 determine that it does not point to any such object, NULL is returned. */
874 determine_base_object (tree expr
)
876 enum tree_code code
= TREE_CODE (expr
);
879 /* If this is a pointer casted to any type, we need to determine
880 the base object for the pointer; so handle conversions before
881 throwing away non-pointer expressions. */
882 if (CONVERT_EXPR_P (expr
))
883 return determine_base_object (TREE_OPERAND (expr
, 0));
885 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
894 obj
= TREE_OPERAND (expr
, 0);
895 base
= get_base_address (obj
);
900 if (TREE_CODE (base
) == MEM_REF
)
901 return determine_base_object (TREE_OPERAND (base
, 0));
903 return fold_convert (ptr_type_node
,
904 build_fold_addr_expr (base
));
906 case POINTER_PLUS_EXPR
:
907 return determine_base_object (TREE_OPERAND (expr
, 0));
911 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
915 return fold_convert (ptr_type_node
, expr
);
919 /* Allocates an induction variable with given initial value BASE and step STEP
923 alloc_iv (tree base
, tree step
)
925 struct iv
*iv
= XCNEW (struct iv
);
926 gcc_assert (step
!= NULL_TREE
);
929 iv
->base_object
= determine_base_object (base
);
932 iv
->have_use_for
= false;
934 iv
->ssa_name
= NULL_TREE
;
939 /* Sets STEP and BASE for induction variable IV. */
942 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
944 struct version_info
*info
= name_info (data
, iv
);
946 gcc_assert (!info
->iv
);
948 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
949 info
->iv
= alloc_iv (base
, step
);
950 info
->iv
->ssa_name
= iv
;
953 /* Finds induction variable declaration for VAR. */
956 get_iv (struct ivopts_data
*data
, tree var
)
959 tree type
= TREE_TYPE (var
);
961 if (!POINTER_TYPE_P (type
)
962 && !INTEGRAL_TYPE_P (type
))
965 if (!name_info (data
, var
)->iv
)
967 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
970 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
971 set_iv (data
, var
, var
, build_int_cst (type
, 0));
974 return name_info (data
, var
)->iv
;
977 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
978 not define a simple affine biv with nonzero step. */
981 determine_biv_step (gimple phi
)
983 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
984 tree name
= PHI_RESULT (phi
);
987 if (!is_gimple_reg (name
))
990 if (!simple_iv (loop
, loop
, name
, &iv
, true))
993 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
996 /* Finds basic ivs. */
999 find_bivs (struct ivopts_data
*data
)
1002 tree step
, type
, base
;
1004 struct loop
*loop
= data
->current_loop
;
1005 gimple_stmt_iterator psi
;
1007 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1009 phi
= gsi_stmt (psi
);
1011 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1014 step
= determine_biv_step (phi
);
1018 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1019 base
= expand_simple_operations (base
);
1020 if (contains_abnormal_ssa_name_p (base
)
1021 || contains_abnormal_ssa_name_p (step
))
1024 type
= TREE_TYPE (PHI_RESULT (phi
));
1025 base
= fold_convert (type
, base
);
1028 if (POINTER_TYPE_P (type
))
1029 step
= fold_convert (sizetype
, step
);
1031 step
= fold_convert (type
, step
);
1034 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1041 /* Marks basic ivs. */
1044 mark_bivs (struct ivopts_data
*data
)
1048 struct iv
*iv
, *incr_iv
;
1049 struct loop
*loop
= data
->current_loop
;
1050 basic_block incr_bb
;
1051 gimple_stmt_iterator psi
;
1053 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1055 phi
= gsi_stmt (psi
);
1057 iv
= get_iv (data
, PHI_RESULT (phi
));
1061 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1062 incr_iv
= get_iv (data
, var
);
1066 /* If the increment is in the subloop, ignore it. */
1067 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1068 if (incr_bb
->loop_father
!= data
->current_loop
1069 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1073 incr_iv
->biv_p
= true;
1077 /* Checks whether STMT defines a linear induction variable and stores its
1078 parameters to IV. */
1081 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1084 struct loop
*loop
= data
->current_loop
;
1086 iv
->base
= NULL_TREE
;
1087 iv
->step
= NULL_TREE
;
1089 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1092 lhs
= gimple_assign_lhs (stmt
);
1093 if (TREE_CODE (lhs
) != SSA_NAME
)
1096 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1098 iv
->base
= expand_simple_operations (iv
->base
);
1100 if (contains_abnormal_ssa_name_p (iv
->base
)
1101 || contains_abnormal_ssa_name_p (iv
->step
))
1107 /* Finds general ivs in statement STMT. */
1110 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1114 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1117 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1120 /* Finds general ivs in basic block BB. */
1123 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1125 gimple_stmt_iterator bsi
;
1127 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1128 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1131 /* Finds general ivs. */
1134 find_givs (struct ivopts_data
*data
)
1136 struct loop
*loop
= data
->current_loop
;
1137 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1140 for (i
= 0; i
< loop
->num_nodes
; i
++)
1141 find_givs_in_bb (data
, body
[i
]);
1145 /* For each ssa name defined in LOOP determines whether it is an induction
1146 variable and if so, its initial value and step. */
1149 find_induction_variables (struct ivopts_data
*data
)
1154 if (!find_bivs (data
))
1160 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1162 tree niter
= niter_for_single_dom_exit (data
);
1166 fprintf (dump_file
, " number of iterations ");
1167 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1168 fprintf (dump_file
, "\n\n");
1171 fprintf (dump_file
, "Induction variables:\n\n");
1173 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1175 if (ver_info (data
, i
)->iv
)
1176 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1183 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1185 static struct iv_use
*
1186 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1187 gimple stmt
, enum use_type use_type
)
1189 struct iv_use
*use
= XCNEW (struct iv_use
);
1191 use
->id
= n_iv_uses (data
);
1192 use
->type
= use_type
;
1196 use
->related_cands
= BITMAP_ALLOC (NULL
);
1198 /* To avoid showing ssa name in the dumps, if it was not reset by the
1200 iv
->ssa_name
= NULL_TREE
;
1202 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1203 dump_use (dump_file
, use
);
1205 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1210 /* Checks whether OP is a loop-level invariant and if so, records it.
1211 NONLINEAR_USE is true if the invariant is used in a way we do not
1212 handle specially. */
1215 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1218 struct version_info
*info
;
1220 if (TREE_CODE (op
) != SSA_NAME
1221 || !is_gimple_reg (op
))
1224 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1226 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1229 info
= name_info (data
, op
);
1231 info
->has_nonlin_use
|= nonlinear_use
;
1233 info
->inv_id
= ++data
->max_inv_id
;
1234 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1237 /* Checks whether the use OP is interesting and if so, records it. */
1239 static struct iv_use
*
1240 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1247 if (TREE_CODE (op
) != SSA_NAME
)
1250 iv
= get_iv (data
, op
);
1254 if (iv
->have_use_for
)
1256 use
= iv_use (data
, iv
->use_id
);
1258 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1262 if (integer_zerop (iv
->step
))
1264 record_invariant (data
, op
, true);
1267 iv
->have_use_for
= true;
1269 civ
= XNEW (struct iv
);
1272 stmt
= SSA_NAME_DEF_STMT (op
);
1273 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1274 || is_gimple_assign (stmt
));
1276 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1277 iv
->use_id
= use
->id
;
1282 /* Given a condition in statement STMT, checks whether it is a compare
1283 of an induction variable and an invariant. If this is the case,
1284 CONTROL_VAR is set to location of the iv, BOUND to the location of
1285 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1286 induction variable descriptions, and true is returned. If this is not
1287 the case, CONTROL_VAR and BOUND are set to the arguments of the
1288 condition and false is returned. */
1291 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1292 tree
**control_var
, tree
**bound
,
1293 struct iv
**iv_var
, struct iv
**iv_bound
)
1295 /* The objects returned when COND has constant operands. */
1296 static struct iv const_iv
;
1298 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1299 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1302 if (gimple_code (stmt
) == GIMPLE_COND
)
1304 op0
= gimple_cond_lhs_ptr (stmt
);
1305 op1
= gimple_cond_rhs_ptr (stmt
);
1309 op0
= gimple_assign_rhs1_ptr (stmt
);
1310 op1
= gimple_assign_rhs2_ptr (stmt
);
1313 zero
= integer_zero_node
;
1314 const_iv
.step
= integer_zero_node
;
1316 if (TREE_CODE (*op0
) == SSA_NAME
)
1317 iv0
= get_iv (data
, *op0
);
1318 if (TREE_CODE (*op1
) == SSA_NAME
)
1319 iv1
= get_iv (data
, *op1
);
1321 /* Exactly one of the compared values must be an iv, and the other one must
1326 if (integer_zerop (iv0
->step
))
1328 /* Control variable may be on the other side. */
1329 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1330 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1332 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1336 *control_var
= op0
;;
1347 /* Checks whether the condition in STMT is interesting and if so,
1351 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1353 tree
*var_p
, *bound_p
;
1354 struct iv
*var_iv
, *civ
;
1356 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1358 find_interesting_uses_op (data
, *var_p
);
1359 find_interesting_uses_op (data
, *bound_p
);
1363 civ
= XNEW (struct iv
);
1365 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1368 /* Returns true if expression EXPR is obviously invariant in LOOP,
1369 i.e. if all its operands are defined outside of the LOOP. LOOP
1370 should not be the function body. */
1373 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1378 gcc_assert (loop_depth (loop
) > 0);
1380 if (is_gimple_min_invariant (expr
))
1383 if (TREE_CODE (expr
) == SSA_NAME
)
1385 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1387 && flow_bb_inside_loop_p (loop
, def_bb
))
1396 len
= TREE_OPERAND_LENGTH (expr
);
1397 for (i
= 0; i
< len
; i
++)
1398 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1404 /* Returns true if statement STMT is obviously invariant in LOOP,
1405 i.e. if all its operands on the RHS are defined outside of the LOOP.
1406 LOOP should not be the function body. */
1409 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1414 gcc_assert (loop_depth (loop
) > 0);
1416 lhs
= gimple_get_lhs (stmt
);
1417 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1419 tree op
= gimple_op (stmt
, i
);
1420 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1427 /* Cumulates the steps of indices into DATA and replaces their values with the
1428 initial ones. Returns false when the value of the index cannot be determined.
1429 Callback for for_each_index. */
1431 struct ifs_ivopts_data
1433 struct ivopts_data
*ivopts_data
;
1439 idx_find_step (tree base
, tree
*idx
, void *data
)
1441 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1443 tree step
, iv_base
, iv_step
, lbound
, off
;
1444 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1446 /* If base is a component ref, require that the offset of the reference
1448 if (TREE_CODE (base
) == COMPONENT_REF
)
1450 off
= component_ref_field_offset (base
);
1451 return expr_invariant_in_loop_p (loop
, off
);
1454 /* If base is array, first check whether we will be able to move the
1455 reference out of the loop (in order to take its address in strength
1456 reduction). In order for this to work we need both lower bound
1457 and step to be loop invariants. */
1458 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1460 /* Moreover, for a range, the size needs to be invariant as well. */
1461 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1462 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1465 step
= array_ref_element_size (base
);
1466 lbound
= array_ref_low_bound (base
);
1468 if (!expr_invariant_in_loop_p (loop
, step
)
1469 || !expr_invariant_in_loop_p (loop
, lbound
))
1473 if (TREE_CODE (*idx
) != SSA_NAME
)
1476 iv
= get_iv (dta
->ivopts_data
, *idx
);
1480 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1481 *&x[0], which is not folded and does not trigger the
1482 ARRAY_REF path below. */
1485 if (integer_zerop (iv
->step
))
1488 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1490 step
= array_ref_element_size (base
);
1492 /* We only handle addresses whose step is an integer constant. */
1493 if (TREE_CODE (step
) != INTEGER_CST
)
1497 /* The step for pointer arithmetics already is 1 byte. */
1498 step
= size_one_node
;
1502 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1503 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1506 /* The index might wrap. */
1510 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1511 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1516 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1517 object is passed to it in DATA. */
1520 idx_record_use (tree base
, tree
*idx
,
1523 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1524 find_interesting_uses_op (data
, *idx
);
1525 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1527 find_interesting_uses_op (data
, array_ref_element_size (base
));
1528 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1533 /* If we can prove that TOP = cst * BOT for some constant cst,
1534 store cst to MUL and return true. Otherwise return false.
1535 The returned value is always sign-extended, regardless of the
1536 signedness of TOP and BOT. */
1539 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1542 enum tree_code code
;
1543 double_int res
, p0
, p1
;
1544 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1549 if (operand_equal_p (top
, bot
, 0))
1551 *mul
= double_int_one
;
1555 code
= TREE_CODE (top
);
1559 mby
= TREE_OPERAND (top
, 1);
1560 if (TREE_CODE (mby
) != INTEGER_CST
)
1563 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1566 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
1572 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1573 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1576 if (code
== MINUS_EXPR
)
1577 p1
= double_int_neg (p1
);
1578 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
1582 if (TREE_CODE (bot
) != INTEGER_CST
)
1585 p0
= double_int_sext (tree_to_double_int (top
), precision
);
1586 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
1587 if (double_int_zero_p (p1
))
1589 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
1591 return double_int_zero_p (res
);
1598 /* Returns true if memory reference REF with step STEP may be unaligned. */
1601 may_be_unaligned_p (tree ref
, tree step
)
1605 HOST_WIDE_INT bitsize
;
1606 HOST_WIDE_INT bitpos
;
1608 enum machine_mode mode
;
1609 int unsignedp
, volatilep
;
1610 unsigned base_align
;
1612 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1613 thus they are not misaligned. */
1614 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1617 /* The test below is basically copy of what expr.c:normal_inner_ref
1618 does to check whether the object must be loaded by parts when
1619 STRICT_ALIGNMENT is true. */
1620 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1621 &unsignedp
, &volatilep
, true);
1622 base_type
= TREE_TYPE (base
);
1623 base_align
= TYPE_ALIGN (base_type
);
1625 if (mode
!= BLKmode
)
1627 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1629 if (base_align
< mode_align
1630 || (bitpos
% mode_align
) != 0
1631 || (bitpos
% BITS_PER_UNIT
) != 0)
1635 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1638 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1645 /* Return true if EXPR may be non-addressable. */
1648 may_be_nonaddressable_p (tree expr
)
1650 switch (TREE_CODE (expr
))
1652 case TARGET_MEM_REF
:
1653 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1654 target, thus they are always addressable. */
1658 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1659 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1661 case VIEW_CONVERT_EXPR
:
1662 /* This kind of view-conversions may wrap non-addressable objects
1663 and make them look addressable. After some processing the
1664 non-addressability may be uncovered again, causing ADDR_EXPRs
1665 of inappropriate objects to be built. */
1666 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1667 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1670 /* ... fall through ... */
1673 case ARRAY_RANGE_REF
:
1674 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1686 /* Finds addresses in *OP_P inside STMT. */
1689 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1691 tree base
= *op_p
, step
= size_zero_node
;
1693 struct ifs_ivopts_data ifs_ivopts_data
;
1695 /* Do not play with volatile memory references. A bit too conservative,
1696 perhaps, but safe. */
1697 if (gimple_has_volatile_ops (stmt
))
1700 /* Ignore bitfields for now. Not really something terribly complicated
1702 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1705 base
= unshare_expr (base
);
1707 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1709 tree type
= build_pointer_type (TREE_TYPE (base
));
1713 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1715 civ
= get_iv (data
, TMR_BASE (base
));
1719 TMR_BASE (base
) = civ
->base
;
1722 if (TMR_INDEX2 (base
)
1723 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1725 civ
= get_iv (data
, TMR_INDEX2 (base
));
1729 TMR_INDEX2 (base
) = civ
->base
;
1732 if (TMR_INDEX (base
)
1733 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1735 civ
= get_iv (data
, TMR_INDEX (base
));
1739 TMR_INDEX (base
) = civ
->base
;
1744 if (TMR_STEP (base
))
1745 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1747 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1751 if (integer_zerop (step
))
1753 base
= tree_mem_ref_addr (type
, base
);
1757 ifs_ivopts_data
.ivopts_data
= data
;
1758 ifs_ivopts_data
.stmt
= stmt
;
1759 ifs_ivopts_data
.step
= size_zero_node
;
1760 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1761 || integer_zerop (ifs_ivopts_data
.step
))
1763 step
= ifs_ivopts_data
.step
;
1765 /* Check that the base expression is addressable. This needs
1766 to be done after substituting bases of IVs into it. */
1767 if (may_be_nonaddressable_p (base
))
1770 /* Moreover, on strict alignment platforms, check that it is
1771 sufficiently aligned. */
1772 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1775 base
= build_fold_addr_expr (base
);
1777 /* Substituting bases of IVs into the base expression might
1778 have caused folding opportunities. */
1779 if (TREE_CODE (base
) == ADDR_EXPR
)
1781 tree
*ref
= &TREE_OPERAND (base
, 0);
1782 while (handled_component_p (*ref
))
1783 ref
= &TREE_OPERAND (*ref
, 0);
1784 if (TREE_CODE (*ref
) == MEM_REF
)
1786 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1787 TREE_OPERAND (*ref
, 0),
1788 TREE_OPERAND (*ref
, 1));
1795 civ
= alloc_iv (base
, step
);
1796 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1800 for_each_index (op_p
, idx_record_use
, data
);
1803 /* Finds and records invariants used in STMT. */
1806 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1809 use_operand_p use_p
;
1812 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1814 op
= USE_FROM_PTR (use_p
);
1815 record_invariant (data
, op
, false);
1819 /* Finds interesting uses of induction variables in the statement STMT. */
1822 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1825 tree op
, *lhs
, *rhs
;
1827 use_operand_p use_p
;
1828 enum tree_code code
;
1830 find_invariants_stmt (data
, stmt
);
1832 if (gimple_code (stmt
) == GIMPLE_COND
)
1834 find_interesting_uses_cond (data
, stmt
);
1838 if (is_gimple_assign (stmt
))
1840 lhs
= gimple_assign_lhs_ptr (stmt
);
1841 rhs
= gimple_assign_rhs1_ptr (stmt
);
1843 if (TREE_CODE (*lhs
) == SSA_NAME
)
1845 /* If the statement defines an induction variable, the uses are not
1846 interesting by themselves. */
1848 iv
= get_iv (data
, *lhs
);
1850 if (iv
&& !integer_zerop (iv
->step
))
1854 code
= gimple_assign_rhs_code (stmt
);
1855 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1856 && (REFERENCE_CLASS_P (*rhs
)
1857 || is_gimple_val (*rhs
)))
1859 if (REFERENCE_CLASS_P (*rhs
))
1860 find_interesting_uses_address (data
, stmt
, rhs
);
1862 find_interesting_uses_op (data
, *rhs
);
1864 if (REFERENCE_CLASS_P (*lhs
))
1865 find_interesting_uses_address (data
, stmt
, lhs
);
1868 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1870 find_interesting_uses_cond (data
, stmt
);
1874 /* TODO -- we should also handle address uses of type
1876 memory = call (whatever);
1883 if (gimple_code (stmt
) == GIMPLE_PHI
1884 && gimple_bb (stmt
) == data
->current_loop
->header
)
1886 iv
= get_iv (data
, PHI_RESULT (stmt
));
1888 if (iv
&& !integer_zerop (iv
->step
))
1892 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1894 op
= USE_FROM_PTR (use_p
);
1896 if (TREE_CODE (op
) != SSA_NAME
)
1899 iv
= get_iv (data
, op
);
1903 find_interesting_uses_op (data
, op
);
1907 /* Finds interesting uses of induction variables outside of loops
1908 on loop exit edge EXIT. */
1911 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1914 gimple_stmt_iterator psi
;
1917 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1919 phi
= gsi_stmt (psi
);
1920 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1921 if (is_gimple_reg (def
))
1922 find_interesting_uses_op (data
, def
);
1926 /* Finds uses of the induction variables that are interesting. */
1929 find_interesting_uses (struct ivopts_data
*data
)
1932 gimple_stmt_iterator bsi
;
1933 basic_block
*body
= get_loop_body (data
->current_loop
);
1935 struct version_info
*info
;
1938 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1939 fprintf (dump_file
, "Uses:\n\n");
1941 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1946 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1947 if (e
->dest
!= EXIT_BLOCK_PTR
1948 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1949 find_interesting_uses_outside (data
, e
);
1951 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1952 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1953 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1954 if (!is_gimple_debug (gsi_stmt (bsi
)))
1955 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1958 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1962 fprintf (dump_file
, "\n");
1964 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1966 info
= ver_info (data
, i
);
1969 fprintf (dump_file
, " ");
1970 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1971 fprintf (dump_file
, " is invariant (%d)%s\n",
1972 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1976 fprintf (dump_file
, "\n");
1982 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1983 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1984 we are at the top-level of the processed address. */
1987 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1988 unsigned HOST_WIDE_INT
*offset
)
1990 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1991 enum tree_code code
;
1992 tree type
, orig_type
= TREE_TYPE (expr
);
1993 unsigned HOST_WIDE_INT off0
, off1
, st
;
1994 tree orig_expr
= expr
;
1998 type
= TREE_TYPE (expr
);
1999 code
= TREE_CODE (expr
);
2005 if (!cst_and_fits_in_hwi (expr
)
2006 || integer_zerop (expr
))
2009 *offset
= int_cst_value (expr
);
2010 return build_int_cst (orig_type
, 0);
2012 case POINTER_PLUS_EXPR
:
2015 op0
= TREE_OPERAND (expr
, 0);
2016 op1
= TREE_OPERAND (expr
, 1);
2018 op0
= strip_offset_1 (op0
, false, false, &off0
);
2019 op1
= strip_offset_1 (op1
, false, false, &off1
);
2021 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2022 if (op0
== TREE_OPERAND (expr
, 0)
2023 && op1
== TREE_OPERAND (expr
, 1))
2026 if (integer_zerop (op1
))
2028 else if (integer_zerop (op0
))
2030 if (code
== MINUS_EXPR
)
2031 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2036 expr
= fold_build2 (code
, type
, op0
, op1
);
2038 return fold_convert (orig_type
, expr
);
2041 op1
= TREE_OPERAND (expr
, 1);
2042 if (!cst_and_fits_in_hwi (op1
))
2045 op0
= TREE_OPERAND (expr
, 0);
2046 op0
= strip_offset_1 (op0
, false, false, &off0
);
2047 if (op0
== TREE_OPERAND (expr
, 0))
2050 *offset
= off0
* int_cst_value (op1
);
2051 if (integer_zerop (op0
))
2054 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2056 return fold_convert (orig_type
, expr
);
2059 case ARRAY_RANGE_REF
:
2063 step
= array_ref_element_size (expr
);
2064 if (!cst_and_fits_in_hwi (step
))
2067 st
= int_cst_value (step
);
2068 op1
= TREE_OPERAND (expr
, 1);
2069 op1
= strip_offset_1 (op1
, false, false, &off1
);
2070 *offset
= off1
* st
;
2073 && integer_zerop (op1
))
2075 /* Strip the component reference completely. */
2076 op0
= TREE_OPERAND (expr
, 0);
2077 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2087 tmp
= component_ref_field_offset (expr
);
2089 && cst_and_fits_in_hwi (tmp
))
2091 /* Strip the component reference completely. */
2092 op0
= TREE_OPERAND (expr
, 0);
2093 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2094 *offset
= off0
+ int_cst_value (tmp
);
2100 op0
= TREE_OPERAND (expr
, 0);
2101 op0
= strip_offset_1 (op0
, true, true, &off0
);
2104 if (op0
== TREE_OPERAND (expr
, 0))
2107 expr
= build_fold_addr_expr (op0
);
2108 return fold_convert (orig_type
, expr
);
2111 /* ??? Offset operand? */
2112 inside_addr
= false;
2119 /* Default handling of expressions for that we want to recurse into
2120 the first operand. */
2121 op0
= TREE_OPERAND (expr
, 0);
2122 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2125 if (op0
== TREE_OPERAND (expr
, 0)
2126 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2129 expr
= copy_node (expr
);
2130 TREE_OPERAND (expr
, 0) = op0
;
2132 TREE_OPERAND (expr
, 1) = op1
;
2134 /* Inside address, we might strip the top level component references,
2135 thus changing type of the expression. Handling of ADDR_EXPR
2137 expr
= fold_convert (orig_type
, expr
);
2142 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2145 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2147 return strip_offset_1 (expr
, false, false, offset
);
2150 /* Returns variant of TYPE that can be used as base for different uses.
2151 We return unsigned type with the same precision, which avoids problems
2155 generic_type_for (tree type
)
2157 if (POINTER_TYPE_P (type
))
2158 return unsigned_type_for (type
);
2160 if (TYPE_UNSIGNED (type
))
2163 return unsigned_type_for (type
);
2166 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2167 the bitmap to that we should store it. */
2169 static struct ivopts_data
*fd_ivopts_data
;
2171 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2173 bitmap
*depends_on
= (bitmap
*) data
;
2174 struct version_info
*info
;
2176 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2178 info
= name_info (fd_ivopts_data
, *expr_p
);
2180 if (!info
->inv_id
|| info
->has_nonlin_use
)
2184 *depends_on
= BITMAP_ALLOC (NULL
);
2185 bitmap_set_bit (*depends_on
, info
->inv_id
);
2190 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2191 position to POS. If USE is not NULL, the candidate is set as related to
2192 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2193 replacement of the final value of the iv by a direct computation. */
2195 static struct iv_cand
*
2196 add_candidate_1 (struct ivopts_data
*data
,
2197 tree base
, tree step
, bool important
, enum iv_position pos
,
2198 struct iv_use
*use
, gimple incremented_at
)
2201 struct iv_cand
*cand
= NULL
;
2202 tree type
, orig_type
;
2206 orig_type
= TREE_TYPE (base
);
2207 type
= generic_type_for (orig_type
);
2208 if (type
!= orig_type
)
2210 base
= fold_convert (type
, base
);
2211 step
= fold_convert (type
, step
);
2215 for (i
= 0; i
< n_iv_cands (data
); i
++)
2217 cand
= iv_cand (data
, i
);
2219 if (cand
->pos
!= pos
)
2222 if (cand
->incremented_at
!= incremented_at
2223 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2224 && cand
->ainc_use
!= use
))
2238 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2239 && operand_equal_p (step
, cand
->iv
->step
, 0)
2240 && (TYPE_PRECISION (TREE_TYPE (base
))
2241 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2245 if (i
== n_iv_cands (data
))
2247 cand
= XCNEW (struct iv_cand
);
2253 cand
->iv
= alloc_iv (base
, step
);
2256 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2258 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2259 cand
->var_after
= cand
->var_before
;
2261 cand
->important
= important
;
2262 cand
->incremented_at
= incremented_at
;
2263 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2266 && TREE_CODE (step
) != INTEGER_CST
)
2268 fd_ivopts_data
= data
;
2269 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2272 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2273 cand
->ainc_use
= use
;
2275 cand
->ainc_use
= NULL
;
2277 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2278 dump_cand (dump_file
, cand
);
2281 if (important
&& !cand
->important
)
2283 cand
->important
= true;
2284 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2285 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2290 bitmap_set_bit (use
->related_cands
, i
);
2291 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2292 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2299 /* Returns true if incrementing the induction variable at the end of the LOOP
2302 The purpose is to avoid splitting latch edge with a biv increment, thus
2303 creating a jump, possibly confusing other optimization passes and leaving
2304 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2305 is not available (so we do not have a better alternative), or if the latch
2306 edge is already nonempty. */
2309 allow_ip_end_pos_p (struct loop
*loop
)
2311 if (!ip_normal_pos (loop
))
2314 if (!empty_block_p (ip_end_pos (loop
)))
2320 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2321 Important field is set to IMPORTANT. */
2324 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2325 bool important
, struct iv_use
*use
)
2327 basic_block use_bb
= gimple_bb (use
->stmt
);
2328 enum machine_mode mem_mode
;
2329 unsigned HOST_WIDE_INT cstepi
;
2331 /* If we insert the increment in any position other than the standard
2332 ones, we must ensure that it is incremented once per iteration.
2333 It must not be in an inner nested loop, or one side of an if
2335 if (use_bb
->loop_father
!= data
->current_loop
2336 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2337 || stmt_could_throw_p (use
->stmt
)
2338 || !cst_and_fits_in_hwi (step
))
2341 cstepi
= int_cst_value (step
);
2343 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2344 if ((HAVE_PRE_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2345 || (HAVE_PRE_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2347 enum tree_code code
= MINUS_EXPR
;
2349 tree new_step
= step
;
2351 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2353 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2354 code
= POINTER_PLUS_EXPR
;
2357 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2358 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2359 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2362 if ((HAVE_POST_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2363 || (HAVE_POST_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2365 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2370 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2371 position to POS. If USE is not NULL, the candidate is set as related to
2372 it. The candidate computation is scheduled on all available positions. */
2375 add_candidate (struct ivopts_data
*data
,
2376 tree base
, tree step
, bool important
, struct iv_use
*use
)
2378 if (ip_normal_pos (data
->current_loop
))
2379 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2380 if (ip_end_pos (data
->current_loop
)
2381 && allow_ip_end_pos_p (data
->current_loop
))
2382 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2384 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2385 add_autoinc_candidates (data
, base
, step
, important
, use
);
2388 /* Add a standard "0 + 1 * iteration" iv candidate for a
2389 type with SIZE bits. */
2392 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2395 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2396 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2400 /* Adds standard iv candidates. */
2403 add_standard_iv_candidates (struct ivopts_data
*data
)
2405 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2407 /* The same for a double-integer type if it is still fast enough. */
2408 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2409 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2413 /* Adds candidates bases on the old induction variable IV. */
2416 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2420 struct iv_cand
*cand
;
2422 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2424 /* The same, but with initial value zero. */
2425 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2426 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2428 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2429 iv
->step
, true, NULL
);
2431 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2432 if (gimple_code (phi
) == GIMPLE_PHI
)
2434 /* Additionally record the possibility of leaving the original iv
2436 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2437 cand
= add_candidate_1 (data
,
2438 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2439 SSA_NAME_DEF_STMT (def
));
2440 cand
->var_before
= iv
->ssa_name
;
2441 cand
->var_after
= def
;
2445 /* Adds candidates based on the old induction variables. */
2448 add_old_ivs_candidates (struct ivopts_data
*data
)
2454 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2456 iv
= ver_info (data
, i
)->iv
;
2457 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2458 add_old_iv_candidates (data
, iv
);
2462 /* Adds candidates based on the value of the induction variable IV and USE. */
2465 add_iv_value_candidates (struct ivopts_data
*data
,
2466 struct iv
*iv
, struct iv_use
*use
)
2468 unsigned HOST_WIDE_INT offset
;
2472 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2474 /* The same, but with initial value zero. Make such variable important,
2475 since it is generic enough so that possibly many uses may be based
2477 basetype
= TREE_TYPE (iv
->base
);
2478 if (POINTER_TYPE_P (basetype
))
2479 basetype
= sizetype
;
2480 add_candidate (data
, build_int_cst (basetype
, 0),
2481 iv
->step
, true, use
);
2483 /* Third, try removing the constant offset. Make sure to even
2484 add a candidate for &a[0] vs. (T *)&a. */
2485 base
= strip_offset (iv
->base
, &offset
);
2487 || base
!= iv
->base
)
2488 add_candidate (data
, base
, iv
->step
, false, use
);
2491 /* Adds candidates based on the uses. */
2494 add_derived_ivs_candidates (struct ivopts_data
*data
)
2498 for (i
= 0; i
< n_iv_uses (data
); i
++)
2500 struct iv_use
*use
= iv_use (data
, i
);
2507 case USE_NONLINEAR_EXPR
:
2510 /* Just add the ivs based on the value of the iv used here. */
2511 add_iv_value_candidates (data
, use
->iv
, use
);
2520 /* Record important candidates and add them to related_cands bitmaps
2524 record_important_candidates (struct ivopts_data
*data
)
2529 for (i
= 0; i
< n_iv_cands (data
); i
++)
2531 struct iv_cand
*cand
= iv_cand (data
, i
);
2533 if (cand
->important
)
2534 bitmap_set_bit (data
->important_candidates
, i
);
2537 data
->consider_all_candidates
= (n_iv_cands (data
)
2538 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2540 if (data
->consider_all_candidates
)
2542 /* We will not need "related_cands" bitmaps in this case,
2543 so release them to decrease peak memory consumption. */
2544 for (i
= 0; i
< n_iv_uses (data
); i
++)
2546 use
= iv_use (data
, i
);
2547 BITMAP_FREE (use
->related_cands
);
2552 /* Add important candidates to the related_cands bitmaps. */
2553 for (i
= 0; i
< n_iv_uses (data
); i
++)
2554 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2555 data
->important_candidates
);
2559 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2560 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2561 we allocate a simple list to every use. */
2564 alloc_use_cost_map (struct ivopts_data
*data
)
2566 unsigned i
, size
, s
, j
;
2568 for (i
= 0; i
< n_iv_uses (data
); i
++)
2570 struct iv_use
*use
= iv_use (data
, i
);
2573 if (data
->consider_all_candidates
)
2574 size
= n_iv_cands (data
);
2578 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2583 /* Round up to the power of two, so that moduling by it is fast. */
2584 for (size
= 1; size
< s
; size
<<= 1)
2588 use
->n_map_members
= size
;
2589 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2593 /* Returns description of computation cost of expression whose runtime
2594 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2597 new_cost (unsigned runtime
, unsigned complexity
)
2601 cost
.cost
= runtime
;
2602 cost
.complexity
= complexity
;
2607 /* Adds costs COST1 and COST2. */
2610 add_costs (comp_cost cost1
, comp_cost cost2
)
2612 cost1
.cost
+= cost2
.cost
;
2613 cost1
.complexity
+= cost2
.complexity
;
2617 /* Subtracts costs COST1 and COST2. */
2620 sub_costs (comp_cost cost1
, comp_cost cost2
)
2622 cost1
.cost
-= cost2
.cost
;
2623 cost1
.complexity
-= cost2
.complexity
;
2628 /* Returns a negative number if COST1 < COST2, a positive number if
2629 COST1 > COST2, and 0 if COST1 = COST2. */
2632 compare_costs (comp_cost cost1
, comp_cost cost2
)
2634 if (cost1
.cost
== cost2
.cost
)
2635 return cost1
.complexity
- cost2
.complexity
;
2637 return cost1
.cost
- cost2
.cost
;
2640 /* Returns true if COST is infinite. */
2643 infinite_cost_p (comp_cost cost
)
2645 return cost
.cost
== INFTY
;
2648 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2649 on invariants DEPENDS_ON and that the value used in expressing it
2653 set_use_iv_cost (struct ivopts_data
*data
,
2654 struct iv_use
*use
, struct iv_cand
*cand
,
2655 comp_cost cost
, bitmap depends_on
, tree value
,
2660 if (infinite_cost_p (cost
))
2662 BITMAP_FREE (depends_on
);
2666 if (data
->consider_all_candidates
)
2668 use
->cost_map
[cand
->id
].cand
= cand
;
2669 use
->cost_map
[cand
->id
].cost
= cost
;
2670 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2671 use
->cost_map
[cand
->id
].value
= value
;
2672 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2676 /* n_map_members is a power of two, so this computes modulo. */
2677 s
= cand
->id
& (use
->n_map_members
- 1);
2678 for (i
= s
; i
< use
->n_map_members
; i
++)
2679 if (!use
->cost_map
[i
].cand
)
2681 for (i
= 0; i
< s
; i
++)
2682 if (!use
->cost_map
[i
].cand
)
2688 use
->cost_map
[i
].cand
= cand
;
2689 use
->cost_map
[i
].cost
= cost
;
2690 use
->cost_map
[i
].depends_on
= depends_on
;
2691 use
->cost_map
[i
].value
= value
;
2692 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2695 /* Gets cost of (USE, CANDIDATE) pair. */
2697 static struct cost_pair
*
2698 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2699 struct iv_cand
*cand
)
2702 struct cost_pair
*ret
;
2707 if (data
->consider_all_candidates
)
2709 ret
= use
->cost_map
+ cand
->id
;
2716 /* n_map_members is a power of two, so this computes modulo. */
2717 s
= cand
->id
& (use
->n_map_members
- 1);
2718 for (i
= s
; i
< use
->n_map_members
; i
++)
2719 if (use
->cost_map
[i
].cand
== cand
)
2720 return use
->cost_map
+ i
;
2722 for (i
= 0; i
< s
; i
++)
2723 if (use
->cost_map
[i
].cand
== cand
)
2724 return use
->cost_map
+ i
;
2729 /* Returns estimate on cost of computing SEQ. */
2732 seq_cost (rtx seq
, bool speed
)
2737 for (; seq
; seq
= NEXT_INSN (seq
))
2739 set
= single_set (seq
);
2741 cost
+= rtx_cost (set
, SET
,speed
);
2749 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2751 produce_memory_decl_rtl (tree obj
, int *regno
)
2753 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2754 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2758 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2760 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2761 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2762 SET_SYMBOL_REF_DECL (x
, obj
);
2763 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2764 set_mem_addr_space (x
, as
);
2765 targetm
.encode_section_info (obj
, x
, true);
2769 x
= gen_raw_REG (address_mode
, (*regno
)++);
2770 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2771 set_mem_addr_space (x
, as
);
2777 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2778 walk_tree. DATA contains the actual fake register number. */
2781 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2783 tree obj
= NULL_TREE
;
2785 int *regno
= (int *) data
;
2787 switch (TREE_CODE (*expr_p
))
2790 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2791 handled_component_p (*expr_p
);
2792 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2795 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2796 x
= produce_memory_decl_rtl (obj
, regno
);
2801 obj
= SSA_NAME_VAR (*expr_p
);
2802 if (!DECL_RTL_SET_P (obj
))
2803 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2812 if (DECL_RTL_SET_P (obj
))
2815 if (DECL_MODE (obj
) == BLKmode
)
2816 x
= produce_memory_decl_rtl (obj
, regno
);
2818 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2828 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2829 SET_DECL_RTL (obj
, x
);
2835 /* Determines cost of the computation of EXPR. */
2838 computation_cost (tree expr
, bool speed
)
2841 tree type
= TREE_TYPE (expr
);
2843 /* Avoid using hard regs in ways which may be unsupported. */
2844 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2845 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
2846 enum node_frequency real_frequency
= node
->frequency
;
2848 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2849 crtl
->maybe_hot_insn_p
= speed
;
2850 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2852 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2855 default_rtl_profile ();
2856 node
->frequency
= real_frequency
;
2858 cost
= seq_cost (seq
, speed
);
2860 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2861 TYPE_ADDR_SPACE (type
), speed
);
2866 /* Returns variable containing the value of candidate CAND at statement AT. */
2869 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2871 if (stmt_after_increment (loop
, cand
, stmt
))
2872 return cand
->var_after
;
2874 return cand
->var_before
;
2877 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2878 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2881 tree_int_cst_sign_bit (const_tree t
)
2883 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2884 unsigned HOST_WIDE_INT w
;
2886 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2887 w
= TREE_INT_CST_LOW (t
);
2890 w
= TREE_INT_CST_HIGH (t
);
2891 bitno
-= HOST_BITS_PER_WIDE_INT
;
2894 return (w
>> bitno
) & 1;
2897 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2898 same precision that is at least as wide as the precision of TYPE, stores
2899 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2903 determine_common_wider_type (tree
*a
, tree
*b
)
2905 tree wider_type
= NULL
;
2907 tree atype
= TREE_TYPE (*a
);
2909 if (CONVERT_EXPR_P (*a
))
2911 suba
= TREE_OPERAND (*a
, 0);
2912 wider_type
= TREE_TYPE (suba
);
2913 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2919 if (CONVERT_EXPR_P (*b
))
2921 subb
= TREE_OPERAND (*b
, 0);
2922 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2933 /* Determines the expression by that USE is expressed from induction variable
2934 CAND at statement AT in LOOP. The expression is stored in a decomposed
2935 form into AFF. Returns false if USE cannot be expressed using CAND. */
2938 get_computation_aff (struct loop
*loop
,
2939 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2940 struct affine_tree_combination
*aff
)
2942 tree ubase
= use
->iv
->base
;
2943 tree ustep
= use
->iv
->step
;
2944 tree cbase
= cand
->iv
->base
;
2945 tree cstep
= cand
->iv
->step
, cstep_common
;
2946 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2947 tree common_type
, var
;
2949 aff_tree cbase_aff
, var_aff
;
2952 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2954 /* We do not have a precision to express the values of use. */
2958 var
= var_at_stmt (loop
, cand
, at
);
2959 uutype
= unsigned_type_for (utype
);
2961 /* If the conversion is not noop, perform it. */
2962 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2964 cstep
= fold_convert (uutype
, cstep
);
2965 cbase
= fold_convert (uutype
, cbase
);
2966 var
= fold_convert (uutype
, var
);
2969 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2972 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2973 type, we achieve better folding by computing their difference in this
2974 wider type, and cast the result to UUTYPE. We do not need to worry about
2975 overflows, as all the arithmetics will in the end be performed in UUTYPE
2977 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2979 /* use = ubase - ratio * cbase + ratio * var. */
2980 tree_to_aff_combination (ubase
, common_type
, aff
);
2981 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2982 tree_to_aff_combination (var
, uutype
, &var_aff
);
2984 /* We need to shift the value if we are after the increment. */
2985 if (stmt_after_increment (loop
, cand
, at
))
2989 if (common_type
!= uutype
)
2990 cstep_common
= fold_convert (common_type
, cstep
);
2992 cstep_common
= cstep
;
2994 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
2995 aff_combination_add (&cbase_aff
, &cstep_aff
);
2998 aff_combination_scale (&cbase_aff
, double_int_neg (rat
));
2999 aff_combination_add (aff
, &cbase_aff
);
3000 if (common_type
!= uutype
)
3001 aff_combination_convert (aff
, uutype
);
3003 aff_combination_scale (&var_aff
, rat
);
3004 aff_combination_add (aff
, &var_aff
);
3009 /* Determines the expression by that USE is expressed from induction variable
3010 CAND at statement AT in LOOP. The computation is unshared. */
3013 get_computation_at (struct loop
*loop
,
3014 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3017 tree type
= TREE_TYPE (use
->iv
->base
);
3019 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3021 unshare_aff_combination (&aff
);
3022 return fold_convert (type
, aff_combination_to_tree (&aff
));
3025 /* Determines the expression by that USE is expressed from induction variable
3026 CAND in LOOP. The computation is unshared. */
3029 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3031 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3034 /* Adjust the cost COST for being in loop setup rather than loop body.
3035 If we're optimizing for space, the loop setup overhead is constant;
3036 if we're optimizing for speed, amortize it over the per-iteration cost. */
3038 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3042 else if (optimize_loop_for_speed_p (data
->current_loop
))
3043 return cost
/ avg_loop_niter (data
->current_loop
);
3048 /* Returns cost of addition in MODE. */
3051 add_cost (enum machine_mode mode
, bool speed
)
3053 static unsigned costs
[NUM_MACHINE_MODES
];
3061 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3062 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3063 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3068 cost
= seq_cost (seq
, speed
);
3074 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3075 fprintf (dump_file
, "Addition in %s costs %d\n",
3076 GET_MODE_NAME (mode
), cost
);
3080 /* Entry in a hashtable of already known costs for multiplication. */
3083 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3084 enum machine_mode mode
; /* In mode. */
3085 unsigned cost
; /* The cost. */
3088 /* Counts hash value for the ENTRY. */
3091 mbc_entry_hash (const void *entry
)
3093 const struct mbc_entry
*e
= (const struct mbc_entry
*) entry
;
3095 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3098 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3101 mbc_entry_eq (const void *entry1
, const void *entry2
)
3103 const struct mbc_entry
*e1
= (const struct mbc_entry
*) entry1
;
3104 const struct mbc_entry
*e2
= (const struct mbc_entry
*) entry2
;
3106 return (e1
->mode
== e2
->mode
3107 && e1
->cst
== e2
->cst
);
3110 /* Returns cost of multiplication by constant CST in MODE. */
3113 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
, bool speed
)
3115 static htab_t costs
;
3116 struct mbc_entry
**cached
, act
;
3121 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3125 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3127 return (*cached
)->cost
;
3129 *cached
= XNEW (struct mbc_entry
);
3130 (*cached
)->mode
= mode
;
3131 (*cached
)->cst
= cst
;
3134 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3135 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3139 cost
= seq_cost (seq
, speed
);
3141 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3142 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3143 (int) cst
, GET_MODE_NAME (mode
), cost
);
3145 (*cached
)->cost
= cost
;
3150 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3151 validity for a memory reference accessing memory of mode MODE in
3152 address space AS. */
3154 DEF_VEC_P (sbitmap
);
3155 DEF_VEC_ALLOC_P (sbitmap
, heap
);
3158 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3161 #define MAX_RATIO 128
3162 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3163 static VEC (sbitmap
, heap
) *valid_mult_list
;
3166 if (data_index
>= VEC_length (sbitmap
, valid_mult_list
))
3167 VEC_safe_grow_cleared (sbitmap
, heap
, valid_mult_list
, data_index
+ 1);
3169 valid_mult
= VEC_index (sbitmap
, valid_mult_list
, data_index
);
3172 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3173 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3177 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3178 sbitmap_zero (valid_mult
);
3179 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3180 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3182 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3183 if (memory_address_addr_space_p (mode
, addr
, as
))
3184 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3187 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3189 fprintf (dump_file
, " allowed multipliers:");
3190 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3191 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3192 fprintf (dump_file
, " %d", (int) i
);
3193 fprintf (dump_file
, "\n");
3194 fprintf (dump_file
, "\n");
3197 VEC_replace (sbitmap
, valid_mult_list
, data_index
, valid_mult
);
3200 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3203 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3206 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3207 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3208 variable is omitted. Compute the cost for a memory reference that accesses
3209 a memory location of mode MEM_MODE in address space AS.
3211 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3212 size of MEM_MODE / RATIO) is available. To make this determination, we
3213 look at the size of the increment to be made, which is given in CSTEP.
3214 CSTEP may be zero if the step is unknown.
3215 STMT_AFTER_INC is true iff the statement we're looking at is after the
3216 increment of the original biv.
3218 TODO -- there must be some better way. This all is quite crude. */
3222 HOST_WIDE_INT min_offset
, max_offset
;
3223 unsigned costs
[2][2][2][2];
3224 } *address_cost_data
;
3226 DEF_VEC_P (address_cost_data
);
3227 DEF_VEC_ALLOC_P (address_cost_data
, heap
);
3230 get_address_cost (bool symbol_present
, bool var_present
,
3231 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3232 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3233 addr_space_t as
, bool speed
,
3234 bool stmt_after_inc
, bool *may_autoinc
)
3236 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3237 static VEC(address_cost_data
, heap
) *address_cost_data_list
;
3238 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3239 address_cost_data data
;
3240 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3241 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3242 unsigned cost
, acost
, complexity
;
3243 bool offset_p
, ratio_p
, autoinc
;
3244 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3245 unsigned HOST_WIDE_INT mask
;
3248 if (data_index
>= VEC_length (address_cost_data
, address_cost_data_list
))
3249 VEC_safe_grow_cleared (address_cost_data
, heap
, address_cost_data_list
,
3252 data
= VEC_index (address_cost_data
, address_cost_data_list
, data_index
);
3256 HOST_WIDE_INT rat
, off
= 0;
3257 int old_cse_not_expected
, width
;
3258 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3259 rtx seq
, addr
, base
;
3262 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3264 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3266 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3267 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3268 width
= HOST_BITS_PER_WIDE_INT
- 1;
3269 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3271 for (i
= width
; i
>= 0; i
--)
3273 off
= -((HOST_WIDE_INT
) 1 << i
);
3274 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3275 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3278 data
->min_offset
= (i
== -1? 0 : off
);
3280 for (i
= width
; i
>= 0; i
--)
3282 off
= ((HOST_WIDE_INT
) 1 << i
) - 1;
3283 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3284 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3289 data
->max_offset
= off
;
3291 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3293 fprintf (dump_file
, "get_address_cost:\n");
3294 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3295 GET_MODE_NAME (mem_mode
),
3297 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3298 GET_MODE_NAME (mem_mode
),
3303 for (i
= 2; i
<= MAX_RATIO
; i
++)
3304 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3310 /* Compute the cost of various addressing modes. */
3312 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3313 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3315 if (HAVE_PRE_DECREMENT
)
3317 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3318 has_predec
[mem_mode
]
3319 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3321 if (HAVE_POST_DECREMENT
)
3323 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3324 has_postdec
[mem_mode
]
3325 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3327 if (HAVE_PRE_INCREMENT
)
3329 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3330 has_preinc
[mem_mode
]
3331 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3333 if (HAVE_POST_INCREMENT
)
3335 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3336 has_postinc
[mem_mode
]
3337 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3339 for (i
= 0; i
< 16; i
++)
3342 var_p
= (i
>> 1) & 1;
3343 off_p
= (i
>> 2) & 1;
3344 rat_p
= (i
>> 3) & 1;
3348 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3349 gen_int_mode (rat
, address_mode
));
3352 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3356 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3357 /* ??? We can run into trouble with some backends by presenting
3358 it with symbols which haven't been properly passed through
3359 targetm.encode_section_info. By setting the local bit, we
3360 enhance the probability of things working. */
3361 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3364 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3366 (PLUS
, address_mode
, base
,
3367 gen_int_mode (off
, address_mode
)));
3370 base
= gen_int_mode (off
, address_mode
);
3375 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3378 /* To avoid splitting addressing modes, pretend that no cse will
3380 old_cse_not_expected
= cse_not_expected
;
3381 cse_not_expected
= true;
3382 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3383 cse_not_expected
= old_cse_not_expected
;
3387 acost
= seq_cost (seq
, speed
);
3388 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3392 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3395 /* On some targets, it is quite expensive to load symbol to a register,
3396 which makes addresses that contain symbols look much more expensive.
3397 However, the symbol will have to be loaded in any case before the
3398 loop (and quite likely we have it in register already), so it does not
3399 make much sense to penalize them too heavily. So make some final
3400 tweaks for the SYMBOL_PRESENT modes:
3402 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3403 var is cheaper, use this mode with small penalty.
3404 If VAR_PRESENT is true, try whether the mode with
3405 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3406 if this is the case, use it. */
3407 add_c
= add_cost (address_mode
, speed
);
3408 for (i
= 0; i
< 8; i
++)
3411 off_p
= (i
>> 1) & 1;
3412 rat_p
= (i
>> 2) & 1;
3414 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3418 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3419 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3422 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3424 fprintf (dump_file
, "Address costs:\n");
3426 for (i
= 0; i
< 16; i
++)
3429 var_p
= (i
>> 1) & 1;
3430 off_p
= (i
>> 2) & 1;
3431 rat_p
= (i
>> 3) & 1;
3433 fprintf (dump_file
, " ");
3435 fprintf (dump_file
, "sym + ");
3437 fprintf (dump_file
, "var + ");
3439 fprintf (dump_file
, "cst + ");
3441 fprintf (dump_file
, "rat * ");
3443 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3444 fprintf (dump_file
, "index costs %d\n", acost
);
3446 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3447 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3448 fprintf (dump_file
, " May include autoinc/dec\n");
3449 fprintf (dump_file
, "\n");
3452 VEC_replace (address_cost_data
, address_cost_data_list
,
3456 bits
= GET_MODE_BITSIZE (address_mode
);
3457 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3459 if ((offset
>> (bits
- 1) & 1))
3464 msize
= GET_MODE_SIZE (mem_mode
);
3465 autoinc_offset
= offset
;
3467 autoinc_offset
+= ratio
* cstep
;
3468 if (symbol_present
|| var_present
|| ratio
!= 1)
3470 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3472 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3474 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3476 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3477 && msize
== -cstep
))
3481 offset_p
= (s_offset
!= 0
3482 && data
->min_offset
<= s_offset
3483 && s_offset
<= data
->max_offset
);
3484 ratio_p
= (ratio
!= 1
3485 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3487 if (ratio
!= 1 && !ratio_p
)
3488 cost
+= multiply_by_cost (ratio
, address_mode
, speed
);
3490 if (s_offset
&& !offset_p
&& !symbol_present
)
3491 cost
+= add_cost (address_mode
, speed
);
3494 *may_autoinc
= autoinc
;
3495 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3496 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3497 return new_cost (cost
+ acost
, complexity
);
3500 /* Estimates cost of forcing expression EXPR into a variable. */
3503 force_expr_to_var_cost (tree expr
, bool speed
)
3505 static bool costs_initialized
= false;
3506 static unsigned integer_cost
[2];
3507 static unsigned symbol_cost
[2];
3508 static unsigned address_cost
[2];
3510 comp_cost cost0
, cost1
, cost
;
3511 enum machine_mode mode
;
3513 if (!costs_initialized
)
3515 tree type
= build_pointer_type (integer_type_node
);
3520 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3521 TREE_STATIC (var
) = 1;
3522 x
= produce_memory_decl_rtl (var
, NULL
);
3523 SET_DECL_RTL (var
, x
);
3525 addr
= build1 (ADDR_EXPR
, type
, var
);
3528 for (i
= 0; i
< 2; i
++)
3530 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3533 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3536 = computation_cost (build2 (POINTER_PLUS_EXPR
, type
,
3538 build_int_cst (sizetype
, 2000)), i
) + 1;
3539 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3541 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3542 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3543 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3544 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3545 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3546 fprintf (dump_file
, "\n");
3550 costs_initialized
= true;
3555 if (SSA_VAR_P (expr
))
3558 if (is_gimple_min_invariant (expr
))
3560 if (TREE_CODE (expr
) == INTEGER_CST
)
3561 return new_cost (integer_cost
[speed
], 0);
3563 if (TREE_CODE (expr
) == ADDR_EXPR
)
3565 tree obj
= TREE_OPERAND (expr
, 0);
3567 if (TREE_CODE (obj
) == VAR_DECL
3568 || TREE_CODE (obj
) == PARM_DECL
3569 || TREE_CODE (obj
) == RESULT_DECL
)
3570 return new_cost (symbol_cost
[speed
], 0);
3573 return new_cost (address_cost
[speed
], 0);
3576 switch (TREE_CODE (expr
))
3578 case POINTER_PLUS_EXPR
:
3582 op0
= TREE_OPERAND (expr
, 0);
3583 op1
= TREE_OPERAND (expr
, 1);
3587 if (is_gimple_val (op0
))
3590 cost0
= force_expr_to_var_cost (op0
, speed
);
3592 if (is_gimple_val (op1
))
3595 cost1
= force_expr_to_var_cost (op1
, speed
);
3600 op0
= TREE_OPERAND (expr
, 0);
3604 if (is_gimple_val (op0
))
3607 cost0
= force_expr_to_var_cost (op0
, speed
);
3613 /* Just an arbitrary value, FIXME. */
3614 return new_cost (target_spill_cost
[speed
], 0);
3617 mode
= TYPE_MODE (TREE_TYPE (expr
));
3618 switch (TREE_CODE (expr
))
3620 case POINTER_PLUS_EXPR
:
3624 cost
= new_cost (add_cost (mode
, speed
), 0);
3628 if (cst_and_fits_in_hwi (op0
))
3629 cost
= new_cost (multiply_by_cost (int_cst_value (op0
), mode
, speed
), 0);
3630 else if (cst_and_fits_in_hwi (op1
))
3631 cost
= new_cost (multiply_by_cost (int_cst_value (op1
), mode
, speed
), 0);
3633 return new_cost (target_spill_cost
[speed
], 0);
3640 cost
= add_costs (cost
, cost0
);
3641 cost
= add_costs (cost
, cost1
);
3643 /* Bound the cost by target_spill_cost. The parts of complicated
3644 computations often are either loop invariant or at least can
3645 be shared between several iv uses, so letting this grow without
3646 limits would not give reasonable results. */
3647 if (cost
.cost
> (int) target_spill_cost
[speed
])
3648 cost
.cost
= target_spill_cost
[speed
];
3653 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3654 invariants the computation depends on. */
3657 force_var_cost (struct ivopts_data
*data
,
3658 tree expr
, bitmap
*depends_on
)
3662 fd_ivopts_data
= data
;
3663 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3666 return force_expr_to_var_cost (expr
, data
->speed
);
3669 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3670 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3671 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3672 invariants the computation depends on. */
3675 split_address_cost (struct ivopts_data
*data
,
3676 tree addr
, bool *symbol_present
, bool *var_present
,
3677 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3680 HOST_WIDE_INT bitsize
;
3681 HOST_WIDE_INT bitpos
;
3683 enum machine_mode mode
;
3684 int unsignedp
, volatilep
;
3686 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3687 &unsignedp
, &volatilep
, false);
3690 || bitpos
% BITS_PER_UNIT
!= 0
3691 || TREE_CODE (core
) != VAR_DECL
)
3693 *symbol_present
= false;
3694 *var_present
= true;
3695 fd_ivopts_data
= data
;
3696 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3697 return new_cost (target_spill_cost
[data
->speed
], 0);
3700 *offset
+= bitpos
/ BITS_PER_UNIT
;
3701 if (TREE_STATIC (core
)
3702 || DECL_EXTERNAL (core
))
3704 *symbol_present
= true;
3705 *var_present
= false;
3709 *symbol_present
= false;
3710 *var_present
= true;
3714 /* Estimates cost of expressing difference of addresses E1 - E2 as
3715 var + symbol + offset. The value of offset is added to OFFSET,
3716 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3717 part is missing. DEPENDS_ON is a set of the invariants the computation
3721 ptr_difference_cost (struct ivopts_data
*data
,
3722 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3723 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3725 HOST_WIDE_INT diff
= 0;
3726 aff_tree aff_e1
, aff_e2
;
3729 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3731 if (ptr_difference_const (e1
, e2
, &diff
))
3734 *symbol_present
= false;
3735 *var_present
= false;
3739 if (integer_zerop (e2
))
3740 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3741 symbol_present
, var_present
, offset
, depends_on
);
3743 *symbol_present
= false;
3744 *var_present
= true;
3746 type
= signed_type_for (TREE_TYPE (e1
));
3747 tree_to_aff_combination (e1
, type
, &aff_e1
);
3748 tree_to_aff_combination (e2
, type
, &aff_e2
);
3749 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3750 aff_combination_add (&aff_e1
, &aff_e2
);
3752 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3755 /* Estimates cost of expressing difference E1 - E2 as
3756 var + symbol + offset. The value of offset is added to OFFSET,
3757 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3758 part is missing. DEPENDS_ON is a set of the invariants the computation
3762 difference_cost (struct ivopts_data
*data
,
3763 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3764 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3766 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3767 unsigned HOST_WIDE_INT off1
, off2
;
3768 aff_tree aff_e1
, aff_e2
;
3771 e1
= strip_offset (e1
, &off1
);
3772 e2
= strip_offset (e2
, &off2
);
3773 *offset
+= off1
- off2
;
3778 if (TREE_CODE (e1
) == ADDR_EXPR
)
3779 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3780 offset
, depends_on
);
3781 *symbol_present
= false;
3783 if (operand_equal_p (e1
, e2
, 0))
3785 *var_present
= false;
3789 *var_present
= true;
3791 if (integer_zerop (e2
))
3792 return force_var_cost (data
, e1
, depends_on
);
3794 if (integer_zerop (e1
))
3796 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3797 cost
.cost
+= multiply_by_cost (-1, mode
, data
->speed
);
3801 type
= signed_type_for (TREE_TYPE (e1
));
3802 tree_to_aff_combination (e1
, type
, &aff_e1
);
3803 tree_to_aff_combination (e2
, type
, &aff_e2
);
3804 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3805 aff_combination_add (&aff_e1
, &aff_e2
);
3807 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3810 /* Returns true if AFF1 and AFF2 are identical. */
3813 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3817 if (aff1
->n
!= aff2
->n
)
3820 for (i
= 0; i
< aff1
->n
; i
++)
3822 if (double_int_cmp (aff1
->elts
[i
].coef
, aff2
->elts
[i
].coef
, 0) != 0)
3825 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3831 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3832 requires a new compiler generated temporary. Returns -1 otherwise.
3833 ADDRESS_P is a flag indicating if the expression is for address
3837 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3838 tree cbase
, HOST_WIDE_INT ratio
,
3841 aff_tree ubase_aff
, cbase_aff
;
3843 struct iv_inv_expr_ent ent
;
3844 struct iv_inv_expr_ent
**slot
;
3851 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3852 && (TREE_CODE (cbase
) == INTEGER_CST
))
3855 /* Strips the constant part. */
3856 if (TREE_CODE (ubase
) == PLUS_EXPR
3857 || TREE_CODE (ubase
) == MINUS_EXPR
3858 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3860 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3861 ubase
= TREE_OPERAND (ubase
, 0);
3864 /* Strips the constant part. */
3865 if (TREE_CODE (cbase
) == PLUS_EXPR
3866 || TREE_CODE (cbase
) == MINUS_EXPR
3867 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3869 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3870 cbase
= TREE_OPERAND (cbase
, 0);
3875 if (((TREE_CODE (ubase
) == SSA_NAME
)
3876 || (TREE_CODE (ubase
) == ADDR_EXPR
3877 && is_gimple_min_invariant (ubase
)))
3878 && (TREE_CODE (cbase
) == INTEGER_CST
))
3881 if (((TREE_CODE (cbase
) == SSA_NAME
)
3882 || (TREE_CODE (cbase
) == ADDR_EXPR
3883 && is_gimple_min_invariant (cbase
)))
3884 && (TREE_CODE (ubase
) == INTEGER_CST
))
3890 if(operand_equal_p (ubase
, cbase
, 0))
3893 if (TREE_CODE (ubase
) == ADDR_EXPR
3894 && TREE_CODE (cbase
) == ADDR_EXPR
)
3898 usym
= TREE_OPERAND (ubase
, 0);
3899 csym
= TREE_OPERAND (cbase
, 0);
3900 if (TREE_CODE (usym
) == ARRAY_REF
)
3902 tree ind
= TREE_OPERAND (usym
, 1);
3903 if (TREE_CODE (ind
) == INTEGER_CST
3904 && host_integerp (ind
, 0)
3905 && TREE_INT_CST_LOW (ind
) == 0)
3906 usym
= TREE_OPERAND (usym
, 0);
3908 if (TREE_CODE (csym
) == ARRAY_REF
)
3910 tree ind
= TREE_OPERAND (csym
, 1);
3911 if (TREE_CODE (ind
) == INTEGER_CST
3912 && host_integerp (ind
, 0)
3913 && TREE_INT_CST_LOW (ind
) == 0)
3914 csym
= TREE_OPERAND (csym
, 0);
3916 if (operand_equal_p (usym
, csym
, 0))
3919 /* Now do more complex comparison */
3920 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3921 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3922 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3926 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3927 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3929 aff_combination_scale (&cbase_aff
, shwi_to_double_int (-1 * ratio
));
3930 aff_combination_add (&ubase_aff
, &cbase_aff
);
3931 expr
= aff_combination_to_tree (&ubase_aff
);
3933 ent
.hash
= iterative_hash_expr (expr
, 0);
3934 slot
= (struct iv_inv_expr_ent
**) htab_find_slot (data
->inv_expr_tab
,
3939 *slot
= XNEW (struct iv_inv_expr_ent
);
3940 (*slot
)->expr
= expr
;
3941 (*slot
)->hash
= ent
.hash
;
3942 (*slot
)->id
= data
->inv_expr_id
++;
3948 /* Determines the cost of the computation by that USE is expressed
3949 from induction variable CAND. If ADDRESS_P is true, we just need
3950 to create an address from it, otherwise we want to get it into
3951 register. A set of invariants we depend on is stored in
3952 DEPENDS_ON. AT is the statement at that the value is computed.
3953 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3954 addressing is likely. */
3957 get_computation_cost_at (struct ivopts_data
*data
,
3958 struct iv_use
*use
, struct iv_cand
*cand
,
3959 bool address_p
, bitmap
*depends_on
, gimple at
,
3963 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3965 tree utype
= TREE_TYPE (ubase
), ctype
;
3966 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3967 HOST_WIDE_INT ratio
, aratio
;
3968 bool var_present
, symbol_present
, stmt_is_after_inc
;
3971 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3975 /* Only consider real candidates. */
3977 return infinite_cost
;
3979 cbase
= cand
->iv
->base
;
3980 cstep
= cand
->iv
->step
;
3981 ctype
= TREE_TYPE (cbase
);
3983 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3985 /* We do not have a precision to express the values of use. */
3986 return infinite_cost
;
3991 /* Do not try to express address of an object with computation based
3992 on address of a different object. This may cause problems in rtl
3993 level alias analysis (that does not expect this to be happening,
3994 as this is illegal in C), and would be unlikely to be useful
3996 if (use
->iv
->base_object
3997 && cand
->iv
->base_object
3998 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3999 return infinite_cost
;
4002 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4004 /* TODO -- add direct handling of this case. */
4008 /* CSTEPI is removed from the offset in case statement is after the
4009 increment. If the step is not constant, we use zero instead.
4010 This is a bit imprecise (there is the extra addition), but
4011 redundancy elimination is likely to transform the code so that
4012 it uses value of the variable before increment anyway,
4013 so it is not that much unrealistic. */
4014 if (cst_and_fits_in_hwi (cstep
))
4015 cstepi
= int_cst_value (cstep
);
4019 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4020 return infinite_cost
;
4022 if (double_int_fits_in_shwi_p (rat
))
4023 ratio
= double_int_to_shwi (rat
);
4025 return infinite_cost
;
4028 ctype
= TREE_TYPE (cbase
);
4030 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4031 or ratio == 1, it is better to handle this like
4033 ubase - ratio * cbase + ratio * var
4035 (also holds in the case ratio == -1, TODO. */
4037 if (cst_and_fits_in_hwi (cbase
))
4039 offset
= - ratio
* int_cst_value (cbase
);
4040 cost
= difference_cost (data
,
4041 ubase
, build_int_cst (utype
, 0),
4042 &symbol_present
, &var_present
, &offset
,
4044 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4046 else if (ratio
== 1)
4048 cost
= difference_cost (data
,
4050 &symbol_present
, &var_present
, &offset
,
4052 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4055 && !POINTER_TYPE_P (ctype
)
4056 && multiplier_allowed_in_address_p
4057 (ratio
, TYPE_MODE (TREE_TYPE (utype
)),
4058 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4061 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4062 cost
= difference_cost (data
,
4064 &symbol_present
, &var_present
, &offset
,
4066 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4070 cost
= force_var_cost (data
, cbase
, depends_on
);
4071 cost
= add_costs (cost
,
4072 difference_cost (data
,
4073 ubase
, build_int_cst (utype
, 0),
4074 &symbol_present
, &var_present
,
4075 &offset
, depends_on
));
4076 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4077 cost
.cost
+= add_cost (TYPE_MODE (ctype
), data
->speed
);
4083 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4084 /* Clear depends on. */
4085 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4086 bitmap_clear (*depends_on
);
4089 /* If we are after the increment, the value of the candidate is higher by
4091 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4092 if (stmt_is_after_inc
)
4093 offset
-= ratio
* cstepi
;
4095 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4096 (symbol/var1/const parts may be omitted). If we are looking for an
4097 address, find the cost of addressing this. */
4099 return add_costs (cost
,
4100 get_address_cost (symbol_present
, var_present
,
4101 offset
, ratio
, cstepi
,
4102 TYPE_MODE (TREE_TYPE (utype
)),
4103 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4104 speed
, stmt_is_after_inc
,
4107 /* Otherwise estimate the costs for computing the expression. */
4108 if (!symbol_present
&& !var_present
&& !offset
)
4111 cost
.cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
), speed
);
4115 /* Symbol + offset should be compile-time computable so consider that they
4116 are added once to the variable, if present. */
4117 if (var_present
&& (symbol_present
|| offset
))
4118 cost
.cost
+= adjust_setup_cost (data
,
4119 add_cost (TYPE_MODE (ctype
), speed
));
4121 /* Having offset does not affect runtime cost in case it is added to
4122 symbol, but it increases complexity. */
4126 cost
.cost
+= add_cost (TYPE_MODE (ctype
), speed
);
4128 aratio
= ratio
> 0 ? ratio
: -ratio
;
4130 cost
.cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
), speed
);
4135 *can_autoinc
= false;
4138 /* Just get the expression, expand it and measure the cost. */
4139 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4142 return infinite_cost
;
4145 comp
= build_simple_mem_ref (comp
);
4147 return new_cost (computation_cost (comp
, speed
), 0);
4151 /* Determines the cost of the computation by that USE is expressed
4152 from induction variable CAND. If ADDRESS_P is true, we just need
4153 to create an address from it, otherwise we want to get it into
4154 register. A set of invariants we depend on is stored in
4155 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4156 autoinc addressing is likely. */
4159 get_computation_cost (struct ivopts_data
*data
,
4160 struct iv_use
*use
, struct iv_cand
*cand
,
4161 bool address_p
, bitmap
*depends_on
,
4162 bool *can_autoinc
, int *inv_expr_id
)
4164 return get_computation_cost_at (data
,
4165 use
, cand
, address_p
, depends_on
, use
->stmt
,
4166 can_autoinc
, inv_expr_id
);
4169 /* Determines cost of basing replacement of USE on CAND in a generic
4173 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4174 struct iv_use
*use
, struct iv_cand
*cand
)
4178 int inv_expr_id
= -1;
4180 /* The simple case first -- if we need to express value of the preserved
4181 original biv, the cost is 0. This also prevents us from counting the
4182 cost of increment twice -- once at this use and once in the cost of
4184 if (cand
->pos
== IP_ORIGINAL
4185 && cand
->incremented_at
== use
->stmt
)
4187 set_use_iv_cost (data
, use
, cand
, zero_cost
, NULL
, NULL_TREE
, -1);
4191 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4192 NULL
, &inv_expr_id
);
4194 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
,
4197 return !infinite_cost_p (cost
);
4200 /* Determines cost of basing replacement of USE on CAND in an address. */
4203 determine_use_iv_cost_address (struct ivopts_data
*data
,
4204 struct iv_use
*use
, struct iv_cand
*cand
)
4208 int inv_expr_id
= -1;
4209 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4210 &can_autoinc
, &inv_expr_id
);
4212 if (cand
->ainc_use
== use
)
4215 cost
.cost
-= cand
->cost_step
;
4216 /* If we generated the candidate solely for exploiting autoincrement
4217 opportunities, and it turns out it can't be used, set the cost to
4218 infinity to make sure we ignore it. */
4219 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4220 cost
= infinite_cost
;
4222 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
,
4225 return !infinite_cost_p (cost
);
4228 /* Computes value of candidate CAND at position AT in iteration NITER, and
4229 stores it to VAL. */
4232 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4235 aff_tree step
, delta
, nit
;
4236 struct iv
*iv
= cand
->iv
;
4237 tree type
= TREE_TYPE (iv
->base
);
4238 tree steptype
= type
;
4239 if (POINTER_TYPE_P (type
))
4240 steptype
= sizetype
;
4242 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4243 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4244 aff_combination_convert (&nit
, steptype
);
4245 aff_combination_mult (&nit
, &step
, &delta
);
4246 if (stmt_after_increment (loop
, cand
, at
))
4247 aff_combination_add (&delta
, &step
);
4249 tree_to_aff_combination (iv
->base
, type
, val
);
4250 aff_combination_add (val
, &delta
);
4253 /* Returns period of induction variable iv. */
4256 iv_period (struct iv
*iv
)
4258 tree step
= iv
->step
, period
, type
;
4261 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4263 type
= unsigned_type_for (TREE_TYPE (step
));
4264 /* Period of the iv is lcm (step, type_range)/step -1,
4265 i.e., N*type_range/step - 1. Since type range is power
4266 of two, N == (step >> num_of_ending_zeros_binary (step),
4267 so the final result is
4269 (type_range >> num_of_ending_zeros_binary (step)) - 1
4272 pow2div
= num_ending_zeros (step
);
4274 period
= build_low_bits_mask (type
,
4275 (TYPE_PRECISION (type
)
4276 - tree_low_cst (pow2div
, 1)));
4281 /* Returns the comparison operator used when eliminating the iv USE. */
4283 static enum tree_code
4284 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4286 struct loop
*loop
= data
->current_loop
;
4290 ex_bb
= gimple_bb (use
->stmt
);
4291 exit
= EDGE_SUCC (ex_bb
, 0);
4292 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4293 exit
= EDGE_SUCC (ex_bb
, 1);
4295 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4298 /* Check whether it is possible to express the condition in USE by comparison
4299 of candidate CAND. If so, store the value compared with to BOUND. */
4302 may_eliminate_iv (struct ivopts_data
*data
,
4303 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
4308 struct loop
*loop
= data
->current_loop
;
4310 struct tree_niter_desc
*desc
= NULL
;
4312 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4315 /* For now works only for exits that dominate the loop latch.
4316 TODO: extend to other conditions inside loop body. */
4317 ex_bb
= gimple_bb (use
->stmt
);
4318 if (use
->stmt
!= last_stmt (ex_bb
)
4319 || gimple_code (use
->stmt
) != GIMPLE_COND
4320 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4323 exit
= EDGE_SUCC (ex_bb
, 0);
4324 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4325 exit
= EDGE_SUCC (ex_bb
, 1);
4326 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4329 nit
= niter_for_exit (data
, exit
, &desc
);
4333 /* Determine whether we can use the variable to test the exit condition.
4334 This is the case iff the period of the induction variable is greater
4335 than the number of iterations for which the exit condition is true. */
4336 period
= iv_period (cand
->iv
);
4338 /* If the number of iterations is constant, compare against it directly. */
4339 if (TREE_CODE (nit
) == INTEGER_CST
)
4341 /* See cand_value_at. */
4342 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4344 if (!tree_int_cst_lt (nit
, period
))
4349 if (tree_int_cst_lt (period
, nit
))
4354 /* If not, and if this is the only possible exit of the loop, see whether
4355 we can get a conservative estimate on the number of iterations of the
4356 entire loop and compare against that instead. */
4359 double_int period_value
, max_niter
;
4361 max_niter
= desc
->max
;
4362 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4363 max_niter
= double_int_add (max_niter
, double_int_one
);
4364 period_value
= tree_to_double_int (period
);
4365 if (double_int_ucmp (max_niter
, period_value
) > 0)
4367 /* See if we can take advantage of infered loop bound information. */
4368 if (loop_only_exit_p (loop
, exit
))
4370 if (!estimated_loop_iterations (loop
, true, &max_niter
))
4372 /* The loop bound is already adjusted by adding 1. */
4373 if (double_int_ucmp (max_niter
, period_value
) > 0)
4381 cand_value_at (loop
, cand
, use
->stmt
, nit
, &bnd
);
4383 *bound
= aff_combination_to_tree (&bnd
);
4384 /* It is unlikely that computing the number of iterations using division
4385 would be more profitable than keeping the original induction variable. */
4386 if (expression_expensive_p (*bound
))
4392 /* Determines cost of basing replacement of USE on CAND in a condition. */
4395 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4396 struct iv_use
*use
, struct iv_cand
*cand
)
4398 tree bound
= NULL_TREE
;
4400 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4401 comp_cost elim_cost
, express_cost
, cost
;
4403 int inv_expr_id
= -1;
4404 tree
*control_var
, *bound_cst
;
4406 /* Only consider real candidates. */
4409 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
, -1);
4413 /* Try iv elimination. */
4414 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4416 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4417 /* The bound is a loop invariant, so it will be only computed
4419 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4422 elim_cost
= infinite_cost
;
4424 /* Try expressing the original giv. If it is compared with an invariant,
4425 note that we cannot get rid of it. */
4426 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4430 /* When the condition is a comparison of the candidate IV against
4431 zero, prefer this IV.
4433 TODO: The constant that we're substracting from the cost should
4434 be target-dependent. This information should be added to the
4435 target costs for each backend. */
4436 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4437 && integer_zerop (*bound_cst
)
4438 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4439 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4440 elim_cost
.cost
-= 1;
4442 express_cost
= get_computation_cost (data
, use
, cand
, false,
4443 &depends_on_express
, NULL
,
4445 fd_ivopts_data
= data
;
4446 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4448 /* Choose the better approach, preferring the eliminated IV. */
4449 if (compare_costs (elim_cost
, express_cost
) <= 0)
4452 depends_on
= depends_on_elim
;
4453 depends_on_elim
= NULL
;
4457 cost
= express_cost
;
4458 depends_on
= depends_on_express
;
4459 depends_on_express
= NULL
;
4463 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, inv_expr_id
);
4465 if (depends_on_elim
)
4466 BITMAP_FREE (depends_on_elim
);
4467 if (depends_on_express
)
4468 BITMAP_FREE (depends_on_express
);
4470 return !infinite_cost_p (cost
);
4473 /* Determines cost of basing replacement of USE on CAND. Returns false
4474 if USE cannot be based on CAND. */
4477 determine_use_iv_cost (struct ivopts_data
*data
,
4478 struct iv_use
*use
, struct iv_cand
*cand
)
4482 case USE_NONLINEAR_EXPR
:
4483 return determine_use_iv_cost_generic (data
, use
, cand
);
4486 return determine_use_iv_cost_address (data
, use
, cand
);
4489 return determine_use_iv_cost_condition (data
, use
, cand
);
4496 /* Return true if get_computation_cost indicates that autoincrement is
4497 a possibility for the pair of USE and CAND, false otherwise. */
4500 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4501 struct iv_cand
*cand
)
4507 if (use
->type
!= USE_ADDRESS
)
4510 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4511 &can_autoinc
, NULL
);
4513 BITMAP_FREE (depends_on
);
4515 return !infinite_cost_p (cost
) && can_autoinc
;
4518 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4519 use that allows autoincrement, and set their AINC_USE if possible. */
4522 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4526 for (i
= 0; i
< n_iv_cands (data
); i
++)
4528 struct iv_cand
*cand
= iv_cand (data
, i
);
4529 struct iv_use
*closest
= NULL
;
4530 if (cand
->pos
!= IP_ORIGINAL
)
4532 for (j
= 0; j
< n_iv_uses (data
); j
++)
4534 struct iv_use
*use
= iv_use (data
, j
);
4535 unsigned uid
= gimple_uid (use
->stmt
);
4536 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
4537 || uid
> gimple_uid (cand
->incremented_at
))
4539 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
4542 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
4544 cand
->ainc_use
= closest
;
4548 /* Finds the candidates for the induction variables. */
4551 find_iv_candidates (struct ivopts_data
*data
)
4553 /* Add commonly used ivs. */
4554 add_standard_iv_candidates (data
);
4556 /* Add old induction variables. */
4557 add_old_ivs_candidates (data
);
4559 /* Add induction variables derived from uses. */
4560 add_derived_ivs_candidates (data
);
4562 set_autoinc_for_original_candidates (data
);
4564 /* Record the important candidates. */
4565 record_important_candidates (data
);
4568 /* Determines costs of basing the use of the iv on an iv candidate. */
4571 determine_use_iv_costs (struct ivopts_data
*data
)
4575 struct iv_cand
*cand
;
4576 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4578 alloc_use_cost_map (data
);
4580 for (i
= 0; i
< n_iv_uses (data
); i
++)
4582 use
= iv_use (data
, i
);
4584 if (data
->consider_all_candidates
)
4586 for (j
= 0; j
< n_iv_cands (data
); j
++)
4588 cand
= iv_cand (data
, j
);
4589 determine_use_iv_cost (data
, use
, cand
);
4596 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4598 cand
= iv_cand (data
, j
);
4599 if (!determine_use_iv_cost (data
, use
, cand
))
4600 bitmap_set_bit (to_clear
, j
);
4603 /* Remove the candidates for that the cost is infinite from
4604 the list of related candidates. */
4605 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4606 bitmap_clear (to_clear
);
4610 BITMAP_FREE (to_clear
);
4612 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4614 fprintf (dump_file
, "Use-candidate costs:\n");
4616 for (i
= 0; i
< n_iv_uses (data
); i
++)
4618 use
= iv_use (data
, i
);
4620 fprintf (dump_file
, "Use %d:\n", i
);
4621 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4622 for (j
= 0; j
< use
->n_map_members
; j
++)
4624 if (!use
->cost_map
[j
].cand
4625 || infinite_cost_p (use
->cost_map
[j
].cost
))
4628 fprintf (dump_file
, " %d\t%d\t%d\t",
4629 use
->cost_map
[j
].cand
->id
,
4630 use
->cost_map
[j
].cost
.cost
,
4631 use
->cost_map
[j
].cost
.complexity
);
4632 if (use
->cost_map
[j
].depends_on
)
4633 bitmap_print (dump_file
,
4634 use
->cost_map
[j
].depends_on
, "","");
4635 if (use
->cost_map
[j
].inv_expr_id
!= -1)
4636 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
4637 fprintf (dump_file
, "\n");
4640 fprintf (dump_file
, "\n");
4642 fprintf (dump_file
, "\n");
4646 /* Determines cost of the candidate CAND. */
4649 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4651 comp_cost cost_base
;
4652 unsigned cost
, cost_step
;
4661 /* There are two costs associated with the candidate -- its increment
4662 and its initialization. The second is almost negligible for any loop
4663 that rolls enough, so we take it just very little into account. */
4665 base
= cand
->iv
->base
;
4666 cost_base
= force_var_cost (data
, base
, NULL
);
4667 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)), data
->speed
);
4669 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
4671 /* Prefer the original ivs unless we may gain something by replacing it.
4672 The reason is to make debugging simpler; so this is not relevant for
4673 artificial ivs created by other optimization passes. */
4674 if (cand
->pos
!= IP_ORIGINAL
4675 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4678 /* Prefer not to insert statements into latch unless there are some
4679 already (so that we do not create unnecessary jumps). */
4680 if (cand
->pos
== IP_END
4681 && empty_block_p (ip_end_pos (data
->current_loop
)))
4685 cand
->cost_step
= cost_step
;
4688 /* Determines costs of computation of the candidates. */
4691 determine_iv_costs (struct ivopts_data
*data
)
4695 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4697 fprintf (dump_file
, "Candidate costs:\n");
4698 fprintf (dump_file
, " cand\tcost\n");
4701 for (i
= 0; i
< n_iv_cands (data
); i
++)
4703 struct iv_cand
*cand
= iv_cand (data
, i
);
4705 determine_iv_cost (data
, cand
);
4707 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4708 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4711 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4712 fprintf (dump_file
, "\n");
4715 /* Calculates cost for having SIZE induction variables. */
4718 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4720 /* We add size to the cost, so that we prefer eliminating ivs
4722 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
4723 data
->body_includes_call
);
4726 /* For each size of the induction variable set determine the penalty. */
4729 determine_set_costs (struct ivopts_data
*data
)
4733 gimple_stmt_iterator psi
;
4735 struct loop
*loop
= data
->current_loop
;
4738 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4740 fprintf (dump_file
, "Global costs:\n");
4741 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4742 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
4743 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
4744 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
4748 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
4750 phi
= gsi_stmt (psi
);
4751 op
= PHI_RESULT (phi
);
4753 if (!is_gimple_reg (op
))
4756 if (get_iv (data
, op
))
4762 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4764 struct version_info
*info
= ver_info (data
, j
);
4766 if (info
->inv_id
&& info
->has_nonlin_use
)
4770 data
->regs_used
= n
;
4771 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4772 fprintf (dump_file
, " regs_used %d\n", n
);
4774 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4776 fprintf (dump_file
, " cost for size:\n");
4777 fprintf (dump_file
, " ivs\tcost\n");
4778 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4779 fprintf (dump_file
, " %d\t%d\n", j
,
4780 ivopts_global_cost_for_size (data
, j
));
4781 fprintf (dump_file
, "\n");
4785 /* Returns true if A is a cheaper cost pair than B. */
4788 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4798 cmp
= compare_costs (a
->cost
, b
->cost
);
4805 /* In case the costs are the same, prefer the cheaper candidate. */
4806 if (a
->cand
->cost
< b
->cand
->cost
)
4813 /* Returns candidate by that USE is expressed in IVS. */
4815 static struct cost_pair
*
4816 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4818 return ivs
->cand_for_use
[use
->id
];
4821 /* Computes the cost field of IVS structure. */
4824 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4826 comp_cost cost
= ivs
->cand_use_cost
;
4828 cost
.cost
+= ivs
->cand_cost
;
4830 cost
.cost
+= ivopts_global_cost_for_size (data
,
4831 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
4836 /* Remove invariants in set INVS to set IVS. */
4839 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4847 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4849 ivs
->n_invariant_uses
[iid
]--;
4850 if (ivs
->n_invariant_uses
[iid
] == 0)
4855 /* Set USE not to be expressed by any candidate in IVS. */
4858 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4861 unsigned uid
= use
->id
, cid
;
4862 struct cost_pair
*cp
;
4864 cp
= ivs
->cand_for_use
[uid
];
4870 ivs
->cand_for_use
[uid
] = NULL
;
4871 ivs
->n_cand_uses
[cid
]--;
4873 if (ivs
->n_cand_uses
[cid
] == 0)
4875 bitmap_clear_bit (ivs
->cands
, cid
);
4876 /* Do not count the pseudocandidates. */
4880 ivs
->cand_cost
-= cp
->cand
->cost
;
4882 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4885 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
4887 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4889 if (cp
->inv_expr_id
!= -1)
4891 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
4892 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
4893 ivs
->num_used_inv_expr
--;
4895 iv_ca_recount_cost (data
, ivs
);
4898 /* Add invariants in set INVS to set IVS. */
4901 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4909 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4911 ivs
->n_invariant_uses
[iid
]++;
4912 if (ivs
->n_invariant_uses
[iid
] == 1)
4917 /* Set cost pair for USE in set IVS to CP. */
4920 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4921 struct iv_use
*use
, struct cost_pair
*cp
)
4923 unsigned uid
= use
->id
, cid
;
4925 if (ivs
->cand_for_use
[uid
] == cp
)
4928 if (ivs
->cand_for_use
[uid
])
4929 iv_ca_set_no_cp (data
, ivs
, use
);
4936 ivs
->cand_for_use
[uid
] = cp
;
4937 ivs
->n_cand_uses
[cid
]++;
4938 if (ivs
->n_cand_uses
[cid
] == 1)
4940 bitmap_set_bit (ivs
->cands
, cid
);
4941 /* Do not count the pseudocandidates. */
4945 ivs
->cand_cost
+= cp
->cand
->cost
;
4947 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4950 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
4951 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4953 if (cp
->inv_expr_id
!= -1)
4955 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
4956 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
4957 ivs
->num_used_inv_expr
++;
4959 iv_ca_recount_cost (data
, ivs
);
4963 /* Extend set IVS by expressing USE by some of the candidates in it
4964 if possible. All important candidates will be considered
4965 if IMPORTANT_CANDIDATES is true. */
4968 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4969 struct iv_use
*use
, bool important_candidates
)
4971 struct cost_pair
*best_cp
= NULL
, *cp
;
4976 gcc_assert (ivs
->upto
>= use
->id
);
4978 if (ivs
->upto
== use
->id
)
4984 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
4985 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
4987 struct iv_cand
*cand
= iv_cand (data
, i
);
4989 cp
= get_use_iv_cost (data
, use
, cand
);
4991 if (cheaper_cost_pair (cp
, best_cp
))
4995 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
4998 /* Get cost for assignment IVS. */
5001 iv_ca_cost (struct iv_ca
*ivs
)
5003 /* This was a conditional expression but it triggered a bug in
5006 return infinite_cost
;
5011 /* Returns true if all dependences of CP are among invariants in IVS. */
5014 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5019 if (!cp
->depends_on
)
5022 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5024 if (ivs
->n_invariant_uses
[i
] == 0)
5031 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5032 it before NEXT_CHANGE. */
5034 static struct iv_ca_delta
*
5035 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5036 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5038 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5041 change
->old_cp
= old_cp
;
5042 change
->new_cp
= new_cp
;
5043 change
->next_change
= next_change
;
5048 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5051 static struct iv_ca_delta
*
5052 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5054 struct iv_ca_delta
*last
;
5062 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5064 last
->next_change
= l2
;
5069 /* Reverse the list of changes DELTA, forming the inverse to it. */
5071 static struct iv_ca_delta
*
5072 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5074 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5075 struct cost_pair
*tmp
;
5077 for (act
= delta
; act
; act
= next
)
5079 next
= act
->next_change
;
5080 act
->next_change
= prev
;
5084 act
->old_cp
= act
->new_cp
;
5091 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5092 reverted instead. */
5095 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5096 struct iv_ca_delta
*delta
, bool forward
)
5098 struct cost_pair
*from
, *to
;
5099 struct iv_ca_delta
*act
;
5102 delta
= iv_ca_delta_reverse (delta
);
5104 for (act
= delta
; act
; act
= act
->next_change
)
5108 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5109 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5113 iv_ca_delta_reverse (delta
);
5116 /* Returns true if CAND is used in IVS. */
5119 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5121 return ivs
->n_cand_uses
[cand
->id
] > 0;
5124 /* Returns number of induction variable candidates in the set IVS. */
5127 iv_ca_n_cands (struct iv_ca
*ivs
)
5129 return ivs
->n_cands
;
5132 /* Free the list of changes DELTA. */
5135 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5137 struct iv_ca_delta
*act
, *next
;
5139 for (act
= *delta
; act
; act
= next
)
5141 next
= act
->next_change
;
5148 /* Allocates new iv candidates assignment. */
5150 static struct iv_ca
*
5151 iv_ca_new (struct ivopts_data
*data
)
5153 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5157 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5158 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5159 nw
->cands
= BITMAP_ALLOC (NULL
);
5162 nw
->cand_use_cost
= zero_cost
;
5164 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5165 nw
->cost
= zero_cost
;
5166 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5167 nw
->num_used_inv_expr
= 0;
5172 /* Free memory occupied by the set IVS. */
5175 iv_ca_free (struct iv_ca
**ivs
)
5177 free ((*ivs
)->cand_for_use
);
5178 free ((*ivs
)->n_cand_uses
);
5179 BITMAP_FREE ((*ivs
)->cands
);
5180 free ((*ivs
)->n_invariant_uses
);
5181 free ((*ivs
)->used_inv_expr
);
5186 /* Dumps IVS to FILE. */
5189 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5191 const char *pref
= " invariants ";
5193 comp_cost cost
= iv_ca_cost (ivs
);
5195 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5196 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5197 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5198 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5200 for (i
= 0; i
< ivs
->upto
; i
++)
5202 struct iv_use
*use
= iv_use (data
, i
);
5203 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5205 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5206 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5208 fprintf (file
, " use:%d --> ??\n", use
->id
);
5211 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5212 if (ivs
->n_invariant_uses
[i
])
5214 fprintf (file
, "%s%d", pref
, i
);
5217 fprintf (file
, "\n\n");
5220 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5221 new set, and store differences in DELTA. Number of induction variables
5222 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5223 the function will try to find a solution with mimimal iv candidates. */
5226 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5227 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5228 unsigned *n_ivs
, bool min_ncand
)
5233 struct cost_pair
*old_cp
, *new_cp
;
5236 for (i
= 0; i
< ivs
->upto
; i
++)
5238 use
= iv_use (data
, i
);
5239 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5242 && old_cp
->cand
== cand
)
5245 new_cp
= get_use_iv_cost (data
, use
, cand
);
5249 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5252 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5255 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5258 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5259 cost
= iv_ca_cost (ivs
);
5261 *n_ivs
= iv_ca_n_cands (ivs
);
5262 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5267 /* Try narrowing set IVS by removing CAND. Return the cost of
5268 the new set and store the differences in DELTA. */
5271 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5272 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5276 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5278 struct iv_cand
*cnd
;
5282 for (i
= 0; i
< n_iv_uses (data
); i
++)
5284 use
= iv_use (data
, i
);
5286 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5287 if (old_cp
->cand
!= cand
)
5292 if (data
->consider_all_candidates
)
5294 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5299 cnd
= iv_cand (data
, ci
);
5301 cp
= get_use_iv_cost (data
, use
, cnd
);
5305 if (!iv_ca_has_deps (ivs
, cp
))
5308 if (!cheaper_cost_pair (cp
, new_cp
))
5316 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5321 cnd
= iv_cand (data
, ci
);
5323 cp
= get_use_iv_cost (data
, use
, cnd
);
5326 if (!iv_ca_has_deps (ivs
, cp
))
5329 if (!cheaper_cost_pair (cp
, new_cp
))
5338 iv_ca_delta_free (delta
);
5339 return infinite_cost
;
5342 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5345 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5346 cost
= iv_ca_cost (ivs
);
5347 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5352 /* Try optimizing the set of candidates IVS by removing candidates different
5353 from to EXCEPT_CAND from it. Return cost of the new set, and store
5354 differences in DELTA. */
5357 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5358 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5361 struct iv_ca_delta
*act_delta
, *best_delta
;
5363 comp_cost best_cost
, acost
;
5364 struct iv_cand
*cand
;
5367 best_cost
= iv_ca_cost (ivs
);
5369 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5371 cand
= iv_cand (data
, i
);
5373 if (cand
== except_cand
)
5376 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5378 if (compare_costs (acost
, best_cost
) < 0)
5381 iv_ca_delta_free (&best_delta
);
5382 best_delta
= act_delta
;
5385 iv_ca_delta_free (&act_delta
);
5394 /* Recurse to possibly remove other unnecessary ivs. */
5395 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5396 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5397 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5398 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5402 /* Tries to extend the sets IVS in the best possible way in order
5403 to express the USE. If ORIGINALP is true, prefer candidates from
5404 the original set of IVs, otherwise favor important candidates not
5405 based on any memory object. */
5408 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5409 struct iv_use
*use
, bool originalp
)
5411 comp_cost best_cost
, act_cost
;
5414 struct iv_cand
*cand
;
5415 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5416 struct cost_pair
*cp
;
5418 iv_ca_add_use (data
, ivs
, use
, false);
5419 best_cost
= iv_ca_cost (ivs
);
5421 cp
= iv_ca_cand_for_use (ivs
, use
);
5426 iv_ca_add_use (data
, ivs
, use
, true);
5427 best_cost
= iv_ca_cost (ivs
);
5428 cp
= iv_ca_cand_for_use (ivs
, use
);
5432 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5433 iv_ca_set_no_cp (data
, ivs
, use
);
5436 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5437 first try important candidates not based on any memory object. Only if
5438 this fails, try the specific ones. Rationale -- in loops with many
5439 variables the best choice often is to use just one generic biv. If we
5440 added here many ivs specific to the uses, the optimization algorithm later
5441 would be likely to get stuck in a local minimum, thus causing us to create
5442 too many ivs. The approach from few ivs to more seems more likely to be
5443 successful -- starting from few ivs, replacing an expensive use by a
5444 specific iv should always be a win. */
5445 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5447 cand
= iv_cand (data
, i
);
5449 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5452 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5455 if (iv_ca_cand_used_p (ivs
, cand
))
5458 cp
= get_use_iv_cost (data
, use
, cand
);
5462 iv_ca_set_cp (data
, ivs
, use
, cp
);
5463 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5465 iv_ca_set_no_cp (data
, ivs
, use
);
5466 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5468 if (compare_costs (act_cost
, best_cost
) < 0)
5470 best_cost
= act_cost
;
5472 iv_ca_delta_free (&best_delta
);
5473 best_delta
= act_delta
;
5476 iv_ca_delta_free (&act_delta
);
5479 if (infinite_cost_p (best_cost
))
5481 for (i
= 0; i
< use
->n_map_members
; i
++)
5483 cp
= use
->cost_map
+ i
;
5488 /* Already tried this. */
5489 if (cand
->important
)
5491 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5493 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5497 if (iv_ca_cand_used_p (ivs
, cand
))
5501 iv_ca_set_cp (data
, ivs
, use
, cp
);
5502 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5503 iv_ca_set_no_cp (data
, ivs
, use
);
5504 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5507 if (compare_costs (act_cost
, best_cost
) < 0)
5509 best_cost
= act_cost
;
5512 iv_ca_delta_free (&best_delta
);
5513 best_delta
= act_delta
;
5516 iv_ca_delta_free (&act_delta
);
5520 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5521 iv_ca_delta_free (&best_delta
);
5523 return !infinite_cost_p (best_cost
);
5526 /* Finds an initial assignment of candidates to uses. */
5528 static struct iv_ca
*
5529 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5531 struct iv_ca
*ivs
= iv_ca_new (data
);
5534 for (i
= 0; i
< n_iv_uses (data
); i
++)
5535 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5544 /* Tries to improve set of induction variables IVS. */
5547 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5550 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5551 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5552 struct iv_cand
*cand
;
5554 /* Try extending the set of induction variables by one. */
5555 for (i
= 0; i
< n_iv_cands (data
); i
++)
5557 cand
= iv_cand (data
, i
);
5559 if (iv_ca_cand_used_p (ivs
, cand
))
5562 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5566 /* If we successfully added the candidate and the set is small enough,
5567 try optimizing it by removing other candidates. */
5568 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5570 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5571 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5572 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5573 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5576 if (compare_costs (acost
, best_cost
) < 0)
5579 iv_ca_delta_free (&best_delta
);
5580 best_delta
= act_delta
;
5583 iv_ca_delta_free (&act_delta
);
5588 /* Try removing the candidates from the set instead. */
5589 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5591 /* Nothing more we can do. */
5596 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5597 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5598 iv_ca_delta_free (&best_delta
);
5602 /* Attempts to find the optimal set of induction variables. We do simple
5603 greedy heuristic -- we try to replace at most one candidate in the selected
5604 solution and remove the unused ivs while this improves the cost. */
5606 static struct iv_ca
*
5607 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
5611 /* Get the initial solution. */
5612 set
= get_initial_solution (data
, originalp
);
5615 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5616 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5620 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5622 fprintf (dump_file
, "Initial set of candidates:\n");
5623 iv_ca_dump (data
, dump_file
, set
);
5626 while (try_improve_iv_set (data
, set
))
5628 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5630 fprintf (dump_file
, "Improved to:\n");
5631 iv_ca_dump (data
, dump_file
, set
);
5638 static struct iv_ca
*
5639 find_optimal_iv_set (struct ivopts_data
*data
)
5642 struct iv_ca
*set
, *origset
;
5644 comp_cost cost
, origcost
;
5646 /* Determine the cost based on a strategy that starts with original IVs,
5647 and try again using a strategy that prefers candidates not based
5649 origset
= find_optimal_iv_set_1 (data
, true);
5650 set
= find_optimal_iv_set_1 (data
, false);
5652 if (!origset
&& !set
)
5655 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
5656 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
5658 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5660 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
5661 origcost
.cost
, origcost
.complexity
);
5662 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
5663 cost
.cost
, cost
.complexity
);
5666 /* Choose the one with the best cost. */
5667 if (compare_costs (origcost
, cost
) <= 0)
5674 iv_ca_free (&origset
);
5676 for (i
= 0; i
< n_iv_uses (data
); i
++)
5678 use
= iv_use (data
, i
);
5679 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5685 /* Creates a new induction variable corresponding to CAND. */
5688 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5690 gimple_stmt_iterator incr_pos
;
5700 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
5704 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
5712 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
5716 /* Mark that the iv is preserved. */
5717 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5718 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5720 /* Rewrite the increment so that it uses var_before directly. */
5721 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5725 gimple_add_tmp_var (cand
->var_before
);
5726 add_referenced_var (cand
->var_before
);
5728 base
= unshare_expr (cand
->iv
->base
);
5730 create_iv (base
, unshare_expr (cand
->iv
->step
),
5731 cand
->var_before
, data
->current_loop
,
5732 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5735 /* Creates new induction variables described in SET. */
5738 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5741 struct iv_cand
*cand
;
5744 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5746 cand
= iv_cand (data
, i
);
5747 create_new_iv (data
, cand
);
5750 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5752 fprintf (dump_file
, "\nSelected IV set: \n");
5753 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5755 cand
= iv_cand (data
, i
);
5756 dump_cand (dump_file
, cand
);
5758 fprintf (dump_file
, "\n");
5762 /* Rewrites USE (definition of iv used in a nonlinear expression)
5763 using candidate CAND. */
5766 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5767 struct iv_use
*use
, struct iv_cand
*cand
)
5772 gimple_stmt_iterator bsi
;
5774 /* An important special case -- if we are asked to express value of
5775 the original iv by itself, just exit; there is no need to
5776 introduce a new computation (that might also need casting the
5777 variable to unsigned and back). */
5778 if (cand
->pos
== IP_ORIGINAL
5779 && cand
->incremented_at
== use
->stmt
)
5781 tree step
, ctype
, utype
;
5782 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
5784 gcc_assert (is_gimple_assign (use
->stmt
));
5785 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
5787 step
= cand
->iv
->step
;
5788 ctype
= TREE_TYPE (step
);
5789 utype
= TREE_TYPE (cand
->var_after
);
5790 if (TREE_CODE (step
) == NEGATE_EXPR
)
5792 incr_code
= MINUS_EXPR
;
5793 step
= TREE_OPERAND (step
, 0);
5796 /* Check whether we may leave the computation unchanged.
5797 This is the case only if it does not rely on other
5798 computations in the loop -- otherwise, the computation
5799 we rely upon may be removed in remove_unused_ivs,
5800 thus leading to ICE. */
5801 old_code
= gimple_assign_rhs_code (use
->stmt
);
5802 if (old_code
== PLUS_EXPR
5803 || old_code
== MINUS_EXPR
5804 || old_code
== POINTER_PLUS_EXPR
)
5806 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
5807 op
= gimple_assign_rhs2 (use
->stmt
);
5808 else if (old_code
!= MINUS_EXPR
5809 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
5810 op
= gimple_assign_rhs1 (use
->stmt
);
5818 && (TREE_CODE (op
) == INTEGER_CST
5819 || operand_equal_p (op
, step
, 0)))
5822 /* Otherwise, add the necessary computations to express
5824 op
= fold_convert (ctype
, cand
->var_before
);
5825 comp
= fold_convert (utype
,
5826 build2 (incr_code
, ctype
, op
,
5827 unshare_expr (step
)));
5831 comp
= get_computation (data
->current_loop
, use
, cand
);
5832 gcc_assert (comp
!= NULL_TREE
);
5835 switch (gimple_code (use
->stmt
))
5838 tgt
= PHI_RESULT (use
->stmt
);
5840 /* If we should keep the biv, do not replace it. */
5841 if (name_info (data
, tgt
)->preserve_biv
)
5844 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
5848 tgt
= gimple_assign_lhs (use
->stmt
);
5849 bsi
= gsi_for_stmt (use
->stmt
);
5856 if (!valid_gimple_rhs_p (comp
)
5857 || (gimple_code (use
->stmt
) != GIMPLE_PHI
5858 /* We can't allow re-allocating the stmt as it might be pointed
5860 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
5861 >= gimple_num_ops (gsi_stmt (bsi
)))))
5863 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
5864 true, GSI_SAME_STMT
);
5865 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
5867 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
5868 /* As this isn't a plain copy we have to reset alignment
5870 if (SSA_NAME_PTR_INFO (comp
))
5872 SSA_NAME_PTR_INFO (comp
)->align
= 1;
5873 SSA_NAME_PTR_INFO (comp
)->misalign
= 0;
5878 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
5880 ass
= gimple_build_assign (tgt
, comp
);
5881 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
5883 bsi
= gsi_for_stmt (use
->stmt
);
5884 remove_phi_node (&bsi
, false);
5888 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
5889 use
->stmt
= gsi_stmt (bsi
);
5893 /* Copies the reference information from OLD_REF to NEW_REF. */
5896 copy_ref_info (tree new_ref
, tree old_ref
)
5898 tree new_ptr_base
= NULL_TREE
;
5900 TREE_SIDE_EFFECTS (new_ref
) = TREE_SIDE_EFFECTS (old_ref
);
5901 TREE_THIS_VOLATILE (new_ref
) = TREE_THIS_VOLATILE (old_ref
);
5903 new_ptr_base
= TREE_OPERAND (new_ref
, 0);
5905 /* We can transfer points-to information from an old pointer
5906 or decl base to the new one. */
5908 && TREE_CODE (new_ptr_base
) == SSA_NAME
5909 && !SSA_NAME_PTR_INFO (new_ptr_base
))
5911 tree base
= get_base_address (old_ref
);
5914 else if ((TREE_CODE (base
) == MEM_REF
5915 || TREE_CODE (base
) == TARGET_MEM_REF
)
5916 && TREE_CODE (TREE_OPERAND (base
, 0)) == SSA_NAME
5917 && SSA_NAME_PTR_INFO (TREE_OPERAND (base
, 0)))
5919 struct ptr_info_def
*new_pi
;
5920 duplicate_ssa_name_ptr_info
5921 (new_ptr_base
, SSA_NAME_PTR_INFO (TREE_OPERAND (base
, 0)));
5922 new_pi
= SSA_NAME_PTR_INFO (new_ptr_base
);
5923 /* We have to be careful about transfering alignment information. */
5924 if (TREE_CODE (old_ref
) == MEM_REF
5925 && !(TREE_CODE (new_ref
) == TARGET_MEM_REF
5926 && (TMR_INDEX2 (new_ref
)
5927 || (TMR_STEP (new_ref
)
5928 && (TREE_INT_CST_LOW (TMR_STEP (new_ref
))
5929 < new_pi
->align
)))))
5931 new_pi
->misalign
+= double_int_sub (mem_ref_offset (old_ref
),
5932 mem_ref_offset (new_ref
)).low
;
5933 new_pi
->misalign
&= (new_pi
->align
- 1);
5938 new_pi
->misalign
= 0;
5941 else if (TREE_CODE (base
) == VAR_DECL
5942 || TREE_CODE (base
) == PARM_DECL
5943 || TREE_CODE (base
) == RESULT_DECL
)
5945 struct ptr_info_def
*pi
= get_ptr_info (new_ptr_base
);
5946 pt_solution_set_var (&pi
->pt
, base
);
5951 /* Performs a peephole optimization to reorder the iv update statement with
5952 a mem ref to enable instruction combining in later phases. The mem ref uses
5953 the iv value before the update, so the reordering transformation requires
5954 adjustment of the offset. CAND is the selected IV_CAND.
5958 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5966 directly propagating t over to (1) will introduce overlapping live range
5967 thus increase register pressure. This peephole transform it into:
5971 t = MEM_REF (base, iv2, 8, 8);
5978 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
5981 gimple iv_update
, stmt
;
5983 gimple_stmt_iterator gsi
, gsi_iv
;
5985 if (cand
->pos
!= IP_NORMAL
)
5988 var_after
= cand
->var_after
;
5989 iv_update
= SSA_NAME_DEF_STMT (var_after
);
5991 bb
= gimple_bb (iv_update
);
5992 gsi
= gsi_last_nondebug_bb (bb
);
5993 stmt
= gsi_stmt (gsi
);
5995 /* Only handle conditional statement for now. */
5996 if (gimple_code (stmt
) != GIMPLE_COND
)
5999 gsi_prev_nondebug (&gsi
);
6000 stmt
= gsi_stmt (gsi
);
6001 if (stmt
!= iv_update
)
6004 gsi_prev_nondebug (&gsi
);
6005 if (gsi_end_p (gsi
))
6008 stmt
= gsi_stmt (gsi
);
6009 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6012 if (stmt
!= use
->stmt
)
6015 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6018 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6020 fprintf (dump_file
, "Reordering \n");
6021 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6022 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6023 fprintf (dump_file
, "\n");
6026 gsi
= gsi_for_stmt (use
->stmt
);
6027 gsi_iv
= gsi_for_stmt (iv_update
);
6028 gsi_move_before (&gsi_iv
, &gsi
);
6030 cand
->pos
= IP_BEFORE_USE
;
6031 cand
->incremented_at
= use
->stmt
;
6034 /* Rewrites USE (address that is an iv) using candidate CAND. */
6037 rewrite_use_address (struct ivopts_data
*data
,
6038 struct iv_use
*use
, struct iv_cand
*cand
)
6041 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6042 tree base_hint
= NULL_TREE
;
6046 adjust_iv_update_pos (cand
, use
);
6047 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6049 unshare_aff_combination (&aff
);
6051 /* To avoid undefined overflow problems, all IV candidates use unsigned
6052 integer types. The drawback is that this makes it impossible for
6053 create_mem_ref to distinguish an IV that is based on a memory object
6054 from one that represents simply an offset.
6056 To work around this problem, we pass a hint to create_mem_ref that
6057 indicates which variable (if any) in aff is an IV based on a memory
6058 object. Note that we only consider the candidate. If this is not
6059 based on an object, the base of the reference is in some subexpression
6060 of the use -- but these will use pointer types, so they are recognized
6061 by the create_mem_ref heuristics anyway. */
6062 if (cand
->iv
->base_object
)
6063 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6065 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6066 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6067 reference_alias_ptr_type (*use
->op_p
),
6068 iv
, base_hint
, data
->speed
);
6069 copy_ref_info (ref
, *use
->op_p
);
6073 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6077 rewrite_use_compare (struct ivopts_data
*data
,
6078 struct iv_use
*use
, struct iv_cand
*cand
)
6080 tree comp
, *var_p
, op
, bound
;
6081 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6082 enum tree_code compare
;
6083 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6089 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6090 tree var_type
= TREE_TYPE (var
);
6093 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6095 fprintf (dump_file
, "Replacing exit test: ");
6096 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6098 compare
= iv_elimination_compare (data
, use
);
6099 bound
= unshare_expr (fold_convert (var_type
, bound
));
6100 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6102 gsi_insert_seq_on_edge_immediate (
6103 loop_preheader_edge (data
->current_loop
),
6106 gimple_cond_set_lhs (use
->stmt
, var
);
6107 gimple_cond_set_code (use
->stmt
, compare
);
6108 gimple_cond_set_rhs (use
->stmt
, op
);
6112 /* The induction variable elimination failed; just express the original
6114 comp
= get_computation (data
->current_loop
, use
, cand
);
6115 gcc_assert (comp
!= NULL_TREE
);
6117 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6120 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6121 true, GSI_SAME_STMT
);
6124 /* Rewrites USE using candidate CAND. */
6127 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6131 case USE_NONLINEAR_EXPR
:
6132 rewrite_use_nonlinear_expr (data
, use
, cand
);
6136 rewrite_use_address (data
, use
, cand
);
6140 rewrite_use_compare (data
, use
, cand
);
6147 update_stmt (use
->stmt
);
6150 /* Rewrite the uses using the selected induction variables. */
6153 rewrite_uses (struct ivopts_data
*data
)
6156 struct iv_cand
*cand
;
6159 for (i
= 0; i
< n_iv_uses (data
); i
++)
6161 use
= iv_use (data
, i
);
6162 cand
= use
->selected
;
6165 rewrite_use (data
, use
, cand
);
6169 /* Removes the ivs that are not used after rewriting. */
6172 remove_unused_ivs (struct ivopts_data
*data
)
6176 bitmap toremove
= BITMAP_ALLOC (NULL
);
6178 /* Figure out an order in which to release SSA DEFs so that we don't
6179 release something that we'd have to propagate into a debug stmt
6181 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6183 struct version_info
*info
;
6185 info
= ver_info (data
, j
);
6187 && !integer_zerop (info
->iv
->step
)
6189 && !info
->iv
->have_use_for
6190 && !info
->preserve_biv
)
6191 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6194 release_defs_bitset (toremove
);
6196 BITMAP_FREE (toremove
);
6199 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6200 for pointer_map_traverse. */
6203 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6204 void *data ATTRIBUTE_UNUSED
)
6206 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6212 /* Frees data allocated by the optimization of a single loop. */
6215 free_loop_data (struct ivopts_data
*data
)
6223 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6224 pointer_map_destroy (data
->niters
);
6225 data
->niters
= NULL
;
6228 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6230 struct version_info
*info
;
6232 info
= ver_info (data
, i
);
6236 info
->has_nonlin_use
= false;
6237 info
->preserve_biv
= false;
6240 bitmap_clear (data
->relevant
);
6241 bitmap_clear (data
->important_candidates
);
6243 for (i
= 0; i
< n_iv_uses (data
); i
++)
6245 struct iv_use
*use
= iv_use (data
, i
);
6248 BITMAP_FREE (use
->related_cands
);
6249 for (j
= 0; j
< use
->n_map_members
; j
++)
6250 if (use
->cost_map
[j
].depends_on
)
6251 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6252 free (use
->cost_map
);
6255 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
6257 for (i
= 0; i
< n_iv_cands (data
); i
++)
6259 struct iv_cand
*cand
= iv_cand (data
, i
);
6263 if (cand
->depends_on
)
6264 BITMAP_FREE (cand
->depends_on
);
6267 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
6269 if (data
->version_info_size
< num_ssa_names
)
6271 data
->version_info_size
= 2 * num_ssa_names
;
6272 free (data
->version_info
);
6273 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6276 data
->max_inv_id
= 0;
6278 FOR_EACH_VEC_ELT (tree
, decl_rtl_to_reset
, i
, obj
)
6279 SET_DECL_RTL (obj
, NULL_RTX
);
6281 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
6283 htab_empty (data
->inv_expr_tab
);
6284 data
->inv_expr_id
= 0;
6287 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6291 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6293 free_loop_data (data
);
6294 free (data
->version_info
);
6295 BITMAP_FREE (data
->relevant
);
6296 BITMAP_FREE (data
->important_candidates
);
6298 VEC_free (tree
, heap
, decl_rtl_to_reset
);
6299 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
6300 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
6301 htab_delete (data
->inv_expr_tab
);
6304 /* Returns true if the loop body BODY includes any function calls. */
6307 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6309 gimple_stmt_iterator gsi
;
6312 for (i
= 0; i
< num_nodes
; i
++)
6313 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6315 gimple stmt
= gsi_stmt (gsi
);
6316 if (is_gimple_call (stmt
)
6317 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6323 /* Optimizes the LOOP. Returns true if anything changed. */
6326 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6328 bool changed
= false;
6329 struct iv_ca
*iv_ca
;
6333 gcc_assert (!data
->niters
);
6334 data
->current_loop
= loop
;
6335 data
->speed
= optimize_loop_for_speed_p (loop
);
6337 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6339 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6341 exit
= single_dom_exit (loop
);
6344 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6345 exit
->src
->index
, exit
->dest
->index
);
6346 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6347 fprintf (dump_file
, "\n");
6350 fprintf (dump_file
, "\n");
6353 body
= get_loop_body (loop
);
6354 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6355 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6358 /* For each ssa name determines whether it behaves as an induction variable
6360 if (!find_induction_variables (data
))
6363 /* Finds interesting uses (item 1). */
6364 find_interesting_uses (data
);
6365 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6368 /* Finds candidates for the induction variables (item 2). */
6369 find_iv_candidates (data
);
6371 /* Calculates the costs (item 3, part 1). */
6372 determine_iv_costs (data
);
6373 determine_use_iv_costs (data
);
6374 determine_set_costs (data
);
6376 /* Find the optimal set of induction variables (item 3, part 2). */
6377 iv_ca
= find_optimal_iv_set (data
);
6382 /* Create the new induction variables (item 4, part 1). */
6383 create_new_ivs (data
, iv_ca
);
6384 iv_ca_free (&iv_ca
);
6386 /* Rewrite the uses (item 4, part 2). */
6387 rewrite_uses (data
);
6389 /* Remove the ivs that are unused after rewriting. */
6390 remove_unused_ivs (data
);
6392 /* We have changed the structure of induction variables; it might happen
6393 that definitions in the scev database refer to some of them that were
6398 free_loop_data (data
);
6403 /* Main entry point. Optimizes induction variables in loops. */
6406 tree_ssa_iv_optimize (void)
6409 struct ivopts_data data
;
6412 tree_ssa_iv_optimize_init (&data
);
6414 /* Optimize the loops starting with the innermost ones. */
6415 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6417 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6418 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6420 tree_ssa_iv_optimize_loop (&data
, loop
);
6423 tree_ssa_iv_optimize_finalize (&data
);