1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
79 #include "tree-pass.h"
81 #include "insn-config.h"
83 #include "pointer-set.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
100 /* The infinite cost. */
101 #define INFTY 10000000
103 #define AVG_LOOP_NITER(LOOP) 5
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop
*loop
)
112 HOST_WIDE_INT niter
= estimated_loop_iterations_int (loop
, false);
114 return AVG_LOOP_NITER (loop
);
119 /* Representation of the induction variable. */
122 tree base
; /* Initial value of the iv. */
123 tree base_object
; /* A memory object to that the induction variable points. */
124 tree step
; /* Step of the iv (constant only). */
125 tree ssa_name
; /* The ssa name with the value. */
126 bool biv_p
; /* Is it a biv? */
127 bool have_use_for
; /* Do we already have a use for it? */
128 unsigned use_id
; /* The identifier in the use if it is the case. */
131 /* Per-ssa version information (induction variable descriptions, etc.). */
134 tree name
; /* The ssa name. */
135 struct iv
*iv
; /* Induction variable description. */
136 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv
; /* For the original biv, whether to preserve it. */
139 unsigned inv_id
; /* Id of an invariant. */
145 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
146 USE_ADDRESS
, /* Use in an address. */
147 USE_COMPARE
/* Use is a compare. */
150 /* Cost of a computation. */
153 int cost
; /* The runtime cost. */
154 unsigned complexity
; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
160 static const comp_cost zero_cost
= {0, 0};
161 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
163 /* The candidate - cost pair. */
166 struct iv_cand
*cand
; /* The candidate. */
167 comp_cost cost
; /* The cost. */
168 bitmap depends_on
; /* The list of invariants that have to be
170 tree value
; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id
; /* Loop invariant expression id. */
179 unsigned id
; /* The id of the use. */
180 enum use_type type
; /* Type of the use. */
181 struct iv
*iv
; /* The induction variable it is based on. */
182 gimple stmt
; /* Statement in that it occurs. */
183 tree
*op_p
; /* The place where it occurs. */
184 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
187 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
188 struct cost_pair
*cost_map
;
189 /* The costs wrto the iv candidates. */
191 struct iv_cand
*selected
;
192 /* The selected candidate. */
195 /* The position where the iv is computed. */
198 IP_NORMAL
, /* At the end, just before the exit condition. */
199 IP_END
, /* At the end of the latch block. */
200 IP_BEFORE_USE
, /* Immediately before a specific use. */
201 IP_AFTER_USE
, /* Immediately after a specific use. */
202 IP_ORIGINAL
/* The original biv. */
205 /* The induction variable candidate. */
208 unsigned id
; /* The number of the candidate. */
209 bool important
; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
212 gimple incremented_at
;/* For original biv, the statement where it is
214 tree var_before
; /* The variable used for it before increment. */
215 tree var_after
; /* The variable used for it after increment. */
216 struct iv
*iv
; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost
; /* Cost of the candidate. */
221 unsigned cost_step
; /* Cost of the candidate's increment operation. */
222 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on
; /* The list of invariants that are used in step of the
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
236 /* The data used by the induction variable optimizations. */
238 typedef struct iv_use
*iv_use_p
;
240 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
242 typedef struct iv_cand
*iv_cand_p
;
243 DEF_VEC_P(iv_cand_p
);
244 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
248 /* The currently optimized loop. */
249 struct loop
*current_loop
;
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t
*niters
;
254 /* Number of registers used in it. */
257 /* The size of version_info array allocated. */
258 unsigned version_info_size
;
260 /* The array of information for the ssa names. */
261 struct version_info
*version_info
;
263 /* The hashtable of loop invariant expressions created
267 /* Loop invariant expression id. */
270 /* The bitmap of indices in version_info whose value was changed. */
273 /* The uses of induction variables. */
274 VEC(iv_use_p
,heap
) *iv_uses
;
276 /* The candidates. */
277 VEC(iv_cand_p
,heap
) *iv_candidates
;
279 /* A bitmap of important candidates. */
280 bitmap important_candidates
;
282 /* The maximum invariant id. */
285 /* Whether to consider just related and important candidates when replacing a
287 bool consider_all_candidates
;
289 /* Are we optimizing for speed? */
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call
;
296 /* An assignment of iv candidates to uses. */
300 /* The number of uses covered by the assignment. */
303 /* Number of uses that cannot be expressed by the candidates in the set. */
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair
**cand_for_use
;
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses
;
312 /* The candidates used. */
315 /* The number of candidates in the set. */
318 /* Total number of registers needed. */
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost
;
324 /* Total cost of candidates. */
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses
;
330 /* The array holding the number of uses of each loop
331 invariant expressions created by ivopt. */
332 unsigned *used_inv_expr
;
334 /* The number of created loop invariants. */
335 unsigned num_used_inv_expr
;
337 /* Total cost of the assignment. */
341 /* Difference of two iv candidate assignments. */
348 /* An old assignment (for rollback purposes). */
349 struct cost_pair
*old_cp
;
351 /* A new assignment. */
352 struct cost_pair
*new_cp
;
354 /* Next change in the list. */
355 struct iv_ca_delta
*next_change
;
358 /* Bound on number of candidates below that all candidates are considered. */
360 #define CONSIDER_ALL_CANDIDATES_BOUND \
361 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
364 optimizing such a loop would help, and it would take ages). */
366 #define MAX_CONSIDERED_USES \
367 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
369 /* If there are at most this number of ivs in the set, try removing unnecessary
370 ivs from the set always. */
372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
373 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
375 /* The list of trees for that the decl_rtl field must be reset is stored
378 static VEC(tree
,heap
) *decl_rtl_to_reset
;
380 /* Number of uses recorded in DATA. */
382 static inline unsigned
383 n_iv_uses (struct ivopts_data
*data
)
385 return VEC_length (iv_use_p
, data
->iv_uses
);
388 /* Ith use recorded in DATA. */
390 static inline struct iv_use
*
391 iv_use (struct ivopts_data
*data
, unsigned i
)
393 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
396 /* Number of candidates recorded in DATA. */
398 static inline unsigned
399 n_iv_cands (struct ivopts_data
*data
)
401 return VEC_length (iv_cand_p
, data
->iv_candidates
);
404 /* Ith candidate recorded in DATA. */
406 static inline struct iv_cand
*
407 iv_cand (struct ivopts_data
*data
, unsigned i
)
409 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
412 /* The single loop exit if it dominates the latch, NULL otherwise. */
415 single_dom_exit (struct loop
*loop
)
417 edge exit
= single_exit (loop
);
422 if (!just_once_each_iteration_p (loop
, exit
->src
))
428 /* Dumps information about the induction variable IV to FILE. */
430 extern void dump_iv (FILE *, struct iv
*);
432 dump_iv (FILE *file
, struct iv
*iv
)
436 fprintf (file
, "ssa name ");
437 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
438 fprintf (file
, "\n");
441 fprintf (file
, " type ");
442 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
443 fprintf (file
, "\n");
447 fprintf (file
, " base ");
448 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
449 fprintf (file
, "\n");
451 fprintf (file
, " step ");
452 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
453 fprintf (file
, "\n");
457 fprintf (file
, " invariant ");
458 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
459 fprintf (file
, "\n");
464 fprintf (file
, " base object ");
465 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
466 fprintf (file
, "\n");
470 fprintf (file
, " is a biv\n");
473 /* Dumps information about the USE to FILE. */
475 extern void dump_use (FILE *, struct iv_use
*);
477 dump_use (FILE *file
, struct iv_use
*use
)
479 fprintf (file
, "use %d\n", use
->id
);
483 case USE_NONLINEAR_EXPR
:
484 fprintf (file
, " generic\n");
488 fprintf (file
, " address\n");
492 fprintf (file
, " compare\n");
499 fprintf (file
, " in statement ");
500 print_gimple_stmt (file
, use
->stmt
, 0, 0);
501 fprintf (file
, "\n");
503 fprintf (file
, " at position ");
505 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
506 fprintf (file
, "\n");
508 dump_iv (file
, use
->iv
);
510 if (use
->related_cands
)
512 fprintf (file
, " related candidates ");
513 dump_bitmap (file
, use
->related_cands
);
517 /* Dumps information about the uses to FILE. */
519 extern void dump_uses (FILE *, struct ivopts_data
*);
521 dump_uses (FILE *file
, struct ivopts_data
*data
)
526 for (i
= 0; i
< n_iv_uses (data
); i
++)
528 use
= iv_use (data
, i
);
530 dump_use (file
, use
);
531 fprintf (file
, "\n");
535 /* Dumps information about induction variable candidate CAND to FILE. */
537 extern void dump_cand (FILE *, struct iv_cand
*);
539 dump_cand (FILE *file
, struct iv_cand
*cand
)
541 struct iv
*iv
= cand
->iv
;
543 fprintf (file
, "candidate %d%s\n",
544 cand
->id
, cand
->important
? " (important)" : "");
546 if (cand
->depends_on
)
548 fprintf (file
, " depends on ");
549 dump_bitmap (file
, cand
->depends_on
);
554 fprintf (file
, " final value replacement\n");
558 if (cand
->var_before
)
560 fprintf (file
, " var_before ");
561 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
562 fprintf (file
, "\n");
566 fprintf (file
, " var_after ");
567 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
568 fprintf (file
, "\n");
574 fprintf (file
, " incremented before exit test\n");
578 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
582 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
586 fprintf (file
, " incremented at end\n");
590 fprintf (file
, " original biv\n");
597 /* Returns the info for ssa version VER. */
599 static inline struct version_info
*
600 ver_info (struct ivopts_data
*data
, unsigned ver
)
602 return data
->version_info
+ ver
;
605 /* Returns the info for ssa name NAME. */
607 static inline struct version_info
*
608 name_info (struct ivopts_data
*data
, tree name
)
610 return ver_info (data
, SSA_NAME_VERSION (name
));
613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
617 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
619 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
623 if (sbb
== loop
->latch
)
629 return stmt
== last_stmt (bb
);
632 /* Returns true if STMT if after the place where the original induction
633 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
634 if the positions are identical. */
637 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
639 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
640 basic_block stmt_bb
= gimple_bb (stmt
);
642 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
645 if (stmt_bb
!= cand_bb
)
649 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
651 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
654 /* Returns true if STMT if after the place where the induction variable
655 CAND is incremented in LOOP. */
658 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
666 return stmt_after_ip_normal_pos (loop
, stmt
);
670 return stmt_after_inc_pos (cand
, stmt
, false);
673 return stmt_after_inc_pos (cand
, stmt
, true);
680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
683 abnormal_ssa_name_p (tree exp
)
688 if (TREE_CODE (exp
) != SSA_NAME
)
691 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
695 abnormal phi node. Callback for for_each_index. */
698 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
699 void *data ATTRIBUTE_UNUSED
)
701 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
703 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
705 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
709 return !abnormal_ssa_name_p (*index
);
712 /* Returns true if EXPR contains a ssa name that occurs in an
713 abnormal phi node. */
716 contains_abnormal_ssa_name_p (tree expr
)
719 enum tree_code_class codeclass
;
724 code
= TREE_CODE (expr
);
725 codeclass
= TREE_CODE_CLASS (code
);
727 if (code
== SSA_NAME
)
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
730 if (code
== INTEGER_CST
731 || is_gimple_min_invariant (expr
))
734 if (code
== ADDR_EXPR
)
735 return !for_each_index (&TREE_OPERAND (expr
, 0),
736 idx_contains_abnormal_ssa_name_p
,
739 if (code
== COND_EXPR
)
740 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
742 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
748 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
753 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
765 /* Returns tree describing number of iterations determined from
766 EXIT of DATA->current_loop, or NULL if something goes wrong. */
769 niter_for_exit (struct ivopts_data
*data
, edge exit
,
770 struct tree_niter_desc
**desc_p
)
772 struct tree_niter_desc
* desc
= NULL
;
778 data
->niters
= pointer_map_create ();
782 slot
= pointer_map_contains (data
->niters
, exit
);
786 /* Try to determine number of iterations. We must know it
787 unconditionally (i.e., without possibility of # of iterations
788 being zero). Also, we cannot safely work with ssa names that
789 appear in phi nodes on abnormal edges, so that we do not create
790 overlapping life ranges for them (PR 27283). */
791 desc
= XNEW (struct tree_niter_desc
);
792 if (number_of_iterations_exit (data
->current_loop
,
794 && integer_zerop (desc
->may_be_zero
)
795 && !contains_abnormal_ssa_name_p (desc
->niter
))
801 slot
= pointer_map_insert (data
->niters
, exit
);
805 niter
= ((struct tree_niter_desc
*) *slot
)->niter
;
808 *desc_p
= (struct tree_niter_desc
*) *slot
;
812 /* Returns tree describing number of iterations determined from
813 single dominating exit of DATA->current_loop, or NULL if something
817 niter_for_single_dom_exit (struct ivopts_data
*data
)
819 edge exit
= single_dom_exit (data
->current_loop
);
824 return niter_for_exit (data
, exit
, NULL
);
827 /* Hash table equality function for expressions. */
830 htab_inv_expr_eq (const void *ent1
, const void *ent2
)
832 const struct iv_inv_expr_ent
*expr1
=
833 (const struct iv_inv_expr_ent
*)ent1
;
834 const struct iv_inv_expr_ent
*expr2
=
835 (const struct iv_inv_expr_ent
*)ent2
;
837 return operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
840 /* Hash function for loop invariant expressions. */
843 htab_inv_expr_hash (const void *ent
)
845 const struct iv_inv_expr_ent
*expr
=
846 (const struct iv_inv_expr_ent
*)ent
;
850 /* Initializes data structures used by the iv optimization pass, stored
854 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
856 data
->version_info_size
= 2 * num_ssa_names
;
857 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
858 data
->relevant
= BITMAP_ALLOC (NULL
);
859 data
->important_candidates
= BITMAP_ALLOC (NULL
);
860 data
->max_inv_id
= 0;
862 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
863 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
864 data
->inv_expr_tab
= htab_create (10, htab_inv_expr_hash
,
865 htab_inv_expr_eq
, free
);
866 data
->inv_expr_id
= 0;
867 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
870 /* Returns a memory object to that EXPR points. In case we are able to
871 determine that it does not point to any such object, NULL is returned. */
874 determine_base_object (tree expr
)
876 enum tree_code code
= TREE_CODE (expr
);
879 /* If this is a pointer casted to any type, we need to determine
880 the base object for the pointer; so handle conversions before
881 throwing away non-pointer expressions. */
882 if (CONVERT_EXPR_P (expr
))
883 return determine_base_object (TREE_OPERAND (expr
, 0));
885 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
894 obj
= TREE_OPERAND (expr
, 0);
895 base
= get_base_address (obj
);
900 if (TREE_CODE (base
) == MEM_REF
)
901 return determine_base_object (TREE_OPERAND (base
, 0));
903 return fold_convert (ptr_type_node
,
904 build_fold_addr_expr (base
));
906 case POINTER_PLUS_EXPR
:
907 return determine_base_object (TREE_OPERAND (expr
, 0));
911 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
915 return fold_convert (ptr_type_node
, expr
);
919 /* Allocates an induction variable with given initial value BASE and step STEP
923 alloc_iv (tree base
, tree step
)
925 struct iv
*iv
= XCNEW (struct iv
);
926 gcc_assert (step
!= NULL_TREE
);
929 iv
->base_object
= determine_base_object (base
);
932 iv
->have_use_for
= false;
934 iv
->ssa_name
= NULL_TREE
;
939 /* Sets STEP and BASE for induction variable IV. */
942 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
944 struct version_info
*info
= name_info (data
, iv
);
946 gcc_assert (!info
->iv
);
948 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
949 info
->iv
= alloc_iv (base
, step
);
950 info
->iv
->ssa_name
= iv
;
953 /* Finds induction variable declaration for VAR. */
956 get_iv (struct ivopts_data
*data
, tree var
)
959 tree type
= TREE_TYPE (var
);
961 if (!POINTER_TYPE_P (type
)
962 && !INTEGRAL_TYPE_P (type
))
965 if (!name_info (data
, var
)->iv
)
967 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
970 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
971 set_iv (data
, var
, var
, build_int_cst (type
, 0));
974 return name_info (data
, var
)->iv
;
977 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
978 not define a simple affine biv with nonzero step. */
981 determine_biv_step (gimple phi
)
983 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
984 tree name
= PHI_RESULT (phi
);
987 if (!is_gimple_reg (name
))
990 if (!simple_iv (loop
, loop
, name
, &iv
, true))
993 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
996 /* Finds basic ivs. */
999 find_bivs (struct ivopts_data
*data
)
1002 tree step
, type
, base
;
1004 struct loop
*loop
= data
->current_loop
;
1005 gimple_stmt_iterator psi
;
1007 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1009 phi
= gsi_stmt (psi
);
1011 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1014 step
= determine_biv_step (phi
);
1018 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1019 base
= expand_simple_operations (base
);
1020 if (contains_abnormal_ssa_name_p (base
)
1021 || contains_abnormal_ssa_name_p (step
))
1024 type
= TREE_TYPE (PHI_RESULT (phi
));
1025 base
= fold_convert (type
, base
);
1028 if (POINTER_TYPE_P (type
))
1029 step
= fold_convert (sizetype
, step
);
1031 step
= fold_convert (type
, step
);
1034 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1041 /* Marks basic ivs. */
1044 mark_bivs (struct ivopts_data
*data
)
1048 struct iv
*iv
, *incr_iv
;
1049 struct loop
*loop
= data
->current_loop
;
1050 basic_block incr_bb
;
1051 gimple_stmt_iterator psi
;
1053 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1055 phi
= gsi_stmt (psi
);
1057 iv
= get_iv (data
, PHI_RESULT (phi
));
1061 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1062 incr_iv
= get_iv (data
, var
);
1066 /* If the increment is in the subloop, ignore it. */
1067 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1068 if (incr_bb
->loop_father
!= data
->current_loop
1069 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1073 incr_iv
->biv_p
= true;
1077 /* Checks whether STMT defines a linear induction variable and stores its
1078 parameters to IV. */
1081 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1084 struct loop
*loop
= data
->current_loop
;
1086 iv
->base
= NULL_TREE
;
1087 iv
->step
= NULL_TREE
;
1089 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1092 lhs
= gimple_assign_lhs (stmt
);
1093 if (TREE_CODE (lhs
) != SSA_NAME
)
1096 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1098 iv
->base
= expand_simple_operations (iv
->base
);
1100 if (contains_abnormal_ssa_name_p (iv
->base
)
1101 || contains_abnormal_ssa_name_p (iv
->step
))
1107 /* Finds general ivs in statement STMT. */
1110 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1114 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1117 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1120 /* Finds general ivs in basic block BB. */
1123 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1125 gimple_stmt_iterator bsi
;
1127 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1128 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1131 /* Finds general ivs. */
1134 find_givs (struct ivopts_data
*data
)
1136 struct loop
*loop
= data
->current_loop
;
1137 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1140 for (i
= 0; i
< loop
->num_nodes
; i
++)
1141 find_givs_in_bb (data
, body
[i
]);
1145 /* For each ssa name defined in LOOP determines whether it is an induction
1146 variable and if so, its initial value and step. */
1149 find_induction_variables (struct ivopts_data
*data
)
1154 if (!find_bivs (data
))
1160 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1162 tree niter
= niter_for_single_dom_exit (data
);
1166 fprintf (dump_file
, " number of iterations ");
1167 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1168 fprintf (dump_file
, "\n\n");
1171 fprintf (dump_file
, "Induction variables:\n\n");
1173 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1175 if (ver_info (data
, i
)->iv
)
1176 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1183 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1185 static struct iv_use
*
1186 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1187 gimple stmt
, enum use_type use_type
)
1189 struct iv_use
*use
= XCNEW (struct iv_use
);
1191 use
->id
= n_iv_uses (data
);
1192 use
->type
= use_type
;
1196 use
->related_cands
= BITMAP_ALLOC (NULL
);
1198 /* To avoid showing ssa name in the dumps, if it was not reset by the
1200 iv
->ssa_name
= NULL_TREE
;
1202 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1203 dump_use (dump_file
, use
);
1205 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1210 /* Checks whether OP is a loop-level invariant and if so, records it.
1211 NONLINEAR_USE is true if the invariant is used in a way we do not
1212 handle specially. */
1215 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1218 struct version_info
*info
;
1220 if (TREE_CODE (op
) != SSA_NAME
1221 || !is_gimple_reg (op
))
1224 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1226 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1229 info
= name_info (data
, op
);
1231 info
->has_nonlin_use
|= nonlinear_use
;
1233 info
->inv_id
= ++data
->max_inv_id
;
1234 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1237 /* Checks whether the use OP is interesting and if so, records it. */
1239 static struct iv_use
*
1240 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1247 if (TREE_CODE (op
) != SSA_NAME
)
1250 iv
= get_iv (data
, op
);
1254 if (iv
->have_use_for
)
1256 use
= iv_use (data
, iv
->use_id
);
1258 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1262 if (integer_zerop (iv
->step
))
1264 record_invariant (data
, op
, true);
1267 iv
->have_use_for
= true;
1269 civ
= XNEW (struct iv
);
1272 stmt
= SSA_NAME_DEF_STMT (op
);
1273 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1274 || is_gimple_assign (stmt
));
1276 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1277 iv
->use_id
= use
->id
;
1282 /* Given a condition in statement STMT, checks whether it is a compare
1283 of an induction variable and an invariant. If this is the case,
1284 CONTROL_VAR is set to location of the iv, BOUND to the location of
1285 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1286 induction variable descriptions, and true is returned. If this is not
1287 the case, CONTROL_VAR and BOUND are set to the arguments of the
1288 condition and false is returned. */
1291 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1292 tree
**control_var
, tree
**bound
,
1293 struct iv
**iv_var
, struct iv
**iv_bound
)
1295 /* The objects returned when COND has constant operands. */
1296 static struct iv const_iv
;
1298 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1299 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1302 if (gimple_code (stmt
) == GIMPLE_COND
)
1304 op0
= gimple_cond_lhs_ptr (stmt
);
1305 op1
= gimple_cond_rhs_ptr (stmt
);
1309 op0
= gimple_assign_rhs1_ptr (stmt
);
1310 op1
= gimple_assign_rhs2_ptr (stmt
);
1313 zero
= integer_zero_node
;
1314 const_iv
.step
= integer_zero_node
;
1316 if (TREE_CODE (*op0
) == SSA_NAME
)
1317 iv0
= get_iv (data
, *op0
);
1318 if (TREE_CODE (*op1
) == SSA_NAME
)
1319 iv1
= get_iv (data
, *op1
);
1321 /* Exactly one of the compared values must be an iv, and the other one must
1326 if (integer_zerop (iv0
->step
))
1328 /* Control variable may be on the other side. */
1329 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1330 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1332 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1336 *control_var
= op0
;;
1347 /* Checks whether the condition in STMT is interesting and if so,
1351 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1353 tree
*var_p
, *bound_p
;
1354 struct iv
*var_iv
, *civ
;
1356 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1358 find_interesting_uses_op (data
, *var_p
);
1359 find_interesting_uses_op (data
, *bound_p
);
1363 civ
= XNEW (struct iv
);
1365 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1368 /* Returns true if expression EXPR is obviously invariant in LOOP,
1369 i.e. if all its operands are defined outside of the LOOP. LOOP
1370 should not be the function body. */
1373 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1378 gcc_assert (loop_depth (loop
) > 0);
1380 if (is_gimple_min_invariant (expr
))
1383 if (TREE_CODE (expr
) == SSA_NAME
)
1385 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1387 && flow_bb_inside_loop_p (loop
, def_bb
))
1396 len
= TREE_OPERAND_LENGTH (expr
);
1397 for (i
= 0; i
< len
; i
++)
1398 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1404 /* Returns true if statement STMT is obviously invariant in LOOP,
1405 i.e. if all its operands on the RHS are defined outside of the LOOP.
1406 LOOP should not be the function body. */
1409 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1414 gcc_assert (loop_depth (loop
) > 0);
1416 lhs
= gimple_get_lhs (stmt
);
1417 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1419 tree op
= gimple_op (stmt
, i
);
1420 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1427 /* Cumulates the steps of indices into DATA and replaces their values with the
1428 initial ones. Returns false when the value of the index cannot be determined.
1429 Callback for for_each_index. */
1431 struct ifs_ivopts_data
1433 struct ivopts_data
*ivopts_data
;
1439 idx_find_step (tree base
, tree
*idx
, void *data
)
1441 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1443 tree step
, iv_base
, iv_step
, lbound
, off
;
1444 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1446 /* If base is a component ref, require that the offset of the reference
1448 if (TREE_CODE (base
) == COMPONENT_REF
)
1450 off
= component_ref_field_offset (base
);
1451 return expr_invariant_in_loop_p (loop
, off
);
1454 /* If base is array, first check whether we will be able to move the
1455 reference out of the loop (in order to take its address in strength
1456 reduction). In order for this to work we need both lower bound
1457 and step to be loop invariants. */
1458 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1460 /* Moreover, for a range, the size needs to be invariant as well. */
1461 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1462 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1465 step
= array_ref_element_size (base
);
1466 lbound
= array_ref_low_bound (base
);
1468 if (!expr_invariant_in_loop_p (loop
, step
)
1469 || !expr_invariant_in_loop_p (loop
, lbound
))
1473 if (TREE_CODE (*idx
) != SSA_NAME
)
1476 iv
= get_iv (dta
->ivopts_data
, *idx
);
1480 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1481 *&x[0], which is not folded and does not trigger the
1482 ARRAY_REF path below. */
1485 if (integer_zerop (iv
->step
))
1488 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1490 step
= array_ref_element_size (base
);
1492 /* We only handle addresses whose step is an integer constant. */
1493 if (TREE_CODE (step
) != INTEGER_CST
)
1497 /* The step for pointer arithmetics already is 1 byte. */
1498 step
= size_one_node
;
1502 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1503 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1506 /* The index might wrap. */
1510 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1511 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1516 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1517 object is passed to it in DATA. */
1520 idx_record_use (tree base
, tree
*idx
,
1523 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1524 find_interesting_uses_op (data
, *idx
);
1525 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1527 find_interesting_uses_op (data
, array_ref_element_size (base
));
1528 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1533 /* If we can prove that TOP = cst * BOT for some constant cst,
1534 store cst to MUL and return true. Otherwise return false.
1535 The returned value is always sign-extended, regardless of the
1536 signedness of TOP and BOT. */
1539 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1542 enum tree_code code
;
1543 double_int res
, p0
, p1
;
1544 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1549 if (operand_equal_p (top
, bot
, 0))
1551 *mul
= double_int_one
;
1555 code
= TREE_CODE (top
);
1559 mby
= TREE_OPERAND (top
, 1);
1560 if (TREE_CODE (mby
) != INTEGER_CST
)
1563 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1566 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
1572 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1573 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1576 if (code
== MINUS_EXPR
)
1577 p1
= double_int_neg (p1
);
1578 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
1582 if (TREE_CODE (bot
) != INTEGER_CST
)
1585 p0
= double_int_sext (tree_to_double_int (top
), precision
);
1586 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
1587 if (double_int_zero_p (p1
))
1589 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
1591 return double_int_zero_p (res
);
1598 /* Returns true if memory reference REF with step STEP may be unaligned. */
1601 may_be_unaligned_p (tree ref
, tree step
)
1605 HOST_WIDE_INT bitsize
;
1606 HOST_WIDE_INT bitpos
;
1608 enum machine_mode mode
;
1609 int unsignedp
, volatilep
;
1610 unsigned base_align
;
1612 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1613 thus they are not misaligned. */
1614 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1617 /* The test below is basically copy of what expr.c:normal_inner_ref
1618 does to check whether the object must be loaded by parts when
1619 STRICT_ALIGNMENT is true. */
1620 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1621 &unsignedp
, &volatilep
, true);
1622 base_type
= TREE_TYPE (base
);
1623 base_align
= TYPE_ALIGN (base_type
);
1625 if (mode
!= BLKmode
)
1627 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1629 if (base_align
< mode_align
1630 || (bitpos
% mode_align
) != 0
1631 || (bitpos
% BITS_PER_UNIT
) != 0)
1635 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1638 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1645 /* Return true if EXPR may be non-addressable. */
1648 may_be_nonaddressable_p (tree expr
)
1650 switch (TREE_CODE (expr
))
1652 case TARGET_MEM_REF
:
1653 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1654 target, thus they are always addressable. */
1658 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1659 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1661 case VIEW_CONVERT_EXPR
:
1662 /* This kind of view-conversions may wrap non-addressable objects
1663 and make them look addressable. After some processing the
1664 non-addressability may be uncovered again, causing ADDR_EXPRs
1665 of inappropriate objects to be built. */
1666 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1667 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1670 /* ... fall through ... */
1673 case ARRAY_RANGE_REF
:
1674 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1686 /* Finds addresses in *OP_P inside STMT. */
1689 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1691 tree base
= *op_p
, step
= size_zero_node
;
1693 struct ifs_ivopts_data ifs_ivopts_data
;
1695 /* Do not play with volatile memory references. A bit too conservative,
1696 perhaps, but safe. */
1697 if (gimple_has_volatile_ops (stmt
))
1700 /* Ignore bitfields for now. Not really something terribly complicated
1702 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1705 base
= unshare_expr (base
);
1707 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1709 tree type
= build_pointer_type (TREE_TYPE (base
));
1713 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1715 civ
= get_iv (data
, TMR_BASE (base
));
1719 TMR_BASE (base
) = civ
->base
;
1722 if (TMR_INDEX2 (base
)
1723 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1725 civ
= get_iv (data
, TMR_INDEX2 (base
));
1729 TMR_INDEX2 (base
) = civ
->base
;
1732 if (TMR_INDEX (base
)
1733 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1735 civ
= get_iv (data
, TMR_INDEX (base
));
1739 TMR_INDEX (base
) = civ
->base
;
1744 if (TMR_STEP (base
))
1745 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1747 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1751 if (integer_zerop (step
))
1753 base
= tree_mem_ref_addr (type
, base
);
1757 ifs_ivopts_data
.ivopts_data
= data
;
1758 ifs_ivopts_data
.stmt
= stmt
;
1759 ifs_ivopts_data
.step
= size_zero_node
;
1760 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1761 || integer_zerop (ifs_ivopts_data
.step
))
1763 step
= ifs_ivopts_data
.step
;
1765 /* Check that the base expression is addressable. This needs
1766 to be done after substituting bases of IVs into it. */
1767 if (may_be_nonaddressable_p (base
))
1770 /* Moreover, on strict alignment platforms, check that it is
1771 sufficiently aligned. */
1772 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1775 base
= build_fold_addr_expr (base
);
1777 /* Substituting bases of IVs into the base expression might
1778 have caused folding opportunities. */
1779 if (TREE_CODE (base
) == ADDR_EXPR
)
1781 tree
*ref
= &TREE_OPERAND (base
, 0);
1782 while (handled_component_p (*ref
))
1783 ref
= &TREE_OPERAND (*ref
, 0);
1784 if (TREE_CODE (*ref
) == MEM_REF
)
1786 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1787 TREE_OPERAND (*ref
, 0),
1788 TREE_OPERAND (*ref
, 1));
1795 civ
= alloc_iv (base
, step
);
1796 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1800 for_each_index (op_p
, idx_record_use
, data
);
1803 /* Finds and records invariants used in STMT. */
1806 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1809 use_operand_p use_p
;
1812 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1814 op
= USE_FROM_PTR (use_p
);
1815 record_invariant (data
, op
, false);
1819 /* Finds interesting uses of induction variables in the statement STMT. */
1822 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1825 tree op
, *lhs
, *rhs
;
1827 use_operand_p use_p
;
1828 enum tree_code code
;
1830 find_invariants_stmt (data
, stmt
);
1832 if (gimple_code (stmt
) == GIMPLE_COND
)
1834 find_interesting_uses_cond (data
, stmt
);
1838 if (is_gimple_assign (stmt
))
1840 lhs
= gimple_assign_lhs_ptr (stmt
);
1841 rhs
= gimple_assign_rhs1_ptr (stmt
);
1843 if (TREE_CODE (*lhs
) == SSA_NAME
)
1845 /* If the statement defines an induction variable, the uses are not
1846 interesting by themselves. */
1848 iv
= get_iv (data
, *lhs
);
1850 if (iv
&& !integer_zerop (iv
->step
))
1854 code
= gimple_assign_rhs_code (stmt
);
1855 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1856 && (REFERENCE_CLASS_P (*rhs
)
1857 || is_gimple_val (*rhs
)))
1859 if (REFERENCE_CLASS_P (*rhs
))
1860 find_interesting_uses_address (data
, stmt
, rhs
);
1862 find_interesting_uses_op (data
, *rhs
);
1864 if (REFERENCE_CLASS_P (*lhs
))
1865 find_interesting_uses_address (data
, stmt
, lhs
);
1868 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1870 find_interesting_uses_cond (data
, stmt
);
1874 /* TODO -- we should also handle address uses of type
1876 memory = call (whatever);
1883 if (gimple_code (stmt
) == GIMPLE_PHI
1884 && gimple_bb (stmt
) == data
->current_loop
->header
)
1886 iv
= get_iv (data
, PHI_RESULT (stmt
));
1888 if (iv
&& !integer_zerop (iv
->step
))
1892 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1894 op
= USE_FROM_PTR (use_p
);
1896 if (TREE_CODE (op
) != SSA_NAME
)
1899 iv
= get_iv (data
, op
);
1903 find_interesting_uses_op (data
, op
);
1907 /* Finds interesting uses of induction variables outside of loops
1908 on loop exit edge EXIT. */
1911 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1914 gimple_stmt_iterator psi
;
1917 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1919 phi
= gsi_stmt (psi
);
1920 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1921 if (is_gimple_reg (def
))
1922 find_interesting_uses_op (data
, def
);
1926 /* Finds uses of the induction variables that are interesting. */
1929 find_interesting_uses (struct ivopts_data
*data
)
1932 gimple_stmt_iterator bsi
;
1933 basic_block
*body
= get_loop_body (data
->current_loop
);
1935 struct version_info
*info
;
1938 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1939 fprintf (dump_file
, "Uses:\n\n");
1941 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1946 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1947 if (e
->dest
!= EXIT_BLOCK_PTR
1948 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1949 find_interesting_uses_outside (data
, e
);
1951 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1952 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1953 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1954 if (!is_gimple_debug (gsi_stmt (bsi
)))
1955 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1958 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1962 fprintf (dump_file
, "\n");
1964 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1966 info
= ver_info (data
, i
);
1969 fprintf (dump_file
, " ");
1970 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1971 fprintf (dump_file
, " is invariant (%d)%s\n",
1972 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1976 fprintf (dump_file
, "\n");
1982 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1983 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1984 we are at the top-level of the processed address. */
1987 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1988 unsigned HOST_WIDE_INT
*offset
)
1990 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1991 enum tree_code code
;
1992 tree type
, orig_type
= TREE_TYPE (expr
);
1993 unsigned HOST_WIDE_INT off0
, off1
, st
;
1994 tree orig_expr
= expr
;
1998 type
= TREE_TYPE (expr
);
1999 code
= TREE_CODE (expr
);
2005 if (!cst_and_fits_in_hwi (expr
)
2006 || integer_zerop (expr
))
2009 *offset
= int_cst_value (expr
);
2010 return build_int_cst (orig_type
, 0);
2012 case POINTER_PLUS_EXPR
:
2015 op0
= TREE_OPERAND (expr
, 0);
2016 op1
= TREE_OPERAND (expr
, 1);
2018 op0
= strip_offset_1 (op0
, false, false, &off0
);
2019 op1
= strip_offset_1 (op1
, false, false, &off1
);
2021 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2022 if (op0
== TREE_OPERAND (expr
, 0)
2023 && op1
== TREE_OPERAND (expr
, 1))
2026 if (integer_zerop (op1
))
2028 else if (integer_zerop (op0
))
2030 if (code
== MINUS_EXPR
)
2031 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2036 expr
= fold_build2 (code
, type
, op0
, op1
);
2038 return fold_convert (orig_type
, expr
);
2041 op1
= TREE_OPERAND (expr
, 1);
2042 if (!cst_and_fits_in_hwi (op1
))
2045 op0
= TREE_OPERAND (expr
, 0);
2046 op0
= strip_offset_1 (op0
, false, false, &off0
);
2047 if (op0
== TREE_OPERAND (expr
, 0))
2050 *offset
= off0
* int_cst_value (op1
);
2051 if (integer_zerop (op0
))
2054 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2056 return fold_convert (orig_type
, expr
);
2059 case ARRAY_RANGE_REF
:
2063 step
= array_ref_element_size (expr
);
2064 if (!cst_and_fits_in_hwi (step
))
2067 st
= int_cst_value (step
);
2068 op1
= TREE_OPERAND (expr
, 1);
2069 op1
= strip_offset_1 (op1
, false, false, &off1
);
2070 *offset
= off1
* st
;
2073 && integer_zerop (op1
))
2075 /* Strip the component reference completely. */
2076 op0
= TREE_OPERAND (expr
, 0);
2077 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2087 tmp
= component_ref_field_offset (expr
);
2089 && cst_and_fits_in_hwi (tmp
))
2091 /* Strip the component reference completely. */
2092 op0
= TREE_OPERAND (expr
, 0);
2093 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2094 *offset
= off0
+ int_cst_value (tmp
);
2100 op0
= TREE_OPERAND (expr
, 0);
2101 op0
= strip_offset_1 (op0
, true, true, &off0
);
2104 if (op0
== TREE_OPERAND (expr
, 0))
2107 expr
= build_fold_addr_expr (op0
);
2108 return fold_convert (orig_type
, expr
);
2111 /* ??? Offset operand? */
2112 inside_addr
= false;
2119 /* Default handling of expressions for that we want to recurse into
2120 the first operand. */
2121 op0
= TREE_OPERAND (expr
, 0);
2122 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2125 if (op0
== TREE_OPERAND (expr
, 0)
2126 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2129 expr
= copy_node (expr
);
2130 TREE_OPERAND (expr
, 0) = op0
;
2132 TREE_OPERAND (expr
, 1) = op1
;
2134 /* Inside address, we might strip the top level component references,
2135 thus changing type of the expression. Handling of ADDR_EXPR
2137 expr
= fold_convert (orig_type
, expr
);
2142 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2145 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2147 return strip_offset_1 (expr
, false, false, offset
);
2150 /* Returns variant of TYPE that can be used as base for different uses.
2151 We return unsigned type with the same precision, which avoids problems
2155 generic_type_for (tree type
)
2157 if (POINTER_TYPE_P (type
))
2158 return unsigned_type_for (type
);
2160 if (TYPE_UNSIGNED (type
))
2163 return unsigned_type_for (type
);
2166 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2167 the bitmap to that we should store it. */
2169 static struct ivopts_data
*fd_ivopts_data
;
2171 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2173 bitmap
*depends_on
= (bitmap
*) data
;
2174 struct version_info
*info
;
2176 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2178 info
= name_info (fd_ivopts_data
, *expr_p
);
2180 if (!info
->inv_id
|| info
->has_nonlin_use
)
2184 *depends_on
= BITMAP_ALLOC (NULL
);
2185 bitmap_set_bit (*depends_on
, info
->inv_id
);
2190 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2191 position to POS. If USE is not NULL, the candidate is set as related to
2192 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2193 replacement of the final value of the iv by a direct computation. */
2195 static struct iv_cand
*
2196 add_candidate_1 (struct ivopts_data
*data
,
2197 tree base
, tree step
, bool important
, enum iv_position pos
,
2198 struct iv_use
*use
, gimple incremented_at
)
2201 struct iv_cand
*cand
= NULL
;
2202 tree type
, orig_type
;
2206 orig_type
= TREE_TYPE (base
);
2207 type
= generic_type_for (orig_type
);
2208 if (type
!= orig_type
)
2210 base
= fold_convert (type
, base
);
2211 step
= fold_convert (type
, step
);
2215 for (i
= 0; i
< n_iv_cands (data
); i
++)
2217 cand
= iv_cand (data
, i
);
2219 if (cand
->pos
!= pos
)
2222 if (cand
->incremented_at
!= incremented_at
2223 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2224 && cand
->ainc_use
!= use
))
2238 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2239 && operand_equal_p (step
, cand
->iv
->step
, 0)
2240 && (TYPE_PRECISION (TREE_TYPE (base
))
2241 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2245 if (i
== n_iv_cands (data
))
2247 cand
= XCNEW (struct iv_cand
);
2253 cand
->iv
= alloc_iv (base
, step
);
2256 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2258 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2259 cand
->var_after
= cand
->var_before
;
2261 cand
->important
= important
;
2262 cand
->incremented_at
= incremented_at
;
2263 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2266 && TREE_CODE (step
) != INTEGER_CST
)
2268 fd_ivopts_data
= data
;
2269 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2272 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2273 cand
->ainc_use
= use
;
2275 cand
->ainc_use
= NULL
;
2277 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2278 dump_cand (dump_file
, cand
);
2281 if (important
&& !cand
->important
)
2283 cand
->important
= true;
2284 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2285 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2290 bitmap_set_bit (use
->related_cands
, i
);
2291 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2292 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2299 /* Returns true if incrementing the induction variable at the end of the LOOP
2302 The purpose is to avoid splitting latch edge with a biv increment, thus
2303 creating a jump, possibly confusing other optimization passes and leaving
2304 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2305 is not available (so we do not have a better alternative), or if the latch
2306 edge is already nonempty. */
2309 allow_ip_end_pos_p (struct loop
*loop
)
2311 if (!ip_normal_pos (loop
))
2314 if (!empty_block_p (ip_end_pos (loop
)))
2320 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2321 Important field is set to IMPORTANT. */
2324 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2325 bool important
, struct iv_use
*use
)
2327 basic_block use_bb
= gimple_bb (use
->stmt
);
2328 enum machine_mode mem_mode
;
2329 unsigned HOST_WIDE_INT cstepi
;
2331 /* If we insert the increment in any position other than the standard
2332 ones, we must ensure that it is incremented once per iteration.
2333 It must not be in an inner nested loop, or one side of an if
2335 if (use_bb
->loop_father
!= data
->current_loop
2336 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2337 || stmt_could_throw_p (use
->stmt
)
2338 || !cst_and_fits_in_hwi (step
))
2341 cstepi
= int_cst_value (step
);
2343 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2344 if ((HAVE_PRE_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2345 || (HAVE_PRE_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2347 enum tree_code code
= MINUS_EXPR
;
2349 tree new_step
= step
;
2351 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2353 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2354 code
= POINTER_PLUS_EXPR
;
2357 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2358 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2359 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2362 if ((HAVE_POST_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2363 || (HAVE_POST_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2365 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2370 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2371 position to POS. If USE is not NULL, the candidate is set as related to
2372 it. The candidate computation is scheduled on all available positions. */
2375 add_candidate (struct ivopts_data
*data
,
2376 tree base
, tree step
, bool important
, struct iv_use
*use
)
2378 if (ip_normal_pos (data
->current_loop
))
2379 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2380 if (ip_end_pos (data
->current_loop
)
2381 && allow_ip_end_pos_p (data
->current_loop
))
2382 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2384 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2385 add_autoinc_candidates (data
, base
, step
, important
, use
);
2388 /* Add a standard "0 + 1 * iteration" iv candidate for a
2389 type with SIZE bits. */
2392 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2395 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2396 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2400 /* Adds standard iv candidates. */
2403 add_standard_iv_candidates (struct ivopts_data
*data
)
2405 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2407 /* The same for a double-integer type if it is still fast enough. */
2408 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2409 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2413 /* Adds candidates bases on the old induction variable IV. */
2416 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2420 struct iv_cand
*cand
;
2422 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2424 /* The same, but with initial value zero. */
2425 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2426 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2428 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2429 iv
->step
, true, NULL
);
2431 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2432 if (gimple_code (phi
) == GIMPLE_PHI
)
2434 /* Additionally record the possibility of leaving the original iv
2436 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2437 cand
= add_candidate_1 (data
,
2438 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2439 SSA_NAME_DEF_STMT (def
));
2440 cand
->var_before
= iv
->ssa_name
;
2441 cand
->var_after
= def
;
2445 /* Adds candidates based on the old induction variables. */
2448 add_old_ivs_candidates (struct ivopts_data
*data
)
2454 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2456 iv
= ver_info (data
, i
)->iv
;
2457 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2458 add_old_iv_candidates (data
, iv
);
2462 /* Adds candidates based on the value of the induction variable IV and USE. */
2465 add_iv_value_candidates (struct ivopts_data
*data
,
2466 struct iv
*iv
, struct iv_use
*use
)
2468 unsigned HOST_WIDE_INT offset
;
2472 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2474 /* The same, but with initial value zero. Make such variable important,
2475 since it is generic enough so that possibly many uses may be based
2477 basetype
= TREE_TYPE (iv
->base
);
2478 if (POINTER_TYPE_P (basetype
))
2479 basetype
= sizetype
;
2480 add_candidate (data
, build_int_cst (basetype
, 0),
2481 iv
->step
, true, use
);
2483 /* Third, try removing the constant offset. Make sure to even
2484 add a candidate for &a[0] vs. (T *)&a. */
2485 base
= strip_offset (iv
->base
, &offset
);
2487 || base
!= iv
->base
)
2488 add_candidate (data
, base
, iv
->step
, false, use
);
2491 /* Adds candidates based on the uses. */
2494 add_derived_ivs_candidates (struct ivopts_data
*data
)
2498 for (i
= 0; i
< n_iv_uses (data
); i
++)
2500 struct iv_use
*use
= iv_use (data
, i
);
2507 case USE_NONLINEAR_EXPR
:
2510 /* Just add the ivs based on the value of the iv used here. */
2511 add_iv_value_candidates (data
, use
->iv
, use
);
2520 /* Record important candidates and add them to related_cands bitmaps
2524 record_important_candidates (struct ivopts_data
*data
)
2529 for (i
= 0; i
< n_iv_cands (data
); i
++)
2531 struct iv_cand
*cand
= iv_cand (data
, i
);
2533 if (cand
->important
)
2534 bitmap_set_bit (data
->important_candidates
, i
);
2537 data
->consider_all_candidates
= (n_iv_cands (data
)
2538 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2540 if (data
->consider_all_candidates
)
2542 /* We will not need "related_cands" bitmaps in this case,
2543 so release them to decrease peak memory consumption. */
2544 for (i
= 0; i
< n_iv_uses (data
); i
++)
2546 use
= iv_use (data
, i
);
2547 BITMAP_FREE (use
->related_cands
);
2552 /* Add important candidates to the related_cands bitmaps. */
2553 for (i
= 0; i
< n_iv_uses (data
); i
++)
2554 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2555 data
->important_candidates
);
2559 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2560 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2561 we allocate a simple list to every use. */
2564 alloc_use_cost_map (struct ivopts_data
*data
)
2566 unsigned i
, size
, s
, j
;
2568 for (i
= 0; i
< n_iv_uses (data
); i
++)
2570 struct iv_use
*use
= iv_use (data
, i
);
2573 if (data
->consider_all_candidates
)
2574 size
= n_iv_cands (data
);
2578 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2583 /* Round up to the power of two, so that moduling by it is fast. */
2584 for (size
= 1; size
< s
; size
<<= 1)
2588 use
->n_map_members
= size
;
2589 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2593 /* Returns description of computation cost of expression whose runtime
2594 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2597 new_cost (unsigned runtime
, unsigned complexity
)
2601 cost
.cost
= runtime
;
2602 cost
.complexity
= complexity
;
2607 /* Adds costs COST1 and COST2. */
2610 add_costs (comp_cost cost1
, comp_cost cost2
)
2612 cost1
.cost
+= cost2
.cost
;
2613 cost1
.complexity
+= cost2
.complexity
;
2617 /* Subtracts costs COST1 and COST2. */
2620 sub_costs (comp_cost cost1
, comp_cost cost2
)
2622 cost1
.cost
-= cost2
.cost
;
2623 cost1
.complexity
-= cost2
.complexity
;
2628 /* Returns a negative number if COST1 < COST2, a positive number if
2629 COST1 > COST2, and 0 if COST1 = COST2. */
2632 compare_costs (comp_cost cost1
, comp_cost cost2
)
2634 if (cost1
.cost
== cost2
.cost
)
2635 return cost1
.complexity
- cost2
.complexity
;
2637 return cost1
.cost
- cost2
.cost
;
2640 /* Returns true if COST is infinite. */
2643 infinite_cost_p (comp_cost cost
)
2645 return cost
.cost
== INFTY
;
2648 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2649 on invariants DEPENDS_ON and that the value used in expressing it
2653 set_use_iv_cost (struct ivopts_data
*data
,
2654 struct iv_use
*use
, struct iv_cand
*cand
,
2655 comp_cost cost
, bitmap depends_on
, tree value
,
2660 if (infinite_cost_p (cost
))
2662 BITMAP_FREE (depends_on
);
2666 if (data
->consider_all_candidates
)
2668 use
->cost_map
[cand
->id
].cand
= cand
;
2669 use
->cost_map
[cand
->id
].cost
= cost
;
2670 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2671 use
->cost_map
[cand
->id
].value
= value
;
2672 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2676 /* n_map_members is a power of two, so this computes modulo. */
2677 s
= cand
->id
& (use
->n_map_members
- 1);
2678 for (i
= s
; i
< use
->n_map_members
; i
++)
2679 if (!use
->cost_map
[i
].cand
)
2681 for (i
= 0; i
< s
; i
++)
2682 if (!use
->cost_map
[i
].cand
)
2688 use
->cost_map
[i
].cand
= cand
;
2689 use
->cost_map
[i
].cost
= cost
;
2690 use
->cost_map
[i
].depends_on
= depends_on
;
2691 use
->cost_map
[i
].value
= value
;
2692 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2695 /* Gets cost of (USE, CANDIDATE) pair. */
2697 static struct cost_pair
*
2698 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2699 struct iv_cand
*cand
)
2702 struct cost_pair
*ret
;
2707 if (data
->consider_all_candidates
)
2709 ret
= use
->cost_map
+ cand
->id
;
2716 /* n_map_members is a power of two, so this computes modulo. */
2717 s
= cand
->id
& (use
->n_map_members
- 1);
2718 for (i
= s
; i
< use
->n_map_members
; i
++)
2719 if (use
->cost_map
[i
].cand
== cand
)
2720 return use
->cost_map
+ i
;
2722 for (i
= 0; i
< s
; i
++)
2723 if (use
->cost_map
[i
].cand
== cand
)
2724 return use
->cost_map
+ i
;
2729 /* Returns estimate on cost of computing SEQ. */
2732 seq_cost (rtx seq
, bool speed
)
2737 for (; seq
; seq
= NEXT_INSN (seq
))
2739 set
= single_set (seq
);
2741 cost
+= rtx_cost (set
, SET
,speed
);
2749 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2751 produce_memory_decl_rtl (tree obj
, int *regno
)
2753 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2754 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2758 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2760 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2761 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2762 SET_SYMBOL_REF_DECL (x
, obj
);
2763 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2764 set_mem_addr_space (x
, as
);
2765 targetm
.encode_section_info (obj
, x
, true);
2769 x
= gen_raw_REG (address_mode
, (*regno
)++);
2770 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2771 set_mem_addr_space (x
, as
);
2777 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2778 walk_tree. DATA contains the actual fake register number. */
2781 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2783 tree obj
= NULL_TREE
;
2785 int *regno
= (int *) data
;
2787 switch (TREE_CODE (*expr_p
))
2790 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2791 handled_component_p (*expr_p
);
2792 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2795 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2796 x
= produce_memory_decl_rtl (obj
, regno
);
2801 obj
= SSA_NAME_VAR (*expr_p
);
2802 if (!DECL_RTL_SET_P (obj
))
2803 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2812 if (DECL_RTL_SET_P (obj
))
2815 if (DECL_MODE (obj
) == BLKmode
)
2816 x
= produce_memory_decl_rtl (obj
, regno
);
2818 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2828 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2829 SET_DECL_RTL (obj
, x
);
2835 /* Determines cost of the computation of EXPR. */
2838 computation_cost (tree expr
, bool speed
)
2841 tree type
= TREE_TYPE (expr
);
2843 /* Avoid using hard regs in ways which may be unsupported. */
2844 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2845 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
2846 enum node_frequency real_frequency
= node
->frequency
;
2848 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2849 crtl
->maybe_hot_insn_p
= speed
;
2850 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2852 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2855 default_rtl_profile ();
2856 node
->frequency
= real_frequency
;
2858 cost
= seq_cost (seq
, speed
);
2860 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2861 TYPE_ADDR_SPACE (type
), speed
);
2866 /* Returns variable containing the value of candidate CAND at statement AT. */
2869 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2871 if (stmt_after_increment (loop
, cand
, stmt
))
2872 return cand
->var_after
;
2874 return cand
->var_before
;
2877 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2878 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2881 tree_int_cst_sign_bit (const_tree t
)
2883 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2884 unsigned HOST_WIDE_INT w
;
2886 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2887 w
= TREE_INT_CST_LOW (t
);
2890 w
= TREE_INT_CST_HIGH (t
);
2891 bitno
-= HOST_BITS_PER_WIDE_INT
;
2894 return (w
>> bitno
) & 1;
2897 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2898 same precision that is at least as wide as the precision of TYPE, stores
2899 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2903 determine_common_wider_type (tree
*a
, tree
*b
)
2905 tree wider_type
= NULL
;
2907 tree atype
= TREE_TYPE (*a
);
2909 if (CONVERT_EXPR_P (*a
))
2911 suba
= TREE_OPERAND (*a
, 0);
2912 wider_type
= TREE_TYPE (suba
);
2913 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2919 if (CONVERT_EXPR_P (*b
))
2921 subb
= TREE_OPERAND (*b
, 0);
2922 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2933 /* Determines the expression by that USE is expressed from induction variable
2934 CAND at statement AT in LOOP. The expression is stored in a decomposed
2935 form into AFF. Returns false if USE cannot be expressed using CAND. */
2938 get_computation_aff (struct loop
*loop
,
2939 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2940 struct affine_tree_combination
*aff
)
2942 tree ubase
= use
->iv
->base
;
2943 tree ustep
= use
->iv
->step
;
2944 tree cbase
= cand
->iv
->base
;
2945 tree cstep
= cand
->iv
->step
, cstep_common
;
2946 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2947 tree common_type
, var
;
2949 aff_tree cbase_aff
, var_aff
;
2952 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2954 /* We do not have a precision to express the values of use. */
2958 var
= var_at_stmt (loop
, cand
, at
);
2959 uutype
= unsigned_type_for (utype
);
2961 /* If the conversion is not noop, perform it. */
2962 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2964 cstep
= fold_convert (uutype
, cstep
);
2965 cbase
= fold_convert (uutype
, cbase
);
2966 var
= fold_convert (uutype
, var
);
2969 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2972 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2973 type, we achieve better folding by computing their difference in this
2974 wider type, and cast the result to UUTYPE. We do not need to worry about
2975 overflows, as all the arithmetics will in the end be performed in UUTYPE
2977 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2979 /* use = ubase - ratio * cbase + ratio * var. */
2980 tree_to_aff_combination (ubase
, common_type
, aff
);
2981 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2982 tree_to_aff_combination (var
, uutype
, &var_aff
);
2984 /* We need to shift the value if we are after the increment. */
2985 if (stmt_after_increment (loop
, cand
, at
))
2989 if (common_type
!= uutype
)
2990 cstep_common
= fold_convert (common_type
, cstep
);
2992 cstep_common
= cstep
;
2994 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
2995 aff_combination_add (&cbase_aff
, &cstep_aff
);
2998 aff_combination_scale (&cbase_aff
, double_int_neg (rat
));
2999 aff_combination_add (aff
, &cbase_aff
);
3000 if (common_type
!= uutype
)
3001 aff_combination_convert (aff
, uutype
);
3003 aff_combination_scale (&var_aff
, rat
);
3004 aff_combination_add (aff
, &var_aff
);
3009 /* Determines the expression by that USE is expressed from induction variable
3010 CAND at statement AT in LOOP. The computation is unshared. */
3013 get_computation_at (struct loop
*loop
,
3014 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3017 tree type
= TREE_TYPE (use
->iv
->base
);
3019 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3021 unshare_aff_combination (&aff
);
3022 return fold_convert (type
, aff_combination_to_tree (&aff
));
3025 /* Determines the expression by that USE is expressed from induction variable
3026 CAND in LOOP. The computation is unshared. */
3029 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3031 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3034 /* Adjust the cost COST for being in loop setup rather than loop body.
3035 If we're optimizing for space, the loop setup overhead is constant;
3036 if we're optimizing for speed, amortize it over the per-iteration cost. */
3038 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3042 else if (optimize_loop_for_speed_p (data
->current_loop
))
3043 return cost
/ avg_loop_niter (data
->current_loop
);
3048 /* Returns cost of addition in MODE. */
3051 add_cost (enum machine_mode mode
, bool speed
)
3053 static unsigned costs
[NUM_MACHINE_MODES
];
3061 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3062 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3063 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3068 cost
= seq_cost (seq
, speed
);
3074 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3075 fprintf (dump_file
, "Addition in %s costs %d\n",
3076 GET_MODE_NAME (mode
), cost
);
3080 /* Entry in a hashtable of already known costs for multiplication. */
3083 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3084 enum machine_mode mode
; /* In mode. */
3085 unsigned cost
; /* The cost. */
3088 /* Counts hash value for the ENTRY. */
3091 mbc_entry_hash (const void *entry
)
3093 const struct mbc_entry
*e
= (const struct mbc_entry
*) entry
;
3095 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3098 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3101 mbc_entry_eq (const void *entry1
, const void *entry2
)
3103 const struct mbc_entry
*e1
= (const struct mbc_entry
*) entry1
;
3104 const struct mbc_entry
*e2
= (const struct mbc_entry
*) entry2
;
3106 return (e1
->mode
== e2
->mode
3107 && e1
->cst
== e2
->cst
);
3110 /* Returns cost of multiplication by constant CST in MODE. */
3113 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
, bool speed
)
3115 static htab_t costs
;
3116 struct mbc_entry
**cached
, act
;
3121 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3125 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3127 return (*cached
)->cost
;
3129 *cached
= XNEW (struct mbc_entry
);
3130 (*cached
)->mode
= mode
;
3131 (*cached
)->cst
= cst
;
3134 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3135 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3139 cost
= seq_cost (seq
, speed
);
3141 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3142 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3143 (int) cst
, GET_MODE_NAME (mode
), cost
);
3145 (*cached
)->cost
= cost
;
3150 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3151 validity for a memory reference accessing memory of mode MODE in
3152 address space AS. */
3154 DEF_VEC_P (sbitmap
);
3155 DEF_VEC_ALLOC_P (sbitmap
, heap
);
3158 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3161 #define MAX_RATIO 128
3162 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3163 static VEC (sbitmap
, heap
) *valid_mult_list
;
3166 if (data_index
>= VEC_length (sbitmap
, valid_mult_list
))
3167 VEC_safe_grow_cleared (sbitmap
, heap
, valid_mult_list
, data_index
+ 1);
3169 valid_mult
= VEC_index (sbitmap
, valid_mult_list
, data_index
);
3172 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3173 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3177 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3178 sbitmap_zero (valid_mult
);
3179 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3180 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3182 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3183 if (memory_address_addr_space_p (mode
, addr
, as
))
3184 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3187 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3189 fprintf (dump_file
, " allowed multipliers:");
3190 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3191 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3192 fprintf (dump_file
, " %d", (int) i
);
3193 fprintf (dump_file
, "\n");
3194 fprintf (dump_file
, "\n");
3197 VEC_replace (sbitmap
, valid_mult_list
, data_index
, valid_mult
);
3200 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3203 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3206 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3207 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3208 variable is omitted. Compute the cost for a memory reference that accesses
3209 a memory location of mode MEM_MODE in address space AS.
3211 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3212 size of MEM_MODE / RATIO) is available. To make this determination, we
3213 look at the size of the increment to be made, which is given in CSTEP.
3214 CSTEP may be zero if the step is unknown.
3215 STMT_AFTER_INC is true iff the statement we're looking at is after the
3216 increment of the original biv.
3218 TODO -- there must be some better way. This all is quite crude. */
3222 HOST_WIDE_INT min_offset
, max_offset
;
3223 unsigned costs
[2][2][2][2];
3224 } *address_cost_data
;
3226 DEF_VEC_P (address_cost_data
);
3227 DEF_VEC_ALLOC_P (address_cost_data
, heap
);
3230 get_address_cost (bool symbol_present
, bool var_present
,
3231 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3232 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3233 addr_space_t as
, bool speed
,
3234 bool stmt_after_inc
, bool *may_autoinc
)
3236 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3237 static VEC(address_cost_data
, heap
) *address_cost_data_list
;
3238 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3239 address_cost_data data
;
3240 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3241 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3242 unsigned cost
, acost
, complexity
;
3243 bool offset_p
, ratio_p
, autoinc
;
3244 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3245 unsigned HOST_WIDE_INT mask
;
3248 if (data_index
>= VEC_length (address_cost_data
, address_cost_data_list
))
3249 VEC_safe_grow_cleared (address_cost_data
, heap
, address_cost_data_list
,
3252 data
= VEC_index (address_cost_data
, address_cost_data_list
, data_index
);
3256 HOST_WIDE_INT rat
, off
= 0;
3257 int old_cse_not_expected
, width
;
3258 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3259 rtx seq
, addr
, base
;
3262 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3264 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3266 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3267 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3268 width
= HOST_BITS_PER_WIDE_INT
- 1;
3269 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3271 for (i
= width
; i
>= 0; i
--)
3273 off
= -((HOST_WIDE_INT
) 1 << i
);
3274 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3275 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3278 data
->min_offset
= (i
== -1? 0 : off
);
3280 for (i
= width
; i
>= 0; i
--)
3282 off
= ((HOST_WIDE_INT
) 1 << i
) - 1;
3283 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3284 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3289 data
->max_offset
= off
;
3291 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3293 fprintf (dump_file
, "get_address_cost:\n");
3294 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3295 GET_MODE_NAME (mem_mode
),
3297 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3298 GET_MODE_NAME (mem_mode
),
3303 for (i
= 2; i
<= MAX_RATIO
; i
++)
3304 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3310 /* Compute the cost of various addressing modes. */
3312 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3313 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3315 if (HAVE_PRE_DECREMENT
)
3317 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3318 has_predec
[mem_mode
]
3319 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3321 if (HAVE_POST_DECREMENT
)
3323 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3324 has_postdec
[mem_mode
]
3325 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3327 if (HAVE_PRE_INCREMENT
)
3329 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3330 has_preinc
[mem_mode
]
3331 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3333 if (HAVE_POST_INCREMENT
)
3335 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3336 has_postinc
[mem_mode
]
3337 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3339 for (i
= 0; i
< 16; i
++)
3342 var_p
= (i
>> 1) & 1;
3343 off_p
= (i
>> 2) & 1;
3344 rat_p
= (i
>> 3) & 1;
3348 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3349 gen_int_mode (rat
, address_mode
));
3352 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3356 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3357 /* ??? We can run into trouble with some backends by presenting
3358 it with symbols which haven't been properly passed through
3359 targetm.encode_section_info. By setting the local bit, we
3360 enhance the probability of things working. */
3361 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3364 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3366 (PLUS
, address_mode
, base
,
3367 gen_int_mode (off
, address_mode
)));
3370 base
= gen_int_mode (off
, address_mode
);
3375 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3378 /* To avoid splitting addressing modes, pretend that no cse will
3380 old_cse_not_expected
= cse_not_expected
;
3381 cse_not_expected
= true;
3382 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3383 cse_not_expected
= old_cse_not_expected
;
3387 acost
= seq_cost (seq
, speed
);
3388 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3392 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3395 /* On some targets, it is quite expensive to load symbol to a register,
3396 which makes addresses that contain symbols look much more expensive.
3397 However, the symbol will have to be loaded in any case before the
3398 loop (and quite likely we have it in register already), so it does not
3399 make much sense to penalize them too heavily. So make some final
3400 tweaks for the SYMBOL_PRESENT modes:
3402 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3403 var is cheaper, use this mode with small penalty.
3404 If VAR_PRESENT is true, try whether the mode with
3405 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3406 if this is the case, use it. */
3407 add_c
= add_cost (address_mode
, speed
);
3408 for (i
= 0; i
< 8; i
++)
3411 off_p
= (i
>> 1) & 1;
3412 rat_p
= (i
>> 2) & 1;
3414 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3418 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3419 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3422 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3424 fprintf (dump_file
, "Address costs:\n");
3426 for (i
= 0; i
< 16; i
++)
3429 var_p
= (i
>> 1) & 1;
3430 off_p
= (i
>> 2) & 1;
3431 rat_p
= (i
>> 3) & 1;
3433 fprintf (dump_file
, " ");
3435 fprintf (dump_file
, "sym + ");
3437 fprintf (dump_file
, "var + ");
3439 fprintf (dump_file
, "cst + ");
3441 fprintf (dump_file
, "rat * ");
3443 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3444 fprintf (dump_file
, "index costs %d\n", acost
);
3446 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3447 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3448 fprintf (dump_file
, " May include autoinc/dec\n");
3449 fprintf (dump_file
, "\n");
3452 VEC_replace (address_cost_data
, address_cost_data_list
,
3456 bits
= GET_MODE_BITSIZE (address_mode
);
3457 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3459 if ((offset
>> (bits
- 1) & 1))
3464 msize
= GET_MODE_SIZE (mem_mode
);
3465 autoinc_offset
= offset
;
3467 autoinc_offset
+= ratio
* cstep
;
3468 if (symbol_present
|| var_present
|| ratio
!= 1)
3470 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3472 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3474 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3476 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3477 && msize
== -cstep
))
3481 offset_p
= (s_offset
!= 0
3482 && data
->min_offset
<= s_offset
3483 && s_offset
<= data
->max_offset
);
3484 ratio_p
= (ratio
!= 1
3485 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3487 if (ratio
!= 1 && !ratio_p
)
3488 cost
+= multiply_by_cost (ratio
, address_mode
, speed
);
3490 if (s_offset
&& !offset_p
&& !symbol_present
)
3491 cost
+= add_cost (address_mode
, speed
);
3494 *may_autoinc
= autoinc
;
3495 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3496 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3497 return new_cost (cost
+ acost
, complexity
);
3500 /* Estimates cost of forcing expression EXPR into a variable. */
3503 force_expr_to_var_cost (tree expr
, bool speed
)
3505 static bool costs_initialized
= false;
3506 static unsigned integer_cost
[2];
3507 static unsigned symbol_cost
[2];
3508 static unsigned address_cost
[2];
3510 comp_cost cost0
, cost1
, cost
;
3511 enum machine_mode mode
;
3513 if (!costs_initialized
)
3515 tree type
= build_pointer_type (integer_type_node
);
3520 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3521 TREE_STATIC (var
) = 1;
3522 x
= produce_memory_decl_rtl (var
, NULL
);
3523 SET_DECL_RTL (var
, x
);
3525 addr
= build1 (ADDR_EXPR
, type
, var
);
3528 for (i
= 0; i
< 2; i
++)
3530 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3533 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3536 = computation_cost (build2 (POINTER_PLUS_EXPR
, type
,
3538 build_int_cst (sizetype
, 2000)), i
) + 1;
3539 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3541 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3542 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3543 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3544 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3545 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3546 fprintf (dump_file
, "\n");
3550 costs_initialized
= true;
3555 if (SSA_VAR_P (expr
))
3558 if (is_gimple_min_invariant (expr
))
3560 if (TREE_CODE (expr
) == INTEGER_CST
)
3561 return new_cost (integer_cost
[speed
], 0);
3563 if (TREE_CODE (expr
) == ADDR_EXPR
)
3565 tree obj
= TREE_OPERAND (expr
, 0);
3567 if (TREE_CODE (obj
) == VAR_DECL
3568 || TREE_CODE (obj
) == PARM_DECL
3569 || TREE_CODE (obj
) == RESULT_DECL
)
3570 return new_cost (symbol_cost
[speed
], 0);
3573 return new_cost (address_cost
[speed
], 0);
3576 switch (TREE_CODE (expr
))
3578 case POINTER_PLUS_EXPR
:
3582 op0
= TREE_OPERAND (expr
, 0);
3583 op1
= TREE_OPERAND (expr
, 1);
3587 if (is_gimple_val (op0
))
3590 cost0
= force_expr_to_var_cost (op0
, speed
);
3592 if (is_gimple_val (op1
))
3595 cost1
= force_expr_to_var_cost (op1
, speed
);
3600 op0
= TREE_OPERAND (expr
, 0);
3604 if (is_gimple_val (op0
))
3607 cost0
= force_expr_to_var_cost (op0
, speed
);
3613 /* Just an arbitrary value, FIXME. */
3614 return new_cost (target_spill_cost
[speed
], 0);
3617 mode
= TYPE_MODE (TREE_TYPE (expr
));
3618 switch (TREE_CODE (expr
))
3620 case POINTER_PLUS_EXPR
:
3624 cost
= new_cost (add_cost (mode
, speed
), 0);
3628 if (cst_and_fits_in_hwi (op0
))
3629 cost
= new_cost (multiply_by_cost (int_cst_value (op0
), mode
, speed
), 0);
3630 else if (cst_and_fits_in_hwi (op1
))
3631 cost
= new_cost (multiply_by_cost (int_cst_value (op1
), mode
, speed
), 0);
3633 return new_cost (target_spill_cost
[speed
], 0);
3640 cost
= add_costs (cost
, cost0
);
3641 cost
= add_costs (cost
, cost1
);
3643 /* Bound the cost by target_spill_cost. The parts of complicated
3644 computations often are either loop invariant or at least can
3645 be shared between several iv uses, so letting this grow without
3646 limits would not give reasonable results. */
3647 if (cost
.cost
> (int) target_spill_cost
[speed
])
3648 cost
.cost
= target_spill_cost
[speed
];
3653 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3654 invariants the computation depends on. */
3657 force_var_cost (struct ivopts_data
*data
,
3658 tree expr
, bitmap
*depends_on
)
3662 fd_ivopts_data
= data
;
3663 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3666 return force_expr_to_var_cost (expr
, data
->speed
);
3669 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3670 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3671 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3672 invariants the computation depends on. */
3675 split_address_cost (struct ivopts_data
*data
,
3676 tree addr
, bool *symbol_present
, bool *var_present
,
3677 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3680 HOST_WIDE_INT bitsize
;
3681 HOST_WIDE_INT bitpos
;
3683 enum machine_mode mode
;
3684 int unsignedp
, volatilep
;
3686 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3687 &unsignedp
, &volatilep
, false);
3690 || bitpos
% BITS_PER_UNIT
!= 0
3691 || TREE_CODE (core
) != VAR_DECL
)
3693 *symbol_present
= false;
3694 *var_present
= true;
3695 fd_ivopts_data
= data
;
3696 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3697 return new_cost (target_spill_cost
[data
->speed
], 0);
3700 *offset
+= bitpos
/ BITS_PER_UNIT
;
3701 if (TREE_STATIC (core
)
3702 || DECL_EXTERNAL (core
))
3704 *symbol_present
= true;
3705 *var_present
= false;
3709 *symbol_present
= false;
3710 *var_present
= true;
3714 /* Estimates cost of expressing difference of addresses E1 - E2 as
3715 var + symbol + offset. The value of offset is added to OFFSET,
3716 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3717 part is missing. DEPENDS_ON is a set of the invariants the computation
3721 ptr_difference_cost (struct ivopts_data
*data
,
3722 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3723 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3725 HOST_WIDE_INT diff
= 0;
3726 aff_tree aff_e1
, aff_e2
;
3729 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3731 if (ptr_difference_const (e1
, e2
, &diff
))
3734 *symbol_present
= false;
3735 *var_present
= false;
3739 if (integer_zerop (e2
))
3740 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3741 symbol_present
, var_present
, offset
, depends_on
);
3743 *symbol_present
= false;
3744 *var_present
= true;
3746 type
= signed_type_for (TREE_TYPE (e1
));
3747 tree_to_aff_combination (e1
, type
, &aff_e1
);
3748 tree_to_aff_combination (e2
, type
, &aff_e2
);
3749 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3750 aff_combination_add (&aff_e1
, &aff_e2
);
3752 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3755 /* Estimates cost of expressing difference E1 - E2 as
3756 var + symbol + offset. The value of offset is added to OFFSET,
3757 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3758 part is missing. DEPENDS_ON is a set of the invariants the computation
3762 difference_cost (struct ivopts_data
*data
,
3763 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3764 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3766 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3767 unsigned HOST_WIDE_INT off1
, off2
;
3768 aff_tree aff_e1
, aff_e2
;
3771 e1
= strip_offset (e1
, &off1
);
3772 e2
= strip_offset (e2
, &off2
);
3773 *offset
+= off1
- off2
;
3778 if (TREE_CODE (e1
) == ADDR_EXPR
)
3779 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3780 offset
, depends_on
);
3781 *symbol_present
= false;
3783 if (operand_equal_p (e1
, e2
, 0))
3785 *var_present
= false;
3789 *var_present
= true;
3791 if (integer_zerop (e2
))
3792 return force_var_cost (data
, e1
, depends_on
);
3794 if (integer_zerop (e1
))
3796 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3797 cost
.cost
+= multiply_by_cost (-1, mode
, data
->speed
);
3801 type
= signed_type_for (TREE_TYPE (e1
));
3802 tree_to_aff_combination (e1
, type
, &aff_e1
);
3803 tree_to_aff_combination (e2
, type
, &aff_e2
);
3804 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3805 aff_combination_add (&aff_e1
, &aff_e2
);
3807 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3810 /* Returns true if AFF1 and AFF2 are identical. */
3813 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3817 if (aff1
->n
!= aff2
->n
)
3820 for (i
= 0; i
< aff1
->n
; i
++)
3822 if (double_int_cmp (aff1
->elts
[i
].coef
, aff2
->elts
[i
].coef
, 0) != 0)
3825 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3831 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3832 requires a new compiler generated temporary. Returns -1 otherwise.
3833 ADDRESS_P is a flag indicating if the expression is for address
3837 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3838 tree cbase
, HOST_WIDE_INT ratio
,
3841 aff_tree ubase_aff
, cbase_aff
;
3843 struct iv_inv_expr_ent ent
;
3844 struct iv_inv_expr_ent
**slot
;
3851 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3852 && (TREE_CODE (cbase
) == INTEGER_CST
))
3855 /* Strips the constant part. */
3856 if (TREE_CODE (ubase
) == PLUS_EXPR
3857 || TREE_CODE (ubase
) == MINUS_EXPR
3858 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3860 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3861 ubase
= TREE_OPERAND (ubase
, 0);
3864 /* Strips the constant part. */
3865 if (TREE_CODE (cbase
) == PLUS_EXPR
3866 || TREE_CODE (cbase
) == MINUS_EXPR
3867 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3869 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3870 cbase
= TREE_OPERAND (cbase
, 0);
3875 if (((TREE_CODE (ubase
) == SSA_NAME
)
3876 || (TREE_CODE (ubase
) == ADDR_EXPR
3877 && is_gimple_min_invariant (ubase
)))
3878 && (TREE_CODE (cbase
) == INTEGER_CST
))
3881 if (((TREE_CODE (cbase
) == SSA_NAME
)
3882 || (TREE_CODE (cbase
) == ADDR_EXPR
3883 && is_gimple_min_invariant (cbase
)))
3884 && (TREE_CODE (ubase
) == INTEGER_CST
))
3890 if(operand_equal_p (ubase
, cbase
, 0))
3893 if (TREE_CODE (ubase
) == ADDR_EXPR
3894 && TREE_CODE (cbase
) == ADDR_EXPR
)
3898 usym
= TREE_OPERAND (ubase
, 0);
3899 csym
= TREE_OPERAND (cbase
, 0);
3900 if (TREE_CODE (usym
) == ARRAY_REF
)
3902 tree ind
= TREE_OPERAND (usym
, 1);
3903 if (TREE_CODE (ind
) == INTEGER_CST
3904 && host_integerp (ind
, 0)
3905 && TREE_INT_CST_LOW (ind
) == 0)
3906 usym
= TREE_OPERAND (usym
, 0);
3908 if (TREE_CODE (csym
) == ARRAY_REF
)
3910 tree ind
= TREE_OPERAND (csym
, 1);
3911 if (TREE_CODE (ind
) == INTEGER_CST
3912 && host_integerp (ind
, 0)
3913 && TREE_INT_CST_LOW (ind
) == 0)
3914 csym
= TREE_OPERAND (csym
, 0);
3916 if (operand_equal_p (usym
, csym
, 0))
3919 /* Now do more complex comparison */
3920 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3921 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3922 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3926 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3927 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3929 aff_combination_scale (&cbase_aff
, shwi_to_double_int (-1 * ratio
));
3930 aff_combination_add (&ubase_aff
, &cbase_aff
);
3931 expr
= aff_combination_to_tree (&ubase_aff
);
3933 ent
.hash
= iterative_hash_expr (expr
, 0);
3934 slot
= (struct iv_inv_expr_ent
**) htab_find_slot (data
->inv_expr_tab
,
3939 *slot
= XNEW (struct iv_inv_expr_ent
);
3940 (*slot
)->expr
= expr
;
3941 (*slot
)->hash
= ent
.hash
;
3942 (*slot
)->id
= data
->inv_expr_id
++;
3948 /* Determines the cost of the computation by that USE is expressed
3949 from induction variable CAND. If ADDRESS_P is true, we just need
3950 to create an address from it, otherwise we want to get it into
3951 register. A set of invariants we depend on is stored in
3952 DEPENDS_ON. AT is the statement at that the value is computed.
3953 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3954 addressing is likely. */
3957 get_computation_cost_at (struct ivopts_data
*data
,
3958 struct iv_use
*use
, struct iv_cand
*cand
,
3959 bool address_p
, bitmap
*depends_on
, gimple at
,
3963 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3965 tree utype
= TREE_TYPE (ubase
), ctype
;
3966 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3967 HOST_WIDE_INT ratio
, aratio
;
3968 bool var_present
, symbol_present
, stmt_is_after_inc
;
3971 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3975 /* Only consider real candidates. */
3977 return infinite_cost
;
3979 cbase
= cand
->iv
->base
;
3980 cstep
= cand
->iv
->step
;
3981 ctype
= TREE_TYPE (cbase
);
3983 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3985 /* We do not have a precision to express the values of use. */
3986 return infinite_cost
;
3991 /* Do not try to express address of an object with computation based
3992 on address of a different object. This may cause problems in rtl
3993 level alias analysis (that does not expect this to be happening,
3994 as this is illegal in C), and would be unlikely to be useful
3996 if (use
->iv
->base_object
3997 && cand
->iv
->base_object
3998 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3999 return infinite_cost
;
4002 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4004 /* TODO -- add direct handling of this case. */
4008 /* CSTEPI is removed from the offset in case statement is after the
4009 increment. If the step is not constant, we use zero instead.
4010 This is a bit imprecise (there is the extra addition), but
4011 redundancy elimination is likely to transform the code so that
4012 it uses value of the variable before increment anyway,
4013 so it is not that much unrealistic. */
4014 if (cst_and_fits_in_hwi (cstep
))
4015 cstepi
= int_cst_value (cstep
);
4019 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4020 return infinite_cost
;
4022 if (double_int_fits_in_shwi_p (rat
))
4023 ratio
= double_int_to_shwi (rat
);
4025 return infinite_cost
;
4028 ctype
= TREE_TYPE (cbase
);
4030 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4032 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4033 or ratio == 1, it is better to handle this like
4035 ubase - ratio * cbase + ratio * var
4037 (also holds in the case ratio == -1, TODO. */
4039 if (cst_and_fits_in_hwi (cbase
))
4041 offset
= - ratio
* int_cst_value (cbase
);
4042 cost
= difference_cost (data
,
4043 ubase
, build_int_cst (utype
, 0),
4044 &symbol_present
, &var_present
, &offset
,
4046 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4048 else if (ratio
== 1)
4050 tree real_cbase
= cbase
;
4052 /* Check to see if any adjustment is needed. */
4053 if (cstepi
== 0 && stmt_is_after_inc
)
4055 aff_tree real_cbase_aff
;
4058 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4060 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4062 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4063 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4066 cost
= difference_cost (data
,
4068 &symbol_present
, &var_present
, &offset
,
4070 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4073 && !POINTER_TYPE_P (ctype
)
4074 && multiplier_allowed_in_address_p
4075 (ratio
, TYPE_MODE (TREE_TYPE (utype
)),
4076 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4079 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4080 cost
= difference_cost (data
,
4082 &symbol_present
, &var_present
, &offset
,
4084 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4088 cost
= force_var_cost (data
, cbase
, depends_on
);
4089 cost
= add_costs (cost
,
4090 difference_cost (data
,
4091 ubase
, build_int_cst (utype
, 0),
4092 &symbol_present
, &var_present
,
4093 &offset
, depends_on
));
4094 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4095 cost
.cost
+= add_cost (TYPE_MODE (ctype
), data
->speed
);
4101 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4102 /* Clear depends on. */
4103 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4104 bitmap_clear (*depends_on
);
4107 /* If we are after the increment, the value of the candidate is higher by
4109 if (stmt_is_after_inc
)
4110 offset
-= ratio
* cstepi
;
4112 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4113 (symbol/var1/const parts may be omitted). If we are looking for an
4114 address, find the cost of addressing this. */
4116 return add_costs (cost
,
4117 get_address_cost (symbol_present
, var_present
,
4118 offset
, ratio
, cstepi
,
4119 TYPE_MODE (TREE_TYPE (utype
)),
4120 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4121 speed
, stmt_is_after_inc
,
4124 /* Otherwise estimate the costs for computing the expression. */
4125 if (!symbol_present
&& !var_present
&& !offset
)
4128 cost
.cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
), speed
);
4132 /* Symbol + offset should be compile-time computable so consider that they
4133 are added once to the variable, if present. */
4134 if (var_present
&& (symbol_present
|| offset
))
4135 cost
.cost
+= adjust_setup_cost (data
,
4136 add_cost (TYPE_MODE (ctype
), speed
));
4138 /* Having offset does not affect runtime cost in case it is added to
4139 symbol, but it increases complexity. */
4143 cost
.cost
+= add_cost (TYPE_MODE (ctype
), speed
);
4145 aratio
= ratio
> 0 ? ratio
: -ratio
;
4147 cost
.cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
), speed
);
4152 *can_autoinc
= false;
4155 /* Just get the expression, expand it and measure the cost. */
4156 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4159 return infinite_cost
;
4162 comp
= build_simple_mem_ref (comp
);
4164 return new_cost (computation_cost (comp
, speed
), 0);
4168 /* Determines the cost of the computation by that USE is expressed
4169 from induction variable CAND. If ADDRESS_P is true, we just need
4170 to create an address from it, otherwise we want to get it into
4171 register. A set of invariants we depend on is stored in
4172 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4173 autoinc addressing is likely. */
4176 get_computation_cost (struct ivopts_data
*data
,
4177 struct iv_use
*use
, struct iv_cand
*cand
,
4178 bool address_p
, bitmap
*depends_on
,
4179 bool *can_autoinc
, int *inv_expr_id
)
4181 return get_computation_cost_at (data
,
4182 use
, cand
, address_p
, depends_on
, use
->stmt
,
4183 can_autoinc
, inv_expr_id
);
4186 /* Determines cost of basing replacement of USE on CAND in a generic
4190 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4191 struct iv_use
*use
, struct iv_cand
*cand
)
4195 int inv_expr_id
= -1;
4197 /* The simple case first -- if we need to express value of the preserved
4198 original biv, the cost is 0. This also prevents us from counting the
4199 cost of increment twice -- once at this use and once in the cost of
4201 if (cand
->pos
== IP_ORIGINAL
4202 && cand
->incremented_at
== use
->stmt
)
4204 set_use_iv_cost (data
, use
, cand
, zero_cost
, NULL
, NULL_TREE
, -1);
4208 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4209 NULL
, &inv_expr_id
);
4211 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
,
4214 return !infinite_cost_p (cost
);
4217 /* Determines cost of basing replacement of USE on CAND in an address. */
4220 determine_use_iv_cost_address (struct ivopts_data
*data
,
4221 struct iv_use
*use
, struct iv_cand
*cand
)
4225 int inv_expr_id
= -1;
4226 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4227 &can_autoinc
, &inv_expr_id
);
4229 if (cand
->ainc_use
== use
)
4232 cost
.cost
-= cand
->cost_step
;
4233 /* If we generated the candidate solely for exploiting autoincrement
4234 opportunities, and it turns out it can't be used, set the cost to
4235 infinity to make sure we ignore it. */
4236 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4237 cost
= infinite_cost
;
4239 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
,
4242 return !infinite_cost_p (cost
);
4245 /* Computes value of candidate CAND at position AT in iteration NITER, and
4246 stores it to VAL. */
4249 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4252 aff_tree step
, delta
, nit
;
4253 struct iv
*iv
= cand
->iv
;
4254 tree type
= TREE_TYPE (iv
->base
);
4255 tree steptype
= type
;
4256 if (POINTER_TYPE_P (type
))
4257 steptype
= sizetype
;
4259 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4260 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4261 aff_combination_convert (&nit
, steptype
);
4262 aff_combination_mult (&nit
, &step
, &delta
);
4263 if (stmt_after_increment (loop
, cand
, at
))
4264 aff_combination_add (&delta
, &step
);
4266 tree_to_aff_combination (iv
->base
, type
, val
);
4267 aff_combination_add (val
, &delta
);
4270 /* Returns period of induction variable iv. */
4273 iv_period (struct iv
*iv
)
4275 tree step
= iv
->step
, period
, type
;
4278 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4280 type
= unsigned_type_for (TREE_TYPE (step
));
4281 /* Period of the iv is lcm (step, type_range)/step -1,
4282 i.e., N*type_range/step - 1. Since type range is power
4283 of two, N == (step >> num_of_ending_zeros_binary (step),
4284 so the final result is
4286 (type_range >> num_of_ending_zeros_binary (step)) - 1
4289 pow2div
= num_ending_zeros (step
);
4291 period
= build_low_bits_mask (type
,
4292 (TYPE_PRECISION (type
)
4293 - tree_low_cst (pow2div
, 1)));
4298 /* Returns the comparison operator used when eliminating the iv USE. */
4300 static enum tree_code
4301 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4303 struct loop
*loop
= data
->current_loop
;
4307 ex_bb
= gimple_bb (use
->stmt
);
4308 exit
= EDGE_SUCC (ex_bb
, 0);
4309 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4310 exit
= EDGE_SUCC (ex_bb
, 1);
4312 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4315 /* Check whether it is possible to express the condition in USE by comparison
4316 of candidate CAND. If so, store the value compared with to BOUND. */
4319 may_eliminate_iv (struct ivopts_data
*data
,
4320 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
4325 struct loop
*loop
= data
->current_loop
;
4327 struct tree_niter_desc
*desc
= NULL
;
4329 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4332 /* For now works only for exits that dominate the loop latch.
4333 TODO: extend to other conditions inside loop body. */
4334 ex_bb
= gimple_bb (use
->stmt
);
4335 if (use
->stmt
!= last_stmt (ex_bb
)
4336 || gimple_code (use
->stmt
) != GIMPLE_COND
4337 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4340 exit
= EDGE_SUCC (ex_bb
, 0);
4341 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4342 exit
= EDGE_SUCC (ex_bb
, 1);
4343 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4346 nit
= niter_for_exit (data
, exit
, &desc
);
4350 /* Determine whether we can use the variable to test the exit condition.
4351 This is the case iff the period of the induction variable is greater
4352 than the number of iterations for which the exit condition is true. */
4353 period
= iv_period (cand
->iv
);
4355 /* If the number of iterations is constant, compare against it directly. */
4356 if (TREE_CODE (nit
) == INTEGER_CST
)
4358 /* See cand_value_at. */
4359 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4361 if (!tree_int_cst_lt (nit
, period
))
4366 if (tree_int_cst_lt (period
, nit
))
4371 /* If not, and if this is the only possible exit of the loop, see whether
4372 we can get a conservative estimate on the number of iterations of the
4373 entire loop and compare against that instead. */
4376 double_int period_value
, max_niter
;
4378 max_niter
= desc
->max
;
4379 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4380 max_niter
= double_int_add (max_niter
, double_int_one
);
4381 period_value
= tree_to_double_int (period
);
4382 if (double_int_ucmp (max_niter
, period_value
) > 0)
4384 /* See if we can take advantage of infered loop bound information. */
4385 if (loop_only_exit_p (loop
, exit
))
4387 if (!estimated_loop_iterations (loop
, true, &max_niter
))
4389 /* The loop bound is already adjusted by adding 1. */
4390 if (double_int_ucmp (max_niter
, period_value
) > 0)
4398 cand_value_at (loop
, cand
, use
->stmt
, nit
, &bnd
);
4400 *bound
= aff_combination_to_tree (&bnd
);
4401 /* It is unlikely that computing the number of iterations using division
4402 would be more profitable than keeping the original induction variable. */
4403 if (expression_expensive_p (*bound
))
4409 /* Determines cost of basing replacement of USE on CAND in a condition. */
4412 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4413 struct iv_use
*use
, struct iv_cand
*cand
)
4415 tree bound
= NULL_TREE
;
4417 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4418 comp_cost elim_cost
, express_cost
, cost
;
4420 int inv_expr_id
= -1;
4421 tree
*control_var
, *bound_cst
;
4423 /* Only consider real candidates. */
4426 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
, -1);
4430 /* Try iv elimination. */
4431 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4433 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4434 /* The bound is a loop invariant, so it will be only computed
4436 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4439 elim_cost
= infinite_cost
;
4441 /* Try expressing the original giv. If it is compared with an invariant,
4442 note that we cannot get rid of it. */
4443 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4447 /* When the condition is a comparison of the candidate IV against
4448 zero, prefer this IV.
4450 TODO: The constant that we're substracting from the cost should
4451 be target-dependent. This information should be added to the
4452 target costs for each backend. */
4453 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4454 && integer_zerop (*bound_cst
)
4455 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4456 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4457 elim_cost
.cost
-= 1;
4459 express_cost
= get_computation_cost (data
, use
, cand
, false,
4460 &depends_on_express
, NULL
,
4462 fd_ivopts_data
= data
;
4463 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4465 /* Choose the better approach, preferring the eliminated IV. */
4466 if (compare_costs (elim_cost
, express_cost
) <= 0)
4469 depends_on
= depends_on_elim
;
4470 depends_on_elim
= NULL
;
4474 cost
= express_cost
;
4475 depends_on
= depends_on_express
;
4476 depends_on_express
= NULL
;
4480 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, inv_expr_id
);
4482 if (depends_on_elim
)
4483 BITMAP_FREE (depends_on_elim
);
4484 if (depends_on_express
)
4485 BITMAP_FREE (depends_on_express
);
4487 return !infinite_cost_p (cost
);
4490 /* Determines cost of basing replacement of USE on CAND. Returns false
4491 if USE cannot be based on CAND. */
4494 determine_use_iv_cost (struct ivopts_data
*data
,
4495 struct iv_use
*use
, struct iv_cand
*cand
)
4499 case USE_NONLINEAR_EXPR
:
4500 return determine_use_iv_cost_generic (data
, use
, cand
);
4503 return determine_use_iv_cost_address (data
, use
, cand
);
4506 return determine_use_iv_cost_condition (data
, use
, cand
);
4513 /* Return true if get_computation_cost indicates that autoincrement is
4514 a possibility for the pair of USE and CAND, false otherwise. */
4517 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4518 struct iv_cand
*cand
)
4524 if (use
->type
!= USE_ADDRESS
)
4527 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4528 &can_autoinc
, NULL
);
4530 BITMAP_FREE (depends_on
);
4532 return !infinite_cost_p (cost
) && can_autoinc
;
4535 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4536 use that allows autoincrement, and set their AINC_USE if possible. */
4539 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4543 for (i
= 0; i
< n_iv_cands (data
); i
++)
4545 struct iv_cand
*cand
= iv_cand (data
, i
);
4546 struct iv_use
*closest
= NULL
;
4547 if (cand
->pos
!= IP_ORIGINAL
)
4549 for (j
= 0; j
< n_iv_uses (data
); j
++)
4551 struct iv_use
*use
= iv_use (data
, j
);
4552 unsigned uid
= gimple_uid (use
->stmt
);
4553 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
4554 || uid
> gimple_uid (cand
->incremented_at
))
4556 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
4559 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
4561 cand
->ainc_use
= closest
;
4565 /* Finds the candidates for the induction variables. */
4568 find_iv_candidates (struct ivopts_data
*data
)
4570 /* Add commonly used ivs. */
4571 add_standard_iv_candidates (data
);
4573 /* Add old induction variables. */
4574 add_old_ivs_candidates (data
);
4576 /* Add induction variables derived from uses. */
4577 add_derived_ivs_candidates (data
);
4579 set_autoinc_for_original_candidates (data
);
4581 /* Record the important candidates. */
4582 record_important_candidates (data
);
4585 /* Determines costs of basing the use of the iv on an iv candidate. */
4588 determine_use_iv_costs (struct ivopts_data
*data
)
4592 struct iv_cand
*cand
;
4593 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4595 alloc_use_cost_map (data
);
4597 for (i
= 0; i
< n_iv_uses (data
); i
++)
4599 use
= iv_use (data
, i
);
4601 if (data
->consider_all_candidates
)
4603 for (j
= 0; j
< n_iv_cands (data
); j
++)
4605 cand
= iv_cand (data
, j
);
4606 determine_use_iv_cost (data
, use
, cand
);
4613 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4615 cand
= iv_cand (data
, j
);
4616 if (!determine_use_iv_cost (data
, use
, cand
))
4617 bitmap_set_bit (to_clear
, j
);
4620 /* Remove the candidates for that the cost is infinite from
4621 the list of related candidates. */
4622 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4623 bitmap_clear (to_clear
);
4627 BITMAP_FREE (to_clear
);
4629 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4631 fprintf (dump_file
, "Use-candidate costs:\n");
4633 for (i
= 0; i
< n_iv_uses (data
); i
++)
4635 use
= iv_use (data
, i
);
4637 fprintf (dump_file
, "Use %d:\n", i
);
4638 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4639 for (j
= 0; j
< use
->n_map_members
; j
++)
4641 if (!use
->cost_map
[j
].cand
4642 || infinite_cost_p (use
->cost_map
[j
].cost
))
4645 fprintf (dump_file
, " %d\t%d\t%d\t",
4646 use
->cost_map
[j
].cand
->id
,
4647 use
->cost_map
[j
].cost
.cost
,
4648 use
->cost_map
[j
].cost
.complexity
);
4649 if (use
->cost_map
[j
].depends_on
)
4650 bitmap_print (dump_file
,
4651 use
->cost_map
[j
].depends_on
, "","");
4652 if (use
->cost_map
[j
].inv_expr_id
!= -1)
4653 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
4654 fprintf (dump_file
, "\n");
4657 fprintf (dump_file
, "\n");
4659 fprintf (dump_file
, "\n");
4663 /* Determines cost of the candidate CAND. */
4666 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4668 comp_cost cost_base
;
4669 unsigned cost
, cost_step
;
4678 /* There are two costs associated with the candidate -- its increment
4679 and its initialization. The second is almost negligible for any loop
4680 that rolls enough, so we take it just very little into account. */
4682 base
= cand
->iv
->base
;
4683 cost_base
= force_var_cost (data
, base
, NULL
);
4684 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)), data
->speed
);
4686 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
4688 /* Prefer the original ivs unless we may gain something by replacing it.
4689 The reason is to make debugging simpler; so this is not relevant for
4690 artificial ivs created by other optimization passes. */
4691 if (cand
->pos
!= IP_ORIGINAL
4692 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4695 /* Prefer not to insert statements into latch unless there are some
4696 already (so that we do not create unnecessary jumps). */
4697 if (cand
->pos
== IP_END
4698 && empty_block_p (ip_end_pos (data
->current_loop
)))
4702 cand
->cost_step
= cost_step
;
4705 /* Determines costs of computation of the candidates. */
4708 determine_iv_costs (struct ivopts_data
*data
)
4712 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4714 fprintf (dump_file
, "Candidate costs:\n");
4715 fprintf (dump_file
, " cand\tcost\n");
4718 for (i
= 0; i
< n_iv_cands (data
); i
++)
4720 struct iv_cand
*cand
= iv_cand (data
, i
);
4722 determine_iv_cost (data
, cand
);
4724 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4725 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4729 fprintf (dump_file
, "\n");
4732 /* Calculates cost for having SIZE induction variables. */
4735 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4737 /* We add size to the cost, so that we prefer eliminating ivs
4739 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
4740 data
->body_includes_call
);
4743 /* For each size of the induction variable set determine the penalty. */
4746 determine_set_costs (struct ivopts_data
*data
)
4750 gimple_stmt_iterator psi
;
4752 struct loop
*loop
= data
->current_loop
;
4755 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4757 fprintf (dump_file
, "Global costs:\n");
4758 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4759 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
4760 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
4761 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
4765 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
4767 phi
= gsi_stmt (psi
);
4768 op
= PHI_RESULT (phi
);
4770 if (!is_gimple_reg (op
))
4773 if (get_iv (data
, op
))
4779 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4781 struct version_info
*info
= ver_info (data
, j
);
4783 if (info
->inv_id
&& info
->has_nonlin_use
)
4787 data
->regs_used
= n
;
4788 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4789 fprintf (dump_file
, " regs_used %d\n", n
);
4791 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4793 fprintf (dump_file
, " cost for size:\n");
4794 fprintf (dump_file
, " ivs\tcost\n");
4795 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4796 fprintf (dump_file
, " %d\t%d\n", j
,
4797 ivopts_global_cost_for_size (data
, j
));
4798 fprintf (dump_file
, "\n");
4802 /* Returns true if A is a cheaper cost pair than B. */
4805 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4815 cmp
= compare_costs (a
->cost
, b
->cost
);
4822 /* In case the costs are the same, prefer the cheaper candidate. */
4823 if (a
->cand
->cost
< b
->cand
->cost
)
4830 /* Returns candidate by that USE is expressed in IVS. */
4832 static struct cost_pair
*
4833 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4835 return ivs
->cand_for_use
[use
->id
];
4838 /* Computes the cost field of IVS structure. */
4841 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4843 comp_cost cost
= ivs
->cand_use_cost
;
4845 cost
.cost
+= ivs
->cand_cost
;
4847 cost
.cost
+= ivopts_global_cost_for_size (data
,
4848 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
4853 /* Remove invariants in set INVS to set IVS. */
4856 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4864 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4866 ivs
->n_invariant_uses
[iid
]--;
4867 if (ivs
->n_invariant_uses
[iid
] == 0)
4872 /* Set USE not to be expressed by any candidate in IVS. */
4875 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4878 unsigned uid
= use
->id
, cid
;
4879 struct cost_pair
*cp
;
4881 cp
= ivs
->cand_for_use
[uid
];
4887 ivs
->cand_for_use
[uid
] = NULL
;
4888 ivs
->n_cand_uses
[cid
]--;
4890 if (ivs
->n_cand_uses
[cid
] == 0)
4892 bitmap_clear_bit (ivs
->cands
, cid
);
4893 /* Do not count the pseudocandidates. */
4897 ivs
->cand_cost
-= cp
->cand
->cost
;
4899 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4902 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
4904 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4906 if (cp
->inv_expr_id
!= -1)
4908 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
4909 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
4910 ivs
->num_used_inv_expr
--;
4912 iv_ca_recount_cost (data
, ivs
);
4915 /* Add invariants in set INVS to set IVS. */
4918 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4926 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4928 ivs
->n_invariant_uses
[iid
]++;
4929 if (ivs
->n_invariant_uses
[iid
] == 1)
4934 /* Set cost pair for USE in set IVS to CP. */
4937 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4938 struct iv_use
*use
, struct cost_pair
*cp
)
4940 unsigned uid
= use
->id
, cid
;
4942 if (ivs
->cand_for_use
[uid
] == cp
)
4945 if (ivs
->cand_for_use
[uid
])
4946 iv_ca_set_no_cp (data
, ivs
, use
);
4953 ivs
->cand_for_use
[uid
] = cp
;
4954 ivs
->n_cand_uses
[cid
]++;
4955 if (ivs
->n_cand_uses
[cid
] == 1)
4957 bitmap_set_bit (ivs
->cands
, cid
);
4958 /* Do not count the pseudocandidates. */
4962 ivs
->cand_cost
+= cp
->cand
->cost
;
4964 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4967 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
4968 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4970 if (cp
->inv_expr_id
!= -1)
4972 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
4973 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
4974 ivs
->num_used_inv_expr
++;
4976 iv_ca_recount_cost (data
, ivs
);
4980 /* Extend set IVS by expressing USE by some of the candidates in it
4981 if possible. All important candidates will be considered
4982 if IMPORTANT_CANDIDATES is true. */
4985 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4986 struct iv_use
*use
, bool important_candidates
)
4988 struct cost_pair
*best_cp
= NULL
, *cp
;
4993 gcc_assert (ivs
->upto
>= use
->id
);
4995 if (ivs
->upto
== use
->id
)
5001 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5002 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5004 struct iv_cand
*cand
= iv_cand (data
, i
);
5006 cp
= get_use_iv_cost (data
, use
, cand
);
5008 if (cheaper_cost_pair (cp
, best_cp
))
5012 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5015 /* Get cost for assignment IVS. */
5018 iv_ca_cost (struct iv_ca
*ivs
)
5020 /* This was a conditional expression but it triggered a bug in
5023 return infinite_cost
;
5028 /* Returns true if all dependences of CP are among invariants in IVS. */
5031 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5036 if (!cp
->depends_on
)
5039 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5041 if (ivs
->n_invariant_uses
[i
] == 0)
5048 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5049 it before NEXT_CHANGE. */
5051 static struct iv_ca_delta
*
5052 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5053 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5055 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5058 change
->old_cp
= old_cp
;
5059 change
->new_cp
= new_cp
;
5060 change
->next_change
= next_change
;
5065 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5068 static struct iv_ca_delta
*
5069 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5071 struct iv_ca_delta
*last
;
5079 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5081 last
->next_change
= l2
;
5086 /* Reverse the list of changes DELTA, forming the inverse to it. */
5088 static struct iv_ca_delta
*
5089 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5091 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5092 struct cost_pair
*tmp
;
5094 for (act
= delta
; act
; act
= next
)
5096 next
= act
->next_change
;
5097 act
->next_change
= prev
;
5101 act
->old_cp
= act
->new_cp
;
5108 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5109 reverted instead. */
5112 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5113 struct iv_ca_delta
*delta
, bool forward
)
5115 struct cost_pair
*from
, *to
;
5116 struct iv_ca_delta
*act
;
5119 delta
= iv_ca_delta_reverse (delta
);
5121 for (act
= delta
; act
; act
= act
->next_change
)
5125 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5126 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5130 iv_ca_delta_reverse (delta
);
5133 /* Returns true if CAND is used in IVS. */
5136 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5138 return ivs
->n_cand_uses
[cand
->id
] > 0;
5141 /* Returns number of induction variable candidates in the set IVS. */
5144 iv_ca_n_cands (struct iv_ca
*ivs
)
5146 return ivs
->n_cands
;
5149 /* Free the list of changes DELTA. */
5152 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5154 struct iv_ca_delta
*act
, *next
;
5156 for (act
= *delta
; act
; act
= next
)
5158 next
= act
->next_change
;
5165 /* Allocates new iv candidates assignment. */
5167 static struct iv_ca
*
5168 iv_ca_new (struct ivopts_data
*data
)
5170 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5174 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5175 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5176 nw
->cands
= BITMAP_ALLOC (NULL
);
5179 nw
->cand_use_cost
= zero_cost
;
5181 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5182 nw
->cost
= zero_cost
;
5183 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5184 nw
->num_used_inv_expr
= 0;
5189 /* Free memory occupied by the set IVS. */
5192 iv_ca_free (struct iv_ca
**ivs
)
5194 free ((*ivs
)->cand_for_use
);
5195 free ((*ivs
)->n_cand_uses
);
5196 BITMAP_FREE ((*ivs
)->cands
);
5197 free ((*ivs
)->n_invariant_uses
);
5198 free ((*ivs
)->used_inv_expr
);
5203 /* Dumps IVS to FILE. */
5206 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5208 const char *pref
= " invariants ";
5210 comp_cost cost
= iv_ca_cost (ivs
);
5212 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5213 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5214 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5215 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5217 for (i
= 0; i
< ivs
->upto
; i
++)
5219 struct iv_use
*use
= iv_use (data
, i
);
5220 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5222 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5223 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5225 fprintf (file
, " use:%d --> ??\n", use
->id
);
5228 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5229 if (ivs
->n_invariant_uses
[i
])
5231 fprintf (file
, "%s%d", pref
, i
);
5234 fprintf (file
, "\n\n");
5237 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5238 new set, and store differences in DELTA. Number of induction variables
5239 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5240 the function will try to find a solution with mimimal iv candidates. */
5243 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5244 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5245 unsigned *n_ivs
, bool min_ncand
)
5250 struct cost_pair
*old_cp
, *new_cp
;
5253 for (i
= 0; i
< ivs
->upto
; i
++)
5255 use
= iv_use (data
, i
);
5256 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5259 && old_cp
->cand
== cand
)
5262 new_cp
= get_use_iv_cost (data
, use
, cand
);
5266 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5269 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5272 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5275 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5276 cost
= iv_ca_cost (ivs
);
5278 *n_ivs
= iv_ca_n_cands (ivs
);
5279 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5284 /* Try narrowing set IVS by removing CAND. Return the cost of
5285 the new set and store the differences in DELTA. */
5288 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5289 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5293 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5295 struct iv_cand
*cnd
;
5299 for (i
= 0; i
< n_iv_uses (data
); i
++)
5301 use
= iv_use (data
, i
);
5303 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5304 if (old_cp
->cand
!= cand
)
5309 if (data
->consider_all_candidates
)
5311 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5316 cnd
= iv_cand (data
, ci
);
5318 cp
= get_use_iv_cost (data
, use
, cnd
);
5322 if (!iv_ca_has_deps (ivs
, cp
))
5325 if (!cheaper_cost_pair (cp
, new_cp
))
5333 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5338 cnd
= iv_cand (data
, ci
);
5340 cp
= get_use_iv_cost (data
, use
, cnd
);
5343 if (!iv_ca_has_deps (ivs
, cp
))
5346 if (!cheaper_cost_pair (cp
, new_cp
))
5355 iv_ca_delta_free (delta
);
5356 return infinite_cost
;
5359 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5362 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5363 cost
= iv_ca_cost (ivs
);
5364 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5369 /* Try optimizing the set of candidates IVS by removing candidates different
5370 from to EXCEPT_CAND from it. Return cost of the new set, and store
5371 differences in DELTA. */
5374 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5375 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5378 struct iv_ca_delta
*act_delta
, *best_delta
;
5380 comp_cost best_cost
, acost
;
5381 struct iv_cand
*cand
;
5384 best_cost
= iv_ca_cost (ivs
);
5386 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5388 cand
= iv_cand (data
, i
);
5390 if (cand
== except_cand
)
5393 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5395 if (compare_costs (acost
, best_cost
) < 0)
5398 iv_ca_delta_free (&best_delta
);
5399 best_delta
= act_delta
;
5402 iv_ca_delta_free (&act_delta
);
5411 /* Recurse to possibly remove other unnecessary ivs. */
5412 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5413 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5414 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5415 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5419 /* Tries to extend the sets IVS in the best possible way in order
5420 to express the USE. If ORIGINALP is true, prefer candidates from
5421 the original set of IVs, otherwise favor important candidates not
5422 based on any memory object. */
5425 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5426 struct iv_use
*use
, bool originalp
)
5428 comp_cost best_cost
, act_cost
;
5431 struct iv_cand
*cand
;
5432 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5433 struct cost_pair
*cp
;
5435 iv_ca_add_use (data
, ivs
, use
, false);
5436 best_cost
= iv_ca_cost (ivs
);
5438 cp
= iv_ca_cand_for_use (ivs
, use
);
5443 iv_ca_add_use (data
, ivs
, use
, true);
5444 best_cost
= iv_ca_cost (ivs
);
5445 cp
= iv_ca_cand_for_use (ivs
, use
);
5449 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5450 iv_ca_set_no_cp (data
, ivs
, use
);
5453 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5454 first try important candidates not based on any memory object. Only if
5455 this fails, try the specific ones. Rationale -- in loops with many
5456 variables the best choice often is to use just one generic biv. If we
5457 added here many ivs specific to the uses, the optimization algorithm later
5458 would be likely to get stuck in a local minimum, thus causing us to create
5459 too many ivs. The approach from few ivs to more seems more likely to be
5460 successful -- starting from few ivs, replacing an expensive use by a
5461 specific iv should always be a win. */
5462 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5464 cand
= iv_cand (data
, i
);
5466 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5469 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5472 if (iv_ca_cand_used_p (ivs
, cand
))
5475 cp
= get_use_iv_cost (data
, use
, cand
);
5479 iv_ca_set_cp (data
, ivs
, use
, cp
);
5480 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5482 iv_ca_set_no_cp (data
, ivs
, use
);
5483 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5485 if (compare_costs (act_cost
, best_cost
) < 0)
5487 best_cost
= act_cost
;
5489 iv_ca_delta_free (&best_delta
);
5490 best_delta
= act_delta
;
5493 iv_ca_delta_free (&act_delta
);
5496 if (infinite_cost_p (best_cost
))
5498 for (i
= 0; i
< use
->n_map_members
; i
++)
5500 cp
= use
->cost_map
+ i
;
5505 /* Already tried this. */
5506 if (cand
->important
)
5508 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5510 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5514 if (iv_ca_cand_used_p (ivs
, cand
))
5518 iv_ca_set_cp (data
, ivs
, use
, cp
);
5519 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5520 iv_ca_set_no_cp (data
, ivs
, use
);
5521 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5524 if (compare_costs (act_cost
, best_cost
) < 0)
5526 best_cost
= act_cost
;
5529 iv_ca_delta_free (&best_delta
);
5530 best_delta
= act_delta
;
5533 iv_ca_delta_free (&act_delta
);
5537 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5538 iv_ca_delta_free (&best_delta
);
5540 return !infinite_cost_p (best_cost
);
5543 /* Finds an initial assignment of candidates to uses. */
5545 static struct iv_ca
*
5546 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5548 struct iv_ca
*ivs
= iv_ca_new (data
);
5551 for (i
= 0; i
< n_iv_uses (data
); i
++)
5552 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5561 /* Tries to improve set of induction variables IVS. */
5564 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5567 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5568 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5569 struct iv_cand
*cand
;
5571 /* Try extending the set of induction variables by one. */
5572 for (i
= 0; i
< n_iv_cands (data
); i
++)
5574 cand
= iv_cand (data
, i
);
5576 if (iv_ca_cand_used_p (ivs
, cand
))
5579 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5583 /* If we successfully added the candidate and the set is small enough,
5584 try optimizing it by removing other candidates. */
5585 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5587 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5588 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5589 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5590 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5593 if (compare_costs (acost
, best_cost
) < 0)
5596 iv_ca_delta_free (&best_delta
);
5597 best_delta
= act_delta
;
5600 iv_ca_delta_free (&act_delta
);
5605 /* Try removing the candidates from the set instead. */
5606 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5608 /* Nothing more we can do. */
5613 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5614 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5615 iv_ca_delta_free (&best_delta
);
5619 /* Attempts to find the optimal set of induction variables. We do simple
5620 greedy heuristic -- we try to replace at most one candidate in the selected
5621 solution and remove the unused ivs while this improves the cost. */
5623 static struct iv_ca
*
5624 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
5628 /* Get the initial solution. */
5629 set
= get_initial_solution (data
, originalp
);
5632 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5633 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5637 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5639 fprintf (dump_file
, "Initial set of candidates:\n");
5640 iv_ca_dump (data
, dump_file
, set
);
5643 while (try_improve_iv_set (data
, set
))
5645 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5647 fprintf (dump_file
, "Improved to:\n");
5648 iv_ca_dump (data
, dump_file
, set
);
5655 static struct iv_ca
*
5656 find_optimal_iv_set (struct ivopts_data
*data
)
5659 struct iv_ca
*set
, *origset
;
5661 comp_cost cost
, origcost
;
5663 /* Determine the cost based on a strategy that starts with original IVs,
5664 and try again using a strategy that prefers candidates not based
5666 origset
= find_optimal_iv_set_1 (data
, true);
5667 set
= find_optimal_iv_set_1 (data
, false);
5669 if (!origset
&& !set
)
5672 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
5673 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
5675 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5677 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
5678 origcost
.cost
, origcost
.complexity
);
5679 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
5680 cost
.cost
, cost
.complexity
);
5683 /* Choose the one with the best cost. */
5684 if (compare_costs (origcost
, cost
) <= 0)
5691 iv_ca_free (&origset
);
5693 for (i
= 0; i
< n_iv_uses (data
); i
++)
5695 use
= iv_use (data
, i
);
5696 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5702 /* Creates a new induction variable corresponding to CAND. */
5705 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5707 gimple_stmt_iterator incr_pos
;
5717 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
5721 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
5729 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
5733 /* Mark that the iv is preserved. */
5734 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5735 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5737 /* Rewrite the increment so that it uses var_before directly. */
5738 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5742 gimple_add_tmp_var (cand
->var_before
);
5743 add_referenced_var (cand
->var_before
);
5745 base
= unshare_expr (cand
->iv
->base
);
5747 create_iv (base
, unshare_expr (cand
->iv
->step
),
5748 cand
->var_before
, data
->current_loop
,
5749 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5752 /* Creates new induction variables described in SET. */
5755 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5758 struct iv_cand
*cand
;
5761 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5763 cand
= iv_cand (data
, i
);
5764 create_new_iv (data
, cand
);
5767 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5769 fprintf (dump_file
, "\nSelected IV set: \n");
5770 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5772 cand
= iv_cand (data
, i
);
5773 dump_cand (dump_file
, cand
);
5775 fprintf (dump_file
, "\n");
5779 /* Rewrites USE (definition of iv used in a nonlinear expression)
5780 using candidate CAND. */
5783 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5784 struct iv_use
*use
, struct iv_cand
*cand
)
5789 gimple_stmt_iterator bsi
;
5791 /* An important special case -- if we are asked to express value of
5792 the original iv by itself, just exit; there is no need to
5793 introduce a new computation (that might also need casting the
5794 variable to unsigned and back). */
5795 if (cand
->pos
== IP_ORIGINAL
5796 && cand
->incremented_at
== use
->stmt
)
5798 tree step
, ctype
, utype
;
5799 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
5801 gcc_assert (is_gimple_assign (use
->stmt
));
5802 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
5804 step
= cand
->iv
->step
;
5805 ctype
= TREE_TYPE (step
);
5806 utype
= TREE_TYPE (cand
->var_after
);
5807 if (TREE_CODE (step
) == NEGATE_EXPR
)
5809 incr_code
= MINUS_EXPR
;
5810 step
= TREE_OPERAND (step
, 0);
5813 /* Check whether we may leave the computation unchanged.
5814 This is the case only if it does not rely on other
5815 computations in the loop -- otherwise, the computation
5816 we rely upon may be removed in remove_unused_ivs,
5817 thus leading to ICE. */
5818 old_code
= gimple_assign_rhs_code (use
->stmt
);
5819 if (old_code
== PLUS_EXPR
5820 || old_code
== MINUS_EXPR
5821 || old_code
== POINTER_PLUS_EXPR
)
5823 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
5824 op
= gimple_assign_rhs2 (use
->stmt
);
5825 else if (old_code
!= MINUS_EXPR
5826 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
5827 op
= gimple_assign_rhs1 (use
->stmt
);
5835 && (TREE_CODE (op
) == INTEGER_CST
5836 || operand_equal_p (op
, step
, 0)))
5839 /* Otherwise, add the necessary computations to express
5841 op
= fold_convert (ctype
, cand
->var_before
);
5842 comp
= fold_convert (utype
,
5843 build2 (incr_code
, ctype
, op
,
5844 unshare_expr (step
)));
5848 comp
= get_computation (data
->current_loop
, use
, cand
);
5849 gcc_assert (comp
!= NULL_TREE
);
5852 switch (gimple_code (use
->stmt
))
5855 tgt
= PHI_RESULT (use
->stmt
);
5857 /* If we should keep the biv, do not replace it. */
5858 if (name_info (data
, tgt
)->preserve_biv
)
5861 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
5865 tgt
= gimple_assign_lhs (use
->stmt
);
5866 bsi
= gsi_for_stmt (use
->stmt
);
5873 if (!valid_gimple_rhs_p (comp
)
5874 || (gimple_code (use
->stmt
) != GIMPLE_PHI
5875 /* We can't allow re-allocating the stmt as it might be pointed
5877 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
5878 >= gimple_num_ops (gsi_stmt (bsi
)))))
5880 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
5881 true, GSI_SAME_STMT
);
5882 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
5884 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
5885 /* As this isn't a plain copy we have to reset alignment
5887 if (SSA_NAME_PTR_INFO (comp
))
5889 SSA_NAME_PTR_INFO (comp
)->align
= 1;
5890 SSA_NAME_PTR_INFO (comp
)->misalign
= 0;
5895 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
5897 ass
= gimple_build_assign (tgt
, comp
);
5898 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
5900 bsi
= gsi_for_stmt (use
->stmt
);
5901 remove_phi_node (&bsi
, false);
5905 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
5906 use
->stmt
= gsi_stmt (bsi
);
5910 /* Copies the reference information from OLD_REF to NEW_REF. */
5913 copy_ref_info (tree new_ref
, tree old_ref
)
5915 tree new_ptr_base
= NULL_TREE
;
5917 TREE_SIDE_EFFECTS (new_ref
) = TREE_SIDE_EFFECTS (old_ref
);
5918 TREE_THIS_VOLATILE (new_ref
) = TREE_THIS_VOLATILE (old_ref
);
5920 new_ptr_base
= TREE_OPERAND (new_ref
, 0);
5922 /* We can transfer points-to information from an old pointer
5923 or decl base to the new one. */
5925 && TREE_CODE (new_ptr_base
) == SSA_NAME
5926 && !SSA_NAME_PTR_INFO (new_ptr_base
))
5928 tree base
= get_base_address (old_ref
);
5931 else if ((TREE_CODE (base
) == MEM_REF
5932 || TREE_CODE (base
) == TARGET_MEM_REF
)
5933 && TREE_CODE (TREE_OPERAND (base
, 0)) == SSA_NAME
5934 && SSA_NAME_PTR_INFO (TREE_OPERAND (base
, 0)))
5936 struct ptr_info_def
*new_pi
;
5937 duplicate_ssa_name_ptr_info
5938 (new_ptr_base
, SSA_NAME_PTR_INFO (TREE_OPERAND (base
, 0)));
5939 new_pi
= SSA_NAME_PTR_INFO (new_ptr_base
);
5940 /* We have to be careful about transfering alignment information. */
5941 if (TREE_CODE (old_ref
) == MEM_REF
5942 && !(TREE_CODE (new_ref
) == TARGET_MEM_REF
5943 && (TMR_INDEX2 (new_ref
)
5944 || (TMR_STEP (new_ref
)
5945 && (TREE_INT_CST_LOW (TMR_STEP (new_ref
))
5946 < new_pi
->align
)))))
5948 new_pi
->misalign
+= double_int_sub (mem_ref_offset (old_ref
),
5949 mem_ref_offset (new_ref
)).low
;
5950 new_pi
->misalign
&= (new_pi
->align
- 1);
5955 new_pi
->misalign
= 0;
5958 else if (TREE_CODE (base
) == VAR_DECL
5959 || TREE_CODE (base
) == PARM_DECL
5960 || TREE_CODE (base
) == RESULT_DECL
)
5962 struct ptr_info_def
*pi
= get_ptr_info (new_ptr_base
);
5963 pt_solution_set_var (&pi
->pt
, base
);
5968 /* Performs a peephole optimization to reorder the iv update statement with
5969 a mem ref to enable instruction combining in later phases. The mem ref uses
5970 the iv value before the update, so the reordering transformation requires
5971 adjustment of the offset. CAND is the selected IV_CAND.
5975 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5983 directly propagating t over to (1) will introduce overlapping live range
5984 thus increase register pressure. This peephole transform it into:
5988 t = MEM_REF (base, iv2, 8, 8);
5995 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
5998 gimple iv_update
, stmt
;
6000 gimple_stmt_iterator gsi
, gsi_iv
;
6002 if (cand
->pos
!= IP_NORMAL
)
6005 var_after
= cand
->var_after
;
6006 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6008 bb
= gimple_bb (iv_update
);
6009 gsi
= gsi_last_nondebug_bb (bb
);
6010 stmt
= gsi_stmt (gsi
);
6012 /* Only handle conditional statement for now. */
6013 if (gimple_code (stmt
) != GIMPLE_COND
)
6016 gsi_prev_nondebug (&gsi
);
6017 stmt
= gsi_stmt (gsi
);
6018 if (stmt
!= iv_update
)
6021 gsi_prev_nondebug (&gsi
);
6022 if (gsi_end_p (gsi
))
6025 stmt
= gsi_stmt (gsi
);
6026 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6029 if (stmt
!= use
->stmt
)
6032 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6035 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6037 fprintf (dump_file
, "Reordering \n");
6038 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6039 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6040 fprintf (dump_file
, "\n");
6043 gsi
= gsi_for_stmt (use
->stmt
);
6044 gsi_iv
= gsi_for_stmt (iv_update
);
6045 gsi_move_before (&gsi_iv
, &gsi
);
6047 cand
->pos
= IP_BEFORE_USE
;
6048 cand
->incremented_at
= use
->stmt
;
6051 /* Rewrites USE (address that is an iv) using candidate CAND. */
6054 rewrite_use_address (struct ivopts_data
*data
,
6055 struct iv_use
*use
, struct iv_cand
*cand
)
6058 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6059 tree base_hint
= NULL_TREE
;
6063 adjust_iv_update_pos (cand
, use
);
6064 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6066 unshare_aff_combination (&aff
);
6068 /* To avoid undefined overflow problems, all IV candidates use unsigned
6069 integer types. The drawback is that this makes it impossible for
6070 create_mem_ref to distinguish an IV that is based on a memory object
6071 from one that represents simply an offset.
6073 To work around this problem, we pass a hint to create_mem_ref that
6074 indicates which variable (if any) in aff is an IV based on a memory
6075 object. Note that we only consider the candidate. If this is not
6076 based on an object, the base of the reference is in some subexpression
6077 of the use -- but these will use pointer types, so they are recognized
6078 by the create_mem_ref heuristics anyway. */
6079 if (cand
->iv
->base_object
)
6080 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6082 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6083 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6084 reference_alias_ptr_type (*use
->op_p
),
6085 iv
, base_hint
, data
->speed
);
6086 copy_ref_info (ref
, *use
->op_p
);
6090 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6094 rewrite_use_compare (struct ivopts_data
*data
,
6095 struct iv_use
*use
, struct iv_cand
*cand
)
6097 tree comp
, *var_p
, op
, bound
;
6098 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6099 enum tree_code compare
;
6100 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6106 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6107 tree var_type
= TREE_TYPE (var
);
6110 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6112 fprintf (dump_file
, "Replacing exit test: ");
6113 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6115 compare
= iv_elimination_compare (data
, use
);
6116 bound
= unshare_expr (fold_convert (var_type
, bound
));
6117 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6119 gsi_insert_seq_on_edge_immediate (
6120 loop_preheader_edge (data
->current_loop
),
6123 gimple_cond_set_lhs (use
->stmt
, var
);
6124 gimple_cond_set_code (use
->stmt
, compare
);
6125 gimple_cond_set_rhs (use
->stmt
, op
);
6129 /* The induction variable elimination failed; just express the original
6131 comp
= get_computation (data
->current_loop
, use
, cand
);
6132 gcc_assert (comp
!= NULL_TREE
);
6134 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6137 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6138 true, GSI_SAME_STMT
);
6141 /* Rewrites USE using candidate CAND. */
6144 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6148 case USE_NONLINEAR_EXPR
:
6149 rewrite_use_nonlinear_expr (data
, use
, cand
);
6153 rewrite_use_address (data
, use
, cand
);
6157 rewrite_use_compare (data
, use
, cand
);
6164 update_stmt (use
->stmt
);
6167 /* Rewrite the uses using the selected induction variables. */
6170 rewrite_uses (struct ivopts_data
*data
)
6173 struct iv_cand
*cand
;
6176 for (i
= 0; i
< n_iv_uses (data
); i
++)
6178 use
= iv_use (data
, i
);
6179 cand
= use
->selected
;
6182 rewrite_use (data
, use
, cand
);
6186 /* Removes the ivs that are not used after rewriting. */
6189 remove_unused_ivs (struct ivopts_data
*data
)
6193 bitmap toremove
= BITMAP_ALLOC (NULL
);
6195 /* Figure out an order in which to release SSA DEFs so that we don't
6196 release something that we'd have to propagate into a debug stmt
6198 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6200 struct version_info
*info
;
6202 info
= ver_info (data
, j
);
6204 && !integer_zerop (info
->iv
->step
)
6206 && !info
->iv
->have_use_for
6207 && !info
->preserve_biv
)
6208 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6211 release_defs_bitset (toremove
);
6213 BITMAP_FREE (toremove
);
6216 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6217 for pointer_map_traverse. */
6220 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6221 void *data ATTRIBUTE_UNUSED
)
6223 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6229 /* Frees data allocated by the optimization of a single loop. */
6232 free_loop_data (struct ivopts_data
*data
)
6240 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6241 pointer_map_destroy (data
->niters
);
6242 data
->niters
= NULL
;
6245 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6247 struct version_info
*info
;
6249 info
= ver_info (data
, i
);
6253 info
->has_nonlin_use
= false;
6254 info
->preserve_biv
= false;
6257 bitmap_clear (data
->relevant
);
6258 bitmap_clear (data
->important_candidates
);
6260 for (i
= 0; i
< n_iv_uses (data
); i
++)
6262 struct iv_use
*use
= iv_use (data
, i
);
6265 BITMAP_FREE (use
->related_cands
);
6266 for (j
= 0; j
< use
->n_map_members
; j
++)
6267 if (use
->cost_map
[j
].depends_on
)
6268 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6269 free (use
->cost_map
);
6272 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
6274 for (i
= 0; i
< n_iv_cands (data
); i
++)
6276 struct iv_cand
*cand
= iv_cand (data
, i
);
6280 if (cand
->depends_on
)
6281 BITMAP_FREE (cand
->depends_on
);
6284 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
6286 if (data
->version_info_size
< num_ssa_names
)
6288 data
->version_info_size
= 2 * num_ssa_names
;
6289 free (data
->version_info
);
6290 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6293 data
->max_inv_id
= 0;
6295 FOR_EACH_VEC_ELT (tree
, decl_rtl_to_reset
, i
, obj
)
6296 SET_DECL_RTL (obj
, NULL_RTX
);
6298 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
6300 htab_empty (data
->inv_expr_tab
);
6301 data
->inv_expr_id
= 0;
6304 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6308 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6310 free_loop_data (data
);
6311 free (data
->version_info
);
6312 BITMAP_FREE (data
->relevant
);
6313 BITMAP_FREE (data
->important_candidates
);
6315 VEC_free (tree
, heap
, decl_rtl_to_reset
);
6316 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
6317 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
6318 htab_delete (data
->inv_expr_tab
);
6321 /* Returns true if the loop body BODY includes any function calls. */
6324 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6326 gimple_stmt_iterator gsi
;
6329 for (i
= 0; i
< num_nodes
; i
++)
6330 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6332 gimple stmt
= gsi_stmt (gsi
);
6333 if (is_gimple_call (stmt
)
6334 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6340 /* Optimizes the LOOP. Returns true if anything changed. */
6343 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6345 bool changed
= false;
6346 struct iv_ca
*iv_ca
;
6350 gcc_assert (!data
->niters
);
6351 data
->current_loop
= loop
;
6352 data
->speed
= optimize_loop_for_speed_p (loop
);
6354 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6356 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6358 exit
= single_dom_exit (loop
);
6361 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6362 exit
->src
->index
, exit
->dest
->index
);
6363 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6364 fprintf (dump_file
, "\n");
6367 fprintf (dump_file
, "\n");
6370 body
= get_loop_body (loop
);
6371 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6372 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6375 /* For each ssa name determines whether it behaves as an induction variable
6377 if (!find_induction_variables (data
))
6380 /* Finds interesting uses (item 1). */
6381 find_interesting_uses (data
);
6382 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6385 /* Finds candidates for the induction variables (item 2). */
6386 find_iv_candidates (data
);
6388 /* Calculates the costs (item 3, part 1). */
6389 determine_iv_costs (data
);
6390 determine_use_iv_costs (data
);
6391 determine_set_costs (data
);
6393 /* Find the optimal set of induction variables (item 3, part 2). */
6394 iv_ca
= find_optimal_iv_set (data
);
6399 /* Create the new induction variables (item 4, part 1). */
6400 create_new_ivs (data
, iv_ca
);
6401 iv_ca_free (&iv_ca
);
6403 /* Rewrite the uses (item 4, part 2). */
6404 rewrite_uses (data
);
6406 /* Remove the ivs that are unused after rewriting. */
6407 remove_unused_ivs (data
);
6409 /* We have changed the structure of induction variables; it might happen
6410 that definitions in the scev database refer to some of them that were
6415 free_loop_data (data
);
6420 /* Main entry point. Optimizes induction variables in loops. */
6423 tree_ssa_iv_optimize (void)
6426 struct ivopts_data data
;
6429 tree_ssa_iv_optimize_init (&data
);
6431 /* Optimize the loops starting with the innermost ones. */
6432 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6434 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6435 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6437 tree_ssa_iv_optimize_loop (&data
, loop
);
6440 tree_ssa_iv_optimize_finalize (&data
);