1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
79 #include "tree-pass.h"
81 #include "insn-config.h"
83 #include "pointer-set.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
100 /* The infinite cost. */
101 #define INFTY 10000000
103 #define AVG_LOOP_NITER(LOOP) 5
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop
*loop
)
112 HOST_WIDE_INT niter
= estimated_loop_iterations_int (loop
, false);
114 return AVG_LOOP_NITER (loop
);
119 /* Representation of the induction variable. */
122 tree base
; /* Initial value of the iv. */
123 tree base_object
; /* A memory object to that the induction variable points. */
124 tree step
; /* Step of the iv (constant only). */
125 tree ssa_name
; /* The ssa name with the value. */
126 bool biv_p
; /* Is it a biv? */
127 bool have_use_for
; /* Do we already have a use for it? */
128 unsigned use_id
; /* The identifier in the use if it is the case. */
131 /* Per-ssa version information (induction variable descriptions, etc.). */
134 tree name
; /* The ssa name. */
135 struct iv
*iv
; /* Induction variable description. */
136 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv
; /* For the original biv, whether to preserve it. */
139 unsigned inv_id
; /* Id of an invariant. */
145 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
146 USE_ADDRESS
, /* Use in an address. */
147 USE_COMPARE
/* Use is a compare. */
150 /* Cost of a computation. */
153 int cost
; /* The runtime cost. */
154 unsigned complexity
; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
160 static const comp_cost zero_cost
= {0, 0};
161 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
163 /* The candidate - cost pair. */
166 struct iv_cand
*cand
; /* The candidate. */
167 comp_cost cost
; /* The cost. */
168 bitmap depends_on
; /* The list of invariants that have to be
170 tree value
; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id
; /* Loop invariant expression id. */
179 unsigned id
; /* The id of the use. */
180 enum use_type type
; /* Type of the use. */
181 struct iv
*iv
; /* The induction variable it is based on. */
182 gimple stmt
; /* Statement in that it occurs. */
183 tree
*op_p
; /* The place where it occurs. */
184 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
187 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
188 struct cost_pair
*cost_map
;
189 /* The costs wrto the iv candidates. */
191 struct iv_cand
*selected
;
192 /* The selected candidate. */
195 /* The position where the iv is computed. */
198 IP_NORMAL
, /* At the end, just before the exit condition. */
199 IP_END
, /* At the end of the latch block. */
200 IP_BEFORE_USE
, /* Immediately before a specific use. */
201 IP_AFTER_USE
, /* Immediately after a specific use. */
202 IP_ORIGINAL
/* The original biv. */
205 /* The induction variable candidate. */
208 unsigned id
; /* The number of the candidate. */
209 bool important
; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
212 gimple incremented_at
;/* For original biv, the statement where it is
214 tree var_before
; /* The variable used for it before increment. */
215 tree var_after
; /* The variable used for it after increment. */
216 struct iv
*iv
; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost
; /* Cost of the candidate. */
221 unsigned cost_step
; /* Cost of the candidate's increment operation. */
222 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on
; /* The list of invariants that are used in step of the
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
236 /* The data used by the induction variable optimizations. */
238 typedef struct iv_use
*iv_use_p
;
240 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
242 typedef struct iv_cand
*iv_cand_p
;
243 DEF_VEC_P(iv_cand_p
);
244 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
248 /* The currently optimized loop. */
249 struct loop
*current_loop
;
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t
*niters
;
254 /* Number of registers used in it. */
257 /* The size of version_info array allocated. */
258 unsigned version_info_size
;
260 /* The array of information for the ssa names. */
261 struct version_info
*version_info
;
263 /* The hashtable of loop invariant expressions created
267 /* Loop invariant expression id. */
270 /* The bitmap of indices in version_info whose value was changed. */
273 /* The uses of induction variables. */
274 VEC(iv_use_p
,heap
) *iv_uses
;
276 /* The candidates. */
277 VEC(iv_cand_p
,heap
) *iv_candidates
;
279 /* A bitmap of important candidates. */
280 bitmap important_candidates
;
282 /* The maximum invariant id. */
285 /* Whether to consider just related and important candidates when replacing a
287 bool consider_all_candidates
;
289 /* Are we optimizing for speed? */
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call
;
296 /* An assignment of iv candidates to uses. */
300 /* The number of uses covered by the assignment. */
303 /* Number of uses that cannot be expressed by the candidates in the set. */
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair
**cand_for_use
;
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses
;
312 /* The candidates used. */
315 /* The number of candidates in the set. */
318 /* Total number of registers needed. */
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost
;
324 /* Total cost of candidates. */
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses
;
330 /* The array holding the number of uses of each loop
331 invariant expressions created by ivopt. */
332 unsigned *used_inv_expr
;
334 /* The number of created loop invariants. */
335 unsigned num_used_inv_expr
;
337 /* Total cost of the assignment. */
341 /* Difference of two iv candidate assignments. */
348 /* An old assignment (for rollback purposes). */
349 struct cost_pair
*old_cp
;
351 /* A new assignment. */
352 struct cost_pair
*new_cp
;
354 /* Next change in the list. */
355 struct iv_ca_delta
*next_change
;
358 /* Bound on number of candidates below that all candidates are considered. */
360 #define CONSIDER_ALL_CANDIDATES_BOUND \
361 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
364 optimizing such a loop would help, and it would take ages). */
366 #define MAX_CONSIDERED_USES \
367 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
369 /* If there are at most this number of ivs in the set, try removing unnecessary
370 ivs from the set always. */
372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
373 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
375 /* The list of trees for that the decl_rtl field must be reset is stored
378 static VEC(tree
,heap
) *decl_rtl_to_reset
;
380 /* Number of uses recorded in DATA. */
382 static inline unsigned
383 n_iv_uses (struct ivopts_data
*data
)
385 return VEC_length (iv_use_p
, data
->iv_uses
);
388 /* Ith use recorded in DATA. */
390 static inline struct iv_use
*
391 iv_use (struct ivopts_data
*data
, unsigned i
)
393 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
396 /* Number of candidates recorded in DATA. */
398 static inline unsigned
399 n_iv_cands (struct ivopts_data
*data
)
401 return VEC_length (iv_cand_p
, data
->iv_candidates
);
404 /* Ith candidate recorded in DATA. */
406 static inline struct iv_cand
*
407 iv_cand (struct ivopts_data
*data
, unsigned i
)
409 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
412 /* The single loop exit if it dominates the latch, NULL otherwise. */
415 single_dom_exit (struct loop
*loop
)
417 edge exit
= single_exit (loop
);
422 if (!just_once_each_iteration_p (loop
, exit
->src
))
428 /* Dumps information about the induction variable IV to FILE. */
430 extern void dump_iv (FILE *, struct iv
*);
432 dump_iv (FILE *file
, struct iv
*iv
)
436 fprintf (file
, "ssa name ");
437 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
438 fprintf (file
, "\n");
441 fprintf (file
, " type ");
442 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
443 fprintf (file
, "\n");
447 fprintf (file
, " base ");
448 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
449 fprintf (file
, "\n");
451 fprintf (file
, " step ");
452 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
453 fprintf (file
, "\n");
457 fprintf (file
, " invariant ");
458 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
459 fprintf (file
, "\n");
464 fprintf (file
, " base object ");
465 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
466 fprintf (file
, "\n");
470 fprintf (file
, " is a biv\n");
473 /* Dumps information about the USE to FILE. */
475 extern void dump_use (FILE *, struct iv_use
*);
477 dump_use (FILE *file
, struct iv_use
*use
)
479 fprintf (file
, "use %d\n", use
->id
);
483 case USE_NONLINEAR_EXPR
:
484 fprintf (file
, " generic\n");
488 fprintf (file
, " address\n");
492 fprintf (file
, " compare\n");
499 fprintf (file
, " in statement ");
500 print_gimple_stmt (file
, use
->stmt
, 0, 0);
501 fprintf (file
, "\n");
503 fprintf (file
, " at position ");
505 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
506 fprintf (file
, "\n");
508 dump_iv (file
, use
->iv
);
510 if (use
->related_cands
)
512 fprintf (file
, " related candidates ");
513 dump_bitmap (file
, use
->related_cands
);
517 /* Dumps information about the uses to FILE. */
519 extern void dump_uses (FILE *, struct ivopts_data
*);
521 dump_uses (FILE *file
, struct ivopts_data
*data
)
526 for (i
= 0; i
< n_iv_uses (data
); i
++)
528 use
= iv_use (data
, i
);
530 dump_use (file
, use
);
531 fprintf (file
, "\n");
535 /* Dumps information about induction variable candidate CAND to FILE. */
537 extern void dump_cand (FILE *, struct iv_cand
*);
539 dump_cand (FILE *file
, struct iv_cand
*cand
)
541 struct iv
*iv
= cand
->iv
;
543 fprintf (file
, "candidate %d%s\n",
544 cand
->id
, cand
->important
? " (important)" : "");
546 if (cand
->depends_on
)
548 fprintf (file
, " depends on ");
549 dump_bitmap (file
, cand
->depends_on
);
554 fprintf (file
, " final value replacement\n");
558 if (cand
->var_before
)
560 fprintf (file
, " var_before ");
561 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
562 fprintf (file
, "\n");
566 fprintf (file
, " var_after ");
567 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
568 fprintf (file
, "\n");
574 fprintf (file
, " incremented before exit test\n");
578 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
582 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
586 fprintf (file
, " incremented at end\n");
590 fprintf (file
, " original biv\n");
597 /* Returns the info for ssa version VER. */
599 static inline struct version_info
*
600 ver_info (struct ivopts_data
*data
, unsigned ver
)
602 return data
->version_info
+ ver
;
605 /* Returns the info for ssa name NAME. */
607 static inline struct version_info
*
608 name_info (struct ivopts_data
*data
, tree name
)
610 return ver_info (data
, SSA_NAME_VERSION (name
));
613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
617 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
619 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
623 if (sbb
== loop
->latch
)
629 return stmt
== last_stmt (bb
);
632 /* Returns true if STMT if after the place where the original induction
633 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
634 if the positions are identical. */
637 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
639 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
640 basic_block stmt_bb
= gimple_bb (stmt
);
642 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
645 if (stmt_bb
!= cand_bb
)
649 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
651 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
654 /* Returns true if STMT if after the place where the induction variable
655 CAND is incremented in LOOP. */
658 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
666 return stmt_after_ip_normal_pos (loop
, stmt
);
670 return stmt_after_inc_pos (cand
, stmt
, false);
673 return stmt_after_inc_pos (cand
, stmt
, true);
680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
683 abnormal_ssa_name_p (tree exp
)
688 if (TREE_CODE (exp
) != SSA_NAME
)
691 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
695 abnormal phi node. Callback for for_each_index. */
698 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
699 void *data ATTRIBUTE_UNUSED
)
701 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
703 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
705 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
709 return !abnormal_ssa_name_p (*index
);
712 /* Returns true if EXPR contains a ssa name that occurs in an
713 abnormal phi node. */
716 contains_abnormal_ssa_name_p (tree expr
)
719 enum tree_code_class codeclass
;
724 code
= TREE_CODE (expr
);
725 codeclass
= TREE_CODE_CLASS (code
);
727 if (code
== SSA_NAME
)
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
730 if (code
== INTEGER_CST
731 || is_gimple_min_invariant (expr
))
734 if (code
== ADDR_EXPR
)
735 return !for_each_index (&TREE_OPERAND (expr
, 0),
736 idx_contains_abnormal_ssa_name_p
,
739 if (code
== COND_EXPR
)
740 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
742 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
748 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
753 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
765 /* Returns tree describing number of iterations determined from
766 EXIT of DATA->current_loop, or NULL if something goes wrong. */
769 niter_for_exit (struct ivopts_data
*data
, edge exit
,
770 struct tree_niter_desc
**desc_p
)
772 struct tree_niter_desc
* desc
= NULL
;
778 data
->niters
= pointer_map_create ();
782 slot
= pointer_map_contains (data
->niters
, exit
);
786 /* Try to determine number of iterations. We must know it
787 unconditionally (i.e., without possibility of # of iterations
788 being zero). Also, we cannot safely work with ssa names that
789 appear in phi nodes on abnormal edges, so that we do not create
790 overlapping life ranges for them (PR 27283). */
791 desc
= XNEW (struct tree_niter_desc
);
792 if (number_of_iterations_exit (data
->current_loop
,
794 && integer_zerop (desc
->may_be_zero
)
795 && !contains_abnormal_ssa_name_p (desc
->niter
))
801 slot
= pointer_map_insert (data
->niters
, exit
);
805 niter
= ((struct tree_niter_desc
*) *slot
)->niter
;
808 *desc_p
= (struct tree_niter_desc
*) *slot
;
812 /* Returns tree describing number of iterations determined from
813 single dominating exit of DATA->current_loop, or NULL if something
817 niter_for_single_dom_exit (struct ivopts_data
*data
)
819 edge exit
= single_dom_exit (data
->current_loop
);
824 return niter_for_exit (data
, exit
, NULL
);
827 /* Hash table equality function for expressions. */
830 htab_inv_expr_eq (const void *ent1
, const void *ent2
)
832 const struct iv_inv_expr_ent
*expr1
=
833 (const struct iv_inv_expr_ent
*)ent1
;
834 const struct iv_inv_expr_ent
*expr2
=
835 (const struct iv_inv_expr_ent
*)ent2
;
837 return expr1
->hash
== expr2
->hash
838 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
841 /* Hash function for loop invariant expressions. */
844 htab_inv_expr_hash (const void *ent
)
846 const struct iv_inv_expr_ent
*expr
=
847 (const struct iv_inv_expr_ent
*)ent
;
851 /* Initializes data structures used by the iv optimization pass, stored
855 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
857 data
->version_info_size
= 2 * num_ssa_names
;
858 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
859 data
->relevant
= BITMAP_ALLOC (NULL
);
860 data
->important_candidates
= BITMAP_ALLOC (NULL
);
861 data
->max_inv_id
= 0;
863 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
864 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
865 data
->inv_expr_tab
= htab_create (10, htab_inv_expr_hash
,
866 htab_inv_expr_eq
, free
);
867 data
->inv_expr_id
= 0;
868 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
871 /* Returns a memory object to that EXPR points. In case we are able to
872 determine that it does not point to any such object, NULL is returned. */
875 determine_base_object (tree expr
)
877 enum tree_code code
= TREE_CODE (expr
);
880 /* If this is a pointer casted to any type, we need to determine
881 the base object for the pointer; so handle conversions before
882 throwing away non-pointer expressions. */
883 if (CONVERT_EXPR_P (expr
))
884 return determine_base_object (TREE_OPERAND (expr
, 0));
886 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
895 obj
= TREE_OPERAND (expr
, 0);
896 base
= get_base_address (obj
);
901 if (TREE_CODE (base
) == MEM_REF
)
902 return determine_base_object (TREE_OPERAND (base
, 0));
904 return fold_convert (ptr_type_node
,
905 build_fold_addr_expr (base
));
907 case POINTER_PLUS_EXPR
:
908 return determine_base_object (TREE_OPERAND (expr
, 0));
912 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
916 return fold_convert (ptr_type_node
, expr
);
920 /* Allocates an induction variable with given initial value BASE and step STEP
924 alloc_iv (tree base
, tree step
)
926 struct iv
*iv
= XCNEW (struct iv
);
927 gcc_assert (step
!= NULL_TREE
);
930 iv
->base_object
= determine_base_object (base
);
933 iv
->have_use_for
= false;
935 iv
->ssa_name
= NULL_TREE
;
940 /* Sets STEP and BASE for induction variable IV. */
943 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
945 struct version_info
*info
= name_info (data
, iv
);
947 gcc_assert (!info
->iv
);
949 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
950 info
->iv
= alloc_iv (base
, step
);
951 info
->iv
->ssa_name
= iv
;
954 /* Finds induction variable declaration for VAR. */
957 get_iv (struct ivopts_data
*data
, tree var
)
960 tree type
= TREE_TYPE (var
);
962 if (!POINTER_TYPE_P (type
)
963 && !INTEGRAL_TYPE_P (type
))
966 if (!name_info (data
, var
)->iv
)
968 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
971 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
972 set_iv (data
, var
, var
, build_int_cst (type
, 0));
975 return name_info (data
, var
)->iv
;
978 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
979 not define a simple affine biv with nonzero step. */
982 determine_biv_step (gimple phi
)
984 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
985 tree name
= PHI_RESULT (phi
);
988 if (!is_gimple_reg (name
))
991 if (!simple_iv (loop
, loop
, name
, &iv
, true))
994 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
997 /* Finds basic ivs. */
1000 find_bivs (struct ivopts_data
*data
)
1003 tree step
, type
, base
;
1005 struct loop
*loop
= data
->current_loop
;
1006 gimple_stmt_iterator psi
;
1008 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1010 phi
= gsi_stmt (psi
);
1012 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1015 step
= determine_biv_step (phi
);
1019 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1020 base
= expand_simple_operations (base
);
1021 if (contains_abnormal_ssa_name_p (base
)
1022 || contains_abnormal_ssa_name_p (step
))
1025 type
= TREE_TYPE (PHI_RESULT (phi
));
1026 base
= fold_convert (type
, base
);
1029 if (POINTER_TYPE_P (type
))
1030 step
= fold_convert (sizetype
, step
);
1032 step
= fold_convert (type
, step
);
1035 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1042 /* Marks basic ivs. */
1045 mark_bivs (struct ivopts_data
*data
)
1049 struct iv
*iv
, *incr_iv
;
1050 struct loop
*loop
= data
->current_loop
;
1051 basic_block incr_bb
;
1052 gimple_stmt_iterator psi
;
1054 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1056 phi
= gsi_stmt (psi
);
1058 iv
= get_iv (data
, PHI_RESULT (phi
));
1062 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1063 incr_iv
= get_iv (data
, var
);
1067 /* If the increment is in the subloop, ignore it. */
1068 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1069 if (incr_bb
->loop_father
!= data
->current_loop
1070 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1074 incr_iv
->biv_p
= true;
1078 /* Checks whether STMT defines a linear induction variable and stores its
1079 parameters to IV. */
1082 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1085 struct loop
*loop
= data
->current_loop
;
1087 iv
->base
= NULL_TREE
;
1088 iv
->step
= NULL_TREE
;
1090 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1093 lhs
= gimple_assign_lhs (stmt
);
1094 if (TREE_CODE (lhs
) != SSA_NAME
)
1097 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1099 iv
->base
= expand_simple_operations (iv
->base
);
1101 if (contains_abnormal_ssa_name_p (iv
->base
)
1102 || contains_abnormal_ssa_name_p (iv
->step
))
1105 /* If STMT could throw, then do not consider STMT as defining a GIV.
1106 While this will suppress optimizations, we can not safely delete this
1107 GIV and associated statements, even if it appears it is not used. */
1108 if (stmt_could_throw_p (stmt
))
1114 /* Finds general ivs in statement STMT. */
1117 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1121 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1124 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1127 /* Finds general ivs in basic block BB. */
1130 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1132 gimple_stmt_iterator bsi
;
1134 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1135 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1138 /* Finds general ivs. */
1141 find_givs (struct ivopts_data
*data
)
1143 struct loop
*loop
= data
->current_loop
;
1144 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1147 for (i
= 0; i
< loop
->num_nodes
; i
++)
1148 find_givs_in_bb (data
, body
[i
]);
1152 /* For each ssa name defined in LOOP determines whether it is an induction
1153 variable and if so, its initial value and step. */
1156 find_induction_variables (struct ivopts_data
*data
)
1161 if (!find_bivs (data
))
1167 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1169 tree niter
= niter_for_single_dom_exit (data
);
1173 fprintf (dump_file
, " number of iterations ");
1174 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1175 fprintf (dump_file
, "\n\n");
1178 fprintf (dump_file
, "Induction variables:\n\n");
1180 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1182 if (ver_info (data
, i
)->iv
)
1183 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1190 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1192 static struct iv_use
*
1193 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1194 gimple stmt
, enum use_type use_type
)
1196 struct iv_use
*use
= XCNEW (struct iv_use
);
1198 use
->id
= n_iv_uses (data
);
1199 use
->type
= use_type
;
1203 use
->related_cands
= BITMAP_ALLOC (NULL
);
1205 /* To avoid showing ssa name in the dumps, if it was not reset by the
1207 iv
->ssa_name
= NULL_TREE
;
1209 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1210 dump_use (dump_file
, use
);
1212 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1217 /* Checks whether OP is a loop-level invariant and if so, records it.
1218 NONLINEAR_USE is true if the invariant is used in a way we do not
1219 handle specially. */
1222 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1225 struct version_info
*info
;
1227 if (TREE_CODE (op
) != SSA_NAME
1228 || !is_gimple_reg (op
))
1231 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1233 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1236 info
= name_info (data
, op
);
1238 info
->has_nonlin_use
|= nonlinear_use
;
1240 info
->inv_id
= ++data
->max_inv_id
;
1241 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1244 /* Checks whether the use OP is interesting and if so, records it. */
1246 static struct iv_use
*
1247 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1254 if (TREE_CODE (op
) != SSA_NAME
)
1257 iv
= get_iv (data
, op
);
1261 if (iv
->have_use_for
)
1263 use
= iv_use (data
, iv
->use_id
);
1265 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1269 if (integer_zerop (iv
->step
))
1271 record_invariant (data
, op
, true);
1274 iv
->have_use_for
= true;
1276 civ
= XNEW (struct iv
);
1279 stmt
= SSA_NAME_DEF_STMT (op
);
1280 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1281 || is_gimple_assign (stmt
));
1283 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1284 iv
->use_id
= use
->id
;
1289 /* Given a condition in statement STMT, checks whether it is a compare
1290 of an induction variable and an invariant. If this is the case,
1291 CONTROL_VAR is set to location of the iv, BOUND to the location of
1292 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1293 induction variable descriptions, and true is returned. If this is not
1294 the case, CONTROL_VAR and BOUND are set to the arguments of the
1295 condition and false is returned. */
1298 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1299 tree
**control_var
, tree
**bound
,
1300 struct iv
**iv_var
, struct iv
**iv_bound
)
1302 /* The objects returned when COND has constant operands. */
1303 static struct iv const_iv
;
1305 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1306 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1309 if (gimple_code (stmt
) == GIMPLE_COND
)
1311 op0
= gimple_cond_lhs_ptr (stmt
);
1312 op1
= gimple_cond_rhs_ptr (stmt
);
1316 op0
= gimple_assign_rhs1_ptr (stmt
);
1317 op1
= gimple_assign_rhs2_ptr (stmt
);
1320 zero
= integer_zero_node
;
1321 const_iv
.step
= integer_zero_node
;
1323 if (TREE_CODE (*op0
) == SSA_NAME
)
1324 iv0
= get_iv (data
, *op0
);
1325 if (TREE_CODE (*op1
) == SSA_NAME
)
1326 iv1
= get_iv (data
, *op1
);
1328 /* Exactly one of the compared values must be an iv, and the other one must
1333 if (integer_zerop (iv0
->step
))
1335 /* Control variable may be on the other side. */
1336 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1337 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1339 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1343 *control_var
= op0
;;
1354 /* Checks whether the condition in STMT is interesting and if so,
1358 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1360 tree
*var_p
, *bound_p
;
1361 struct iv
*var_iv
, *civ
;
1363 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1365 find_interesting_uses_op (data
, *var_p
);
1366 find_interesting_uses_op (data
, *bound_p
);
1370 civ
= XNEW (struct iv
);
1372 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1375 /* Returns true if expression EXPR is obviously invariant in LOOP,
1376 i.e. if all its operands are defined outside of the LOOP. LOOP
1377 should not be the function body. */
1380 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1385 gcc_assert (loop_depth (loop
) > 0);
1387 if (is_gimple_min_invariant (expr
))
1390 if (TREE_CODE (expr
) == SSA_NAME
)
1392 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1394 && flow_bb_inside_loop_p (loop
, def_bb
))
1403 len
= TREE_OPERAND_LENGTH (expr
);
1404 for (i
= 0; i
< len
; i
++)
1405 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1411 /* Returns true if statement STMT is obviously invariant in LOOP,
1412 i.e. if all its operands on the RHS are defined outside of the LOOP.
1413 LOOP should not be the function body. */
1416 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1421 gcc_assert (loop_depth (loop
) > 0);
1423 lhs
= gimple_get_lhs (stmt
);
1424 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1426 tree op
= gimple_op (stmt
, i
);
1427 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1434 /* Cumulates the steps of indices into DATA and replaces their values with the
1435 initial ones. Returns false when the value of the index cannot be determined.
1436 Callback for for_each_index. */
1438 struct ifs_ivopts_data
1440 struct ivopts_data
*ivopts_data
;
1446 idx_find_step (tree base
, tree
*idx
, void *data
)
1448 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1450 tree step
, iv_base
, iv_step
, lbound
, off
;
1451 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1453 /* If base is a component ref, require that the offset of the reference
1455 if (TREE_CODE (base
) == COMPONENT_REF
)
1457 off
= component_ref_field_offset (base
);
1458 return expr_invariant_in_loop_p (loop
, off
);
1461 /* If base is array, first check whether we will be able to move the
1462 reference out of the loop (in order to take its address in strength
1463 reduction). In order for this to work we need both lower bound
1464 and step to be loop invariants. */
1465 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1467 /* Moreover, for a range, the size needs to be invariant as well. */
1468 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1469 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1472 step
= array_ref_element_size (base
);
1473 lbound
= array_ref_low_bound (base
);
1475 if (!expr_invariant_in_loop_p (loop
, step
)
1476 || !expr_invariant_in_loop_p (loop
, lbound
))
1480 if (TREE_CODE (*idx
) != SSA_NAME
)
1483 iv
= get_iv (dta
->ivopts_data
, *idx
);
1487 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1488 *&x[0], which is not folded and does not trigger the
1489 ARRAY_REF path below. */
1492 if (integer_zerop (iv
->step
))
1495 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1497 step
= array_ref_element_size (base
);
1499 /* We only handle addresses whose step is an integer constant. */
1500 if (TREE_CODE (step
) != INTEGER_CST
)
1504 /* The step for pointer arithmetics already is 1 byte. */
1505 step
= size_one_node
;
1509 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1510 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1513 /* The index might wrap. */
1517 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1518 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1523 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1524 object is passed to it in DATA. */
1527 idx_record_use (tree base
, tree
*idx
,
1530 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1531 find_interesting_uses_op (data
, *idx
);
1532 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1534 find_interesting_uses_op (data
, array_ref_element_size (base
));
1535 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1540 /* If we can prove that TOP = cst * BOT for some constant cst,
1541 store cst to MUL and return true. Otherwise return false.
1542 The returned value is always sign-extended, regardless of the
1543 signedness of TOP and BOT. */
1546 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1549 enum tree_code code
;
1550 double_int res
, p0
, p1
;
1551 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1556 if (operand_equal_p (top
, bot
, 0))
1558 *mul
= double_int_one
;
1562 code
= TREE_CODE (top
);
1566 mby
= TREE_OPERAND (top
, 1);
1567 if (TREE_CODE (mby
) != INTEGER_CST
)
1570 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1573 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
1579 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1580 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1583 if (code
== MINUS_EXPR
)
1584 p1
= double_int_neg (p1
);
1585 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
1589 if (TREE_CODE (bot
) != INTEGER_CST
)
1592 p0
= double_int_sext (tree_to_double_int (top
), precision
);
1593 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
1594 if (double_int_zero_p (p1
))
1596 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
1598 return double_int_zero_p (res
);
1605 /* Returns true if memory reference REF with step STEP may be unaligned. */
1608 may_be_unaligned_p (tree ref
, tree step
)
1612 HOST_WIDE_INT bitsize
;
1613 HOST_WIDE_INT bitpos
;
1615 enum machine_mode mode
;
1616 int unsignedp
, volatilep
;
1617 unsigned base_align
;
1619 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1620 thus they are not misaligned. */
1621 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1624 /* The test below is basically copy of what expr.c:normal_inner_ref
1625 does to check whether the object must be loaded by parts when
1626 STRICT_ALIGNMENT is true. */
1627 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1628 &unsignedp
, &volatilep
, true);
1629 base_type
= TREE_TYPE (base
);
1630 base_align
= TYPE_ALIGN (base_type
);
1632 if (mode
!= BLKmode
)
1634 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1636 if (base_align
< mode_align
1637 || (bitpos
% mode_align
) != 0
1638 || (bitpos
% BITS_PER_UNIT
) != 0)
1642 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1645 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1652 /* Return true if EXPR may be non-addressable. */
1655 may_be_nonaddressable_p (tree expr
)
1657 switch (TREE_CODE (expr
))
1659 case TARGET_MEM_REF
:
1660 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1661 target, thus they are always addressable. */
1665 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1666 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1668 case VIEW_CONVERT_EXPR
:
1669 /* This kind of view-conversions may wrap non-addressable objects
1670 and make them look addressable. After some processing the
1671 non-addressability may be uncovered again, causing ADDR_EXPRs
1672 of inappropriate objects to be built. */
1673 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1674 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1677 /* ... fall through ... */
1680 case ARRAY_RANGE_REF
:
1681 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1693 /* Finds addresses in *OP_P inside STMT. */
1696 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1698 tree base
= *op_p
, step
= size_zero_node
;
1700 struct ifs_ivopts_data ifs_ivopts_data
;
1702 /* Do not play with volatile memory references. A bit too conservative,
1703 perhaps, but safe. */
1704 if (gimple_has_volatile_ops (stmt
))
1707 /* Ignore bitfields for now. Not really something terribly complicated
1709 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1712 base
= unshare_expr (base
);
1714 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1716 tree type
= build_pointer_type (TREE_TYPE (base
));
1720 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1722 civ
= get_iv (data
, TMR_BASE (base
));
1726 TMR_BASE (base
) = civ
->base
;
1729 if (TMR_INDEX2 (base
)
1730 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1732 civ
= get_iv (data
, TMR_INDEX2 (base
));
1736 TMR_INDEX2 (base
) = civ
->base
;
1739 if (TMR_INDEX (base
)
1740 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1742 civ
= get_iv (data
, TMR_INDEX (base
));
1746 TMR_INDEX (base
) = civ
->base
;
1751 if (TMR_STEP (base
))
1752 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1754 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1758 if (integer_zerop (step
))
1760 base
= tree_mem_ref_addr (type
, base
);
1764 ifs_ivopts_data
.ivopts_data
= data
;
1765 ifs_ivopts_data
.stmt
= stmt
;
1766 ifs_ivopts_data
.step
= size_zero_node
;
1767 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1768 || integer_zerop (ifs_ivopts_data
.step
))
1770 step
= ifs_ivopts_data
.step
;
1772 /* Check that the base expression is addressable. This needs
1773 to be done after substituting bases of IVs into it. */
1774 if (may_be_nonaddressable_p (base
))
1777 /* Moreover, on strict alignment platforms, check that it is
1778 sufficiently aligned. */
1779 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1782 base
= build_fold_addr_expr (base
);
1784 /* Substituting bases of IVs into the base expression might
1785 have caused folding opportunities. */
1786 if (TREE_CODE (base
) == ADDR_EXPR
)
1788 tree
*ref
= &TREE_OPERAND (base
, 0);
1789 while (handled_component_p (*ref
))
1790 ref
= &TREE_OPERAND (*ref
, 0);
1791 if (TREE_CODE (*ref
) == MEM_REF
)
1793 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1794 TREE_OPERAND (*ref
, 0),
1795 TREE_OPERAND (*ref
, 1));
1802 civ
= alloc_iv (base
, step
);
1803 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1807 for_each_index (op_p
, idx_record_use
, data
);
1810 /* Finds and records invariants used in STMT. */
1813 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1816 use_operand_p use_p
;
1819 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1821 op
= USE_FROM_PTR (use_p
);
1822 record_invariant (data
, op
, false);
1826 /* Finds interesting uses of induction variables in the statement STMT. */
1829 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1832 tree op
, *lhs
, *rhs
;
1834 use_operand_p use_p
;
1835 enum tree_code code
;
1837 find_invariants_stmt (data
, stmt
);
1839 if (gimple_code (stmt
) == GIMPLE_COND
)
1841 find_interesting_uses_cond (data
, stmt
);
1845 if (is_gimple_assign (stmt
))
1847 lhs
= gimple_assign_lhs_ptr (stmt
);
1848 rhs
= gimple_assign_rhs1_ptr (stmt
);
1850 if (TREE_CODE (*lhs
) == SSA_NAME
)
1852 /* If the statement defines an induction variable, the uses are not
1853 interesting by themselves. */
1855 iv
= get_iv (data
, *lhs
);
1857 if (iv
&& !integer_zerop (iv
->step
))
1861 code
= gimple_assign_rhs_code (stmt
);
1862 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1863 && (REFERENCE_CLASS_P (*rhs
)
1864 || is_gimple_val (*rhs
)))
1866 if (REFERENCE_CLASS_P (*rhs
))
1867 find_interesting_uses_address (data
, stmt
, rhs
);
1869 find_interesting_uses_op (data
, *rhs
);
1871 if (REFERENCE_CLASS_P (*lhs
))
1872 find_interesting_uses_address (data
, stmt
, lhs
);
1875 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1877 find_interesting_uses_cond (data
, stmt
);
1881 /* TODO -- we should also handle address uses of type
1883 memory = call (whatever);
1890 if (gimple_code (stmt
) == GIMPLE_PHI
1891 && gimple_bb (stmt
) == data
->current_loop
->header
)
1893 iv
= get_iv (data
, PHI_RESULT (stmt
));
1895 if (iv
&& !integer_zerop (iv
->step
))
1899 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1901 op
= USE_FROM_PTR (use_p
);
1903 if (TREE_CODE (op
) != SSA_NAME
)
1906 iv
= get_iv (data
, op
);
1910 find_interesting_uses_op (data
, op
);
1914 /* Finds interesting uses of induction variables outside of loops
1915 on loop exit edge EXIT. */
1918 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1921 gimple_stmt_iterator psi
;
1924 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1926 phi
= gsi_stmt (psi
);
1927 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1928 if (is_gimple_reg (def
))
1929 find_interesting_uses_op (data
, def
);
1933 /* Finds uses of the induction variables that are interesting. */
1936 find_interesting_uses (struct ivopts_data
*data
)
1939 gimple_stmt_iterator bsi
;
1940 basic_block
*body
= get_loop_body (data
->current_loop
);
1942 struct version_info
*info
;
1945 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1946 fprintf (dump_file
, "Uses:\n\n");
1948 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1953 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1954 if (e
->dest
!= EXIT_BLOCK_PTR
1955 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1956 find_interesting_uses_outside (data
, e
);
1958 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1959 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1960 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1961 if (!is_gimple_debug (gsi_stmt (bsi
)))
1962 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1965 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1969 fprintf (dump_file
, "\n");
1971 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1973 info
= ver_info (data
, i
);
1976 fprintf (dump_file
, " ");
1977 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1978 fprintf (dump_file
, " is invariant (%d)%s\n",
1979 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1983 fprintf (dump_file
, "\n");
1989 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1990 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1991 we are at the top-level of the processed address. */
1994 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1995 unsigned HOST_WIDE_INT
*offset
)
1997 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1998 enum tree_code code
;
1999 tree type
, orig_type
= TREE_TYPE (expr
);
2000 unsigned HOST_WIDE_INT off0
, off1
, st
;
2001 tree orig_expr
= expr
;
2005 type
= TREE_TYPE (expr
);
2006 code
= TREE_CODE (expr
);
2012 if (!cst_and_fits_in_hwi (expr
)
2013 || integer_zerop (expr
))
2016 *offset
= int_cst_value (expr
);
2017 return build_int_cst (orig_type
, 0);
2019 case POINTER_PLUS_EXPR
:
2022 op0
= TREE_OPERAND (expr
, 0);
2023 op1
= TREE_OPERAND (expr
, 1);
2025 op0
= strip_offset_1 (op0
, false, false, &off0
);
2026 op1
= strip_offset_1 (op1
, false, false, &off1
);
2028 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2029 if (op0
== TREE_OPERAND (expr
, 0)
2030 && op1
== TREE_OPERAND (expr
, 1))
2033 if (integer_zerop (op1
))
2035 else if (integer_zerop (op0
))
2037 if (code
== MINUS_EXPR
)
2038 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2043 expr
= fold_build2 (code
, type
, op0
, op1
);
2045 return fold_convert (orig_type
, expr
);
2048 op1
= TREE_OPERAND (expr
, 1);
2049 if (!cst_and_fits_in_hwi (op1
))
2052 op0
= TREE_OPERAND (expr
, 0);
2053 op0
= strip_offset_1 (op0
, false, false, &off0
);
2054 if (op0
== TREE_OPERAND (expr
, 0))
2057 *offset
= off0
* int_cst_value (op1
);
2058 if (integer_zerop (op0
))
2061 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2063 return fold_convert (orig_type
, expr
);
2066 case ARRAY_RANGE_REF
:
2070 step
= array_ref_element_size (expr
);
2071 if (!cst_and_fits_in_hwi (step
))
2074 st
= int_cst_value (step
);
2075 op1
= TREE_OPERAND (expr
, 1);
2076 op1
= strip_offset_1 (op1
, false, false, &off1
);
2077 *offset
= off1
* st
;
2080 && integer_zerop (op1
))
2082 /* Strip the component reference completely. */
2083 op0
= TREE_OPERAND (expr
, 0);
2084 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2094 tmp
= component_ref_field_offset (expr
);
2096 && cst_and_fits_in_hwi (tmp
))
2098 /* Strip the component reference completely. */
2099 op0
= TREE_OPERAND (expr
, 0);
2100 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2101 *offset
= off0
+ int_cst_value (tmp
);
2107 op0
= TREE_OPERAND (expr
, 0);
2108 op0
= strip_offset_1 (op0
, true, true, &off0
);
2111 if (op0
== TREE_OPERAND (expr
, 0))
2114 expr
= build_fold_addr_expr (op0
);
2115 return fold_convert (orig_type
, expr
);
2118 /* ??? Offset operand? */
2119 inside_addr
= false;
2126 /* Default handling of expressions for that we want to recurse into
2127 the first operand. */
2128 op0
= TREE_OPERAND (expr
, 0);
2129 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2132 if (op0
== TREE_OPERAND (expr
, 0)
2133 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2136 expr
= copy_node (expr
);
2137 TREE_OPERAND (expr
, 0) = op0
;
2139 TREE_OPERAND (expr
, 1) = op1
;
2141 /* Inside address, we might strip the top level component references,
2142 thus changing type of the expression. Handling of ADDR_EXPR
2144 expr
= fold_convert (orig_type
, expr
);
2149 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2152 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2154 return strip_offset_1 (expr
, false, false, offset
);
2157 /* Returns variant of TYPE that can be used as base for different uses.
2158 We return unsigned type with the same precision, which avoids problems
2162 generic_type_for (tree type
)
2164 if (POINTER_TYPE_P (type
))
2165 return unsigned_type_for (type
);
2167 if (TYPE_UNSIGNED (type
))
2170 return unsigned_type_for (type
);
2173 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2174 the bitmap to that we should store it. */
2176 static struct ivopts_data
*fd_ivopts_data
;
2178 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2180 bitmap
*depends_on
= (bitmap
*) data
;
2181 struct version_info
*info
;
2183 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2185 info
= name_info (fd_ivopts_data
, *expr_p
);
2187 if (!info
->inv_id
|| info
->has_nonlin_use
)
2191 *depends_on
= BITMAP_ALLOC (NULL
);
2192 bitmap_set_bit (*depends_on
, info
->inv_id
);
2197 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2198 position to POS. If USE is not NULL, the candidate is set as related to
2199 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2200 replacement of the final value of the iv by a direct computation. */
2202 static struct iv_cand
*
2203 add_candidate_1 (struct ivopts_data
*data
,
2204 tree base
, tree step
, bool important
, enum iv_position pos
,
2205 struct iv_use
*use
, gimple incremented_at
)
2208 struct iv_cand
*cand
= NULL
;
2209 tree type
, orig_type
;
2213 orig_type
= TREE_TYPE (base
);
2214 type
= generic_type_for (orig_type
);
2215 if (type
!= orig_type
)
2217 base
= fold_convert (type
, base
);
2218 step
= fold_convert (type
, step
);
2222 for (i
= 0; i
< n_iv_cands (data
); i
++)
2224 cand
= iv_cand (data
, i
);
2226 if (cand
->pos
!= pos
)
2229 if (cand
->incremented_at
!= incremented_at
2230 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2231 && cand
->ainc_use
!= use
))
2245 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2246 && operand_equal_p (step
, cand
->iv
->step
, 0)
2247 && (TYPE_PRECISION (TREE_TYPE (base
))
2248 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2252 if (i
== n_iv_cands (data
))
2254 cand
= XCNEW (struct iv_cand
);
2260 cand
->iv
= alloc_iv (base
, step
);
2263 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2265 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2266 cand
->var_after
= cand
->var_before
;
2268 cand
->important
= important
;
2269 cand
->incremented_at
= incremented_at
;
2270 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2273 && TREE_CODE (step
) != INTEGER_CST
)
2275 fd_ivopts_data
= data
;
2276 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2279 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2280 cand
->ainc_use
= use
;
2282 cand
->ainc_use
= NULL
;
2284 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2285 dump_cand (dump_file
, cand
);
2288 if (important
&& !cand
->important
)
2290 cand
->important
= true;
2291 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2292 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2297 bitmap_set_bit (use
->related_cands
, i
);
2298 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2299 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2306 /* Returns true if incrementing the induction variable at the end of the LOOP
2309 The purpose is to avoid splitting latch edge with a biv increment, thus
2310 creating a jump, possibly confusing other optimization passes and leaving
2311 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2312 is not available (so we do not have a better alternative), or if the latch
2313 edge is already nonempty. */
2316 allow_ip_end_pos_p (struct loop
*loop
)
2318 if (!ip_normal_pos (loop
))
2321 if (!empty_block_p (ip_end_pos (loop
)))
2327 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2328 Important field is set to IMPORTANT. */
2331 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2332 bool important
, struct iv_use
*use
)
2334 basic_block use_bb
= gimple_bb (use
->stmt
);
2335 enum machine_mode mem_mode
;
2336 unsigned HOST_WIDE_INT cstepi
;
2338 /* If we insert the increment in any position other than the standard
2339 ones, we must ensure that it is incremented once per iteration.
2340 It must not be in an inner nested loop, or one side of an if
2342 if (use_bb
->loop_father
!= data
->current_loop
2343 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2344 || stmt_could_throw_p (use
->stmt
)
2345 || !cst_and_fits_in_hwi (step
))
2348 cstepi
= int_cst_value (step
);
2350 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2351 if ((HAVE_PRE_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2352 || (HAVE_PRE_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2354 enum tree_code code
= MINUS_EXPR
;
2356 tree new_step
= step
;
2358 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2360 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2361 code
= POINTER_PLUS_EXPR
;
2364 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2365 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2366 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2369 if ((HAVE_POST_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2370 || (HAVE_POST_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2372 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2377 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2378 position to POS. If USE is not NULL, the candidate is set as related to
2379 it. The candidate computation is scheduled on all available positions. */
2382 add_candidate (struct ivopts_data
*data
,
2383 tree base
, tree step
, bool important
, struct iv_use
*use
)
2385 if (ip_normal_pos (data
->current_loop
))
2386 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2387 if (ip_end_pos (data
->current_loop
)
2388 && allow_ip_end_pos_p (data
->current_loop
))
2389 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2391 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2392 add_autoinc_candidates (data
, base
, step
, important
, use
);
2395 /* Add a standard "0 + 1 * iteration" iv candidate for a
2396 type with SIZE bits. */
2399 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2402 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2403 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2407 /* Adds standard iv candidates. */
2410 add_standard_iv_candidates (struct ivopts_data
*data
)
2412 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2414 /* The same for a double-integer type if it is still fast enough. */
2415 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2416 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2420 /* Adds candidates bases on the old induction variable IV. */
2423 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2427 struct iv_cand
*cand
;
2429 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2431 /* The same, but with initial value zero. */
2432 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2433 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2435 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2436 iv
->step
, true, NULL
);
2438 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2439 if (gimple_code (phi
) == GIMPLE_PHI
)
2441 /* Additionally record the possibility of leaving the original iv
2443 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2444 cand
= add_candidate_1 (data
,
2445 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2446 SSA_NAME_DEF_STMT (def
));
2447 cand
->var_before
= iv
->ssa_name
;
2448 cand
->var_after
= def
;
2452 /* Adds candidates based on the old induction variables. */
2455 add_old_ivs_candidates (struct ivopts_data
*data
)
2461 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2463 iv
= ver_info (data
, i
)->iv
;
2464 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2465 add_old_iv_candidates (data
, iv
);
2469 /* Adds candidates based on the value of the induction variable IV and USE. */
2472 add_iv_value_candidates (struct ivopts_data
*data
,
2473 struct iv
*iv
, struct iv_use
*use
)
2475 unsigned HOST_WIDE_INT offset
;
2479 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2481 /* The same, but with initial value zero. Make such variable important,
2482 since it is generic enough so that possibly many uses may be based
2484 basetype
= TREE_TYPE (iv
->base
);
2485 if (POINTER_TYPE_P (basetype
))
2486 basetype
= sizetype
;
2487 add_candidate (data
, build_int_cst (basetype
, 0),
2488 iv
->step
, true, use
);
2490 /* Third, try removing the constant offset. Make sure to even
2491 add a candidate for &a[0] vs. (T *)&a. */
2492 base
= strip_offset (iv
->base
, &offset
);
2494 || base
!= iv
->base
)
2495 add_candidate (data
, base
, iv
->step
, false, use
);
2498 /* Adds candidates based on the uses. */
2501 add_derived_ivs_candidates (struct ivopts_data
*data
)
2505 for (i
= 0; i
< n_iv_uses (data
); i
++)
2507 struct iv_use
*use
= iv_use (data
, i
);
2514 case USE_NONLINEAR_EXPR
:
2517 /* Just add the ivs based on the value of the iv used here. */
2518 add_iv_value_candidates (data
, use
->iv
, use
);
2527 /* Record important candidates and add them to related_cands bitmaps
2531 record_important_candidates (struct ivopts_data
*data
)
2536 for (i
= 0; i
< n_iv_cands (data
); i
++)
2538 struct iv_cand
*cand
= iv_cand (data
, i
);
2540 if (cand
->important
)
2541 bitmap_set_bit (data
->important_candidates
, i
);
2544 data
->consider_all_candidates
= (n_iv_cands (data
)
2545 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2547 if (data
->consider_all_candidates
)
2549 /* We will not need "related_cands" bitmaps in this case,
2550 so release them to decrease peak memory consumption. */
2551 for (i
= 0; i
< n_iv_uses (data
); i
++)
2553 use
= iv_use (data
, i
);
2554 BITMAP_FREE (use
->related_cands
);
2559 /* Add important candidates to the related_cands bitmaps. */
2560 for (i
= 0; i
< n_iv_uses (data
); i
++)
2561 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2562 data
->important_candidates
);
2566 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2567 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2568 we allocate a simple list to every use. */
2571 alloc_use_cost_map (struct ivopts_data
*data
)
2573 unsigned i
, size
, s
, j
;
2575 for (i
= 0; i
< n_iv_uses (data
); i
++)
2577 struct iv_use
*use
= iv_use (data
, i
);
2580 if (data
->consider_all_candidates
)
2581 size
= n_iv_cands (data
);
2585 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2590 /* Round up to the power of two, so that moduling by it is fast. */
2591 for (size
= 1; size
< s
; size
<<= 1)
2595 use
->n_map_members
= size
;
2596 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2600 /* Returns description of computation cost of expression whose runtime
2601 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2604 new_cost (unsigned runtime
, unsigned complexity
)
2608 cost
.cost
= runtime
;
2609 cost
.complexity
= complexity
;
2614 /* Adds costs COST1 and COST2. */
2617 add_costs (comp_cost cost1
, comp_cost cost2
)
2619 cost1
.cost
+= cost2
.cost
;
2620 cost1
.complexity
+= cost2
.complexity
;
2624 /* Subtracts costs COST1 and COST2. */
2627 sub_costs (comp_cost cost1
, comp_cost cost2
)
2629 cost1
.cost
-= cost2
.cost
;
2630 cost1
.complexity
-= cost2
.complexity
;
2635 /* Returns a negative number if COST1 < COST2, a positive number if
2636 COST1 > COST2, and 0 if COST1 = COST2. */
2639 compare_costs (comp_cost cost1
, comp_cost cost2
)
2641 if (cost1
.cost
== cost2
.cost
)
2642 return cost1
.complexity
- cost2
.complexity
;
2644 return cost1
.cost
- cost2
.cost
;
2647 /* Returns true if COST is infinite. */
2650 infinite_cost_p (comp_cost cost
)
2652 return cost
.cost
== INFTY
;
2655 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2656 on invariants DEPENDS_ON and that the value used in expressing it
2660 set_use_iv_cost (struct ivopts_data
*data
,
2661 struct iv_use
*use
, struct iv_cand
*cand
,
2662 comp_cost cost
, bitmap depends_on
, tree value
,
2667 if (infinite_cost_p (cost
))
2669 BITMAP_FREE (depends_on
);
2673 if (data
->consider_all_candidates
)
2675 use
->cost_map
[cand
->id
].cand
= cand
;
2676 use
->cost_map
[cand
->id
].cost
= cost
;
2677 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2678 use
->cost_map
[cand
->id
].value
= value
;
2679 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2683 /* n_map_members is a power of two, so this computes modulo. */
2684 s
= cand
->id
& (use
->n_map_members
- 1);
2685 for (i
= s
; i
< use
->n_map_members
; i
++)
2686 if (!use
->cost_map
[i
].cand
)
2688 for (i
= 0; i
< s
; i
++)
2689 if (!use
->cost_map
[i
].cand
)
2695 use
->cost_map
[i
].cand
= cand
;
2696 use
->cost_map
[i
].cost
= cost
;
2697 use
->cost_map
[i
].depends_on
= depends_on
;
2698 use
->cost_map
[i
].value
= value
;
2699 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2702 /* Gets cost of (USE, CANDIDATE) pair. */
2704 static struct cost_pair
*
2705 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2706 struct iv_cand
*cand
)
2709 struct cost_pair
*ret
;
2714 if (data
->consider_all_candidates
)
2716 ret
= use
->cost_map
+ cand
->id
;
2723 /* n_map_members is a power of two, so this computes modulo. */
2724 s
= cand
->id
& (use
->n_map_members
- 1);
2725 for (i
= s
; i
< use
->n_map_members
; i
++)
2726 if (use
->cost_map
[i
].cand
== cand
)
2727 return use
->cost_map
+ i
;
2729 for (i
= 0; i
< s
; i
++)
2730 if (use
->cost_map
[i
].cand
== cand
)
2731 return use
->cost_map
+ i
;
2736 /* Returns estimate on cost of computing SEQ. */
2739 seq_cost (rtx seq
, bool speed
)
2744 for (; seq
; seq
= NEXT_INSN (seq
))
2746 set
= single_set (seq
);
2748 cost
+= rtx_cost (set
, SET
,speed
);
2756 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2758 produce_memory_decl_rtl (tree obj
, int *regno
)
2760 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2761 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2765 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2767 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2768 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2769 SET_SYMBOL_REF_DECL (x
, obj
);
2770 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2771 set_mem_addr_space (x
, as
);
2772 targetm
.encode_section_info (obj
, x
, true);
2776 x
= gen_raw_REG (address_mode
, (*regno
)++);
2777 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2778 set_mem_addr_space (x
, as
);
2784 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2785 walk_tree. DATA contains the actual fake register number. */
2788 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2790 tree obj
= NULL_TREE
;
2792 int *regno
= (int *) data
;
2794 switch (TREE_CODE (*expr_p
))
2797 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2798 handled_component_p (*expr_p
);
2799 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2802 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2803 x
= produce_memory_decl_rtl (obj
, regno
);
2808 obj
= SSA_NAME_VAR (*expr_p
);
2809 if (!DECL_RTL_SET_P (obj
))
2810 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2819 if (DECL_RTL_SET_P (obj
))
2822 if (DECL_MODE (obj
) == BLKmode
)
2823 x
= produce_memory_decl_rtl (obj
, regno
);
2825 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2835 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2836 SET_DECL_RTL (obj
, x
);
2842 /* Determines cost of the computation of EXPR. */
2845 computation_cost (tree expr
, bool speed
)
2848 tree type
= TREE_TYPE (expr
);
2850 /* Avoid using hard regs in ways which may be unsupported. */
2851 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2852 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
2853 enum node_frequency real_frequency
= node
->frequency
;
2855 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2856 crtl
->maybe_hot_insn_p
= speed
;
2857 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2859 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2862 default_rtl_profile ();
2863 node
->frequency
= real_frequency
;
2865 cost
= seq_cost (seq
, speed
);
2867 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2868 TYPE_ADDR_SPACE (type
), speed
);
2873 /* Returns variable containing the value of candidate CAND at statement AT. */
2876 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2878 if (stmt_after_increment (loop
, cand
, stmt
))
2879 return cand
->var_after
;
2881 return cand
->var_before
;
2884 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2885 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2888 tree_int_cst_sign_bit (const_tree t
)
2890 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2891 unsigned HOST_WIDE_INT w
;
2893 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2894 w
= TREE_INT_CST_LOW (t
);
2897 w
= TREE_INT_CST_HIGH (t
);
2898 bitno
-= HOST_BITS_PER_WIDE_INT
;
2901 return (w
>> bitno
) & 1;
2904 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2905 same precision that is at least as wide as the precision of TYPE, stores
2906 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2910 determine_common_wider_type (tree
*a
, tree
*b
)
2912 tree wider_type
= NULL
;
2914 tree atype
= TREE_TYPE (*a
);
2916 if (CONVERT_EXPR_P (*a
))
2918 suba
= TREE_OPERAND (*a
, 0);
2919 wider_type
= TREE_TYPE (suba
);
2920 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2926 if (CONVERT_EXPR_P (*b
))
2928 subb
= TREE_OPERAND (*b
, 0);
2929 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2940 /* Determines the expression by that USE is expressed from induction variable
2941 CAND at statement AT in LOOP. The expression is stored in a decomposed
2942 form into AFF. Returns false if USE cannot be expressed using CAND. */
2945 get_computation_aff (struct loop
*loop
,
2946 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2947 struct affine_tree_combination
*aff
)
2949 tree ubase
= use
->iv
->base
;
2950 tree ustep
= use
->iv
->step
;
2951 tree cbase
= cand
->iv
->base
;
2952 tree cstep
= cand
->iv
->step
, cstep_common
;
2953 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2954 tree common_type
, var
;
2956 aff_tree cbase_aff
, var_aff
;
2959 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2961 /* We do not have a precision to express the values of use. */
2965 var
= var_at_stmt (loop
, cand
, at
);
2966 uutype
= unsigned_type_for (utype
);
2968 /* If the conversion is not noop, perform it. */
2969 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2971 cstep
= fold_convert (uutype
, cstep
);
2972 cbase
= fold_convert (uutype
, cbase
);
2973 var
= fold_convert (uutype
, var
);
2976 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2979 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2980 type, we achieve better folding by computing their difference in this
2981 wider type, and cast the result to UUTYPE. We do not need to worry about
2982 overflows, as all the arithmetics will in the end be performed in UUTYPE
2984 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2986 /* use = ubase - ratio * cbase + ratio * var. */
2987 tree_to_aff_combination (ubase
, common_type
, aff
);
2988 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2989 tree_to_aff_combination (var
, uutype
, &var_aff
);
2991 /* We need to shift the value if we are after the increment. */
2992 if (stmt_after_increment (loop
, cand
, at
))
2996 if (common_type
!= uutype
)
2997 cstep_common
= fold_convert (common_type
, cstep
);
2999 cstep_common
= cstep
;
3001 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3002 aff_combination_add (&cbase_aff
, &cstep_aff
);
3005 aff_combination_scale (&cbase_aff
, double_int_neg (rat
));
3006 aff_combination_add (aff
, &cbase_aff
);
3007 if (common_type
!= uutype
)
3008 aff_combination_convert (aff
, uutype
);
3010 aff_combination_scale (&var_aff
, rat
);
3011 aff_combination_add (aff
, &var_aff
);
3016 /* Determines the expression by that USE is expressed from induction variable
3017 CAND at statement AT in LOOP. The computation is unshared. */
3020 get_computation_at (struct loop
*loop
,
3021 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3024 tree type
= TREE_TYPE (use
->iv
->base
);
3026 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3028 unshare_aff_combination (&aff
);
3029 return fold_convert (type
, aff_combination_to_tree (&aff
));
3032 /* Determines the expression by that USE is expressed from induction variable
3033 CAND in LOOP. The computation is unshared. */
3036 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3038 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3041 /* Adjust the cost COST for being in loop setup rather than loop body.
3042 If we're optimizing for space, the loop setup overhead is constant;
3043 if we're optimizing for speed, amortize it over the per-iteration cost. */
3045 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3049 else if (optimize_loop_for_speed_p (data
->current_loop
))
3050 return cost
/ avg_loop_niter (data
->current_loop
);
3055 /* Returns cost of addition in MODE. */
3058 add_cost (enum machine_mode mode
, bool speed
)
3060 static unsigned costs
[NUM_MACHINE_MODES
];
3068 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3069 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3070 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3075 cost
= seq_cost (seq
, speed
);
3081 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3082 fprintf (dump_file
, "Addition in %s costs %d\n",
3083 GET_MODE_NAME (mode
), cost
);
3087 /* Entry in a hashtable of already known costs for multiplication. */
3090 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3091 enum machine_mode mode
; /* In mode. */
3092 unsigned cost
; /* The cost. */
3095 /* Counts hash value for the ENTRY. */
3098 mbc_entry_hash (const void *entry
)
3100 const struct mbc_entry
*e
= (const struct mbc_entry
*) entry
;
3102 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3105 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3108 mbc_entry_eq (const void *entry1
, const void *entry2
)
3110 const struct mbc_entry
*e1
= (const struct mbc_entry
*) entry1
;
3111 const struct mbc_entry
*e2
= (const struct mbc_entry
*) entry2
;
3113 return (e1
->mode
== e2
->mode
3114 && e1
->cst
== e2
->cst
);
3117 /* Returns cost of multiplication by constant CST in MODE. */
3120 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
, bool speed
)
3122 static htab_t costs
;
3123 struct mbc_entry
**cached
, act
;
3128 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3132 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3134 return (*cached
)->cost
;
3136 *cached
= XNEW (struct mbc_entry
);
3137 (*cached
)->mode
= mode
;
3138 (*cached
)->cst
= cst
;
3141 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3142 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3146 cost
= seq_cost (seq
, speed
);
3148 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3149 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3150 (int) cst
, GET_MODE_NAME (mode
), cost
);
3152 (*cached
)->cost
= cost
;
3157 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3158 validity for a memory reference accessing memory of mode MODE in
3159 address space AS. */
3161 DEF_VEC_P (sbitmap
);
3162 DEF_VEC_ALLOC_P (sbitmap
, heap
);
3165 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3168 #define MAX_RATIO 128
3169 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3170 static VEC (sbitmap
, heap
) *valid_mult_list
;
3173 if (data_index
>= VEC_length (sbitmap
, valid_mult_list
))
3174 VEC_safe_grow_cleared (sbitmap
, heap
, valid_mult_list
, data_index
+ 1);
3176 valid_mult
= VEC_index (sbitmap
, valid_mult_list
, data_index
);
3179 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3180 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3184 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3185 sbitmap_zero (valid_mult
);
3186 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3187 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3189 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3190 if (memory_address_addr_space_p (mode
, addr
, as
))
3191 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3194 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3196 fprintf (dump_file
, " allowed multipliers:");
3197 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3198 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3199 fprintf (dump_file
, " %d", (int) i
);
3200 fprintf (dump_file
, "\n");
3201 fprintf (dump_file
, "\n");
3204 VEC_replace (sbitmap
, valid_mult_list
, data_index
, valid_mult
);
3207 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3210 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3213 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3214 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3215 variable is omitted. Compute the cost for a memory reference that accesses
3216 a memory location of mode MEM_MODE in address space AS.
3218 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3219 size of MEM_MODE / RATIO) is available. To make this determination, we
3220 look at the size of the increment to be made, which is given in CSTEP.
3221 CSTEP may be zero if the step is unknown.
3222 STMT_AFTER_INC is true iff the statement we're looking at is after the
3223 increment of the original biv.
3225 TODO -- there must be some better way. This all is quite crude. */
3229 HOST_WIDE_INT min_offset
, max_offset
;
3230 unsigned costs
[2][2][2][2];
3231 } *address_cost_data
;
3233 DEF_VEC_P (address_cost_data
);
3234 DEF_VEC_ALLOC_P (address_cost_data
, heap
);
3237 get_address_cost (bool symbol_present
, bool var_present
,
3238 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3239 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3240 addr_space_t as
, bool speed
,
3241 bool stmt_after_inc
, bool *may_autoinc
)
3243 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3244 static VEC(address_cost_data
, heap
) *address_cost_data_list
;
3245 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3246 address_cost_data data
;
3247 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3248 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3249 unsigned cost
, acost
, complexity
;
3250 bool offset_p
, ratio_p
, autoinc
;
3251 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3252 unsigned HOST_WIDE_INT mask
;
3255 if (data_index
>= VEC_length (address_cost_data
, address_cost_data_list
))
3256 VEC_safe_grow_cleared (address_cost_data
, heap
, address_cost_data_list
,
3259 data
= VEC_index (address_cost_data
, address_cost_data_list
, data_index
);
3263 HOST_WIDE_INT rat
, off
= 0;
3264 int old_cse_not_expected
, width
;
3265 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3266 rtx seq
, addr
, base
;
3269 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3271 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3273 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3274 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3275 width
= HOST_BITS_PER_WIDE_INT
- 1;
3276 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3278 for (i
= width
; i
>= 0; i
--)
3280 off
= -((HOST_WIDE_INT
) 1 << i
);
3281 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3282 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3285 data
->min_offset
= (i
== -1? 0 : off
);
3287 for (i
= width
; i
>= 0; i
--)
3289 off
= ((HOST_WIDE_INT
) 1 << i
) - 1;
3290 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3291 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3296 data
->max_offset
= off
;
3298 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3300 fprintf (dump_file
, "get_address_cost:\n");
3301 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3302 GET_MODE_NAME (mem_mode
),
3304 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3305 GET_MODE_NAME (mem_mode
),
3310 for (i
= 2; i
<= MAX_RATIO
; i
++)
3311 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3317 /* Compute the cost of various addressing modes. */
3319 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3320 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3322 if (HAVE_PRE_DECREMENT
)
3324 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3325 has_predec
[mem_mode
]
3326 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3328 if (HAVE_POST_DECREMENT
)
3330 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3331 has_postdec
[mem_mode
]
3332 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3334 if (HAVE_PRE_INCREMENT
)
3336 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3337 has_preinc
[mem_mode
]
3338 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3340 if (HAVE_POST_INCREMENT
)
3342 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3343 has_postinc
[mem_mode
]
3344 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3346 for (i
= 0; i
< 16; i
++)
3349 var_p
= (i
>> 1) & 1;
3350 off_p
= (i
>> 2) & 1;
3351 rat_p
= (i
>> 3) & 1;
3355 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3356 gen_int_mode (rat
, address_mode
));
3359 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3363 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3364 /* ??? We can run into trouble with some backends by presenting
3365 it with symbols which haven't been properly passed through
3366 targetm.encode_section_info. By setting the local bit, we
3367 enhance the probability of things working. */
3368 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3371 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3373 (PLUS
, address_mode
, base
,
3374 gen_int_mode (off
, address_mode
)));
3377 base
= gen_int_mode (off
, address_mode
);
3382 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3385 /* To avoid splitting addressing modes, pretend that no cse will
3387 old_cse_not_expected
= cse_not_expected
;
3388 cse_not_expected
= true;
3389 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3390 cse_not_expected
= old_cse_not_expected
;
3394 acost
= seq_cost (seq
, speed
);
3395 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3399 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3402 /* On some targets, it is quite expensive to load symbol to a register,
3403 which makes addresses that contain symbols look much more expensive.
3404 However, the symbol will have to be loaded in any case before the
3405 loop (and quite likely we have it in register already), so it does not
3406 make much sense to penalize them too heavily. So make some final
3407 tweaks for the SYMBOL_PRESENT modes:
3409 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3410 var is cheaper, use this mode with small penalty.
3411 If VAR_PRESENT is true, try whether the mode with
3412 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3413 if this is the case, use it. */
3414 add_c
= add_cost (address_mode
, speed
);
3415 for (i
= 0; i
< 8; i
++)
3418 off_p
= (i
>> 1) & 1;
3419 rat_p
= (i
>> 2) & 1;
3421 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3425 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3426 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3429 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3431 fprintf (dump_file
, "Address costs:\n");
3433 for (i
= 0; i
< 16; i
++)
3436 var_p
= (i
>> 1) & 1;
3437 off_p
= (i
>> 2) & 1;
3438 rat_p
= (i
>> 3) & 1;
3440 fprintf (dump_file
, " ");
3442 fprintf (dump_file
, "sym + ");
3444 fprintf (dump_file
, "var + ");
3446 fprintf (dump_file
, "cst + ");
3448 fprintf (dump_file
, "rat * ");
3450 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3451 fprintf (dump_file
, "index costs %d\n", acost
);
3453 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3454 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3455 fprintf (dump_file
, " May include autoinc/dec\n");
3456 fprintf (dump_file
, "\n");
3459 VEC_replace (address_cost_data
, address_cost_data_list
,
3463 bits
= GET_MODE_BITSIZE (address_mode
);
3464 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3466 if ((offset
>> (bits
- 1) & 1))
3471 msize
= GET_MODE_SIZE (mem_mode
);
3472 autoinc_offset
= offset
;
3474 autoinc_offset
+= ratio
* cstep
;
3475 if (symbol_present
|| var_present
|| ratio
!= 1)
3477 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3479 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3481 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3483 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3484 && msize
== -cstep
))
3488 offset_p
= (s_offset
!= 0
3489 && data
->min_offset
<= s_offset
3490 && s_offset
<= data
->max_offset
);
3491 ratio_p
= (ratio
!= 1
3492 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3494 if (ratio
!= 1 && !ratio_p
)
3495 cost
+= multiply_by_cost (ratio
, address_mode
, speed
);
3497 if (s_offset
&& !offset_p
&& !symbol_present
)
3498 cost
+= add_cost (address_mode
, speed
);
3501 *may_autoinc
= autoinc
;
3502 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3503 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3504 return new_cost (cost
+ acost
, complexity
);
3507 /* Estimates cost of forcing expression EXPR into a variable. */
3510 force_expr_to_var_cost (tree expr
, bool speed
)
3512 static bool costs_initialized
= false;
3513 static unsigned integer_cost
[2];
3514 static unsigned symbol_cost
[2];
3515 static unsigned address_cost
[2];
3517 comp_cost cost0
, cost1
, cost
;
3518 enum machine_mode mode
;
3520 if (!costs_initialized
)
3522 tree type
= build_pointer_type (integer_type_node
);
3527 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3528 TREE_STATIC (var
) = 1;
3529 x
= produce_memory_decl_rtl (var
, NULL
);
3530 SET_DECL_RTL (var
, x
);
3532 addr
= build1 (ADDR_EXPR
, type
, var
);
3535 for (i
= 0; i
< 2; i
++)
3537 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3540 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3543 = computation_cost (build2 (POINTER_PLUS_EXPR
, type
,
3545 build_int_cst (sizetype
, 2000)), i
) + 1;
3546 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3548 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3549 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3550 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3551 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3552 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3553 fprintf (dump_file
, "\n");
3557 costs_initialized
= true;
3562 if (SSA_VAR_P (expr
))
3565 if (is_gimple_min_invariant (expr
))
3567 if (TREE_CODE (expr
) == INTEGER_CST
)
3568 return new_cost (integer_cost
[speed
], 0);
3570 if (TREE_CODE (expr
) == ADDR_EXPR
)
3572 tree obj
= TREE_OPERAND (expr
, 0);
3574 if (TREE_CODE (obj
) == VAR_DECL
3575 || TREE_CODE (obj
) == PARM_DECL
3576 || TREE_CODE (obj
) == RESULT_DECL
)
3577 return new_cost (symbol_cost
[speed
], 0);
3580 return new_cost (address_cost
[speed
], 0);
3583 switch (TREE_CODE (expr
))
3585 case POINTER_PLUS_EXPR
:
3589 op0
= TREE_OPERAND (expr
, 0);
3590 op1
= TREE_OPERAND (expr
, 1);
3594 if (is_gimple_val (op0
))
3597 cost0
= force_expr_to_var_cost (op0
, speed
);
3599 if (is_gimple_val (op1
))
3602 cost1
= force_expr_to_var_cost (op1
, speed
);
3607 op0
= TREE_OPERAND (expr
, 0);
3611 if (is_gimple_val (op0
))
3614 cost0
= force_expr_to_var_cost (op0
, speed
);
3620 /* Just an arbitrary value, FIXME. */
3621 return new_cost (target_spill_cost
[speed
], 0);
3624 mode
= TYPE_MODE (TREE_TYPE (expr
));
3625 switch (TREE_CODE (expr
))
3627 case POINTER_PLUS_EXPR
:
3631 cost
= new_cost (add_cost (mode
, speed
), 0);
3635 if (cst_and_fits_in_hwi (op0
))
3636 cost
= new_cost (multiply_by_cost (int_cst_value (op0
), mode
, speed
), 0);
3637 else if (cst_and_fits_in_hwi (op1
))
3638 cost
= new_cost (multiply_by_cost (int_cst_value (op1
), mode
, speed
), 0);
3640 return new_cost (target_spill_cost
[speed
], 0);
3647 cost
= add_costs (cost
, cost0
);
3648 cost
= add_costs (cost
, cost1
);
3650 /* Bound the cost by target_spill_cost. The parts of complicated
3651 computations often are either loop invariant or at least can
3652 be shared between several iv uses, so letting this grow without
3653 limits would not give reasonable results. */
3654 if (cost
.cost
> (int) target_spill_cost
[speed
])
3655 cost
.cost
= target_spill_cost
[speed
];
3660 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3661 invariants the computation depends on. */
3664 force_var_cost (struct ivopts_data
*data
,
3665 tree expr
, bitmap
*depends_on
)
3669 fd_ivopts_data
= data
;
3670 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3673 return force_expr_to_var_cost (expr
, data
->speed
);
3676 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3677 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3678 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3679 invariants the computation depends on. */
3682 split_address_cost (struct ivopts_data
*data
,
3683 tree addr
, bool *symbol_present
, bool *var_present
,
3684 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3687 HOST_WIDE_INT bitsize
;
3688 HOST_WIDE_INT bitpos
;
3690 enum machine_mode mode
;
3691 int unsignedp
, volatilep
;
3693 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3694 &unsignedp
, &volatilep
, false);
3697 || bitpos
% BITS_PER_UNIT
!= 0
3698 || TREE_CODE (core
) != VAR_DECL
)
3700 *symbol_present
= false;
3701 *var_present
= true;
3702 fd_ivopts_data
= data
;
3703 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3704 return new_cost (target_spill_cost
[data
->speed
], 0);
3707 *offset
+= bitpos
/ BITS_PER_UNIT
;
3708 if (TREE_STATIC (core
)
3709 || DECL_EXTERNAL (core
))
3711 *symbol_present
= true;
3712 *var_present
= false;
3716 *symbol_present
= false;
3717 *var_present
= true;
3721 /* Estimates cost of expressing difference of addresses E1 - E2 as
3722 var + symbol + offset. The value of offset is added to OFFSET,
3723 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3724 part is missing. DEPENDS_ON is a set of the invariants the computation
3728 ptr_difference_cost (struct ivopts_data
*data
,
3729 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3730 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3732 HOST_WIDE_INT diff
= 0;
3733 aff_tree aff_e1
, aff_e2
;
3736 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3738 if (ptr_difference_const (e1
, e2
, &diff
))
3741 *symbol_present
= false;
3742 *var_present
= false;
3746 if (integer_zerop (e2
))
3747 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3748 symbol_present
, var_present
, offset
, depends_on
);
3750 *symbol_present
= false;
3751 *var_present
= true;
3753 type
= signed_type_for (TREE_TYPE (e1
));
3754 tree_to_aff_combination (e1
, type
, &aff_e1
);
3755 tree_to_aff_combination (e2
, type
, &aff_e2
);
3756 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3757 aff_combination_add (&aff_e1
, &aff_e2
);
3759 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3762 /* Estimates cost of expressing difference E1 - E2 as
3763 var + symbol + offset. The value of offset is added to OFFSET,
3764 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3765 part is missing. DEPENDS_ON is a set of the invariants the computation
3769 difference_cost (struct ivopts_data
*data
,
3770 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3771 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3773 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3774 unsigned HOST_WIDE_INT off1
, off2
;
3775 aff_tree aff_e1
, aff_e2
;
3778 e1
= strip_offset (e1
, &off1
);
3779 e2
= strip_offset (e2
, &off2
);
3780 *offset
+= off1
- off2
;
3785 if (TREE_CODE (e1
) == ADDR_EXPR
)
3786 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3787 offset
, depends_on
);
3788 *symbol_present
= false;
3790 if (operand_equal_p (e1
, e2
, 0))
3792 *var_present
= false;
3796 *var_present
= true;
3798 if (integer_zerop (e2
))
3799 return force_var_cost (data
, e1
, depends_on
);
3801 if (integer_zerop (e1
))
3803 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3804 cost
.cost
+= multiply_by_cost (-1, mode
, data
->speed
);
3808 type
= signed_type_for (TREE_TYPE (e1
));
3809 tree_to_aff_combination (e1
, type
, &aff_e1
);
3810 tree_to_aff_combination (e2
, type
, &aff_e2
);
3811 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3812 aff_combination_add (&aff_e1
, &aff_e2
);
3814 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3817 /* Returns true if AFF1 and AFF2 are identical. */
3820 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3824 if (aff1
->n
!= aff2
->n
)
3827 for (i
= 0; i
< aff1
->n
; i
++)
3829 if (double_int_cmp (aff1
->elts
[i
].coef
, aff2
->elts
[i
].coef
, 0) != 0)
3832 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3838 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3839 requires a new compiler generated temporary. Returns -1 otherwise.
3840 ADDRESS_P is a flag indicating if the expression is for address
3844 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3845 tree cbase
, HOST_WIDE_INT ratio
,
3848 aff_tree ubase_aff
, cbase_aff
;
3850 struct iv_inv_expr_ent ent
;
3851 struct iv_inv_expr_ent
**slot
;
3858 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3859 && (TREE_CODE (cbase
) == INTEGER_CST
))
3862 /* Strips the constant part. */
3863 if (TREE_CODE (ubase
) == PLUS_EXPR
3864 || TREE_CODE (ubase
) == MINUS_EXPR
3865 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3867 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3868 ubase
= TREE_OPERAND (ubase
, 0);
3871 /* Strips the constant part. */
3872 if (TREE_CODE (cbase
) == PLUS_EXPR
3873 || TREE_CODE (cbase
) == MINUS_EXPR
3874 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3876 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3877 cbase
= TREE_OPERAND (cbase
, 0);
3882 if (((TREE_CODE (ubase
) == SSA_NAME
)
3883 || (TREE_CODE (ubase
) == ADDR_EXPR
3884 && is_gimple_min_invariant (ubase
)))
3885 && (TREE_CODE (cbase
) == INTEGER_CST
))
3888 if (((TREE_CODE (cbase
) == SSA_NAME
)
3889 || (TREE_CODE (cbase
) == ADDR_EXPR
3890 && is_gimple_min_invariant (cbase
)))
3891 && (TREE_CODE (ubase
) == INTEGER_CST
))
3897 if(operand_equal_p (ubase
, cbase
, 0))
3900 if (TREE_CODE (ubase
) == ADDR_EXPR
3901 && TREE_CODE (cbase
) == ADDR_EXPR
)
3905 usym
= TREE_OPERAND (ubase
, 0);
3906 csym
= TREE_OPERAND (cbase
, 0);
3907 if (TREE_CODE (usym
) == ARRAY_REF
)
3909 tree ind
= TREE_OPERAND (usym
, 1);
3910 if (TREE_CODE (ind
) == INTEGER_CST
3911 && host_integerp (ind
, 0)
3912 && TREE_INT_CST_LOW (ind
) == 0)
3913 usym
= TREE_OPERAND (usym
, 0);
3915 if (TREE_CODE (csym
) == ARRAY_REF
)
3917 tree ind
= TREE_OPERAND (csym
, 1);
3918 if (TREE_CODE (ind
) == INTEGER_CST
3919 && host_integerp (ind
, 0)
3920 && TREE_INT_CST_LOW (ind
) == 0)
3921 csym
= TREE_OPERAND (csym
, 0);
3923 if (operand_equal_p (usym
, csym
, 0))
3926 /* Now do more complex comparison */
3927 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3928 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3929 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3933 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3934 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3936 aff_combination_scale (&cbase_aff
, shwi_to_double_int (-1 * ratio
));
3937 aff_combination_add (&ubase_aff
, &cbase_aff
);
3938 expr
= aff_combination_to_tree (&ubase_aff
);
3940 ent
.hash
= iterative_hash_expr (expr
, 0);
3941 slot
= (struct iv_inv_expr_ent
**) htab_find_slot (data
->inv_expr_tab
,
3946 *slot
= XNEW (struct iv_inv_expr_ent
);
3947 (*slot
)->expr
= expr
;
3948 (*slot
)->hash
= ent
.hash
;
3949 (*slot
)->id
= data
->inv_expr_id
++;
3955 /* Determines the cost of the computation by that USE is expressed
3956 from induction variable CAND. If ADDRESS_P is true, we just need
3957 to create an address from it, otherwise we want to get it into
3958 register. A set of invariants we depend on is stored in
3959 DEPENDS_ON. AT is the statement at that the value is computed.
3960 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3961 addressing is likely. */
3964 get_computation_cost_at (struct ivopts_data
*data
,
3965 struct iv_use
*use
, struct iv_cand
*cand
,
3966 bool address_p
, bitmap
*depends_on
, gimple at
,
3970 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3972 tree utype
= TREE_TYPE (ubase
), ctype
;
3973 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3974 HOST_WIDE_INT ratio
, aratio
;
3975 bool var_present
, symbol_present
, stmt_is_after_inc
;
3978 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3982 /* Only consider real candidates. */
3984 return infinite_cost
;
3986 cbase
= cand
->iv
->base
;
3987 cstep
= cand
->iv
->step
;
3988 ctype
= TREE_TYPE (cbase
);
3990 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3992 /* We do not have a precision to express the values of use. */
3993 return infinite_cost
;
3998 /* Do not try to express address of an object with computation based
3999 on address of a different object. This may cause problems in rtl
4000 level alias analysis (that does not expect this to be happening,
4001 as this is illegal in C), and would be unlikely to be useful
4003 if (use
->iv
->base_object
4004 && cand
->iv
->base_object
4005 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4006 return infinite_cost
;
4009 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4011 /* TODO -- add direct handling of this case. */
4015 /* CSTEPI is removed from the offset in case statement is after the
4016 increment. If the step is not constant, we use zero instead.
4017 This is a bit imprecise (there is the extra addition), but
4018 redundancy elimination is likely to transform the code so that
4019 it uses value of the variable before increment anyway,
4020 so it is not that much unrealistic. */
4021 if (cst_and_fits_in_hwi (cstep
))
4022 cstepi
= int_cst_value (cstep
);
4026 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4027 return infinite_cost
;
4029 if (double_int_fits_in_shwi_p (rat
))
4030 ratio
= double_int_to_shwi (rat
);
4032 return infinite_cost
;
4035 ctype
= TREE_TYPE (cbase
);
4037 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4039 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4040 or ratio == 1, it is better to handle this like
4042 ubase - ratio * cbase + ratio * var
4044 (also holds in the case ratio == -1, TODO. */
4046 if (cst_and_fits_in_hwi (cbase
))
4048 offset
= - ratio
* int_cst_value (cbase
);
4049 cost
= difference_cost (data
,
4050 ubase
, build_int_cst (utype
, 0),
4051 &symbol_present
, &var_present
, &offset
,
4053 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4055 else if (ratio
== 1)
4057 tree real_cbase
= cbase
;
4059 /* Check to see if any adjustment is needed. */
4060 if (cstepi
== 0 && stmt_is_after_inc
)
4062 aff_tree real_cbase_aff
;
4065 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4067 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4069 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4070 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4073 cost
= difference_cost (data
,
4075 &symbol_present
, &var_present
, &offset
,
4077 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4080 && !POINTER_TYPE_P (ctype
)
4081 && multiplier_allowed_in_address_p
4082 (ratio
, TYPE_MODE (TREE_TYPE (utype
)),
4083 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4086 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4087 cost
= difference_cost (data
,
4089 &symbol_present
, &var_present
, &offset
,
4091 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4095 cost
= force_var_cost (data
, cbase
, depends_on
);
4096 cost
= add_costs (cost
,
4097 difference_cost (data
,
4098 ubase
, build_int_cst (utype
, 0),
4099 &symbol_present
, &var_present
,
4100 &offset
, depends_on
));
4101 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4102 cost
.cost
+= add_cost (TYPE_MODE (ctype
), data
->speed
);
4108 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4109 /* Clear depends on. */
4110 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4111 bitmap_clear (*depends_on
);
4114 /* If we are after the increment, the value of the candidate is higher by
4116 if (stmt_is_after_inc
)
4117 offset
-= ratio
* cstepi
;
4119 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4120 (symbol/var1/const parts may be omitted). If we are looking for an
4121 address, find the cost of addressing this. */
4123 return add_costs (cost
,
4124 get_address_cost (symbol_present
, var_present
,
4125 offset
, ratio
, cstepi
,
4126 TYPE_MODE (TREE_TYPE (utype
)),
4127 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4128 speed
, stmt_is_after_inc
,
4131 /* Otherwise estimate the costs for computing the expression. */
4132 if (!symbol_present
&& !var_present
&& !offset
)
4135 cost
.cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
), speed
);
4139 /* Symbol + offset should be compile-time computable so consider that they
4140 are added once to the variable, if present. */
4141 if (var_present
&& (symbol_present
|| offset
))
4142 cost
.cost
+= adjust_setup_cost (data
,
4143 add_cost (TYPE_MODE (ctype
), speed
));
4145 /* Having offset does not affect runtime cost in case it is added to
4146 symbol, but it increases complexity. */
4150 cost
.cost
+= add_cost (TYPE_MODE (ctype
), speed
);
4152 aratio
= ratio
> 0 ? ratio
: -ratio
;
4154 cost
.cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
), speed
);
4159 *can_autoinc
= false;
4162 /* Just get the expression, expand it and measure the cost. */
4163 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4166 return infinite_cost
;
4169 comp
= build_simple_mem_ref (comp
);
4171 return new_cost (computation_cost (comp
, speed
), 0);
4175 /* Determines the cost of the computation by that USE is expressed
4176 from induction variable CAND. If ADDRESS_P is true, we just need
4177 to create an address from it, otherwise we want to get it into
4178 register. A set of invariants we depend on is stored in
4179 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4180 autoinc addressing is likely. */
4183 get_computation_cost (struct ivopts_data
*data
,
4184 struct iv_use
*use
, struct iv_cand
*cand
,
4185 bool address_p
, bitmap
*depends_on
,
4186 bool *can_autoinc
, int *inv_expr_id
)
4188 return get_computation_cost_at (data
,
4189 use
, cand
, address_p
, depends_on
, use
->stmt
,
4190 can_autoinc
, inv_expr_id
);
4193 /* Determines cost of basing replacement of USE on CAND in a generic
4197 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4198 struct iv_use
*use
, struct iv_cand
*cand
)
4202 int inv_expr_id
= -1;
4204 /* The simple case first -- if we need to express value of the preserved
4205 original biv, the cost is 0. This also prevents us from counting the
4206 cost of increment twice -- once at this use and once in the cost of
4208 if (cand
->pos
== IP_ORIGINAL
4209 && cand
->incremented_at
== use
->stmt
)
4211 set_use_iv_cost (data
, use
, cand
, zero_cost
, NULL
, NULL_TREE
, -1);
4215 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4216 NULL
, &inv_expr_id
);
4218 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
,
4221 return !infinite_cost_p (cost
);
4224 /* Determines cost of basing replacement of USE on CAND in an address. */
4227 determine_use_iv_cost_address (struct ivopts_data
*data
,
4228 struct iv_use
*use
, struct iv_cand
*cand
)
4232 int inv_expr_id
= -1;
4233 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4234 &can_autoinc
, &inv_expr_id
);
4236 if (cand
->ainc_use
== use
)
4239 cost
.cost
-= cand
->cost_step
;
4240 /* If we generated the candidate solely for exploiting autoincrement
4241 opportunities, and it turns out it can't be used, set the cost to
4242 infinity to make sure we ignore it. */
4243 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4244 cost
= infinite_cost
;
4246 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
,
4249 return !infinite_cost_p (cost
);
4252 /* Computes value of candidate CAND at position AT in iteration NITER, and
4253 stores it to VAL. */
4256 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4259 aff_tree step
, delta
, nit
;
4260 struct iv
*iv
= cand
->iv
;
4261 tree type
= TREE_TYPE (iv
->base
);
4262 tree steptype
= type
;
4263 if (POINTER_TYPE_P (type
))
4264 steptype
= sizetype
;
4266 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4267 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4268 aff_combination_convert (&nit
, steptype
);
4269 aff_combination_mult (&nit
, &step
, &delta
);
4270 if (stmt_after_increment (loop
, cand
, at
))
4271 aff_combination_add (&delta
, &step
);
4273 tree_to_aff_combination (iv
->base
, type
, val
);
4274 aff_combination_add (val
, &delta
);
4277 /* Returns period of induction variable iv. */
4280 iv_period (struct iv
*iv
)
4282 tree step
= iv
->step
, period
, type
;
4285 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4287 type
= unsigned_type_for (TREE_TYPE (step
));
4288 /* Period of the iv is lcm (step, type_range)/step -1,
4289 i.e., N*type_range/step - 1. Since type range is power
4290 of two, N == (step >> num_of_ending_zeros_binary (step),
4291 so the final result is
4293 (type_range >> num_of_ending_zeros_binary (step)) - 1
4296 pow2div
= num_ending_zeros (step
);
4298 period
= build_low_bits_mask (type
,
4299 (TYPE_PRECISION (type
)
4300 - tree_low_cst (pow2div
, 1)));
4305 /* Returns the comparison operator used when eliminating the iv USE. */
4307 static enum tree_code
4308 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4310 struct loop
*loop
= data
->current_loop
;
4314 ex_bb
= gimple_bb (use
->stmt
);
4315 exit
= EDGE_SUCC (ex_bb
, 0);
4316 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4317 exit
= EDGE_SUCC (ex_bb
, 1);
4319 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4322 /* Check whether it is possible to express the condition in USE by comparison
4323 of candidate CAND. If so, store the value compared with to BOUND. */
4326 may_eliminate_iv (struct ivopts_data
*data
,
4327 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
4332 struct loop
*loop
= data
->current_loop
;
4334 struct tree_niter_desc
*desc
= NULL
;
4336 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4339 /* For now works only for exits that dominate the loop latch.
4340 TODO: extend to other conditions inside loop body. */
4341 ex_bb
= gimple_bb (use
->stmt
);
4342 if (use
->stmt
!= last_stmt (ex_bb
)
4343 || gimple_code (use
->stmt
) != GIMPLE_COND
4344 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4347 exit
= EDGE_SUCC (ex_bb
, 0);
4348 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4349 exit
= EDGE_SUCC (ex_bb
, 1);
4350 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4353 nit
= niter_for_exit (data
, exit
, &desc
);
4357 /* Determine whether we can use the variable to test the exit condition.
4358 This is the case iff the period of the induction variable is greater
4359 than the number of iterations for which the exit condition is true. */
4360 period
= iv_period (cand
->iv
);
4362 /* If the number of iterations is constant, compare against it directly. */
4363 if (TREE_CODE (nit
) == INTEGER_CST
)
4365 /* See cand_value_at. */
4366 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4368 if (!tree_int_cst_lt (nit
, period
))
4373 if (tree_int_cst_lt (period
, nit
))
4378 /* If not, and if this is the only possible exit of the loop, see whether
4379 we can get a conservative estimate on the number of iterations of the
4380 entire loop and compare against that instead. */
4383 double_int period_value
, max_niter
;
4385 max_niter
= desc
->max
;
4386 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4387 max_niter
= double_int_add (max_niter
, double_int_one
);
4388 period_value
= tree_to_double_int (period
);
4389 if (double_int_ucmp (max_niter
, period_value
) > 0)
4391 /* See if we can take advantage of infered loop bound information. */
4392 if (loop_only_exit_p (loop
, exit
))
4394 if (!estimated_loop_iterations (loop
, true, &max_niter
))
4396 /* The loop bound is already adjusted by adding 1. */
4397 if (double_int_ucmp (max_niter
, period_value
) > 0)
4405 cand_value_at (loop
, cand
, use
->stmt
, nit
, &bnd
);
4407 *bound
= aff_combination_to_tree (&bnd
);
4408 /* It is unlikely that computing the number of iterations using division
4409 would be more profitable than keeping the original induction variable. */
4410 if (expression_expensive_p (*bound
))
4416 /* Determines cost of basing replacement of USE on CAND in a condition. */
4419 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4420 struct iv_use
*use
, struct iv_cand
*cand
)
4422 tree bound
= NULL_TREE
;
4424 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4425 comp_cost elim_cost
, express_cost
, cost
;
4427 int inv_expr_id
= -1;
4428 tree
*control_var
, *bound_cst
;
4430 /* Only consider real candidates. */
4433 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
, -1);
4437 /* Try iv elimination. */
4438 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4440 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4441 /* The bound is a loop invariant, so it will be only computed
4443 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4446 elim_cost
= infinite_cost
;
4448 /* Try expressing the original giv. If it is compared with an invariant,
4449 note that we cannot get rid of it. */
4450 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4454 /* When the condition is a comparison of the candidate IV against
4455 zero, prefer this IV.
4457 TODO: The constant that we're substracting from the cost should
4458 be target-dependent. This information should be added to the
4459 target costs for each backend. */
4460 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4461 && integer_zerop (*bound_cst
)
4462 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4463 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4464 elim_cost
.cost
-= 1;
4466 express_cost
= get_computation_cost (data
, use
, cand
, false,
4467 &depends_on_express
, NULL
,
4469 fd_ivopts_data
= data
;
4470 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4472 /* Choose the better approach, preferring the eliminated IV. */
4473 if (compare_costs (elim_cost
, express_cost
) <= 0)
4476 depends_on
= depends_on_elim
;
4477 depends_on_elim
= NULL
;
4481 cost
= express_cost
;
4482 depends_on
= depends_on_express
;
4483 depends_on_express
= NULL
;
4487 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, inv_expr_id
);
4489 if (depends_on_elim
)
4490 BITMAP_FREE (depends_on_elim
);
4491 if (depends_on_express
)
4492 BITMAP_FREE (depends_on_express
);
4494 return !infinite_cost_p (cost
);
4497 /* Determines cost of basing replacement of USE on CAND. Returns false
4498 if USE cannot be based on CAND. */
4501 determine_use_iv_cost (struct ivopts_data
*data
,
4502 struct iv_use
*use
, struct iv_cand
*cand
)
4506 case USE_NONLINEAR_EXPR
:
4507 return determine_use_iv_cost_generic (data
, use
, cand
);
4510 return determine_use_iv_cost_address (data
, use
, cand
);
4513 return determine_use_iv_cost_condition (data
, use
, cand
);
4520 /* Return true if get_computation_cost indicates that autoincrement is
4521 a possibility for the pair of USE and CAND, false otherwise. */
4524 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4525 struct iv_cand
*cand
)
4531 if (use
->type
!= USE_ADDRESS
)
4534 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4535 &can_autoinc
, NULL
);
4537 BITMAP_FREE (depends_on
);
4539 return !infinite_cost_p (cost
) && can_autoinc
;
4542 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4543 use that allows autoincrement, and set their AINC_USE if possible. */
4546 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4550 for (i
= 0; i
< n_iv_cands (data
); i
++)
4552 struct iv_cand
*cand
= iv_cand (data
, i
);
4553 struct iv_use
*closest
= NULL
;
4554 if (cand
->pos
!= IP_ORIGINAL
)
4556 for (j
= 0; j
< n_iv_uses (data
); j
++)
4558 struct iv_use
*use
= iv_use (data
, j
);
4559 unsigned uid
= gimple_uid (use
->stmt
);
4560 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
4561 || uid
> gimple_uid (cand
->incremented_at
))
4563 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
4566 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
4568 cand
->ainc_use
= closest
;
4572 /* Finds the candidates for the induction variables. */
4575 find_iv_candidates (struct ivopts_data
*data
)
4577 /* Add commonly used ivs. */
4578 add_standard_iv_candidates (data
);
4580 /* Add old induction variables. */
4581 add_old_ivs_candidates (data
);
4583 /* Add induction variables derived from uses. */
4584 add_derived_ivs_candidates (data
);
4586 set_autoinc_for_original_candidates (data
);
4588 /* Record the important candidates. */
4589 record_important_candidates (data
);
4592 /* Determines costs of basing the use of the iv on an iv candidate. */
4595 determine_use_iv_costs (struct ivopts_data
*data
)
4599 struct iv_cand
*cand
;
4600 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4602 alloc_use_cost_map (data
);
4604 for (i
= 0; i
< n_iv_uses (data
); i
++)
4606 use
= iv_use (data
, i
);
4608 if (data
->consider_all_candidates
)
4610 for (j
= 0; j
< n_iv_cands (data
); j
++)
4612 cand
= iv_cand (data
, j
);
4613 determine_use_iv_cost (data
, use
, cand
);
4620 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4622 cand
= iv_cand (data
, j
);
4623 if (!determine_use_iv_cost (data
, use
, cand
))
4624 bitmap_set_bit (to_clear
, j
);
4627 /* Remove the candidates for that the cost is infinite from
4628 the list of related candidates. */
4629 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4630 bitmap_clear (to_clear
);
4634 BITMAP_FREE (to_clear
);
4636 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4638 fprintf (dump_file
, "Use-candidate costs:\n");
4640 for (i
= 0; i
< n_iv_uses (data
); i
++)
4642 use
= iv_use (data
, i
);
4644 fprintf (dump_file
, "Use %d:\n", i
);
4645 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4646 for (j
= 0; j
< use
->n_map_members
; j
++)
4648 if (!use
->cost_map
[j
].cand
4649 || infinite_cost_p (use
->cost_map
[j
].cost
))
4652 fprintf (dump_file
, " %d\t%d\t%d\t",
4653 use
->cost_map
[j
].cand
->id
,
4654 use
->cost_map
[j
].cost
.cost
,
4655 use
->cost_map
[j
].cost
.complexity
);
4656 if (use
->cost_map
[j
].depends_on
)
4657 bitmap_print (dump_file
,
4658 use
->cost_map
[j
].depends_on
, "","");
4659 if (use
->cost_map
[j
].inv_expr_id
!= -1)
4660 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
4661 fprintf (dump_file
, "\n");
4664 fprintf (dump_file
, "\n");
4666 fprintf (dump_file
, "\n");
4670 /* Determines cost of the candidate CAND. */
4673 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4675 comp_cost cost_base
;
4676 unsigned cost
, cost_step
;
4685 /* There are two costs associated with the candidate -- its increment
4686 and its initialization. The second is almost negligible for any loop
4687 that rolls enough, so we take it just very little into account. */
4689 base
= cand
->iv
->base
;
4690 cost_base
= force_var_cost (data
, base
, NULL
);
4691 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)), data
->speed
);
4693 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
4695 /* Prefer the original ivs unless we may gain something by replacing it.
4696 The reason is to make debugging simpler; so this is not relevant for
4697 artificial ivs created by other optimization passes. */
4698 if (cand
->pos
!= IP_ORIGINAL
4699 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4702 /* Prefer not to insert statements into latch unless there are some
4703 already (so that we do not create unnecessary jumps). */
4704 if (cand
->pos
== IP_END
4705 && empty_block_p (ip_end_pos (data
->current_loop
)))
4709 cand
->cost_step
= cost_step
;
4712 /* Determines costs of computation of the candidates. */
4715 determine_iv_costs (struct ivopts_data
*data
)
4719 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4721 fprintf (dump_file
, "Candidate costs:\n");
4722 fprintf (dump_file
, " cand\tcost\n");
4725 for (i
= 0; i
< n_iv_cands (data
); i
++)
4727 struct iv_cand
*cand
= iv_cand (data
, i
);
4729 determine_iv_cost (data
, cand
);
4731 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4732 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4735 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4736 fprintf (dump_file
, "\n");
4739 /* Calculates cost for having SIZE induction variables. */
4742 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4744 /* We add size to the cost, so that we prefer eliminating ivs
4746 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
4747 data
->body_includes_call
);
4750 /* For each size of the induction variable set determine the penalty. */
4753 determine_set_costs (struct ivopts_data
*data
)
4757 gimple_stmt_iterator psi
;
4759 struct loop
*loop
= data
->current_loop
;
4762 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4764 fprintf (dump_file
, "Global costs:\n");
4765 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4766 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
4767 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
4768 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
4772 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
4774 phi
= gsi_stmt (psi
);
4775 op
= PHI_RESULT (phi
);
4777 if (!is_gimple_reg (op
))
4780 if (get_iv (data
, op
))
4786 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4788 struct version_info
*info
= ver_info (data
, j
);
4790 if (info
->inv_id
&& info
->has_nonlin_use
)
4794 data
->regs_used
= n
;
4795 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4796 fprintf (dump_file
, " regs_used %d\n", n
);
4798 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4800 fprintf (dump_file
, " cost for size:\n");
4801 fprintf (dump_file
, " ivs\tcost\n");
4802 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4803 fprintf (dump_file
, " %d\t%d\n", j
,
4804 ivopts_global_cost_for_size (data
, j
));
4805 fprintf (dump_file
, "\n");
4809 /* Returns true if A is a cheaper cost pair than B. */
4812 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4822 cmp
= compare_costs (a
->cost
, b
->cost
);
4829 /* In case the costs are the same, prefer the cheaper candidate. */
4830 if (a
->cand
->cost
< b
->cand
->cost
)
4837 /* Returns candidate by that USE is expressed in IVS. */
4839 static struct cost_pair
*
4840 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4842 return ivs
->cand_for_use
[use
->id
];
4845 /* Computes the cost field of IVS structure. */
4848 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4850 comp_cost cost
= ivs
->cand_use_cost
;
4852 cost
.cost
+= ivs
->cand_cost
;
4854 cost
.cost
+= ivopts_global_cost_for_size (data
,
4855 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
4860 /* Remove invariants in set INVS to set IVS. */
4863 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4871 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4873 ivs
->n_invariant_uses
[iid
]--;
4874 if (ivs
->n_invariant_uses
[iid
] == 0)
4879 /* Set USE not to be expressed by any candidate in IVS. */
4882 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4885 unsigned uid
= use
->id
, cid
;
4886 struct cost_pair
*cp
;
4888 cp
= ivs
->cand_for_use
[uid
];
4894 ivs
->cand_for_use
[uid
] = NULL
;
4895 ivs
->n_cand_uses
[cid
]--;
4897 if (ivs
->n_cand_uses
[cid
] == 0)
4899 bitmap_clear_bit (ivs
->cands
, cid
);
4900 /* Do not count the pseudocandidates. */
4904 ivs
->cand_cost
-= cp
->cand
->cost
;
4906 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4909 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
4911 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4913 if (cp
->inv_expr_id
!= -1)
4915 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
4916 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
4917 ivs
->num_used_inv_expr
--;
4919 iv_ca_recount_cost (data
, ivs
);
4922 /* Add invariants in set INVS to set IVS. */
4925 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4933 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4935 ivs
->n_invariant_uses
[iid
]++;
4936 if (ivs
->n_invariant_uses
[iid
] == 1)
4941 /* Set cost pair for USE in set IVS to CP. */
4944 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4945 struct iv_use
*use
, struct cost_pair
*cp
)
4947 unsigned uid
= use
->id
, cid
;
4949 if (ivs
->cand_for_use
[uid
] == cp
)
4952 if (ivs
->cand_for_use
[uid
])
4953 iv_ca_set_no_cp (data
, ivs
, use
);
4960 ivs
->cand_for_use
[uid
] = cp
;
4961 ivs
->n_cand_uses
[cid
]++;
4962 if (ivs
->n_cand_uses
[cid
] == 1)
4964 bitmap_set_bit (ivs
->cands
, cid
);
4965 /* Do not count the pseudocandidates. */
4969 ivs
->cand_cost
+= cp
->cand
->cost
;
4971 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4974 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
4975 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4977 if (cp
->inv_expr_id
!= -1)
4979 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
4980 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
4981 ivs
->num_used_inv_expr
++;
4983 iv_ca_recount_cost (data
, ivs
);
4987 /* Extend set IVS by expressing USE by some of the candidates in it
4988 if possible. All important candidates will be considered
4989 if IMPORTANT_CANDIDATES is true. */
4992 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4993 struct iv_use
*use
, bool important_candidates
)
4995 struct cost_pair
*best_cp
= NULL
, *cp
;
5000 gcc_assert (ivs
->upto
>= use
->id
);
5002 if (ivs
->upto
== use
->id
)
5008 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5009 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5011 struct iv_cand
*cand
= iv_cand (data
, i
);
5013 cp
= get_use_iv_cost (data
, use
, cand
);
5015 if (cheaper_cost_pair (cp
, best_cp
))
5019 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5022 /* Get cost for assignment IVS. */
5025 iv_ca_cost (struct iv_ca
*ivs
)
5027 /* This was a conditional expression but it triggered a bug in
5030 return infinite_cost
;
5035 /* Returns true if all dependences of CP are among invariants in IVS. */
5038 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5043 if (!cp
->depends_on
)
5046 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5048 if (ivs
->n_invariant_uses
[i
] == 0)
5055 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5056 it before NEXT_CHANGE. */
5058 static struct iv_ca_delta
*
5059 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5060 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5062 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5065 change
->old_cp
= old_cp
;
5066 change
->new_cp
= new_cp
;
5067 change
->next_change
= next_change
;
5072 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5075 static struct iv_ca_delta
*
5076 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5078 struct iv_ca_delta
*last
;
5086 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5088 last
->next_change
= l2
;
5093 /* Reverse the list of changes DELTA, forming the inverse to it. */
5095 static struct iv_ca_delta
*
5096 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5098 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5099 struct cost_pair
*tmp
;
5101 for (act
= delta
; act
; act
= next
)
5103 next
= act
->next_change
;
5104 act
->next_change
= prev
;
5108 act
->old_cp
= act
->new_cp
;
5115 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5116 reverted instead. */
5119 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5120 struct iv_ca_delta
*delta
, bool forward
)
5122 struct cost_pair
*from
, *to
;
5123 struct iv_ca_delta
*act
;
5126 delta
= iv_ca_delta_reverse (delta
);
5128 for (act
= delta
; act
; act
= act
->next_change
)
5132 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5133 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5137 iv_ca_delta_reverse (delta
);
5140 /* Returns true if CAND is used in IVS. */
5143 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5145 return ivs
->n_cand_uses
[cand
->id
] > 0;
5148 /* Returns number of induction variable candidates in the set IVS. */
5151 iv_ca_n_cands (struct iv_ca
*ivs
)
5153 return ivs
->n_cands
;
5156 /* Free the list of changes DELTA. */
5159 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5161 struct iv_ca_delta
*act
, *next
;
5163 for (act
= *delta
; act
; act
= next
)
5165 next
= act
->next_change
;
5172 /* Allocates new iv candidates assignment. */
5174 static struct iv_ca
*
5175 iv_ca_new (struct ivopts_data
*data
)
5177 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5181 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5182 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5183 nw
->cands
= BITMAP_ALLOC (NULL
);
5186 nw
->cand_use_cost
= zero_cost
;
5188 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5189 nw
->cost
= zero_cost
;
5190 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5191 nw
->num_used_inv_expr
= 0;
5196 /* Free memory occupied by the set IVS. */
5199 iv_ca_free (struct iv_ca
**ivs
)
5201 free ((*ivs
)->cand_for_use
);
5202 free ((*ivs
)->n_cand_uses
);
5203 BITMAP_FREE ((*ivs
)->cands
);
5204 free ((*ivs
)->n_invariant_uses
);
5205 free ((*ivs
)->used_inv_expr
);
5210 /* Dumps IVS to FILE. */
5213 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5215 const char *pref
= " invariants ";
5217 comp_cost cost
= iv_ca_cost (ivs
);
5219 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5220 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5221 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5222 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5224 for (i
= 0; i
< ivs
->upto
; i
++)
5226 struct iv_use
*use
= iv_use (data
, i
);
5227 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5229 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5230 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5232 fprintf (file
, " use:%d --> ??\n", use
->id
);
5235 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5236 if (ivs
->n_invariant_uses
[i
])
5238 fprintf (file
, "%s%d", pref
, i
);
5241 fprintf (file
, "\n\n");
5244 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5245 new set, and store differences in DELTA. Number of induction variables
5246 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5247 the function will try to find a solution with mimimal iv candidates. */
5250 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5251 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5252 unsigned *n_ivs
, bool min_ncand
)
5257 struct cost_pair
*old_cp
, *new_cp
;
5260 for (i
= 0; i
< ivs
->upto
; i
++)
5262 use
= iv_use (data
, i
);
5263 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5266 && old_cp
->cand
== cand
)
5269 new_cp
= get_use_iv_cost (data
, use
, cand
);
5273 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5276 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5279 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5282 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5283 cost
= iv_ca_cost (ivs
);
5285 *n_ivs
= iv_ca_n_cands (ivs
);
5286 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5291 /* Try narrowing set IVS by removing CAND. Return the cost of
5292 the new set and store the differences in DELTA. */
5295 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5296 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5300 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5302 struct iv_cand
*cnd
;
5306 for (i
= 0; i
< n_iv_uses (data
); i
++)
5308 use
= iv_use (data
, i
);
5310 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5311 if (old_cp
->cand
!= cand
)
5316 if (data
->consider_all_candidates
)
5318 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5323 cnd
= iv_cand (data
, ci
);
5325 cp
= get_use_iv_cost (data
, use
, cnd
);
5329 if (!iv_ca_has_deps (ivs
, cp
))
5332 if (!cheaper_cost_pair (cp
, new_cp
))
5340 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5345 cnd
= iv_cand (data
, ci
);
5347 cp
= get_use_iv_cost (data
, use
, cnd
);
5350 if (!iv_ca_has_deps (ivs
, cp
))
5353 if (!cheaper_cost_pair (cp
, new_cp
))
5362 iv_ca_delta_free (delta
);
5363 return infinite_cost
;
5366 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5369 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5370 cost
= iv_ca_cost (ivs
);
5371 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5376 /* Try optimizing the set of candidates IVS by removing candidates different
5377 from to EXCEPT_CAND from it. Return cost of the new set, and store
5378 differences in DELTA. */
5381 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5382 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5385 struct iv_ca_delta
*act_delta
, *best_delta
;
5387 comp_cost best_cost
, acost
;
5388 struct iv_cand
*cand
;
5391 best_cost
= iv_ca_cost (ivs
);
5393 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5395 cand
= iv_cand (data
, i
);
5397 if (cand
== except_cand
)
5400 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5402 if (compare_costs (acost
, best_cost
) < 0)
5405 iv_ca_delta_free (&best_delta
);
5406 best_delta
= act_delta
;
5409 iv_ca_delta_free (&act_delta
);
5418 /* Recurse to possibly remove other unnecessary ivs. */
5419 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5420 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5421 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5422 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5426 /* Tries to extend the sets IVS in the best possible way in order
5427 to express the USE. If ORIGINALP is true, prefer candidates from
5428 the original set of IVs, otherwise favor important candidates not
5429 based on any memory object. */
5432 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5433 struct iv_use
*use
, bool originalp
)
5435 comp_cost best_cost
, act_cost
;
5438 struct iv_cand
*cand
;
5439 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5440 struct cost_pair
*cp
;
5442 iv_ca_add_use (data
, ivs
, use
, false);
5443 best_cost
= iv_ca_cost (ivs
);
5445 cp
= iv_ca_cand_for_use (ivs
, use
);
5450 iv_ca_add_use (data
, ivs
, use
, true);
5451 best_cost
= iv_ca_cost (ivs
);
5452 cp
= iv_ca_cand_for_use (ivs
, use
);
5456 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5457 iv_ca_set_no_cp (data
, ivs
, use
);
5460 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5461 first try important candidates not based on any memory object. Only if
5462 this fails, try the specific ones. Rationale -- in loops with many
5463 variables the best choice often is to use just one generic biv. If we
5464 added here many ivs specific to the uses, the optimization algorithm later
5465 would be likely to get stuck in a local minimum, thus causing us to create
5466 too many ivs. The approach from few ivs to more seems more likely to be
5467 successful -- starting from few ivs, replacing an expensive use by a
5468 specific iv should always be a win. */
5469 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5471 cand
= iv_cand (data
, i
);
5473 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5476 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5479 if (iv_ca_cand_used_p (ivs
, cand
))
5482 cp
= get_use_iv_cost (data
, use
, cand
);
5486 iv_ca_set_cp (data
, ivs
, use
, cp
);
5487 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5489 iv_ca_set_no_cp (data
, ivs
, use
);
5490 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5492 if (compare_costs (act_cost
, best_cost
) < 0)
5494 best_cost
= act_cost
;
5496 iv_ca_delta_free (&best_delta
);
5497 best_delta
= act_delta
;
5500 iv_ca_delta_free (&act_delta
);
5503 if (infinite_cost_p (best_cost
))
5505 for (i
= 0; i
< use
->n_map_members
; i
++)
5507 cp
= use
->cost_map
+ i
;
5512 /* Already tried this. */
5513 if (cand
->important
)
5515 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5517 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5521 if (iv_ca_cand_used_p (ivs
, cand
))
5525 iv_ca_set_cp (data
, ivs
, use
, cp
);
5526 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5527 iv_ca_set_no_cp (data
, ivs
, use
);
5528 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5531 if (compare_costs (act_cost
, best_cost
) < 0)
5533 best_cost
= act_cost
;
5536 iv_ca_delta_free (&best_delta
);
5537 best_delta
= act_delta
;
5540 iv_ca_delta_free (&act_delta
);
5544 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5545 iv_ca_delta_free (&best_delta
);
5547 return !infinite_cost_p (best_cost
);
5550 /* Finds an initial assignment of candidates to uses. */
5552 static struct iv_ca
*
5553 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5555 struct iv_ca
*ivs
= iv_ca_new (data
);
5558 for (i
= 0; i
< n_iv_uses (data
); i
++)
5559 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5568 /* Tries to improve set of induction variables IVS. */
5571 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5574 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5575 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5576 struct iv_cand
*cand
;
5578 /* Try extending the set of induction variables by one. */
5579 for (i
= 0; i
< n_iv_cands (data
); i
++)
5581 cand
= iv_cand (data
, i
);
5583 if (iv_ca_cand_used_p (ivs
, cand
))
5586 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5590 /* If we successfully added the candidate and the set is small enough,
5591 try optimizing it by removing other candidates. */
5592 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5594 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5595 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5596 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5597 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5600 if (compare_costs (acost
, best_cost
) < 0)
5603 iv_ca_delta_free (&best_delta
);
5604 best_delta
= act_delta
;
5607 iv_ca_delta_free (&act_delta
);
5612 /* Try removing the candidates from the set instead. */
5613 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5615 /* Nothing more we can do. */
5620 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5621 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5622 iv_ca_delta_free (&best_delta
);
5626 /* Attempts to find the optimal set of induction variables. We do simple
5627 greedy heuristic -- we try to replace at most one candidate in the selected
5628 solution and remove the unused ivs while this improves the cost. */
5630 static struct iv_ca
*
5631 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
5635 /* Get the initial solution. */
5636 set
= get_initial_solution (data
, originalp
);
5639 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5640 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5644 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5646 fprintf (dump_file
, "Initial set of candidates:\n");
5647 iv_ca_dump (data
, dump_file
, set
);
5650 while (try_improve_iv_set (data
, set
))
5652 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5654 fprintf (dump_file
, "Improved to:\n");
5655 iv_ca_dump (data
, dump_file
, set
);
5662 static struct iv_ca
*
5663 find_optimal_iv_set (struct ivopts_data
*data
)
5666 struct iv_ca
*set
, *origset
;
5668 comp_cost cost
, origcost
;
5670 /* Determine the cost based on a strategy that starts with original IVs,
5671 and try again using a strategy that prefers candidates not based
5673 origset
= find_optimal_iv_set_1 (data
, true);
5674 set
= find_optimal_iv_set_1 (data
, false);
5676 if (!origset
&& !set
)
5679 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
5680 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
5682 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5684 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
5685 origcost
.cost
, origcost
.complexity
);
5686 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
5687 cost
.cost
, cost
.complexity
);
5690 /* Choose the one with the best cost. */
5691 if (compare_costs (origcost
, cost
) <= 0)
5698 iv_ca_free (&origset
);
5700 for (i
= 0; i
< n_iv_uses (data
); i
++)
5702 use
= iv_use (data
, i
);
5703 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5709 /* Creates a new induction variable corresponding to CAND. */
5712 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5714 gimple_stmt_iterator incr_pos
;
5724 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
5728 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
5736 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
5740 /* Mark that the iv is preserved. */
5741 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5742 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5744 /* Rewrite the increment so that it uses var_before directly. */
5745 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5749 gimple_add_tmp_var (cand
->var_before
);
5750 add_referenced_var (cand
->var_before
);
5752 base
= unshare_expr (cand
->iv
->base
);
5754 create_iv (base
, unshare_expr (cand
->iv
->step
),
5755 cand
->var_before
, data
->current_loop
,
5756 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5759 /* Creates new induction variables described in SET. */
5762 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5765 struct iv_cand
*cand
;
5768 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5770 cand
= iv_cand (data
, i
);
5771 create_new_iv (data
, cand
);
5774 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5776 fprintf (dump_file
, "\nSelected IV set: \n");
5777 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5779 cand
= iv_cand (data
, i
);
5780 dump_cand (dump_file
, cand
);
5782 fprintf (dump_file
, "\n");
5786 /* Rewrites USE (definition of iv used in a nonlinear expression)
5787 using candidate CAND. */
5790 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5791 struct iv_use
*use
, struct iv_cand
*cand
)
5796 gimple_stmt_iterator bsi
;
5798 /* An important special case -- if we are asked to express value of
5799 the original iv by itself, just exit; there is no need to
5800 introduce a new computation (that might also need casting the
5801 variable to unsigned and back). */
5802 if (cand
->pos
== IP_ORIGINAL
5803 && cand
->incremented_at
== use
->stmt
)
5805 tree step
, ctype
, utype
;
5806 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
5808 gcc_assert (is_gimple_assign (use
->stmt
));
5809 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
5811 step
= cand
->iv
->step
;
5812 ctype
= TREE_TYPE (step
);
5813 utype
= TREE_TYPE (cand
->var_after
);
5814 if (TREE_CODE (step
) == NEGATE_EXPR
)
5816 incr_code
= MINUS_EXPR
;
5817 step
= TREE_OPERAND (step
, 0);
5820 /* Check whether we may leave the computation unchanged.
5821 This is the case only if it does not rely on other
5822 computations in the loop -- otherwise, the computation
5823 we rely upon may be removed in remove_unused_ivs,
5824 thus leading to ICE. */
5825 old_code
= gimple_assign_rhs_code (use
->stmt
);
5826 if (old_code
== PLUS_EXPR
5827 || old_code
== MINUS_EXPR
5828 || old_code
== POINTER_PLUS_EXPR
)
5830 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
5831 op
= gimple_assign_rhs2 (use
->stmt
);
5832 else if (old_code
!= MINUS_EXPR
5833 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
5834 op
= gimple_assign_rhs1 (use
->stmt
);
5842 && (TREE_CODE (op
) == INTEGER_CST
5843 || operand_equal_p (op
, step
, 0)))
5846 /* Otherwise, add the necessary computations to express
5848 op
= fold_convert (ctype
, cand
->var_before
);
5849 comp
= fold_convert (utype
,
5850 build2 (incr_code
, ctype
, op
,
5851 unshare_expr (step
)));
5855 comp
= get_computation (data
->current_loop
, use
, cand
);
5856 gcc_assert (comp
!= NULL_TREE
);
5859 switch (gimple_code (use
->stmt
))
5862 tgt
= PHI_RESULT (use
->stmt
);
5864 /* If we should keep the biv, do not replace it. */
5865 if (name_info (data
, tgt
)->preserve_biv
)
5868 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
5872 tgt
= gimple_assign_lhs (use
->stmt
);
5873 bsi
= gsi_for_stmt (use
->stmt
);
5880 if (!valid_gimple_rhs_p (comp
)
5881 || (gimple_code (use
->stmt
) != GIMPLE_PHI
5882 /* We can't allow re-allocating the stmt as it might be pointed
5884 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
5885 >= gimple_num_ops (gsi_stmt (bsi
)))))
5887 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
5888 true, GSI_SAME_STMT
);
5889 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
5891 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
5892 /* As this isn't a plain copy we have to reset alignment
5894 if (SSA_NAME_PTR_INFO (comp
))
5896 SSA_NAME_PTR_INFO (comp
)->align
= 1;
5897 SSA_NAME_PTR_INFO (comp
)->misalign
= 0;
5902 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
5904 ass
= gimple_build_assign (tgt
, comp
);
5905 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
5907 bsi
= gsi_for_stmt (use
->stmt
);
5908 remove_phi_node (&bsi
, false);
5912 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
5913 use
->stmt
= gsi_stmt (bsi
);
5917 /* Copies the reference information from OLD_REF to NEW_REF. */
5920 copy_ref_info (tree new_ref
, tree old_ref
)
5922 tree new_ptr_base
= NULL_TREE
;
5924 TREE_SIDE_EFFECTS (new_ref
) = TREE_SIDE_EFFECTS (old_ref
);
5925 TREE_THIS_VOLATILE (new_ref
) = TREE_THIS_VOLATILE (old_ref
);
5927 new_ptr_base
= TREE_OPERAND (new_ref
, 0);
5929 /* We can transfer points-to information from an old pointer
5930 or decl base to the new one. */
5932 && TREE_CODE (new_ptr_base
) == SSA_NAME
5933 && !SSA_NAME_PTR_INFO (new_ptr_base
))
5935 tree base
= get_base_address (old_ref
);
5938 else if ((TREE_CODE (base
) == MEM_REF
5939 || TREE_CODE (base
) == TARGET_MEM_REF
)
5940 && TREE_CODE (TREE_OPERAND (base
, 0)) == SSA_NAME
5941 && SSA_NAME_PTR_INFO (TREE_OPERAND (base
, 0)))
5943 struct ptr_info_def
*new_pi
;
5944 duplicate_ssa_name_ptr_info
5945 (new_ptr_base
, SSA_NAME_PTR_INFO (TREE_OPERAND (base
, 0)));
5946 new_pi
= SSA_NAME_PTR_INFO (new_ptr_base
);
5947 /* We have to be careful about transfering alignment information. */
5948 if (TREE_CODE (old_ref
) == MEM_REF
5949 && !(TREE_CODE (new_ref
) == TARGET_MEM_REF
5950 && (TMR_INDEX2 (new_ref
)
5951 || (TMR_STEP (new_ref
)
5952 && (TREE_INT_CST_LOW (TMR_STEP (new_ref
))
5953 < new_pi
->align
)))))
5955 new_pi
->misalign
+= double_int_sub (mem_ref_offset (old_ref
),
5956 mem_ref_offset (new_ref
)).low
;
5957 new_pi
->misalign
&= (new_pi
->align
- 1);
5962 new_pi
->misalign
= 0;
5965 else if (TREE_CODE (base
) == VAR_DECL
5966 || TREE_CODE (base
) == PARM_DECL
5967 || TREE_CODE (base
) == RESULT_DECL
)
5969 struct ptr_info_def
*pi
= get_ptr_info (new_ptr_base
);
5970 pt_solution_set_var (&pi
->pt
, base
);
5975 /* Performs a peephole optimization to reorder the iv update statement with
5976 a mem ref to enable instruction combining in later phases. The mem ref uses
5977 the iv value before the update, so the reordering transformation requires
5978 adjustment of the offset. CAND is the selected IV_CAND.
5982 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5990 directly propagating t over to (1) will introduce overlapping live range
5991 thus increase register pressure. This peephole transform it into:
5995 t = MEM_REF (base, iv2, 8, 8);
6002 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6005 gimple iv_update
, stmt
;
6007 gimple_stmt_iterator gsi
, gsi_iv
;
6009 if (cand
->pos
!= IP_NORMAL
)
6012 var_after
= cand
->var_after
;
6013 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6015 bb
= gimple_bb (iv_update
);
6016 gsi
= gsi_last_nondebug_bb (bb
);
6017 stmt
= gsi_stmt (gsi
);
6019 /* Only handle conditional statement for now. */
6020 if (gimple_code (stmt
) != GIMPLE_COND
)
6023 gsi_prev_nondebug (&gsi
);
6024 stmt
= gsi_stmt (gsi
);
6025 if (stmt
!= iv_update
)
6028 gsi_prev_nondebug (&gsi
);
6029 if (gsi_end_p (gsi
))
6032 stmt
= gsi_stmt (gsi
);
6033 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6036 if (stmt
!= use
->stmt
)
6039 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6042 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6044 fprintf (dump_file
, "Reordering \n");
6045 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6046 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6047 fprintf (dump_file
, "\n");
6050 gsi
= gsi_for_stmt (use
->stmt
);
6051 gsi_iv
= gsi_for_stmt (iv_update
);
6052 gsi_move_before (&gsi_iv
, &gsi
);
6054 cand
->pos
= IP_BEFORE_USE
;
6055 cand
->incremented_at
= use
->stmt
;
6058 /* Rewrites USE (address that is an iv) using candidate CAND. */
6061 rewrite_use_address (struct ivopts_data
*data
,
6062 struct iv_use
*use
, struct iv_cand
*cand
)
6065 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6066 tree base_hint
= NULL_TREE
;
6070 adjust_iv_update_pos (cand
, use
);
6071 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6073 unshare_aff_combination (&aff
);
6075 /* To avoid undefined overflow problems, all IV candidates use unsigned
6076 integer types. The drawback is that this makes it impossible for
6077 create_mem_ref to distinguish an IV that is based on a memory object
6078 from one that represents simply an offset.
6080 To work around this problem, we pass a hint to create_mem_ref that
6081 indicates which variable (if any) in aff is an IV based on a memory
6082 object. Note that we only consider the candidate. If this is not
6083 based on an object, the base of the reference is in some subexpression
6084 of the use -- but these will use pointer types, so they are recognized
6085 by the create_mem_ref heuristics anyway. */
6086 if (cand
->iv
->base_object
)
6087 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6089 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6090 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6091 reference_alias_ptr_type (*use
->op_p
),
6092 iv
, base_hint
, data
->speed
);
6093 copy_ref_info (ref
, *use
->op_p
);
6097 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6101 rewrite_use_compare (struct ivopts_data
*data
,
6102 struct iv_use
*use
, struct iv_cand
*cand
)
6104 tree comp
, *var_p
, op
, bound
;
6105 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6106 enum tree_code compare
;
6107 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6113 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6114 tree var_type
= TREE_TYPE (var
);
6117 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6119 fprintf (dump_file
, "Replacing exit test: ");
6120 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6122 compare
= iv_elimination_compare (data
, use
);
6123 bound
= unshare_expr (fold_convert (var_type
, bound
));
6124 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6126 gsi_insert_seq_on_edge_immediate (
6127 loop_preheader_edge (data
->current_loop
),
6130 gimple_cond_set_lhs (use
->stmt
, var
);
6131 gimple_cond_set_code (use
->stmt
, compare
);
6132 gimple_cond_set_rhs (use
->stmt
, op
);
6136 /* The induction variable elimination failed; just express the original
6138 comp
= get_computation (data
->current_loop
, use
, cand
);
6139 gcc_assert (comp
!= NULL_TREE
);
6141 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6144 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6145 true, GSI_SAME_STMT
);
6148 /* Rewrites USE using candidate CAND. */
6151 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6155 case USE_NONLINEAR_EXPR
:
6156 rewrite_use_nonlinear_expr (data
, use
, cand
);
6160 rewrite_use_address (data
, use
, cand
);
6164 rewrite_use_compare (data
, use
, cand
);
6171 update_stmt (use
->stmt
);
6174 /* Rewrite the uses using the selected induction variables. */
6177 rewrite_uses (struct ivopts_data
*data
)
6180 struct iv_cand
*cand
;
6183 for (i
= 0; i
< n_iv_uses (data
); i
++)
6185 use
= iv_use (data
, i
);
6186 cand
= use
->selected
;
6189 rewrite_use (data
, use
, cand
);
6193 /* Removes the ivs that are not used after rewriting. */
6196 remove_unused_ivs (struct ivopts_data
*data
)
6200 bitmap toremove
= BITMAP_ALLOC (NULL
);
6202 /* Figure out an order in which to release SSA DEFs so that we don't
6203 release something that we'd have to propagate into a debug stmt
6205 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6207 struct version_info
*info
;
6209 info
= ver_info (data
, j
);
6211 && !integer_zerop (info
->iv
->step
)
6213 && !info
->iv
->have_use_for
6214 && !info
->preserve_biv
)
6215 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6218 release_defs_bitset (toremove
);
6220 BITMAP_FREE (toremove
);
6223 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6224 for pointer_map_traverse. */
6227 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6228 void *data ATTRIBUTE_UNUSED
)
6230 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6236 /* Frees data allocated by the optimization of a single loop. */
6239 free_loop_data (struct ivopts_data
*data
)
6247 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6248 pointer_map_destroy (data
->niters
);
6249 data
->niters
= NULL
;
6252 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6254 struct version_info
*info
;
6256 info
= ver_info (data
, i
);
6260 info
->has_nonlin_use
= false;
6261 info
->preserve_biv
= false;
6264 bitmap_clear (data
->relevant
);
6265 bitmap_clear (data
->important_candidates
);
6267 for (i
= 0; i
< n_iv_uses (data
); i
++)
6269 struct iv_use
*use
= iv_use (data
, i
);
6272 BITMAP_FREE (use
->related_cands
);
6273 for (j
= 0; j
< use
->n_map_members
; j
++)
6274 if (use
->cost_map
[j
].depends_on
)
6275 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6276 free (use
->cost_map
);
6279 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
6281 for (i
= 0; i
< n_iv_cands (data
); i
++)
6283 struct iv_cand
*cand
= iv_cand (data
, i
);
6287 if (cand
->depends_on
)
6288 BITMAP_FREE (cand
->depends_on
);
6291 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
6293 if (data
->version_info_size
< num_ssa_names
)
6295 data
->version_info_size
= 2 * num_ssa_names
;
6296 free (data
->version_info
);
6297 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6300 data
->max_inv_id
= 0;
6302 FOR_EACH_VEC_ELT (tree
, decl_rtl_to_reset
, i
, obj
)
6303 SET_DECL_RTL (obj
, NULL_RTX
);
6305 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
6307 htab_empty (data
->inv_expr_tab
);
6308 data
->inv_expr_id
= 0;
6311 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6315 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6317 free_loop_data (data
);
6318 free (data
->version_info
);
6319 BITMAP_FREE (data
->relevant
);
6320 BITMAP_FREE (data
->important_candidates
);
6322 VEC_free (tree
, heap
, decl_rtl_to_reset
);
6323 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
6324 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
6325 htab_delete (data
->inv_expr_tab
);
6328 /* Returns true if the loop body BODY includes any function calls. */
6331 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6333 gimple_stmt_iterator gsi
;
6336 for (i
= 0; i
< num_nodes
; i
++)
6337 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6339 gimple stmt
= gsi_stmt (gsi
);
6340 if (is_gimple_call (stmt
)
6341 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6347 /* Optimizes the LOOP. Returns true if anything changed. */
6350 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6352 bool changed
= false;
6353 struct iv_ca
*iv_ca
;
6357 gcc_assert (!data
->niters
);
6358 data
->current_loop
= loop
;
6359 data
->speed
= optimize_loop_for_speed_p (loop
);
6361 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6363 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6365 exit
= single_dom_exit (loop
);
6368 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6369 exit
->src
->index
, exit
->dest
->index
);
6370 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6371 fprintf (dump_file
, "\n");
6374 fprintf (dump_file
, "\n");
6377 body
= get_loop_body (loop
);
6378 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6379 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6382 /* For each ssa name determines whether it behaves as an induction variable
6384 if (!find_induction_variables (data
))
6387 /* Finds interesting uses (item 1). */
6388 find_interesting_uses (data
);
6389 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6392 /* Finds candidates for the induction variables (item 2). */
6393 find_iv_candidates (data
);
6395 /* Calculates the costs (item 3, part 1). */
6396 determine_iv_costs (data
);
6397 determine_use_iv_costs (data
);
6398 determine_set_costs (data
);
6400 /* Find the optimal set of induction variables (item 3, part 2). */
6401 iv_ca
= find_optimal_iv_set (data
);
6406 /* Create the new induction variables (item 4, part 1). */
6407 create_new_ivs (data
, iv_ca
);
6408 iv_ca_free (&iv_ca
);
6410 /* Rewrite the uses (item 4, part 2). */
6411 rewrite_uses (data
);
6413 /* Remove the ivs that are unused after rewriting. */
6414 remove_unused_ivs (data
);
6416 /* We have changed the structure of induction variables; it might happen
6417 that definitions in the scev database refer to some of them that were
6422 free_loop_data (data
);
6427 /* Main entry point. Optimizes induction variables in loops. */
6430 tree_ssa_iv_optimize (void)
6433 struct ivopts_data data
;
6436 tree_ssa_iv_optimize_init (&data
);
6438 /* Optimize the loops starting with the innermost ones. */
6439 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6441 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6442 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6444 tree_ssa_iv_optimize_loop (&data
, loop
);
6447 tree_ssa_iv_optimize_finalize (&data
);