1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
79 #include "tree-pass.h"
81 #include "insn-config.h"
83 #include "pointer-set.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
100 /* The infinite cost. */
101 #define INFTY 10000000
103 #define AVG_LOOP_NITER(LOOP) 5
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop
*loop
)
112 HOST_WIDE_INT niter
= estimated_loop_iterations_int (loop
, false);
114 return AVG_LOOP_NITER (loop
);
119 /* Representation of the induction variable. */
122 tree base
; /* Initial value of the iv. */
123 tree base_object
; /* A memory object to that the induction variable points. */
124 tree step
; /* Step of the iv (constant only). */
125 tree ssa_name
; /* The ssa name with the value. */
126 bool biv_p
; /* Is it a biv? */
127 bool have_use_for
; /* Do we already have a use for it? */
128 unsigned use_id
; /* The identifier in the use if it is the case. */
131 /* Per-ssa version information (induction variable descriptions, etc.). */
134 tree name
; /* The ssa name. */
135 struct iv
*iv
; /* Induction variable description. */
136 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv
; /* For the original biv, whether to preserve it. */
139 unsigned inv_id
; /* Id of an invariant. */
145 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
146 USE_ADDRESS
, /* Use in an address. */
147 USE_COMPARE
/* Use is a compare. */
150 /* Cost of a computation. */
153 int cost
; /* The runtime cost. */
154 unsigned complexity
; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
160 static const comp_cost zero_cost
= {0, 0};
161 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
163 /* The candidate - cost pair. */
166 struct iv_cand
*cand
; /* The candidate. */
167 comp_cost cost
; /* The cost. */
168 bitmap depends_on
; /* The list of invariants that have to be
170 tree value
; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id
; /* Loop invariant expression id. */
179 unsigned id
; /* The id of the use. */
180 enum use_type type
; /* Type of the use. */
181 struct iv
*iv
; /* The induction variable it is based on. */
182 gimple stmt
; /* Statement in that it occurs. */
183 tree
*op_p
; /* The place where it occurs. */
184 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
187 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
188 struct cost_pair
*cost_map
;
189 /* The costs wrto the iv candidates. */
191 struct iv_cand
*selected
;
192 /* The selected candidate. */
195 /* The position where the iv is computed. */
198 IP_NORMAL
, /* At the end, just before the exit condition. */
199 IP_END
, /* At the end of the latch block. */
200 IP_BEFORE_USE
, /* Immediately before a specific use. */
201 IP_AFTER_USE
, /* Immediately after a specific use. */
202 IP_ORIGINAL
/* The original biv. */
205 /* The induction variable candidate. */
208 unsigned id
; /* The number of the candidate. */
209 bool important
; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
212 gimple incremented_at
;/* For original biv, the statement where it is
214 tree var_before
; /* The variable used for it before increment. */
215 tree var_after
; /* The variable used for it after increment. */
216 struct iv
*iv
; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost
; /* Cost of the candidate. */
221 unsigned cost_step
; /* Cost of the candidate's increment operation. */
222 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on
; /* The list of invariants that are used in step of the
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
236 /* The data used by the induction variable optimizations. */
238 typedef struct iv_use
*iv_use_p
;
240 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
242 typedef struct iv_cand
*iv_cand_p
;
243 DEF_VEC_P(iv_cand_p
);
244 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
248 /* The currently optimized loop. */
249 struct loop
*current_loop
;
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t
*niters
;
254 /* Number of registers used in it. */
257 /* The size of version_info array allocated. */
258 unsigned version_info_size
;
260 /* The array of information for the ssa names. */
261 struct version_info
*version_info
;
263 /* The hashtable of loop invariant expressions created
267 /* Loop invariant expression id. */
270 /* The bitmap of indices in version_info whose value was changed. */
273 /* The uses of induction variables. */
274 VEC(iv_use_p
,heap
) *iv_uses
;
276 /* The candidates. */
277 VEC(iv_cand_p
,heap
) *iv_candidates
;
279 /* A bitmap of important candidates. */
280 bitmap important_candidates
;
282 /* The maximum invariant id. */
285 /* Whether to consider just related and important candidates when replacing a
287 bool consider_all_candidates
;
289 /* Are we optimizing for speed? */
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call
;
296 /* An assignment of iv candidates to uses. */
300 /* The number of uses covered by the assignment. */
303 /* Number of uses that cannot be expressed by the candidates in the set. */
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair
**cand_for_use
;
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses
;
312 /* The candidates used. */
315 /* The number of candidates in the set. */
318 /* Total number of registers needed. */
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost
;
324 /* Total cost of candidates. */
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses
;
330 /* The array holding the number of uses of each loop
331 invariant expressions created by ivopt. */
332 unsigned *used_inv_expr
;
334 /* The number of created loop invariants. */
335 unsigned num_used_inv_expr
;
337 /* Total cost of the assignment. */
341 /* Difference of two iv candidate assignments. */
348 /* An old assignment (for rollback purposes). */
349 struct cost_pair
*old_cp
;
351 /* A new assignment. */
352 struct cost_pair
*new_cp
;
354 /* Next change in the list. */
355 struct iv_ca_delta
*next_change
;
358 /* Bound on number of candidates below that all candidates are considered. */
360 #define CONSIDER_ALL_CANDIDATES_BOUND \
361 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
364 optimizing such a loop would help, and it would take ages). */
366 #define MAX_CONSIDERED_USES \
367 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
369 /* If there are at most this number of ivs in the set, try removing unnecessary
370 ivs from the set always. */
372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
373 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
375 /* The list of trees for that the decl_rtl field must be reset is stored
378 static VEC(tree
,heap
) *decl_rtl_to_reset
;
380 /* Number of uses recorded in DATA. */
382 static inline unsigned
383 n_iv_uses (struct ivopts_data
*data
)
385 return VEC_length (iv_use_p
, data
->iv_uses
);
388 /* Ith use recorded in DATA. */
390 static inline struct iv_use
*
391 iv_use (struct ivopts_data
*data
, unsigned i
)
393 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
396 /* Number of candidates recorded in DATA. */
398 static inline unsigned
399 n_iv_cands (struct ivopts_data
*data
)
401 return VEC_length (iv_cand_p
, data
->iv_candidates
);
404 /* Ith candidate recorded in DATA. */
406 static inline struct iv_cand
*
407 iv_cand (struct ivopts_data
*data
, unsigned i
)
409 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
412 /* The single loop exit if it dominates the latch, NULL otherwise. */
415 single_dom_exit (struct loop
*loop
)
417 edge exit
= single_exit (loop
);
422 if (!just_once_each_iteration_p (loop
, exit
->src
))
428 /* Dumps information about the induction variable IV to FILE. */
430 extern void dump_iv (FILE *, struct iv
*);
432 dump_iv (FILE *file
, struct iv
*iv
)
436 fprintf (file
, "ssa name ");
437 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
438 fprintf (file
, "\n");
441 fprintf (file
, " type ");
442 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
443 fprintf (file
, "\n");
447 fprintf (file
, " base ");
448 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
449 fprintf (file
, "\n");
451 fprintf (file
, " step ");
452 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
453 fprintf (file
, "\n");
457 fprintf (file
, " invariant ");
458 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
459 fprintf (file
, "\n");
464 fprintf (file
, " base object ");
465 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
466 fprintf (file
, "\n");
470 fprintf (file
, " is a biv\n");
473 /* Dumps information about the USE to FILE. */
475 extern void dump_use (FILE *, struct iv_use
*);
477 dump_use (FILE *file
, struct iv_use
*use
)
479 fprintf (file
, "use %d\n", use
->id
);
483 case USE_NONLINEAR_EXPR
:
484 fprintf (file
, " generic\n");
488 fprintf (file
, " address\n");
492 fprintf (file
, " compare\n");
499 fprintf (file
, " in statement ");
500 print_gimple_stmt (file
, use
->stmt
, 0, 0);
501 fprintf (file
, "\n");
503 fprintf (file
, " at position ");
505 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
506 fprintf (file
, "\n");
508 dump_iv (file
, use
->iv
);
510 if (use
->related_cands
)
512 fprintf (file
, " related candidates ");
513 dump_bitmap (file
, use
->related_cands
);
517 /* Dumps information about the uses to FILE. */
519 extern void dump_uses (FILE *, struct ivopts_data
*);
521 dump_uses (FILE *file
, struct ivopts_data
*data
)
526 for (i
= 0; i
< n_iv_uses (data
); i
++)
528 use
= iv_use (data
, i
);
530 dump_use (file
, use
);
531 fprintf (file
, "\n");
535 /* Dumps information about induction variable candidate CAND to FILE. */
537 extern void dump_cand (FILE *, struct iv_cand
*);
539 dump_cand (FILE *file
, struct iv_cand
*cand
)
541 struct iv
*iv
= cand
->iv
;
543 fprintf (file
, "candidate %d%s\n",
544 cand
->id
, cand
->important
? " (important)" : "");
546 if (cand
->depends_on
)
548 fprintf (file
, " depends on ");
549 dump_bitmap (file
, cand
->depends_on
);
554 fprintf (file
, " final value replacement\n");
558 if (cand
->var_before
)
560 fprintf (file
, " var_before ");
561 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
562 fprintf (file
, "\n");
566 fprintf (file
, " var_after ");
567 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
568 fprintf (file
, "\n");
574 fprintf (file
, " incremented before exit test\n");
578 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
582 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
586 fprintf (file
, " incremented at end\n");
590 fprintf (file
, " original biv\n");
597 /* Returns the info for ssa version VER. */
599 static inline struct version_info
*
600 ver_info (struct ivopts_data
*data
, unsigned ver
)
602 return data
->version_info
+ ver
;
605 /* Returns the info for ssa name NAME. */
607 static inline struct version_info
*
608 name_info (struct ivopts_data
*data
, tree name
)
610 return ver_info (data
, SSA_NAME_VERSION (name
));
613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
617 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
619 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
623 if (sbb
== loop
->latch
)
629 return stmt
== last_stmt (bb
);
632 /* Returns true if STMT if after the place where the original induction
633 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
634 if the positions are identical. */
637 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
639 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
640 basic_block stmt_bb
= gimple_bb (stmt
);
642 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
645 if (stmt_bb
!= cand_bb
)
649 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
651 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
654 /* Returns true if STMT if after the place where the induction variable
655 CAND is incremented in LOOP. */
658 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
666 return stmt_after_ip_normal_pos (loop
, stmt
);
670 return stmt_after_inc_pos (cand
, stmt
, false);
673 return stmt_after_inc_pos (cand
, stmt
, true);
680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
683 abnormal_ssa_name_p (tree exp
)
688 if (TREE_CODE (exp
) != SSA_NAME
)
691 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
695 abnormal phi node. Callback for for_each_index. */
698 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
699 void *data ATTRIBUTE_UNUSED
)
701 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
703 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
705 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
709 return !abnormal_ssa_name_p (*index
);
712 /* Returns true if EXPR contains a ssa name that occurs in an
713 abnormal phi node. */
716 contains_abnormal_ssa_name_p (tree expr
)
719 enum tree_code_class codeclass
;
724 code
= TREE_CODE (expr
);
725 codeclass
= TREE_CODE_CLASS (code
);
727 if (code
== SSA_NAME
)
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
730 if (code
== INTEGER_CST
731 || is_gimple_min_invariant (expr
))
734 if (code
== ADDR_EXPR
)
735 return !for_each_index (&TREE_OPERAND (expr
, 0),
736 idx_contains_abnormal_ssa_name_p
,
739 if (code
== COND_EXPR
)
740 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
742 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
748 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
753 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
765 /* Returns tree describing number of iterations determined from
766 EXIT of DATA->current_loop, or NULL if something goes wrong. */
769 niter_for_exit (struct ivopts_data
*data
, edge exit
,
770 struct tree_niter_desc
**desc_p
)
772 struct tree_niter_desc
* desc
= NULL
;
778 data
->niters
= pointer_map_create ();
782 slot
= pointer_map_contains (data
->niters
, exit
);
786 /* Try to determine number of iterations. We must know it
787 unconditionally (i.e., without possibility of # of iterations
788 being zero). Also, we cannot safely work with ssa names that
789 appear in phi nodes on abnormal edges, so that we do not create
790 overlapping life ranges for them (PR 27283). */
791 desc
= XNEW (struct tree_niter_desc
);
792 if (number_of_iterations_exit (data
->current_loop
,
794 && integer_zerop (desc
->may_be_zero
)
795 && !contains_abnormal_ssa_name_p (desc
->niter
))
801 slot
= pointer_map_insert (data
->niters
, exit
);
805 niter
= ((struct tree_niter_desc
*) *slot
)->niter
;
808 *desc_p
= (struct tree_niter_desc
*) *slot
;
812 /* Returns tree describing number of iterations determined from
813 single dominating exit of DATA->current_loop, or NULL if something
817 niter_for_single_dom_exit (struct ivopts_data
*data
)
819 edge exit
= single_dom_exit (data
->current_loop
);
824 return niter_for_exit (data
, exit
, NULL
);
827 /* Hash table equality function for expressions. */
830 htab_inv_expr_eq (const void *ent1
, const void *ent2
)
832 const struct iv_inv_expr_ent
*expr1
=
833 (const struct iv_inv_expr_ent
*)ent1
;
834 const struct iv_inv_expr_ent
*expr2
=
835 (const struct iv_inv_expr_ent
*)ent2
;
837 return expr1
->hash
== expr2
->hash
838 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
841 /* Hash function for loop invariant expressions. */
844 htab_inv_expr_hash (const void *ent
)
846 const struct iv_inv_expr_ent
*expr
=
847 (const struct iv_inv_expr_ent
*)ent
;
851 /* Initializes data structures used by the iv optimization pass, stored
855 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
857 data
->version_info_size
= 2 * num_ssa_names
;
858 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
859 data
->relevant
= BITMAP_ALLOC (NULL
);
860 data
->important_candidates
= BITMAP_ALLOC (NULL
);
861 data
->max_inv_id
= 0;
863 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
864 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
865 data
->inv_expr_tab
= htab_create (10, htab_inv_expr_hash
,
866 htab_inv_expr_eq
, free
);
867 data
->inv_expr_id
= 0;
868 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
871 /* Returns a memory object to that EXPR points. In case we are able to
872 determine that it does not point to any such object, NULL is returned. */
875 determine_base_object (tree expr
)
877 enum tree_code code
= TREE_CODE (expr
);
880 /* If this is a pointer casted to any type, we need to determine
881 the base object for the pointer; so handle conversions before
882 throwing away non-pointer expressions. */
883 if (CONVERT_EXPR_P (expr
))
884 return determine_base_object (TREE_OPERAND (expr
, 0));
886 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
895 obj
= TREE_OPERAND (expr
, 0);
896 base
= get_base_address (obj
);
901 if (TREE_CODE (base
) == MEM_REF
)
902 return determine_base_object (TREE_OPERAND (base
, 0));
904 return fold_convert (ptr_type_node
,
905 build_fold_addr_expr (base
));
907 case POINTER_PLUS_EXPR
:
908 return determine_base_object (TREE_OPERAND (expr
, 0));
912 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
916 return fold_convert (ptr_type_node
, expr
);
920 /* Allocates an induction variable with given initial value BASE and step STEP
924 alloc_iv (tree base
, tree step
)
926 struct iv
*iv
= XCNEW (struct iv
);
927 gcc_assert (step
!= NULL_TREE
);
930 iv
->base_object
= determine_base_object (base
);
933 iv
->have_use_for
= false;
935 iv
->ssa_name
= NULL_TREE
;
940 /* Sets STEP and BASE for induction variable IV. */
943 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
945 struct version_info
*info
= name_info (data
, iv
);
947 gcc_assert (!info
->iv
);
949 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
950 info
->iv
= alloc_iv (base
, step
);
951 info
->iv
->ssa_name
= iv
;
954 /* Finds induction variable declaration for VAR. */
957 get_iv (struct ivopts_data
*data
, tree var
)
960 tree type
= TREE_TYPE (var
);
962 if (!POINTER_TYPE_P (type
)
963 && !INTEGRAL_TYPE_P (type
))
966 if (!name_info (data
, var
)->iv
)
968 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
971 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
972 set_iv (data
, var
, var
, build_int_cst (type
, 0));
975 return name_info (data
, var
)->iv
;
978 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
979 not define a simple affine biv with nonzero step. */
982 determine_biv_step (gimple phi
)
984 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
985 tree name
= PHI_RESULT (phi
);
988 if (!is_gimple_reg (name
))
991 if (!simple_iv (loop
, loop
, name
, &iv
, true))
994 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
997 /* Finds basic ivs. */
1000 find_bivs (struct ivopts_data
*data
)
1003 tree step
, type
, base
;
1005 struct loop
*loop
= data
->current_loop
;
1006 gimple_stmt_iterator psi
;
1008 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1010 phi
= gsi_stmt (psi
);
1012 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1015 step
= determine_biv_step (phi
);
1019 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1020 base
= expand_simple_operations (base
);
1021 if (contains_abnormal_ssa_name_p (base
)
1022 || contains_abnormal_ssa_name_p (step
))
1025 type
= TREE_TYPE (PHI_RESULT (phi
));
1026 base
= fold_convert (type
, base
);
1029 if (POINTER_TYPE_P (type
))
1030 step
= fold_convert (sizetype
, step
);
1032 step
= fold_convert (type
, step
);
1035 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1042 /* Marks basic ivs. */
1045 mark_bivs (struct ivopts_data
*data
)
1049 struct iv
*iv
, *incr_iv
;
1050 struct loop
*loop
= data
->current_loop
;
1051 basic_block incr_bb
;
1052 gimple_stmt_iterator psi
;
1054 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1056 phi
= gsi_stmt (psi
);
1058 iv
= get_iv (data
, PHI_RESULT (phi
));
1062 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1063 incr_iv
= get_iv (data
, var
);
1067 /* If the increment is in the subloop, ignore it. */
1068 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1069 if (incr_bb
->loop_father
!= data
->current_loop
1070 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1074 incr_iv
->biv_p
= true;
1078 /* Checks whether STMT defines a linear induction variable and stores its
1079 parameters to IV. */
1082 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1085 struct loop
*loop
= data
->current_loop
;
1087 iv
->base
= NULL_TREE
;
1088 iv
->step
= NULL_TREE
;
1090 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1093 lhs
= gimple_assign_lhs (stmt
);
1094 if (TREE_CODE (lhs
) != SSA_NAME
)
1097 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1099 iv
->base
= expand_simple_operations (iv
->base
);
1101 if (contains_abnormal_ssa_name_p (iv
->base
)
1102 || contains_abnormal_ssa_name_p (iv
->step
))
1108 /* Finds general ivs in statement STMT. */
1111 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1115 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1118 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1121 /* Finds general ivs in basic block BB. */
1124 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1126 gimple_stmt_iterator bsi
;
1128 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1129 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1132 /* Finds general ivs. */
1135 find_givs (struct ivopts_data
*data
)
1137 struct loop
*loop
= data
->current_loop
;
1138 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1141 for (i
= 0; i
< loop
->num_nodes
; i
++)
1142 find_givs_in_bb (data
, body
[i
]);
1146 /* For each ssa name defined in LOOP determines whether it is an induction
1147 variable and if so, its initial value and step. */
1150 find_induction_variables (struct ivopts_data
*data
)
1155 if (!find_bivs (data
))
1161 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1163 tree niter
= niter_for_single_dom_exit (data
);
1167 fprintf (dump_file
, " number of iterations ");
1168 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1169 fprintf (dump_file
, "\n\n");
1172 fprintf (dump_file
, "Induction variables:\n\n");
1174 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1176 if (ver_info (data
, i
)->iv
)
1177 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1184 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1186 static struct iv_use
*
1187 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1188 gimple stmt
, enum use_type use_type
)
1190 struct iv_use
*use
= XCNEW (struct iv_use
);
1192 use
->id
= n_iv_uses (data
);
1193 use
->type
= use_type
;
1197 use
->related_cands
= BITMAP_ALLOC (NULL
);
1199 /* To avoid showing ssa name in the dumps, if it was not reset by the
1201 iv
->ssa_name
= NULL_TREE
;
1203 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1204 dump_use (dump_file
, use
);
1206 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1211 /* Checks whether OP is a loop-level invariant and if so, records it.
1212 NONLINEAR_USE is true if the invariant is used in a way we do not
1213 handle specially. */
1216 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1219 struct version_info
*info
;
1221 if (TREE_CODE (op
) != SSA_NAME
1222 || !is_gimple_reg (op
))
1225 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1227 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1230 info
= name_info (data
, op
);
1232 info
->has_nonlin_use
|= nonlinear_use
;
1234 info
->inv_id
= ++data
->max_inv_id
;
1235 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1238 /* Checks whether the use OP is interesting and if so, records it. */
1240 static struct iv_use
*
1241 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1248 if (TREE_CODE (op
) != SSA_NAME
)
1251 iv
= get_iv (data
, op
);
1255 if (iv
->have_use_for
)
1257 use
= iv_use (data
, iv
->use_id
);
1259 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1263 if (integer_zerop (iv
->step
))
1265 record_invariant (data
, op
, true);
1268 iv
->have_use_for
= true;
1270 civ
= XNEW (struct iv
);
1273 stmt
= SSA_NAME_DEF_STMT (op
);
1274 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1275 || is_gimple_assign (stmt
));
1277 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1278 iv
->use_id
= use
->id
;
1283 /* Given a condition in statement STMT, checks whether it is a compare
1284 of an induction variable and an invariant. If this is the case,
1285 CONTROL_VAR is set to location of the iv, BOUND to the location of
1286 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1287 induction variable descriptions, and true is returned. If this is not
1288 the case, CONTROL_VAR and BOUND are set to the arguments of the
1289 condition and false is returned. */
1292 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1293 tree
**control_var
, tree
**bound
,
1294 struct iv
**iv_var
, struct iv
**iv_bound
)
1296 /* The objects returned when COND has constant operands. */
1297 static struct iv const_iv
;
1299 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1300 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1303 if (gimple_code (stmt
) == GIMPLE_COND
)
1305 op0
= gimple_cond_lhs_ptr (stmt
);
1306 op1
= gimple_cond_rhs_ptr (stmt
);
1310 op0
= gimple_assign_rhs1_ptr (stmt
);
1311 op1
= gimple_assign_rhs2_ptr (stmt
);
1314 zero
= integer_zero_node
;
1315 const_iv
.step
= integer_zero_node
;
1317 if (TREE_CODE (*op0
) == SSA_NAME
)
1318 iv0
= get_iv (data
, *op0
);
1319 if (TREE_CODE (*op1
) == SSA_NAME
)
1320 iv1
= get_iv (data
, *op1
);
1322 /* Exactly one of the compared values must be an iv, and the other one must
1327 if (integer_zerop (iv0
->step
))
1329 /* Control variable may be on the other side. */
1330 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1331 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1333 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1337 *control_var
= op0
;;
1348 /* Checks whether the condition in STMT is interesting and if so,
1352 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1354 tree
*var_p
, *bound_p
;
1355 struct iv
*var_iv
, *civ
;
1357 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1359 find_interesting_uses_op (data
, *var_p
);
1360 find_interesting_uses_op (data
, *bound_p
);
1364 civ
= XNEW (struct iv
);
1366 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1369 /* Returns true if expression EXPR is obviously invariant in LOOP,
1370 i.e. if all its operands are defined outside of the LOOP. LOOP
1371 should not be the function body. */
1374 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1379 gcc_assert (loop_depth (loop
) > 0);
1381 if (is_gimple_min_invariant (expr
))
1384 if (TREE_CODE (expr
) == SSA_NAME
)
1386 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1388 && flow_bb_inside_loop_p (loop
, def_bb
))
1397 len
= TREE_OPERAND_LENGTH (expr
);
1398 for (i
= 0; i
< len
; i
++)
1399 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1405 /* Returns true if statement STMT is obviously invariant in LOOP,
1406 i.e. if all its operands on the RHS are defined outside of the LOOP.
1407 LOOP should not be the function body. */
1410 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1415 gcc_assert (loop_depth (loop
) > 0);
1417 lhs
= gimple_get_lhs (stmt
);
1418 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1420 tree op
= gimple_op (stmt
, i
);
1421 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1428 /* Cumulates the steps of indices into DATA and replaces their values with the
1429 initial ones. Returns false when the value of the index cannot be determined.
1430 Callback for for_each_index. */
1432 struct ifs_ivopts_data
1434 struct ivopts_data
*ivopts_data
;
1440 idx_find_step (tree base
, tree
*idx
, void *data
)
1442 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1444 tree step
, iv_base
, iv_step
, lbound
, off
;
1445 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1447 /* If base is a component ref, require that the offset of the reference
1449 if (TREE_CODE (base
) == COMPONENT_REF
)
1451 off
= component_ref_field_offset (base
);
1452 return expr_invariant_in_loop_p (loop
, off
);
1455 /* If base is array, first check whether we will be able to move the
1456 reference out of the loop (in order to take its address in strength
1457 reduction). In order for this to work we need both lower bound
1458 and step to be loop invariants. */
1459 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1461 /* Moreover, for a range, the size needs to be invariant as well. */
1462 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1463 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1466 step
= array_ref_element_size (base
);
1467 lbound
= array_ref_low_bound (base
);
1469 if (!expr_invariant_in_loop_p (loop
, step
)
1470 || !expr_invariant_in_loop_p (loop
, lbound
))
1474 if (TREE_CODE (*idx
) != SSA_NAME
)
1477 iv
= get_iv (dta
->ivopts_data
, *idx
);
1481 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1482 *&x[0], which is not folded and does not trigger the
1483 ARRAY_REF path below. */
1486 if (integer_zerop (iv
->step
))
1489 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1491 step
= array_ref_element_size (base
);
1493 /* We only handle addresses whose step is an integer constant. */
1494 if (TREE_CODE (step
) != INTEGER_CST
)
1498 /* The step for pointer arithmetics already is 1 byte. */
1499 step
= size_one_node
;
1503 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1504 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1507 /* The index might wrap. */
1511 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1512 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1517 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1518 object is passed to it in DATA. */
1521 idx_record_use (tree base
, tree
*idx
,
1524 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1525 find_interesting_uses_op (data
, *idx
);
1526 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1528 find_interesting_uses_op (data
, array_ref_element_size (base
));
1529 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1534 /* If we can prove that TOP = cst * BOT for some constant cst,
1535 store cst to MUL and return true. Otherwise return false.
1536 The returned value is always sign-extended, regardless of the
1537 signedness of TOP and BOT. */
1540 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1543 enum tree_code code
;
1544 double_int res
, p0
, p1
;
1545 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1550 if (operand_equal_p (top
, bot
, 0))
1552 *mul
= double_int_one
;
1556 code
= TREE_CODE (top
);
1560 mby
= TREE_OPERAND (top
, 1);
1561 if (TREE_CODE (mby
) != INTEGER_CST
)
1564 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1567 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
1573 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1574 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1577 if (code
== MINUS_EXPR
)
1578 p1
= double_int_neg (p1
);
1579 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
1583 if (TREE_CODE (bot
) != INTEGER_CST
)
1586 p0
= double_int_sext (tree_to_double_int (top
), precision
);
1587 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
1588 if (double_int_zero_p (p1
))
1590 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
1592 return double_int_zero_p (res
);
1599 /* Returns true if memory reference REF with step STEP may be unaligned. */
1602 may_be_unaligned_p (tree ref
, tree step
)
1606 HOST_WIDE_INT bitsize
;
1607 HOST_WIDE_INT bitpos
;
1609 enum machine_mode mode
;
1610 int unsignedp
, volatilep
;
1611 unsigned base_align
;
1613 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1614 thus they are not misaligned. */
1615 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1618 /* The test below is basically copy of what expr.c:normal_inner_ref
1619 does to check whether the object must be loaded by parts when
1620 STRICT_ALIGNMENT is true. */
1621 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1622 &unsignedp
, &volatilep
, true);
1623 base_type
= TREE_TYPE (base
);
1624 base_align
= TYPE_ALIGN (base_type
);
1626 if (mode
!= BLKmode
)
1628 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1630 if (base_align
< mode_align
1631 || (bitpos
% mode_align
) != 0
1632 || (bitpos
% BITS_PER_UNIT
) != 0)
1636 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1639 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1646 /* Return true if EXPR may be non-addressable. */
1649 may_be_nonaddressable_p (tree expr
)
1651 switch (TREE_CODE (expr
))
1653 case TARGET_MEM_REF
:
1654 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1655 target, thus they are always addressable. */
1659 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1660 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1662 case VIEW_CONVERT_EXPR
:
1663 /* This kind of view-conversions may wrap non-addressable objects
1664 and make them look addressable. After some processing the
1665 non-addressability may be uncovered again, causing ADDR_EXPRs
1666 of inappropriate objects to be built. */
1667 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1668 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1671 /* ... fall through ... */
1674 case ARRAY_RANGE_REF
:
1675 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1687 /* Finds addresses in *OP_P inside STMT. */
1690 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1692 tree base
= *op_p
, step
= size_zero_node
;
1694 struct ifs_ivopts_data ifs_ivopts_data
;
1696 /* Do not play with volatile memory references. A bit too conservative,
1697 perhaps, but safe. */
1698 if (gimple_has_volatile_ops (stmt
))
1701 /* Ignore bitfields for now. Not really something terribly complicated
1703 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1706 base
= unshare_expr (base
);
1708 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1710 tree type
= build_pointer_type (TREE_TYPE (base
));
1714 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1716 civ
= get_iv (data
, TMR_BASE (base
));
1720 TMR_BASE (base
) = civ
->base
;
1723 if (TMR_INDEX2 (base
)
1724 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1726 civ
= get_iv (data
, TMR_INDEX2 (base
));
1730 TMR_INDEX2 (base
) = civ
->base
;
1733 if (TMR_INDEX (base
)
1734 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1736 civ
= get_iv (data
, TMR_INDEX (base
));
1740 TMR_INDEX (base
) = civ
->base
;
1745 if (TMR_STEP (base
))
1746 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1748 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1752 if (integer_zerop (step
))
1754 base
= tree_mem_ref_addr (type
, base
);
1758 ifs_ivopts_data
.ivopts_data
= data
;
1759 ifs_ivopts_data
.stmt
= stmt
;
1760 ifs_ivopts_data
.step
= size_zero_node
;
1761 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1762 || integer_zerop (ifs_ivopts_data
.step
))
1764 step
= ifs_ivopts_data
.step
;
1766 /* Check that the base expression is addressable. This needs
1767 to be done after substituting bases of IVs into it. */
1768 if (may_be_nonaddressable_p (base
))
1771 /* Moreover, on strict alignment platforms, check that it is
1772 sufficiently aligned. */
1773 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1776 base
= build_fold_addr_expr (base
);
1778 /* Substituting bases of IVs into the base expression might
1779 have caused folding opportunities. */
1780 if (TREE_CODE (base
) == ADDR_EXPR
)
1782 tree
*ref
= &TREE_OPERAND (base
, 0);
1783 while (handled_component_p (*ref
))
1784 ref
= &TREE_OPERAND (*ref
, 0);
1785 if (TREE_CODE (*ref
) == MEM_REF
)
1787 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1788 TREE_OPERAND (*ref
, 0),
1789 TREE_OPERAND (*ref
, 1));
1796 civ
= alloc_iv (base
, step
);
1797 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1801 for_each_index (op_p
, idx_record_use
, data
);
1804 /* Finds and records invariants used in STMT. */
1807 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1810 use_operand_p use_p
;
1813 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1815 op
= USE_FROM_PTR (use_p
);
1816 record_invariant (data
, op
, false);
1820 /* Finds interesting uses of induction variables in the statement STMT. */
1823 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1826 tree op
, *lhs
, *rhs
;
1828 use_operand_p use_p
;
1829 enum tree_code code
;
1831 find_invariants_stmt (data
, stmt
);
1833 if (gimple_code (stmt
) == GIMPLE_COND
)
1835 find_interesting_uses_cond (data
, stmt
);
1839 if (is_gimple_assign (stmt
))
1841 lhs
= gimple_assign_lhs_ptr (stmt
);
1842 rhs
= gimple_assign_rhs1_ptr (stmt
);
1844 if (TREE_CODE (*lhs
) == SSA_NAME
)
1846 /* If the statement defines an induction variable, the uses are not
1847 interesting by themselves. */
1849 iv
= get_iv (data
, *lhs
);
1851 if (iv
&& !integer_zerop (iv
->step
))
1855 code
= gimple_assign_rhs_code (stmt
);
1856 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1857 && (REFERENCE_CLASS_P (*rhs
)
1858 || is_gimple_val (*rhs
)))
1860 if (REFERENCE_CLASS_P (*rhs
))
1861 find_interesting_uses_address (data
, stmt
, rhs
);
1863 find_interesting_uses_op (data
, *rhs
);
1865 if (REFERENCE_CLASS_P (*lhs
))
1866 find_interesting_uses_address (data
, stmt
, lhs
);
1869 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1871 find_interesting_uses_cond (data
, stmt
);
1875 /* TODO -- we should also handle address uses of type
1877 memory = call (whatever);
1884 if (gimple_code (stmt
) == GIMPLE_PHI
1885 && gimple_bb (stmt
) == data
->current_loop
->header
)
1887 iv
= get_iv (data
, PHI_RESULT (stmt
));
1889 if (iv
&& !integer_zerop (iv
->step
))
1893 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1895 op
= USE_FROM_PTR (use_p
);
1897 if (TREE_CODE (op
) != SSA_NAME
)
1900 iv
= get_iv (data
, op
);
1904 find_interesting_uses_op (data
, op
);
1908 /* Finds interesting uses of induction variables outside of loops
1909 on loop exit edge EXIT. */
1912 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1915 gimple_stmt_iterator psi
;
1918 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1920 phi
= gsi_stmt (psi
);
1921 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1922 if (is_gimple_reg (def
))
1923 find_interesting_uses_op (data
, def
);
1927 /* Finds uses of the induction variables that are interesting. */
1930 find_interesting_uses (struct ivopts_data
*data
)
1933 gimple_stmt_iterator bsi
;
1934 basic_block
*body
= get_loop_body (data
->current_loop
);
1936 struct version_info
*info
;
1939 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1940 fprintf (dump_file
, "Uses:\n\n");
1942 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1947 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1948 if (e
->dest
!= EXIT_BLOCK_PTR
1949 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1950 find_interesting_uses_outside (data
, e
);
1952 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1953 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1954 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1955 if (!is_gimple_debug (gsi_stmt (bsi
)))
1956 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1959 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1963 fprintf (dump_file
, "\n");
1965 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1967 info
= ver_info (data
, i
);
1970 fprintf (dump_file
, " ");
1971 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1972 fprintf (dump_file
, " is invariant (%d)%s\n",
1973 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1977 fprintf (dump_file
, "\n");
1983 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1984 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1985 we are at the top-level of the processed address. */
1988 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1989 unsigned HOST_WIDE_INT
*offset
)
1991 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1992 enum tree_code code
;
1993 tree type
, orig_type
= TREE_TYPE (expr
);
1994 unsigned HOST_WIDE_INT off0
, off1
, st
;
1995 tree orig_expr
= expr
;
1999 type
= TREE_TYPE (expr
);
2000 code
= TREE_CODE (expr
);
2006 if (!cst_and_fits_in_hwi (expr
)
2007 || integer_zerop (expr
))
2010 *offset
= int_cst_value (expr
);
2011 return build_int_cst (orig_type
, 0);
2013 case POINTER_PLUS_EXPR
:
2016 op0
= TREE_OPERAND (expr
, 0);
2017 op1
= TREE_OPERAND (expr
, 1);
2019 op0
= strip_offset_1 (op0
, false, false, &off0
);
2020 op1
= strip_offset_1 (op1
, false, false, &off1
);
2022 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2023 if (op0
== TREE_OPERAND (expr
, 0)
2024 && op1
== TREE_OPERAND (expr
, 1))
2027 if (integer_zerop (op1
))
2029 else if (integer_zerop (op0
))
2031 if (code
== MINUS_EXPR
)
2032 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2037 expr
= fold_build2 (code
, type
, op0
, op1
);
2039 return fold_convert (orig_type
, expr
);
2042 op1
= TREE_OPERAND (expr
, 1);
2043 if (!cst_and_fits_in_hwi (op1
))
2046 op0
= TREE_OPERAND (expr
, 0);
2047 op0
= strip_offset_1 (op0
, false, false, &off0
);
2048 if (op0
== TREE_OPERAND (expr
, 0))
2051 *offset
= off0
* int_cst_value (op1
);
2052 if (integer_zerop (op0
))
2055 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2057 return fold_convert (orig_type
, expr
);
2060 case ARRAY_RANGE_REF
:
2064 step
= array_ref_element_size (expr
);
2065 if (!cst_and_fits_in_hwi (step
))
2068 st
= int_cst_value (step
);
2069 op1
= TREE_OPERAND (expr
, 1);
2070 op1
= strip_offset_1 (op1
, false, false, &off1
);
2071 *offset
= off1
* st
;
2074 && integer_zerop (op1
))
2076 /* Strip the component reference completely. */
2077 op0
= TREE_OPERAND (expr
, 0);
2078 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2088 tmp
= component_ref_field_offset (expr
);
2090 && cst_and_fits_in_hwi (tmp
))
2092 /* Strip the component reference completely. */
2093 op0
= TREE_OPERAND (expr
, 0);
2094 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2095 *offset
= off0
+ int_cst_value (tmp
);
2101 op0
= TREE_OPERAND (expr
, 0);
2102 op0
= strip_offset_1 (op0
, true, true, &off0
);
2105 if (op0
== TREE_OPERAND (expr
, 0))
2108 expr
= build_fold_addr_expr (op0
);
2109 return fold_convert (orig_type
, expr
);
2112 /* ??? Offset operand? */
2113 inside_addr
= false;
2120 /* Default handling of expressions for that we want to recurse into
2121 the first operand. */
2122 op0
= TREE_OPERAND (expr
, 0);
2123 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2126 if (op0
== TREE_OPERAND (expr
, 0)
2127 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2130 expr
= copy_node (expr
);
2131 TREE_OPERAND (expr
, 0) = op0
;
2133 TREE_OPERAND (expr
, 1) = op1
;
2135 /* Inside address, we might strip the top level component references,
2136 thus changing type of the expression. Handling of ADDR_EXPR
2138 expr
= fold_convert (orig_type
, expr
);
2143 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2146 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2148 return strip_offset_1 (expr
, false, false, offset
);
2151 /* Returns variant of TYPE that can be used as base for different uses.
2152 We return unsigned type with the same precision, which avoids problems
2156 generic_type_for (tree type
)
2158 if (POINTER_TYPE_P (type
))
2159 return unsigned_type_for (type
);
2161 if (TYPE_UNSIGNED (type
))
2164 return unsigned_type_for (type
);
2167 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2168 the bitmap to that we should store it. */
2170 static struct ivopts_data
*fd_ivopts_data
;
2172 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2174 bitmap
*depends_on
= (bitmap
*) data
;
2175 struct version_info
*info
;
2177 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2179 info
= name_info (fd_ivopts_data
, *expr_p
);
2181 if (!info
->inv_id
|| info
->has_nonlin_use
)
2185 *depends_on
= BITMAP_ALLOC (NULL
);
2186 bitmap_set_bit (*depends_on
, info
->inv_id
);
2191 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2192 position to POS. If USE is not NULL, the candidate is set as related to
2193 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2194 replacement of the final value of the iv by a direct computation. */
2196 static struct iv_cand
*
2197 add_candidate_1 (struct ivopts_data
*data
,
2198 tree base
, tree step
, bool important
, enum iv_position pos
,
2199 struct iv_use
*use
, gimple incremented_at
)
2202 struct iv_cand
*cand
= NULL
;
2203 tree type
, orig_type
;
2207 orig_type
= TREE_TYPE (base
);
2208 type
= generic_type_for (orig_type
);
2209 if (type
!= orig_type
)
2211 base
= fold_convert (type
, base
);
2212 step
= fold_convert (type
, step
);
2216 for (i
= 0; i
< n_iv_cands (data
); i
++)
2218 cand
= iv_cand (data
, i
);
2220 if (cand
->pos
!= pos
)
2223 if (cand
->incremented_at
!= incremented_at
2224 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2225 && cand
->ainc_use
!= use
))
2239 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2240 && operand_equal_p (step
, cand
->iv
->step
, 0)
2241 && (TYPE_PRECISION (TREE_TYPE (base
))
2242 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2246 if (i
== n_iv_cands (data
))
2248 cand
= XCNEW (struct iv_cand
);
2254 cand
->iv
= alloc_iv (base
, step
);
2257 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2259 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2260 cand
->var_after
= cand
->var_before
;
2262 cand
->important
= important
;
2263 cand
->incremented_at
= incremented_at
;
2264 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2267 && TREE_CODE (step
) != INTEGER_CST
)
2269 fd_ivopts_data
= data
;
2270 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2273 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2274 cand
->ainc_use
= use
;
2276 cand
->ainc_use
= NULL
;
2278 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2279 dump_cand (dump_file
, cand
);
2282 if (important
&& !cand
->important
)
2284 cand
->important
= true;
2285 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2286 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2291 bitmap_set_bit (use
->related_cands
, i
);
2292 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2293 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2300 /* Returns true if incrementing the induction variable at the end of the LOOP
2303 The purpose is to avoid splitting latch edge with a biv increment, thus
2304 creating a jump, possibly confusing other optimization passes and leaving
2305 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2306 is not available (so we do not have a better alternative), or if the latch
2307 edge is already nonempty. */
2310 allow_ip_end_pos_p (struct loop
*loop
)
2312 if (!ip_normal_pos (loop
))
2315 if (!empty_block_p (ip_end_pos (loop
)))
2321 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2322 Important field is set to IMPORTANT. */
2325 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2326 bool important
, struct iv_use
*use
)
2328 basic_block use_bb
= gimple_bb (use
->stmt
);
2329 enum machine_mode mem_mode
;
2330 unsigned HOST_WIDE_INT cstepi
;
2332 /* If we insert the increment in any position other than the standard
2333 ones, we must ensure that it is incremented once per iteration.
2334 It must not be in an inner nested loop, or one side of an if
2336 if (use_bb
->loop_father
!= data
->current_loop
2337 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2338 || stmt_could_throw_p (use
->stmt
)
2339 || !cst_and_fits_in_hwi (step
))
2342 cstepi
= int_cst_value (step
);
2344 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2345 if ((HAVE_PRE_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2346 || (HAVE_PRE_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2348 enum tree_code code
= MINUS_EXPR
;
2350 tree new_step
= step
;
2352 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2354 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2355 code
= POINTER_PLUS_EXPR
;
2358 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2359 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2360 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2363 if ((HAVE_POST_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2364 || (HAVE_POST_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2366 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2371 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2372 position to POS. If USE is not NULL, the candidate is set as related to
2373 it. The candidate computation is scheduled on all available positions. */
2376 add_candidate (struct ivopts_data
*data
,
2377 tree base
, tree step
, bool important
, struct iv_use
*use
)
2379 if (ip_normal_pos (data
->current_loop
))
2380 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2381 if (ip_end_pos (data
->current_loop
)
2382 && allow_ip_end_pos_p (data
->current_loop
))
2383 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2385 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2386 add_autoinc_candidates (data
, base
, step
, important
, use
);
2389 /* Add a standard "0 + 1 * iteration" iv candidate for a
2390 type with SIZE bits. */
2393 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2396 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2397 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2401 /* Adds standard iv candidates. */
2404 add_standard_iv_candidates (struct ivopts_data
*data
)
2406 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2408 /* The same for a double-integer type if it is still fast enough. */
2409 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2410 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2414 /* Adds candidates bases on the old induction variable IV. */
2417 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2421 struct iv_cand
*cand
;
2423 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2425 /* The same, but with initial value zero. */
2426 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2427 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2429 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2430 iv
->step
, true, NULL
);
2432 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2433 if (gimple_code (phi
) == GIMPLE_PHI
)
2435 /* Additionally record the possibility of leaving the original iv
2437 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2438 cand
= add_candidate_1 (data
,
2439 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2440 SSA_NAME_DEF_STMT (def
));
2441 cand
->var_before
= iv
->ssa_name
;
2442 cand
->var_after
= def
;
2446 /* Adds candidates based on the old induction variables. */
2449 add_old_ivs_candidates (struct ivopts_data
*data
)
2455 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2457 iv
= ver_info (data
, i
)->iv
;
2458 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2459 add_old_iv_candidates (data
, iv
);
2463 /* Adds candidates based on the value of the induction variable IV and USE. */
2466 add_iv_value_candidates (struct ivopts_data
*data
,
2467 struct iv
*iv
, struct iv_use
*use
)
2469 unsigned HOST_WIDE_INT offset
;
2473 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2475 /* The same, but with initial value zero. Make such variable important,
2476 since it is generic enough so that possibly many uses may be based
2478 basetype
= TREE_TYPE (iv
->base
);
2479 if (POINTER_TYPE_P (basetype
))
2480 basetype
= sizetype
;
2481 add_candidate (data
, build_int_cst (basetype
, 0),
2482 iv
->step
, true, use
);
2484 /* Third, try removing the constant offset. Make sure to even
2485 add a candidate for &a[0] vs. (T *)&a. */
2486 base
= strip_offset (iv
->base
, &offset
);
2488 || base
!= iv
->base
)
2489 add_candidate (data
, base
, iv
->step
, false, use
);
2492 /* Adds candidates based on the uses. */
2495 add_derived_ivs_candidates (struct ivopts_data
*data
)
2499 for (i
= 0; i
< n_iv_uses (data
); i
++)
2501 struct iv_use
*use
= iv_use (data
, i
);
2508 case USE_NONLINEAR_EXPR
:
2511 /* Just add the ivs based on the value of the iv used here. */
2512 add_iv_value_candidates (data
, use
->iv
, use
);
2521 /* Record important candidates and add them to related_cands bitmaps
2525 record_important_candidates (struct ivopts_data
*data
)
2530 for (i
= 0; i
< n_iv_cands (data
); i
++)
2532 struct iv_cand
*cand
= iv_cand (data
, i
);
2534 if (cand
->important
)
2535 bitmap_set_bit (data
->important_candidates
, i
);
2538 data
->consider_all_candidates
= (n_iv_cands (data
)
2539 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2541 if (data
->consider_all_candidates
)
2543 /* We will not need "related_cands" bitmaps in this case,
2544 so release them to decrease peak memory consumption. */
2545 for (i
= 0; i
< n_iv_uses (data
); i
++)
2547 use
= iv_use (data
, i
);
2548 BITMAP_FREE (use
->related_cands
);
2553 /* Add important candidates to the related_cands bitmaps. */
2554 for (i
= 0; i
< n_iv_uses (data
); i
++)
2555 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2556 data
->important_candidates
);
2560 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2561 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2562 we allocate a simple list to every use. */
2565 alloc_use_cost_map (struct ivopts_data
*data
)
2567 unsigned i
, size
, s
, j
;
2569 for (i
= 0; i
< n_iv_uses (data
); i
++)
2571 struct iv_use
*use
= iv_use (data
, i
);
2574 if (data
->consider_all_candidates
)
2575 size
= n_iv_cands (data
);
2579 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2584 /* Round up to the power of two, so that moduling by it is fast. */
2585 for (size
= 1; size
< s
; size
<<= 1)
2589 use
->n_map_members
= size
;
2590 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2594 /* Returns description of computation cost of expression whose runtime
2595 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2598 new_cost (unsigned runtime
, unsigned complexity
)
2602 cost
.cost
= runtime
;
2603 cost
.complexity
= complexity
;
2608 /* Adds costs COST1 and COST2. */
2611 add_costs (comp_cost cost1
, comp_cost cost2
)
2613 cost1
.cost
+= cost2
.cost
;
2614 cost1
.complexity
+= cost2
.complexity
;
2618 /* Subtracts costs COST1 and COST2. */
2621 sub_costs (comp_cost cost1
, comp_cost cost2
)
2623 cost1
.cost
-= cost2
.cost
;
2624 cost1
.complexity
-= cost2
.complexity
;
2629 /* Returns a negative number if COST1 < COST2, a positive number if
2630 COST1 > COST2, and 0 if COST1 = COST2. */
2633 compare_costs (comp_cost cost1
, comp_cost cost2
)
2635 if (cost1
.cost
== cost2
.cost
)
2636 return cost1
.complexity
- cost2
.complexity
;
2638 return cost1
.cost
- cost2
.cost
;
2641 /* Returns true if COST is infinite. */
2644 infinite_cost_p (comp_cost cost
)
2646 return cost
.cost
== INFTY
;
2649 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2650 on invariants DEPENDS_ON and that the value used in expressing it
2654 set_use_iv_cost (struct ivopts_data
*data
,
2655 struct iv_use
*use
, struct iv_cand
*cand
,
2656 comp_cost cost
, bitmap depends_on
, tree value
,
2661 if (infinite_cost_p (cost
))
2663 BITMAP_FREE (depends_on
);
2667 if (data
->consider_all_candidates
)
2669 use
->cost_map
[cand
->id
].cand
= cand
;
2670 use
->cost_map
[cand
->id
].cost
= cost
;
2671 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2672 use
->cost_map
[cand
->id
].value
= value
;
2673 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2677 /* n_map_members is a power of two, so this computes modulo. */
2678 s
= cand
->id
& (use
->n_map_members
- 1);
2679 for (i
= s
; i
< use
->n_map_members
; i
++)
2680 if (!use
->cost_map
[i
].cand
)
2682 for (i
= 0; i
< s
; i
++)
2683 if (!use
->cost_map
[i
].cand
)
2689 use
->cost_map
[i
].cand
= cand
;
2690 use
->cost_map
[i
].cost
= cost
;
2691 use
->cost_map
[i
].depends_on
= depends_on
;
2692 use
->cost_map
[i
].value
= value
;
2693 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2696 /* Gets cost of (USE, CANDIDATE) pair. */
2698 static struct cost_pair
*
2699 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2700 struct iv_cand
*cand
)
2703 struct cost_pair
*ret
;
2708 if (data
->consider_all_candidates
)
2710 ret
= use
->cost_map
+ cand
->id
;
2717 /* n_map_members is a power of two, so this computes modulo. */
2718 s
= cand
->id
& (use
->n_map_members
- 1);
2719 for (i
= s
; i
< use
->n_map_members
; i
++)
2720 if (use
->cost_map
[i
].cand
== cand
)
2721 return use
->cost_map
+ i
;
2723 for (i
= 0; i
< s
; i
++)
2724 if (use
->cost_map
[i
].cand
== cand
)
2725 return use
->cost_map
+ i
;
2730 /* Returns estimate on cost of computing SEQ. */
2733 seq_cost (rtx seq
, bool speed
)
2738 for (; seq
; seq
= NEXT_INSN (seq
))
2740 set
= single_set (seq
);
2742 cost
+= rtx_cost (set
, SET
,speed
);
2750 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2752 produce_memory_decl_rtl (tree obj
, int *regno
)
2754 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2755 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2759 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2761 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2762 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2763 SET_SYMBOL_REF_DECL (x
, obj
);
2764 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2765 set_mem_addr_space (x
, as
);
2766 targetm
.encode_section_info (obj
, x
, true);
2770 x
= gen_raw_REG (address_mode
, (*regno
)++);
2771 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2772 set_mem_addr_space (x
, as
);
2778 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2779 walk_tree. DATA contains the actual fake register number. */
2782 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2784 tree obj
= NULL_TREE
;
2786 int *regno
= (int *) data
;
2788 switch (TREE_CODE (*expr_p
))
2791 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2792 handled_component_p (*expr_p
);
2793 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2796 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2797 x
= produce_memory_decl_rtl (obj
, regno
);
2802 obj
= SSA_NAME_VAR (*expr_p
);
2803 if (!DECL_RTL_SET_P (obj
))
2804 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2813 if (DECL_RTL_SET_P (obj
))
2816 if (DECL_MODE (obj
) == BLKmode
)
2817 x
= produce_memory_decl_rtl (obj
, regno
);
2819 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2829 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2830 SET_DECL_RTL (obj
, x
);
2836 /* Determines cost of the computation of EXPR. */
2839 computation_cost (tree expr
, bool speed
)
2842 tree type
= TREE_TYPE (expr
);
2844 /* Avoid using hard regs in ways which may be unsupported. */
2845 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2846 struct cgraph_node
*node
= cgraph_node (current_function_decl
);
2847 enum node_frequency real_frequency
= node
->frequency
;
2849 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2850 crtl
->maybe_hot_insn_p
= speed
;
2851 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2853 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2856 default_rtl_profile ();
2857 node
->frequency
= real_frequency
;
2859 cost
= seq_cost (seq
, speed
);
2861 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2862 TYPE_ADDR_SPACE (type
), speed
);
2867 /* Returns variable containing the value of candidate CAND at statement AT. */
2870 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2872 if (stmt_after_increment (loop
, cand
, stmt
))
2873 return cand
->var_after
;
2875 return cand
->var_before
;
2878 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2879 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2882 tree_int_cst_sign_bit (const_tree t
)
2884 unsigned bitno
= TYPE_PRECISION (TREE_TYPE (t
)) - 1;
2885 unsigned HOST_WIDE_INT w
;
2887 if (bitno
< HOST_BITS_PER_WIDE_INT
)
2888 w
= TREE_INT_CST_LOW (t
);
2891 w
= TREE_INT_CST_HIGH (t
);
2892 bitno
-= HOST_BITS_PER_WIDE_INT
;
2895 return (w
>> bitno
) & 1;
2898 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2899 same precision that is at least as wide as the precision of TYPE, stores
2900 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2904 determine_common_wider_type (tree
*a
, tree
*b
)
2906 tree wider_type
= NULL
;
2908 tree atype
= TREE_TYPE (*a
);
2910 if (CONVERT_EXPR_P (*a
))
2912 suba
= TREE_OPERAND (*a
, 0);
2913 wider_type
= TREE_TYPE (suba
);
2914 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2920 if (CONVERT_EXPR_P (*b
))
2922 subb
= TREE_OPERAND (*b
, 0);
2923 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2934 /* Determines the expression by that USE is expressed from induction variable
2935 CAND at statement AT in LOOP. The expression is stored in a decomposed
2936 form into AFF. Returns false if USE cannot be expressed using CAND. */
2939 get_computation_aff (struct loop
*loop
,
2940 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2941 struct affine_tree_combination
*aff
)
2943 tree ubase
= use
->iv
->base
;
2944 tree ustep
= use
->iv
->step
;
2945 tree cbase
= cand
->iv
->base
;
2946 tree cstep
= cand
->iv
->step
, cstep_common
;
2947 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2948 tree common_type
, var
;
2950 aff_tree cbase_aff
, var_aff
;
2953 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2955 /* We do not have a precision to express the values of use. */
2959 var
= var_at_stmt (loop
, cand
, at
);
2960 uutype
= unsigned_type_for (utype
);
2962 /* If the conversion is not noop, perform it. */
2963 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2965 cstep
= fold_convert (uutype
, cstep
);
2966 cbase
= fold_convert (uutype
, cbase
);
2967 var
= fold_convert (uutype
, var
);
2970 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2973 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2974 type, we achieve better folding by computing their difference in this
2975 wider type, and cast the result to UUTYPE. We do not need to worry about
2976 overflows, as all the arithmetics will in the end be performed in UUTYPE
2978 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2980 /* use = ubase - ratio * cbase + ratio * var. */
2981 tree_to_aff_combination (ubase
, common_type
, aff
);
2982 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2983 tree_to_aff_combination (var
, uutype
, &var_aff
);
2985 /* We need to shift the value if we are after the increment. */
2986 if (stmt_after_increment (loop
, cand
, at
))
2990 if (common_type
!= uutype
)
2991 cstep_common
= fold_convert (common_type
, cstep
);
2993 cstep_common
= cstep
;
2995 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
2996 aff_combination_add (&cbase_aff
, &cstep_aff
);
2999 aff_combination_scale (&cbase_aff
, double_int_neg (rat
));
3000 aff_combination_add (aff
, &cbase_aff
);
3001 if (common_type
!= uutype
)
3002 aff_combination_convert (aff
, uutype
);
3004 aff_combination_scale (&var_aff
, rat
);
3005 aff_combination_add (aff
, &var_aff
);
3010 /* Determines the expression by that USE is expressed from induction variable
3011 CAND at statement AT in LOOP. The computation is unshared. */
3014 get_computation_at (struct loop
*loop
,
3015 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3018 tree type
= TREE_TYPE (use
->iv
->base
);
3020 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3022 unshare_aff_combination (&aff
);
3023 return fold_convert (type
, aff_combination_to_tree (&aff
));
3026 /* Determines the expression by that USE is expressed from induction variable
3027 CAND in LOOP. The computation is unshared. */
3030 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3032 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3035 /* Adjust the cost COST for being in loop setup rather than loop body.
3036 If we're optimizing for space, the loop setup overhead is constant;
3037 if we're optimizing for speed, amortize it over the per-iteration cost. */
3039 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3043 else if (optimize_loop_for_speed_p (data
->current_loop
))
3044 return cost
/ avg_loop_niter (data
->current_loop
);
3049 /* Returns cost of addition in MODE. */
3052 add_cost (enum machine_mode mode
, bool speed
)
3054 static unsigned costs
[NUM_MACHINE_MODES
];
3062 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3063 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3064 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3069 cost
= seq_cost (seq
, speed
);
3075 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3076 fprintf (dump_file
, "Addition in %s costs %d\n",
3077 GET_MODE_NAME (mode
), cost
);
3081 /* Entry in a hashtable of already known costs for multiplication. */
3084 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3085 enum machine_mode mode
; /* In mode. */
3086 unsigned cost
; /* The cost. */
3089 /* Counts hash value for the ENTRY. */
3092 mbc_entry_hash (const void *entry
)
3094 const struct mbc_entry
*e
= (const struct mbc_entry
*) entry
;
3096 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3099 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3102 mbc_entry_eq (const void *entry1
, const void *entry2
)
3104 const struct mbc_entry
*e1
= (const struct mbc_entry
*) entry1
;
3105 const struct mbc_entry
*e2
= (const struct mbc_entry
*) entry2
;
3107 return (e1
->mode
== e2
->mode
3108 && e1
->cst
== e2
->cst
);
3111 /* Returns cost of multiplication by constant CST in MODE. */
3114 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
, bool speed
)
3116 static htab_t costs
;
3117 struct mbc_entry
**cached
, act
;
3122 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3126 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3128 return (*cached
)->cost
;
3130 *cached
= XNEW (struct mbc_entry
);
3131 (*cached
)->mode
= mode
;
3132 (*cached
)->cst
= cst
;
3135 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3136 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3140 cost
= seq_cost (seq
, speed
);
3142 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3143 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3144 (int) cst
, GET_MODE_NAME (mode
), cost
);
3146 (*cached
)->cost
= cost
;
3151 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3152 validity for a memory reference accessing memory of mode MODE in
3153 address space AS. */
3155 DEF_VEC_P (sbitmap
);
3156 DEF_VEC_ALLOC_P (sbitmap
, heap
);
3159 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3162 #define MAX_RATIO 128
3163 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3164 static VEC (sbitmap
, heap
) *valid_mult_list
;
3167 if (data_index
>= VEC_length (sbitmap
, valid_mult_list
))
3168 VEC_safe_grow_cleared (sbitmap
, heap
, valid_mult_list
, data_index
+ 1);
3170 valid_mult
= VEC_index (sbitmap
, valid_mult_list
, data_index
);
3173 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3174 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3178 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3179 sbitmap_zero (valid_mult
);
3180 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3181 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3183 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3184 if (memory_address_addr_space_p (mode
, addr
, as
))
3185 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3188 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3190 fprintf (dump_file
, " allowed multipliers:");
3191 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3192 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3193 fprintf (dump_file
, " %d", (int) i
);
3194 fprintf (dump_file
, "\n");
3195 fprintf (dump_file
, "\n");
3198 VEC_replace (sbitmap
, valid_mult_list
, data_index
, valid_mult
);
3201 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3204 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3207 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3208 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3209 variable is omitted. Compute the cost for a memory reference that accesses
3210 a memory location of mode MEM_MODE in address space AS.
3212 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3213 size of MEM_MODE / RATIO) is available. To make this determination, we
3214 look at the size of the increment to be made, which is given in CSTEP.
3215 CSTEP may be zero if the step is unknown.
3216 STMT_AFTER_INC is true iff the statement we're looking at is after the
3217 increment of the original biv.
3219 TODO -- there must be some better way. This all is quite crude. */
3223 HOST_WIDE_INT min_offset
, max_offset
;
3224 unsigned costs
[2][2][2][2];
3225 } *address_cost_data
;
3227 DEF_VEC_P (address_cost_data
);
3228 DEF_VEC_ALLOC_P (address_cost_data
, heap
);
3231 get_address_cost (bool symbol_present
, bool var_present
,
3232 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3233 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3234 addr_space_t as
, bool speed
,
3235 bool stmt_after_inc
, bool *may_autoinc
)
3237 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3238 static VEC(address_cost_data
, heap
) *address_cost_data_list
;
3239 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3240 address_cost_data data
;
3241 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3242 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3243 unsigned cost
, acost
, complexity
;
3244 bool offset_p
, ratio_p
, autoinc
;
3245 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3246 unsigned HOST_WIDE_INT mask
;
3249 if (data_index
>= VEC_length (address_cost_data
, address_cost_data_list
))
3250 VEC_safe_grow_cleared (address_cost_data
, heap
, address_cost_data_list
,
3253 data
= VEC_index (address_cost_data
, address_cost_data_list
, data_index
);
3257 HOST_WIDE_INT rat
, off
= 0;
3258 int old_cse_not_expected
, width
;
3259 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3260 rtx seq
, addr
, base
;
3263 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3265 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3267 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3268 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3269 width
= HOST_BITS_PER_WIDE_INT
- 1;
3270 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3272 for (i
= width
; i
>= 0; i
--)
3274 off
= -((HOST_WIDE_INT
) 1 << i
);
3275 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3276 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3279 data
->min_offset
= (i
== -1? 0 : off
);
3281 for (i
= width
; i
>= 0; i
--)
3283 off
= ((HOST_WIDE_INT
) 1 << i
) - 1;
3284 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3285 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3290 data
->max_offset
= off
;
3292 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3294 fprintf (dump_file
, "get_address_cost:\n");
3295 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3296 GET_MODE_NAME (mem_mode
),
3298 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3299 GET_MODE_NAME (mem_mode
),
3304 for (i
= 2; i
<= MAX_RATIO
; i
++)
3305 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3311 /* Compute the cost of various addressing modes. */
3313 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3314 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3316 if (HAVE_PRE_DECREMENT
)
3318 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3319 has_predec
[mem_mode
]
3320 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3322 if (HAVE_POST_DECREMENT
)
3324 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3325 has_postdec
[mem_mode
]
3326 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3328 if (HAVE_PRE_INCREMENT
)
3330 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3331 has_preinc
[mem_mode
]
3332 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3334 if (HAVE_POST_INCREMENT
)
3336 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3337 has_postinc
[mem_mode
]
3338 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3340 for (i
= 0; i
< 16; i
++)
3343 var_p
= (i
>> 1) & 1;
3344 off_p
= (i
>> 2) & 1;
3345 rat_p
= (i
>> 3) & 1;
3349 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3350 gen_int_mode (rat
, address_mode
));
3353 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3357 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3358 /* ??? We can run into trouble with some backends by presenting
3359 it with symbols which haven't been properly passed through
3360 targetm.encode_section_info. By setting the local bit, we
3361 enhance the probability of things working. */
3362 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3365 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3367 (PLUS
, address_mode
, base
,
3368 gen_int_mode (off
, address_mode
)));
3371 base
= gen_int_mode (off
, address_mode
);
3376 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3379 /* To avoid splitting addressing modes, pretend that no cse will
3381 old_cse_not_expected
= cse_not_expected
;
3382 cse_not_expected
= true;
3383 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3384 cse_not_expected
= old_cse_not_expected
;
3388 acost
= seq_cost (seq
, speed
);
3389 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3393 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3396 /* On some targets, it is quite expensive to load symbol to a register,
3397 which makes addresses that contain symbols look much more expensive.
3398 However, the symbol will have to be loaded in any case before the
3399 loop (and quite likely we have it in register already), so it does not
3400 make much sense to penalize them too heavily. So make some final
3401 tweaks for the SYMBOL_PRESENT modes:
3403 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3404 var is cheaper, use this mode with small penalty.
3405 If VAR_PRESENT is true, try whether the mode with
3406 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3407 if this is the case, use it. */
3408 add_c
= add_cost (address_mode
, speed
);
3409 for (i
= 0; i
< 8; i
++)
3412 off_p
= (i
>> 1) & 1;
3413 rat_p
= (i
>> 2) & 1;
3415 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3419 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3420 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3423 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3425 fprintf (dump_file
, "Address costs:\n");
3427 for (i
= 0; i
< 16; i
++)
3430 var_p
= (i
>> 1) & 1;
3431 off_p
= (i
>> 2) & 1;
3432 rat_p
= (i
>> 3) & 1;
3434 fprintf (dump_file
, " ");
3436 fprintf (dump_file
, "sym + ");
3438 fprintf (dump_file
, "var + ");
3440 fprintf (dump_file
, "cst + ");
3442 fprintf (dump_file
, "rat * ");
3444 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3445 fprintf (dump_file
, "index costs %d\n", acost
);
3447 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3448 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3449 fprintf (dump_file
, " May include autoinc/dec\n");
3450 fprintf (dump_file
, "\n");
3453 VEC_replace (address_cost_data
, address_cost_data_list
,
3457 bits
= GET_MODE_BITSIZE (address_mode
);
3458 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3460 if ((offset
>> (bits
- 1) & 1))
3465 msize
= GET_MODE_SIZE (mem_mode
);
3466 autoinc_offset
= offset
;
3468 autoinc_offset
+= ratio
* cstep
;
3469 if (symbol_present
|| var_present
|| ratio
!= 1)
3471 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3473 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3475 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3477 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3478 && msize
== -cstep
))
3482 offset_p
= (s_offset
!= 0
3483 && data
->min_offset
<= s_offset
3484 && s_offset
<= data
->max_offset
);
3485 ratio_p
= (ratio
!= 1
3486 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3488 if (ratio
!= 1 && !ratio_p
)
3489 cost
+= multiply_by_cost (ratio
, address_mode
, speed
);
3491 if (s_offset
&& !offset_p
&& !symbol_present
)
3492 cost
+= add_cost (address_mode
, speed
);
3495 *may_autoinc
= autoinc
;
3496 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3497 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3498 return new_cost (cost
+ acost
, complexity
);
3501 /* Estimates cost of forcing expression EXPR into a variable. */
3504 force_expr_to_var_cost (tree expr
, bool speed
)
3506 static bool costs_initialized
= false;
3507 static unsigned integer_cost
[2];
3508 static unsigned symbol_cost
[2];
3509 static unsigned address_cost
[2];
3511 comp_cost cost0
, cost1
, cost
;
3512 enum machine_mode mode
;
3514 if (!costs_initialized
)
3516 tree type
= build_pointer_type (integer_type_node
);
3521 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3522 TREE_STATIC (var
) = 1;
3523 x
= produce_memory_decl_rtl (var
, NULL
);
3524 SET_DECL_RTL (var
, x
);
3526 addr
= build1 (ADDR_EXPR
, type
, var
);
3529 for (i
= 0; i
< 2; i
++)
3531 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3534 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3537 = computation_cost (build2 (POINTER_PLUS_EXPR
, type
,
3539 build_int_cst (sizetype
, 2000)), i
) + 1;
3540 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3542 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3543 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3544 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3545 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3546 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3547 fprintf (dump_file
, "\n");
3551 costs_initialized
= true;
3556 if (SSA_VAR_P (expr
))
3559 if (is_gimple_min_invariant (expr
))
3561 if (TREE_CODE (expr
) == INTEGER_CST
)
3562 return new_cost (integer_cost
[speed
], 0);
3564 if (TREE_CODE (expr
) == ADDR_EXPR
)
3566 tree obj
= TREE_OPERAND (expr
, 0);
3568 if (TREE_CODE (obj
) == VAR_DECL
3569 || TREE_CODE (obj
) == PARM_DECL
3570 || TREE_CODE (obj
) == RESULT_DECL
)
3571 return new_cost (symbol_cost
[speed
], 0);
3574 return new_cost (address_cost
[speed
], 0);
3577 switch (TREE_CODE (expr
))
3579 case POINTER_PLUS_EXPR
:
3583 op0
= TREE_OPERAND (expr
, 0);
3584 op1
= TREE_OPERAND (expr
, 1);
3588 if (is_gimple_val (op0
))
3591 cost0
= force_expr_to_var_cost (op0
, speed
);
3593 if (is_gimple_val (op1
))
3596 cost1
= force_expr_to_var_cost (op1
, speed
);
3601 op0
= TREE_OPERAND (expr
, 0);
3605 if (is_gimple_val (op0
))
3608 cost0
= force_expr_to_var_cost (op0
, speed
);
3614 /* Just an arbitrary value, FIXME. */
3615 return new_cost (target_spill_cost
[speed
], 0);
3618 mode
= TYPE_MODE (TREE_TYPE (expr
));
3619 switch (TREE_CODE (expr
))
3621 case POINTER_PLUS_EXPR
:
3625 cost
= new_cost (add_cost (mode
, speed
), 0);
3629 if (cst_and_fits_in_hwi (op0
))
3630 cost
= new_cost (multiply_by_cost (int_cst_value (op0
), mode
, speed
), 0);
3631 else if (cst_and_fits_in_hwi (op1
))
3632 cost
= new_cost (multiply_by_cost (int_cst_value (op1
), mode
, speed
), 0);
3634 return new_cost (target_spill_cost
[speed
], 0);
3641 cost
= add_costs (cost
, cost0
);
3642 cost
= add_costs (cost
, cost1
);
3644 /* Bound the cost by target_spill_cost. The parts of complicated
3645 computations often are either loop invariant or at least can
3646 be shared between several iv uses, so letting this grow without
3647 limits would not give reasonable results. */
3648 if (cost
.cost
> (int) target_spill_cost
[speed
])
3649 cost
.cost
= target_spill_cost
[speed
];
3654 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3655 invariants the computation depends on. */
3658 force_var_cost (struct ivopts_data
*data
,
3659 tree expr
, bitmap
*depends_on
)
3663 fd_ivopts_data
= data
;
3664 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3667 return force_expr_to_var_cost (expr
, data
->speed
);
3670 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3671 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3672 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3673 invariants the computation depends on. */
3676 split_address_cost (struct ivopts_data
*data
,
3677 tree addr
, bool *symbol_present
, bool *var_present
,
3678 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3681 HOST_WIDE_INT bitsize
;
3682 HOST_WIDE_INT bitpos
;
3684 enum machine_mode mode
;
3685 int unsignedp
, volatilep
;
3687 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3688 &unsignedp
, &volatilep
, false);
3691 || bitpos
% BITS_PER_UNIT
!= 0
3692 || TREE_CODE (core
) != VAR_DECL
)
3694 *symbol_present
= false;
3695 *var_present
= true;
3696 fd_ivopts_data
= data
;
3697 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3698 return new_cost (target_spill_cost
[data
->speed
], 0);
3701 *offset
+= bitpos
/ BITS_PER_UNIT
;
3702 if (TREE_STATIC (core
)
3703 || DECL_EXTERNAL (core
))
3705 *symbol_present
= true;
3706 *var_present
= false;
3710 *symbol_present
= false;
3711 *var_present
= true;
3715 /* Estimates cost of expressing difference of addresses E1 - E2 as
3716 var + symbol + offset. The value of offset is added to OFFSET,
3717 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3718 part is missing. DEPENDS_ON is a set of the invariants the computation
3722 ptr_difference_cost (struct ivopts_data
*data
,
3723 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3724 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3726 HOST_WIDE_INT diff
= 0;
3727 aff_tree aff_e1
, aff_e2
;
3730 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3732 if (ptr_difference_const (e1
, e2
, &diff
))
3735 *symbol_present
= false;
3736 *var_present
= false;
3740 if (integer_zerop (e2
))
3741 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3742 symbol_present
, var_present
, offset
, depends_on
);
3744 *symbol_present
= false;
3745 *var_present
= true;
3747 type
= signed_type_for (TREE_TYPE (e1
));
3748 tree_to_aff_combination (e1
, type
, &aff_e1
);
3749 tree_to_aff_combination (e2
, type
, &aff_e2
);
3750 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3751 aff_combination_add (&aff_e1
, &aff_e2
);
3753 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3756 /* Estimates cost of expressing difference E1 - E2 as
3757 var + symbol + offset. The value of offset is added to OFFSET,
3758 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3759 part is missing. DEPENDS_ON is a set of the invariants the computation
3763 difference_cost (struct ivopts_data
*data
,
3764 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3765 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3767 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3768 unsigned HOST_WIDE_INT off1
, off2
;
3769 aff_tree aff_e1
, aff_e2
;
3772 e1
= strip_offset (e1
, &off1
);
3773 e2
= strip_offset (e2
, &off2
);
3774 *offset
+= off1
- off2
;
3779 if (TREE_CODE (e1
) == ADDR_EXPR
)
3780 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3781 offset
, depends_on
);
3782 *symbol_present
= false;
3784 if (operand_equal_p (e1
, e2
, 0))
3786 *var_present
= false;
3790 *var_present
= true;
3792 if (integer_zerop (e2
))
3793 return force_var_cost (data
, e1
, depends_on
);
3795 if (integer_zerop (e1
))
3797 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3798 cost
.cost
+= multiply_by_cost (-1, mode
, data
->speed
);
3802 type
= signed_type_for (TREE_TYPE (e1
));
3803 tree_to_aff_combination (e1
, type
, &aff_e1
);
3804 tree_to_aff_combination (e2
, type
, &aff_e2
);
3805 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3806 aff_combination_add (&aff_e1
, &aff_e2
);
3808 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3811 /* Returns true if AFF1 and AFF2 are identical. */
3814 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3818 if (aff1
->n
!= aff2
->n
)
3821 for (i
= 0; i
< aff1
->n
; i
++)
3823 if (double_int_cmp (aff1
->elts
[i
].coef
, aff2
->elts
[i
].coef
, 0) != 0)
3826 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3832 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3833 requires a new compiler generated temporary. Returns -1 otherwise.
3834 ADDRESS_P is a flag indicating if the expression is for address
3838 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3839 tree cbase
, HOST_WIDE_INT ratio
,
3842 aff_tree ubase_aff
, cbase_aff
;
3844 struct iv_inv_expr_ent ent
;
3845 struct iv_inv_expr_ent
**slot
;
3852 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3853 && (TREE_CODE (cbase
) == INTEGER_CST
))
3856 /* Strips the constant part. */
3857 if (TREE_CODE (ubase
) == PLUS_EXPR
3858 || TREE_CODE (ubase
) == MINUS_EXPR
3859 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3861 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3862 ubase
= TREE_OPERAND (ubase
, 0);
3865 /* Strips the constant part. */
3866 if (TREE_CODE (cbase
) == PLUS_EXPR
3867 || TREE_CODE (cbase
) == MINUS_EXPR
3868 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3870 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3871 cbase
= TREE_OPERAND (cbase
, 0);
3876 if (((TREE_CODE (ubase
) == SSA_NAME
)
3877 || (TREE_CODE (ubase
) == ADDR_EXPR
3878 && is_gimple_min_invariant (ubase
)))
3879 && (TREE_CODE (cbase
) == INTEGER_CST
))
3882 if (((TREE_CODE (cbase
) == SSA_NAME
)
3883 || (TREE_CODE (cbase
) == ADDR_EXPR
3884 && is_gimple_min_invariant (cbase
)))
3885 && (TREE_CODE (ubase
) == INTEGER_CST
))
3891 if(operand_equal_p (ubase
, cbase
, 0))
3894 if (TREE_CODE (ubase
) == ADDR_EXPR
3895 && TREE_CODE (cbase
) == ADDR_EXPR
)
3899 usym
= TREE_OPERAND (ubase
, 0);
3900 csym
= TREE_OPERAND (cbase
, 0);
3901 if (TREE_CODE (usym
) == ARRAY_REF
)
3903 tree ind
= TREE_OPERAND (usym
, 1);
3904 if (TREE_CODE (ind
) == INTEGER_CST
3905 && host_integerp (ind
, 0)
3906 && TREE_INT_CST_LOW (ind
) == 0)
3907 usym
= TREE_OPERAND (usym
, 0);
3909 if (TREE_CODE (csym
) == ARRAY_REF
)
3911 tree ind
= TREE_OPERAND (csym
, 1);
3912 if (TREE_CODE (ind
) == INTEGER_CST
3913 && host_integerp (ind
, 0)
3914 && TREE_INT_CST_LOW (ind
) == 0)
3915 csym
= TREE_OPERAND (csym
, 0);
3917 if (operand_equal_p (usym
, csym
, 0))
3920 /* Now do more complex comparison */
3921 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3922 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3923 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3927 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3928 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3930 aff_combination_scale (&cbase_aff
, shwi_to_double_int (-1 * ratio
));
3931 aff_combination_add (&ubase_aff
, &cbase_aff
);
3932 expr
= aff_combination_to_tree (&ubase_aff
);
3934 ent
.hash
= iterative_hash_expr (expr
, 0);
3935 slot
= (struct iv_inv_expr_ent
**) htab_find_slot (data
->inv_expr_tab
,
3940 *slot
= XNEW (struct iv_inv_expr_ent
);
3941 (*slot
)->expr
= expr
;
3942 (*slot
)->hash
= ent
.hash
;
3943 (*slot
)->id
= data
->inv_expr_id
++;
3949 /* Determines the cost of the computation by that USE is expressed
3950 from induction variable CAND. If ADDRESS_P is true, we just need
3951 to create an address from it, otherwise we want to get it into
3952 register. A set of invariants we depend on is stored in
3953 DEPENDS_ON. AT is the statement at that the value is computed.
3954 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3955 addressing is likely. */
3958 get_computation_cost_at (struct ivopts_data
*data
,
3959 struct iv_use
*use
, struct iv_cand
*cand
,
3960 bool address_p
, bitmap
*depends_on
, gimple at
,
3964 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3966 tree utype
= TREE_TYPE (ubase
), ctype
;
3967 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3968 HOST_WIDE_INT ratio
, aratio
;
3969 bool var_present
, symbol_present
, stmt_is_after_inc
;
3972 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3976 /* Only consider real candidates. */
3978 return infinite_cost
;
3980 cbase
= cand
->iv
->base
;
3981 cstep
= cand
->iv
->step
;
3982 ctype
= TREE_TYPE (cbase
);
3984 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3986 /* We do not have a precision to express the values of use. */
3987 return infinite_cost
;
3992 /* Do not try to express address of an object with computation based
3993 on address of a different object. This may cause problems in rtl
3994 level alias analysis (that does not expect this to be happening,
3995 as this is illegal in C), and would be unlikely to be useful
3997 if (use
->iv
->base_object
3998 && cand
->iv
->base_object
3999 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4000 return infinite_cost
;
4003 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4005 /* TODO -- add direct handling of this case. */
4009 /* CSTEPI is removed from the offset in case statement is after the
4010 increment. If the step is not constant, we use zero instead.
4011 This is a bit imprecise (there is the extra addition), but
4012 redundancy elimination is likely to transform the code so that
4013 it uses value of the variable before increment anyway,
4014 so it is not that much unrealistic. */
4015 if (cst_and_fits_in_hwi (cstep
))
4016 cstepi
= int_cst_value (cstep
);
4020 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4021 return infinite_cost
;
4023 if (double_int_fits_in_shwi_p (rat
))
4024 ratio
= double_int_to_shwi (rat
);
4026 return infinite_cost
;
4029 ctype
= TREE_TYPE (cbase
);
4031 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4033 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4034 or ratio == 1, it is better to handle this like
4036 ubase - ratio * cbase + ratio * var
4038 (also holds in the case ratio == -1, TODO. */
4040 if (cst_and_fits_in_hwi (cbase
))
4042 offset
= - ratio
* int_cst_value (cbase
);
4043 cost
= difference_cost (data
,
4044 ubase
, build_int_cst (utype
, 0),
4045 &symbol_present
, &var_present
, &offset
,
4047 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4049 else if (ratio
== 1)
4051 tree real_cbase
= cbase
;
4053 /* Check to see if any adjustment is needed. */
4054 if (cstepi
== 0 && stmt_is_after_inc
)
4056 aff_tree real_cbase_aff
;
4059 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4061 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4063 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4064 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4067 cost
= difference_cost (data
,
4069 &symbol_present
, &var_present
, &offset
,
4071 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4074 && !POINTER_TYPE_P (ctype
)
4075 && multiplier_allowed_in_address_p
4076 (ratio
, TYPE_MODE (TREE_TYPE (utype
)),
4077 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4080 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4081 cost
= difference_cost (data
,
4083 &symbol_present
, &var_present
, &offset
,
4085 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4089 cost
= force_var_cost (data
, cbase
, depends_on
);
4090 cost
= add_costs (cost
,
4091 difference_cost (data
,
4092 ubase
, build_int_cst (utype
, 0),
4093 &symbol_present
, &var_present
,
4094 &offset
, depends_on
));
4095 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4096 cost
.cost
+= add_cost (TYPE_MODE (ctype
), data
->speed
);
4102 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4103 /* Clear depends on. */
4104 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4105 bitmap_clear (*depends_on
);
4108 /* If we are after the increment, the value of the candidate is higher by
4110 if (stmt_is_after_inc
)
4111 offset
-= ratio
* cstepi
;
4113 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4114 (symbol/var1/const parts may be omitted). If we are looking for an
4115 address, find the cost of addressing this. */
4117 return add_costs (cost
,
4118 get_address_cost (symbol_present
, var_present
,
4119 offset
, ratio
, cstepi
,
4120 TYPE_MODE (TREE_TYPE (utype
)),
4121 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4122 speed
, stmt_is_after_inc
,
4125 /* Otherwise estimate the costs for computing the expression. */
4126 if (!symbol_present
&& !var_present
&& !offset
)
4129 cost
.cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
), speed
);
4133 /* Symbol + offset should be compile-time computable so consider that they
4134 are added once to the variable, if present. */
4135 if (var_present
&& (symbol_present
|| offset
))
4136 cost
.cost
+= adjust_setup_cost (data
,
4137 add_cost (TYPE_MODE (ctype
), speed
));
4139 /* Having offset does not affect runtime cost in case it is added to
4140 symbol, but it increases complexity. */
4144 cost
.cost
+= add_cost (TYPE_MODE (ctype
), speed
);
4146 aratio
= ratio
> 0 ? ratio
: -ratio
;
4148 cost
.cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
), speed
);
4153 *can_autoinc
= false;
4156 /* Just get the expression, expand it and measure the cost. */
4157 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4160 return infinite_cost
;
4163 comp
= build_simple_mem_ref (comp
);
4165 return new_cost (computation_cost (comp
, speed
), 0);
4169 /* Determines the cost of the computation by that USE is expressed
4170 from induction variable CAND. If ADDRESS_P is true, we just need
4171 to create an address from it, otherwise we want to get it into
4172 register. A set of invariants we depend on is stored in
4173 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4174 autoinc addressing is likely. */
4177 get_computation_cost (struct ivopts_data
*data
,
4178 struct iv_use
*use
, struct iv_cand
*cand
,
4179 bool address_p
, bitmap
*depends_on
,
4180 bool *can_autoinc
, int *inv_expr_id
)
4182 return get_computation_cost_at (data
,
4183 use
, cand
, address_p
, depends_on
, use
->stmt
,
4184 can_autoinc
, inv_expr_id
);
4187 /* Determines cost of basing replacement of USE on CAND in a generic
4191 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4192 struct iv_use
*use
, struct iv_cand
*cand
)
4196 int inv_expr_id
= -1;
4198 /* The simple case first -- if we need to express value of the preserved
4199 original biv, the cost is 0. This also prevents us from counting the
4200 cost of increment twice -- once at this use and once in the cost of
4202 if (cand
->pos
== IP_ORIGINAL
4203 && cand
->incremented_at
== use
->stmt
)
4205 set_use_iv_cost (data
, use
, cand
, zero_cost
, NULL
, NULL_TREE
, -1);
4209 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4210 NULL
, &inv_expr_id
);
4212 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
,
4215 return !infinite_cost_p (cost
);
4218 /* Determines cost of basing replacement of USE on CAND in an address. */
4221 determine_use_iv_cost_address (struct ivopts_data
*data
,
4222 struct iv_use
*use
, struct iv_cand
*cand
)
4226 int inv_expr_id
= -1;
4227 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4228 &can_autoinc
, &inv_expr_id
);
4230 if (cand
->ainc_use
== use
)
4233 cost
.cost
-= cand
->cost_step
;
4234 /* If we generated the candidate solely for exploiting autoincrement
4235 opportunities, and it turns out it can't be used, set the cost to
4236 infinity to make sure we ignore it. */
4237 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4238 cost
= infinite_cost
;
4240 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
,
4243 return !infinite_cost_p (cost
);
4246 /* Computes value of candidate CAND at position AT in iteration NITER, and
4247 stores it to VAL. */
4250 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4253 aff_tree step
, delta
, nit
;
4254 struct iv
*iv
= cand
->iv
;
4255 tree type
= TREE_TYPE (iv
->base
);
4256 tree steptype
= type
;
4257 if (POINTER_TYPE_P (type
))
4258 steptype
= sizetype
;
4260 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4261 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4262 aff_combination_convert (&nit
, steptype
);
4263 aff_combination_mult (&nit
, &step
, &delta
);
4264 if (stmt_after_increment (loop
, cand
, at
))
4265 aff_combination_add (&delta
, &step
);
4267 tree_to_aff_combination (iv
->base
, type
, val
);
4268 aff_combination_add (val
, &delta
);
4271 /* Returns period of induction variable iv. */
4274 iv_period (struct iv
*iv
)
4276 tree step
= iv
->step
, period
, type
;
4279 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4281 type
= unsigned_type_for (TREE_TYPE (step
));
4282 /* Period of the iv is lcm (step, type_range)/step -1,
4283 i.e., N*type_range/step - 1. Since type range is power
4284 of two, N == (step >> num_of_ending_zeros_binary (step),
4285 so the final result is
4287 (type_range >> num_of_ending_zeros_binary (step)) - 1
4290 pow2div
= num_ending_zeros (step
);
4292 period
= build_low_bits_mask (type
,
4293 (TYPE_PRECISION (type
)
4294 - tree_low_cst (pow2div
, 1)));
4299 /* Returns the comparison operator used when eliminating the iv USE. */
4301 static enum tree_code
4302 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4304 struct loop
*loop
= data
->current_loop
;
4308 ex_bb
= gimple_bb (use
->stmt
);
4309 exit
= EDGE_SUCC (ex_bb
, 0);
4310 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4311 exit
= EDGE_SUCC (ex_bb
, 1);
4313 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4316 /* Check whether it is possible to express the condition in USE by comparison
4317 of candidate CAND. If so, store the value compared with to BOUND. */
4320 may_eliminate_iv (struct ivopts_data
*data
,
4321 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
4326 struct loop
*loop
= data
->current_loop
;
4328 struct tree_niter_desc
*desc
= NULL
;
4330 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4333 /* For now works only for exits that dominate the loop latch.
4334 TODO: extend to other conditions inside loop body. */
4335 ex_bb
= gimple_bb (use
->stmt
);
4336 if (use
->stmt
!= last_stmt (ex_bb
)
4337 || gimple_code (use
->stmt
) != GIMPLE_COND
4338 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4341 exit
= EDGE_SUCC (ex_bb
, 0);
4342 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4343 exit
= EDGE_SUCC (ex_bb
, 1);
4344 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4347 nit
= niter_for_exit (data
, exit
, &desc
);
4351 /* Determine whether we can use the variable to test the exit condition.
4352 This is the case iff the period of the induction variable is greater
4353 than the number of iterations for which the exit condition is true. */
4354 period
= iv_period (cand
->iv
);
4356 /* If the number of iterations is constant, compare against it directly. */
4357 if (TREE_CODE (nit
) == INTEGER_CST
)
4359 /* See cand_value_at. */
4360 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4362 if (!tree_int_cst_lt (nit
, period
))
4367 if (tree_int_cst_lt (period
, nit
))
4372 /* If not, and if this is the only possible exit of the loop, see whether
4373 we can get a conservative estimate on the number of iterations of the
4374 entire loop and compare against that instead. */
4377 double_int period_value
, max_niter
;
4379 max_niter
= desc
->max
;
4380 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4381 max_niter
= double_int_add (max_niter
, double_int_one
);
4382 period_value
= tree_to_double_int (period
);
4383 if (double_int_ucmp (max_niter
, period_value
) > 0)
4385 /* See if we can take advantage of infered loop bound information. */
4386 if (loop_only_exit_p (loop
, exit
))
4388 if (!estimated_loop_iterations (loop
, true, &max_niter
))
4390 /* The loop bound is already adjusted by adding 1. */
4391 if (double_int_ucmp (max_niter
, period_value
) > 0)
4399 cand_value_at (loop
, cand
, use
->stmt
, nit
, &bnd
);
4401 *bound
= aff_combination_to_tree (&bnd
);
4402 /* It is unlikely that computing the number of iterations using division
4403 would be more profitable than keeping the original induction variable. */
4404 if (expression_expensive_p (*bound
))
4410 /* Determines cost of basing replacement of USE on CAND in a condition. */
4413 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4414 struct iv_use
*use
, struct iv_cand
*cand
)
4416 tree bound
= NULL_TREE
;
4418 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4419 comp_cost elim_cost
, express_cost
, cost
;
4421 int inv_expr_id
= -1;
4422 tree
*control_var
, *bound_cst
;
4424 /* Only consider real candidates. */
4427 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
, -1);
4431 /* Try iv elimination. */
4432 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4434 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4435 /* The bound is a loop invariant, so it will be only computed
4437 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4440 elim_cost
= infinite_cost
;
4442 /* Try expressing the original giv. If it is compared with an invariant,
4443 note that we cannot get rid of it. */
4444 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4448 /* When the condition is a comparison of the candidate IV against
4449 zero, prefer this IV.
4451 TODO: The constant that we're substracting from the cost should
4452 be target-dependent. This information should be added to the
4453 target costs for each backend. */
4454 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4455 && integer_zerop (*bound_cst
)
4456 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4457 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4458 elim_cost
.cost
-= 1;
4460 express_cost
= get_computation_cost (data
, use
, cand
, false,
4461 &depends_on_express
, NULL
,
4463 fd_ivopts_data
= data
;
4464 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4466 /* Choose the better approach, preferring the eliminated IV. */
4467 if (compare_costs (elim_cost
, express_cost
) <= 0)
4470 depends_on
= depends_on_elim
;
4471 depends_on_elim
= NULL
;
4475 cost
= express_cost
;
4476 depends_on
= depends_on_express
;
4477 depends_on_express
= NULL
;
4481 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, inv_expr_id
);
4483 if (depends_on_elim
)
4484 BITMAP_FREE (depends_on_elim
);
4485 if (depends_on_express
)
4486 BITMAP_FREE (depends_on_express
);
4488 return !infinite_cost_p (cost
);
4491 /* Determines cost of basing replacement of USE on CAND. Returns false
4492 if USE cannot be based on CAND. */
4495 determine_use_iv_cost (struct ivopts_data
*data
,
4496 struct iv_use
*use
, struct iv_cand
*cand
)
4500 case USE_NONLINEAR_EXPR
:
4501 return determine_use_iv_cost_generic (data
, use
, cand
);
4504 return determine_use_iv_cost_address (data
, use
, cand
);
4507 return determine_use_iv_cost_condition (data
, use
, cand
);
4514 /* Return true if get_computation_cost indicates that autoincrement is
4515 a possibility for the pair of USE and CAND, false otherwise. */
4518 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4519 struct iv_cand
*cand
)
4525 if (use
->type
!= USE_ADDRESS
)
4528 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4529 &can_autoinc
, NULL
);
4531 BITMAP_FREE (depends_on
);
4533 return !infinite_cost_p (cost
) && can_autoinc
;
4536 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4537 use that allows autoincrement, and set their AINC_USE if possible. */
4540 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4544 for (i
= 0; i
< n_iv_cands (data
); i
++)
4546 struct iv_cand
*cand
= iv_cand (data
, i
);
4547 struct iv_use
*closest
= NULL
;
4548 if (cand
->pos
!= IP_ORIGINAL
)
4550 for (j
= 0; j
< n_iv_uses (data
); j
++)
4552 struct iv_use
*use
= iv_use (data
, j
);
4553 unsigned uid
= gimple_uid (use
->stmt
);
4554 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
4555 || uid
> gimple_uid (cand
->incremented_at
))
4557 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
4560 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
4562 cand
->ainc_use
= closest
;
4566 /* Finds the candidates for the induction variables. */
4569 find_iv_candidates (struct ivopts_data
*data
)
4571 /* Add commonly used ivs. */
4572 add_standard_iv_candidates (data
);
4574 /* Add old induction variables. */
4575 add_old_ivs_candidates (data
);
4577 /* Add induction variables derived from uses. */
4578 add_derived_ivs_candidates (data
);
4580 set_autoinc_for_original_candidates (data
);
4582 /* Record the important candidates. */
4583 record_important_candidates (data
);
4586 /* Determines costs of basing the use of the iv on an iv candidate. */
4589 determine_use_iv_costs (struct ivopts_data
*data
)
4593 struct iv_cand
*cand
;
4594 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4596 alloc_use_cost_map (data
);
4598 for (i
= 0; i
< n_iv_uses (data
); i
++)
4600 use
= iv_use (data
, i
);
4602 if (data
->consider_all_candidates
)
4604 for (j
= 0; j
< n_iv_cands (data
); j
++)
4606 cand
= iv_cand (data
, j
);
4607 determine_use_iv_cost (data
, use
, cand
);
4614 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4616 cand
= iv_cand (data
, j
);
4617 if (!determine_use_iv_cost (data
, use
, cand
))
4618 bitmap_set_bit (to_clear
, j
);
4621 /* Remove the candidates for that the cost is infinite from
4622 the list of related candidates. */
4623 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4624 bitmap_clear (to_clear
);
4628 BITMAP_FREE (to_clear
);
4630 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4632 fprintf (dump_file
, "Use-candidate costs:\n");
4634 for (i
= 0; i
< n_iv_uses (data
); i
++)
4636 use
= iv_use (data
, i
);
4638 fprintf (dump_file
, "Use %d:\n", i
);
4639 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4640 for (j
= 0; j
< use
->n_map_members
; j
++)
4642 if (!use
->cost_map
[j
].cand
4643 || infinite_cost_p (use
->cost_map
[j
].cost
))
4646 fprintf (dump_file
, " %d\t%d\t%d\t",
4647 use
->cost_map
[j
].cand
->id
,
4648 use
->cost_map
[j
].cost
.cost
,
4649 use
->cost_map
[j
].cost
.complexity
);
4650 if (use
->cost_map
[j
].depends_on
)
4651 bitmap_print (dump_file
,
4652 use
->cost_map
[j
].depends_on
, "","");
4653 if (use
->cost_map
[j
].inv_expr_id
!= -1)
4654 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
4655 fprintf (dump_file
, "\n");
4658 fprintf (dump_file
, "\n");
4660 fprintf (dump_file
, "\n");
4664 /* Determines cost of the candidate CAND. */
4667 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4669 comp_cost cost_base
;
4670 unsigned cost
, cost_step
;
4679 /* There are two costs associated with the candidate -- its increment
4680 and its initialization. The second is almost negligible for any loop
4681 that rolls enough, so we take it just very little into account. */
4683 base
= cand
->iv
->base
;
4684 cost_base
= force_var_cost (data
, base
, NULL
);
4685 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)), data
->speed
);
4687 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
4689 /* Prefer the original ivs unless we may gain something by replacing it.
4690 The reason is to make debugging simpler; so this is not relevant for
4691 artificial ivs created by other optimization passes. */
4692 if (cand
->pos
!= IP_ORIGINAL
4693 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4696 /* Prefer not to insert statements into latch unless there are some
4697 already (so that we do not create unnecessary jumps). */
4698 if (cand
->pos
== IP_END
4699 && empty_block_p (ip_end_pos (data
->current_loop
)))
4703 cand
->cost_step
= cost_step
;
4706 /* Determines costs of computation of the candidates. */
4709 determine_iv_costs (struct ivopts_data
*data
)
4713 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4715 fprintf (dump_file
, "Candidate costs:\n");
4716 fprintf (dump_file
, " cand\tcost\n");
4719 for (i
= 0; i
< n_iv_cands (data
); i
++)
4721 struct iv_cand
*cand
= iv_cand (data
, i
);
4723 determine_iv_cost (data
, cand
);
4725 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4726 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4729 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4730 fprintf (dump_file
, "\n");
4733 /* Calculates cost for having SIZE induction variables. */
4736 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4738 /* We add size to the cost, so that we prefer eliminating ivs
4740 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
4741 data
->body_includes_call
);
4744 /* For each size of the induction variable set determine the penalty. */
4747 determine_set_costs (struct ivopts_data
*data
)
4751 gimple_stmt_iterator psi
;
4753 struct loop
*loop
= data
->current_loop
;
4756 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4758 fprintf (dump_file
, "Global costs:\n");
4759 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4760 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
4761 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
4762 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
4766 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
4768 phi
= gsi_stmt (psi
);
4769 op
= PHI_RESULT (phi
);
4771 if (!is_gimple_reg (op
))
4774 if (get_iv (data
, op
))
4780 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4782 struct version_info
*info
= ver_info (data
, j
);
4784 if (info
->inv_id
&& info
->has_nonlin_use
)
4788 data
->regs_used
= n
;
4789 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4790 fprintf (dump_file
, " regs_used %d\n", n
);
4792 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4794 fprintf (dump_file
, " cost for size:\n");
4795 fprintf (dump_file
, " ivs\tcost\n");
4796 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4797 fprintf (dump_file
, " %d\t%d\n", j
,
4798 ivopts_global_cost_for_size (data
, j
));
4799 fprintf (dump_file
, "\n");
4803 /* Returns true if A is a cheaper cost pair than B. */
4806 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4816 cmp
= compare_costs (a
->cost
, b
->cost
);
4823 /* In case the costs are the same, prefer the cheaper candidate. */
4824 if (a
->cand
->cost
< b
->cand
->cost
)
4831 /* Returns candidate by that USE is expressed in IVS. */
4833 static struct cost_pair
*
4834 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4836 return ivs
->cand_for_use
[use
->id
];
4839 /* Computes the cost field of IVS structure. */
4842 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4844 comp_cost cost
= ivs
->cand_use_cost
;
4846 cost
.cost
+= ivs
->cand_cost
;
4848 cost
.cost
+= ivopts_global_cost_for_size (data
,
4849 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
4854 /* Remove invariants in set INVS to set IVS. */
4857 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4865 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4867 ivs
->n_invariant_uses
[iid
]--;
4868 if (ivs
->n_invariant_uses
[iid
] == 0)
4873 /* Set USE not to be expressed by any candidate in IVS. */
4876 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4879 unsigned uid
= use
->id
, cid
;
4880 struct cost_pair
*cp
;
4882 cp
= ivs
->cand_for_use
[uid
];
4888 ivs
->cand_for_use
[uid
] = NULL
;
4889 ivs
->n_cand_uses
[cid
]--;
4891 if (ivs
->n_cand_uses
[cid
] == 0)
4893 bitmap_clear_bit (ivs
->cands
, cid
);
4894 /* Do not count the pseudocandidates. */
4898 ivs
->cand_cost
-= cp
->cand
->cost
;
4900 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
4903 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
4905 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
4907 if (cp
->inv_expr_id
!= -1)
4909 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
4910 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
4911 ivs
->num_used_inv_expr
--;
4913 iv_ca_recount_cost (data
, ivs
);
4916 /* Add invariants in set INVS to set IVS. */
4919 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
4927 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4929 ivs
->n_invariant_uses
[iid
]++;
4930 if (ivs
->n_invariant_uses
[iid
] == 1)
4935 /* Set cost pair for USE in set IVS to CP. */
4938 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4939 struct iv_use
*use
, struct cost_pair
*cp
)
4941 unsigned uid
= use
->id
, cid
;
4943 if (ivs
->cand_for_use
[uid
] == cp
)
4946 if (ivs
->cand_for_use
[uid
])
4947 iv_ca_set_no_cp (data
, ivs
, use
);
4954 ivs
->cand_for_use
[uid
] = cp
;
4955 ivs
->n_cand_uses
[cid
]++;
4956 if (ivs
->n_cand_uses
[cid
] == 1)
4958 bitmap_set_bit (ivs
->cands
, cid
);
4959 /* Do not count the pseudocandidates. */
4963 ivs
->cand_cost
+= cp
->cand
->cost
;
4965 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
4968 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
4969 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
4971 if (cp
->inv_expr_id
!= -1)
4973 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
4974 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
4975 ivs
->num_used_inv_expr
++;
4977 iv_ca_recount_cost (data
, ivs
);
4981 /* Extend set IVS by expressing USE by some of the candidates in it
4982 if possible. All important candidates will be considered
4983 if IMPORTANT_CANDIDATES is true. */
4986 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4987 struct iv_use
*use
, bool important_candidates
)
4989 struct cost_pair
*best_cp
= NULL
, *cp
;
4994 gcc_assert (ivs
->upto
>= use
->id
);
4996 if (ivs
->upto
== use
->id
)
5002 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5003 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5005 struct iv_cand
*cand
= iv_cand (data
, i
);
5007 cp
= get_use_iv_cost (data
, use
, cand
);
5009 if (cheaper_cost_pair (cp
, best_cp
))
5013 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5016 /* Get cost for assignment IVS. */
5019 iv_ca_cost (struct iv_ca
*ivs
)
5021 /* This was a conditional expression but it triggered a bug in
5024 return infinite_cost
;
5029 /* Returns true if all dependences of CP are among invariants in IVS. */
5032 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5037 if (!cp
->depends_on
)
5040 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5042 if (ivs
->n_invariant_uses
[i
] == 0)
5049 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5050 it before NEXT_CHANGE. */
5052 static struct iv_ca_delta
*
5053 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5054 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5056 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5059 change
->old_cp
= old_cp
;
5060 change
->new_cp
= new_cp
;
5061 change
->next_change
= next_change
;
5066 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5069 static struct iv_ca_delta
*
5070 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5072 struct iv_ca_delta
*last
;
5080 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5082 last
->next_change
= l2
;
5087 /* Reverse the list of changes DELTA, forming the inverse to it. */
5089 static struct iv_ca_delta
*
5090 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5092 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5093 struct cost_pair
*tmp
;
5095 for (act
= delta
; act
; act
= next
)
5097 next
= act
->next_change
;
5098 act
->next_change
= prev
;
5102 act
->old_cp
= act
->new_cp
;
5109 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5110 reverted instead. */
5113 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5114 struct iv_ca_delta
*delta
, bool forward
)
5116 struct cost_pair
*from
, *to
;
5117 struct iv_ca_delta
*act
;
5120 delta
= iv_ca_delta_reverse (delta
);
5122 for (act
= delta
; act
; act
= act
->next_change
)
5126 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5127 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5131 iv_ca_delta_reverse (delta
);
5134 /* Returns true if CAND is used in IVS. */
5137 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5139 return ivs
->n_cand_uses
[cand
->id
] > 0;
5142 /* Returns number of induction variable candidates in the set IVS. */
5145 iv_ca_n_cands (struct iv_ca
*ivs
)
5147 return ivs
->n_cands
;
5150 /* Free the list of changes DELTA. */
5153 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5155 struct iv_ca_delta
*act
, *next
;
5157 for (act
= *delta
; act
; act
= next
)
5159 next
= act
->next_change
;
5166 /* Allocates new iv candidates assignment. */
5168 static struct iv_ca
*
5169 iv_ca_new (struct ivopts_data
*data
)
5171 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5175 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5176 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5177 nw
->cands
= BITMAP_ALLOC (NULL
);
5180 nw
->cand_use_cost
= zero_cost
;
5182 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5183 nw
->cost
= zero_cost
;
5184 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5185 nw
->num_used_inv_expr
= 0;
5190 /* Free memory occupied by the set IVS. */
5193 iv_ca_free (struct iv_ca
**ivs
)
5195 free ((*ivs
)->cand_for_use
);
5196 free ((*ivs
)->n_cand_uses
);
5197 BITMAP_FREE ((*ivs
)->cands
);
5198 free ((*ivs
)->n_invariant_uses
);
5199 free ((*ivs
)->used_inv_expr
);
5204 /* Dumps IVS to FILE. */
5207 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5209 const char *pref
= " invariants ";
5211 comp_cost cost
= iv_ca_cost (ivs
);
5213 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5214 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5215 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5216 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5218 for (i
= 0; i
< ivs
->upto
; i
++)
5220 struct iv_use
*use
= iv_use (data
, i
);
5221 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5223 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5224 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5226 fprintf (file
, " use:%d --> ??\n", use
->id
);
5229 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5230 if (ivs
->n_invariant_uses
[i
])
5232 fprintf (file
, "%s%d", pref
, i
);
5235 fprintf (file
, "\n\n");
5238 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5239 new set, and store differences in DELTA. Number of induction variables
5240 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5241 the function will try to find a solution with mimimal iv candidates. */
5244 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5245 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5246 unsigned *n_ivs
, bool min_ncand
)
5251 struct cost_pair
*old_cp
, *new_cp
;
5254 for (i
= 0; i
< ivs
->upto
; i
++)
5256 use
= iv_use (data
, i
);
5257 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5260 && old_cp
->cand
== cand
)
5263 new_cp
= get_use_iv_cost (data
, use
, cand
);
5267 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5270 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5273 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5276 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5277 cost
= iv_ca_cost (ivs
);
5279 *n_ivs
= iv_ca_n_cands (ivs
);
5280 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5285 /* Try narrowing set IVS by removing CAND. Return the cost of
5286 the new set and store the differences in DELTA. */
5289 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5290 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5294 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5296 struct iv_cand
*cnd
;
5300 for (i
= 0; i
< n_iv_uses (data
); i
++)
5302 use
= iv_use (data
, i
);
5304 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5305 if (old_cp
->cand
!= cand
)
5310 if (data
->consider_all_candidates
)
5312 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5317 cnd
= iv_cand (data
, ci
);
5319 cp
= get_use_iv_cost (data
, use
, cnd
);
5323 if (!iv_ca_has_deps (ivs
, cp
))
5326 if (!cheaper_cost_pair (cp
, new_cp
))
5334 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5339 cnd
= iv_cand (data
, ci
);
5341 cp
= get_use_iv_cost (data
, use
, cnd
);
5344 if (!iv_ca_has_deps (ivs
, cp
))
5347 if (!cheaper_cost_pair (cp
, new_cp
))
5356 iv_ca_delta_free (delta
);
5357 return infinite_cost
;
5360 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5363 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5364 cost
= iv_ca_cost (ivs
);
5365 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5370 /* Try optimizing the set of candidates IVS by removing candidates different
5371 from to EXCEPT_CAND from it. Return cost of the new set, and store
5372 differences in DELTA. */
5375 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5376 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5379 struct iv_ca_delta
*act_delta
, *best_delta
;
5381 comp_cost best_cost
, acost
;
5382 struct iv_cand
*cand
;
5385 best_cost
= iv_ca_cost (ivs
);
5387 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5389 cand
= iv_cand (data
, i
);
5391 if (cand
== except_cand
)
5394 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5396 if (compare_costs (acost
, best_cost
) < 0)
5399 iv_ca_delta_free (&best_delta
);
5400 best_delta
= act_delta
;
5403 iv_ca_delta_free (&act_delta
);
5412 /* Recurse to possibly remove other unnecessary ivs. */
5413 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5414 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5415 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5416 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5420 /* Tries to extend the sets IVS in the best possible way in order
5421 to express the USE. If ORIGINALP is true, prefer candidates from
5422 the original set of IVs, otherwise favor important candidates not
5423 based on any memory object. */
5426 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5427 struct iv_use
*use
, bool originalp
)
5429 comp_cost best_cost
, act_cost
;
5432 struct iv_cand
*cand
;
5433 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5434 struct cost_pair
*cp
;
5436 iv_ca_add_use (data
, ivs
, use
, false);
5437 best_cost
= iv_ca_cost (ivs
);
5439 cp
= iv_ca_cand_for_use (ivs
, use
);
5444 iv_ca_add_use (data
, ivs
, use
, true);
5445 best_cost
= iv_ca_cost (ivs
);
5446 cp
= iv_ca_cand_for_use (ivs
, use
);
5450 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5451 iv_ca_set_no_cp (data
, ivs
, use
);
5454 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5455 first try important candidates not based on any memory object. Only if
5456 this fails, try the specific ones. Rationale -- in loops with many
5457 variables the best choice often is to use just one generic biv. If we
5458 added here many ivs specific to the uses, the optimization algorithm later
5459 would be likely to get stuck in a local minimum, thus causing us to create
5460 too many ivs. The approach from few ivs to more seems more likely to be
5461 successful -- starting from few ivs, replacing an expensive use by a
5462 specific iv should always be a win. */
5463 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5465 cand
= iv_cand (data
, i
);
5467 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5470 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5473 if (iv_ca_cand_used_p (ivs
, cand
))
5476 cp
= get_use_iv_cost (data
, use
, cand
);
5480 iv_ca_set_cp (data
, ivs
, use
, cp
);
5481 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5483 iv_ca_set_no_cp (data
, ivs
, use
);
5484 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5486 if (compare_costs (act_cost
, best_cost
) < 0)
5488 best_cost
= act_cost
;
5490 iv_ca_delta_free (&best_delta
);
5491 best_delta
= act_delta
;
5494 iv_ca_delta_free (&act_delta
);
5497 if (infinite_cost_p (best_cost
))
5499 for (i
= 0; i
< use
->n_map_members
; i
++)
5501 cp
= use
->cost_map
+ i
;
5506 /* Already tried this. */
5507 if (cand
->important
)
5509 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5511 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5515 if (iv_ca_cand_used_p (ivs
, cand
))
5519 iv_ca_set_cp (data
, ivs
, use
, cp
);
5520 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5521 iv_ca_set_no_cp (data
, ivs
, use
);
5522 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5525 if (compare_costs (act_cost
, best_cost
) < 0)
5527 best_cost
= act_cost
;
5530 iv_ca_delta_free (&best_delta
);
5531 best_delta
= act_delta
;
5534 iv_ca_delta_free (&act_delta
);
5538 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5539 iv_ca_delta_free (&best_delta
);
5541 return !infinite_cost_p (best_cost
);
5544 /* Finds an initial assignment of candidates to uses. */
5546 static struct iv_ca
*
5547 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5549 struct iv_ca
*ivs
= iv_ca_new (data
);
5552 for (i
= 0; i
< n_iv_uses (data
); i
++)
5553 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5562 /* Tries to improve set of induction variables IVS. */
5565 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5568 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5569 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5570 struct iv_cand
*cand
;
5572 /* Try extending the set of induction variables by one. */
5573 for (i
= 0; i
< n_iv_cands (data
); i
++)
5575 cand
= iv_cand (data
, i
);
5577 if (iv_ca_cand_used_p (ivs
, cand
))
5580 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5584 /* If we successfully added the candidate and the set is small enough,
5585 try optimizing it by removing other candidates. */
5586 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5588 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5589 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5590 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5591 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5594 if (compare_costs (acost
, best_cost
) < 0)
5597 iv_ca_delta_free (&best_delta
);
5598 best_delta
= act_delta
;
5601 iv_ca_delta_free (&act_delta
);
5606 /* Try removing the candidates from the set instead. */
5607 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5609 /* Nothing more we can do. */
5614 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5615 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5616 iv_ca_delta_free (&best_delta
);
5620 /* Attempts to find the optimal set of induction variables. We do simple
5621 greedy heuristic -- we try to replace at most one candidate in the selected
5622 solution and remove the unused ivs while this improves the cost. */
5624 static struct iv_ca
*
5625 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
5629 /* Get the initial solution. */
5630 set
= get_initial_solution (data
, originalp
);
5633 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5634 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5638 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5640 fprintf (dump_file
, "Initial set of candidates:\n");
5641 iv_ca_dump (data
, dump_file
, set
);
5644 while (try_improve_iv_set (data
, set
))
5646 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5648 fprintf (dump_file
, "Improved to:\n");
5649 iv_ca_dump (data
, dump_file
, set
);
5656 static struct iv_ca
*
5657 find_optimal_iv_set (struct ivopts_data
*data
)
5660 struct iv_ca
*set
, *origset
;
5662 comp_cost cost
, origcost
;
5664 /* Determine the cost based on a strategy that starts with original IVs,
5665 and try again using a strategy that prefers candidates not based
5667 origset
= find_optimal_iv_set_1 (data
, true);
5668 set
= find_optimal_iv_set_1 (data
, false);
5670 if (!origset
&& !set
)
5673 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
5674 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
5676 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5678 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
5679 origcost
.cost
, origcost
.complexity
);
5680 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
5681 cost
.cost
, cost
.complexity
);
5684 /* Choose the one with the best cost. */
5685 if (compare_costs (origcost
, cost
) <= 0)
5692 iv_ca_free (&origset
);
5694 for (i
= 0; i
< n_iv_uses (data
); i
++)
5696 use
= iv_use (data
, i
);
5697 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5703 /* Creates a new induction variable corresponding to CAND. */
5706 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5708 gimple_stmt_iterator incr_pos
;
5718 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
5722 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
5730 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
5734 /* Mark that the iv is preserved. */
5735 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5736 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5738 /* Rewrite the increment so that it uses var_before directly. */
5739 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5743 gimple_add_tmp_var (cand
->var_before
);
5744 add_referenced_var (cand
->var_before
);
5746 base
= unshare_expr (cand
->iv
->base
);
5748 create_iv (base
, unshare_expr (cand
->iv
->step
),
5749 cand
->var_before
, data
->current_loop
,
5750 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5753 /* Creates new induction variables described in SET. */
5756 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5759 struct iv_cand
*cand
;
5762 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5764 cand
= iv_cand (data
, i
);
5765 create_new_iv (data
, cand
);
5768 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5770 fprintf (dump_file
, "\nSelected IV set: \n");
5771 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5773 cand
= iv_cand (data
, i
);
5774 dump_cand (dump_file
, cand
);
5776 fprintf (dump_file
, "\n");
5780 /* Rewrites USE (definition of iv used in a nonlinear expression)
5781 using candidate CAND. */
5784 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5785 struct iv_use
*use
, struct iv_cand
*cand
)
5790 gimple_stmt_iterator bsi
;
5792 /* An important special case -- if we are asked to express value of
5793 the original iv by itself, just exit; there is no need to
5794 introduce a new computation (that might also need casting the
5795 variable to unsigned and back). */
5796 if (cand
->pos
== IP_ORIGINAL
5797 && cand
->incremented_at
== use
->stmt
)
5799 tree step
, ctype
, utype
;
5800 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
5802 gcc_assert (is_gimple_assign (use
->stmt
));
5803 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
5805 step
= cand
->iv
->step
;
5806 ctype
= TREE_TYPE (step
);
5807 utype
= TREE_TYPE (cand
->var_after
);
5808 if (TREE_CODE (step
) == NEGATE_EXPR
)
5810 incr_code
= MINUS_EXPR
;
5811 step
= TREE_OPERAND (step
, 0);
5814 /* Check whether we may leave the computation unchanged.
5815 This is the case only if it does not rely on other
5816 computations in the loop -- otherwise, the computation
5817 we rely upon may be removed in remove_unused_ivs,
5818 thus leading to ICE. */
5819 old_code
= gimple_assign_rhs_code (use
->stmt
);
5820 if (old_code
== PLUS_EXPR
5821 || old_code
== MINUS_EXPR
5822 || old_code
== POINTER_PLUS_EXPR
)
5824 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
5825 op
= gimple_assign_rhs2 (use
->stmt
);
5826 else if (old_code
!= MINUS_EXPR
5827 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
5828 op
= gimple_assign_rhs1 (use
->stmt
);
5836 && (TREE_CODE (op
) == INTEGER_CST
5837 || operand_equal_p (op
, step
, 0)))
5840 /* Otherwise, add the necessary computations to express
5842 op
= fold_convert (ctype
, cand
->var_before
);
5843 comp
= fold_convert (utype
,
5844 build2 (incr_code
, ctype
, op
,
5845 unshare_expr (step
)));
5849 comp
= get_computation (data
->current_loop
, use
, cand
);
5850 gcc_assert (comp
!= NULL_TREE
);
5853 switch (gimple_code (use
->stmt
))
5856 tgt
= PHI_RESULT (use
->stmt
);
5858 /* If we should keep the biv, do not replace it. */
5859 if (name_info (data
, tgt
)->preserve_biv
)
5862 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
5866 tgt
= gimple_assign_lhs (use
->stmt
);
5867 bsi
= gsi_for_stmt (use
->stmt
);
5874 if (!valid_gimple_rhs_p (comp
)
5875 || (gimple_code (use
->stmt
) != GIMPLE_PHI
5876 /* We can't allow re-allocating the stmt as it might be pointed
5878 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
5879 >= gimple_num_ops (gsi_stmt (bsi
)))))
5881 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
5882 true, GSI_SAME_STMT
);
5883 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
5885 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
5886 /* As this isn't a plain copy we have to reset alignment
5888 if (SSA_NAME_PTR_INFO (comp
))
5890 SSA_NAME_PTR_INFO (comp
)->align
= 1;
5891 SSA_NAME_PTR_INFO (comp
)->misalign
= 0;
5896 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
5898 ass
= gimple_build_assign (tgt
, comp
);
5899 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
5901 bsi
= gsi_for_stmt (use
->stmt
);
5902 remove_phi_node (&bsi
, false);
5906 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
5907 use
->stmt
= gsi_stmt (bsi
);
5911 /* Copies the reference information from OLD_REF to NEW_REF. */
5914 copy_ref_info (tree new_ref
, tree old_ref
)
5916 tree new_ptr_base
= NULL_TREE
;
5918 TREE_SIDE_EFFECTS (new_ref
) = TREE_SIDE_EFFECTS (old_ref
);
5919 TREE_THIS_VOLATILE (new_ref
) = TREE_THIS_VOLATILE (old_ref
);
5921 new_ptr_base
= TREE_OPERAND (new_ref
, 0);
5923 /* We can transfer points-to information from an old pointer
5924 or decl base to the new one. */
5926 && TREE_CODE (new_ptr_base
) == SSA_NAME
5927 && !SSA_NAME_PTR_INFO (new_ptr_base
))
5929 tree base
= get_base_address (old_ref
);
5932 else if ((TREE_CODE (base
) == MEM_REF
5933 || TREE_CODE (base
) == TARGET_MEM_REF
)
5934 && TREE_CODE (TREE_OPERAND (base
, 0)) == SSA_NAME
5935 && SSA_NAME_PTR_INFO (TREE_OPERAND (base
, 0)))
5937 struct ptr_info_def
*new_pi
;
5938 duplicate_ssa_name_ptr_info
5939 (new_ptr_base
, SSA_NAME_PTR_INFO (TREE_OPERAND (base
, 0)));
5940 new_pi
= SSA_NAME_PTR_INFO (new_ptr_base
);
5941 /* We have to be careful about transfering alignment information. */
5942 if (TREE_CODE (old_ref
) == MEM_REF
5943 && !(TREE_CODE (new_ref
) == TARGET_MEM_REF
5944 && (TMR_INDEX2 (new_ref
)
5945 || (TMR_STEP (new_ref
)
5946 && (TREE_INT_CST_LOW (TMR_STEP (new_ref
))
5947 < new_pi
->align
)))))
5949 new_pi
->misalign
+= double_int_sub (mem_ref_offset (old_ref
),
5950 mem_ref_offset (new_ref
)).low
;
5951 new_pi
->misalign
&= (new_pi
->align
- 1);
5956 new_pi
->misalign
= 0;
5959 else if (TREE_CODE (base
) == VAR_DECL
5960 || TREE_CODE (base
) == PARM_DECL
5961 || TREE_CODE (base
) == RESULT_DECL
)
5963 struct ptr_info_def
*pi
= get_ptr_info (new_ptr_base
);
5964 pt_solution_set_var (&pi
->pt
, base
);
5969 /* Performs a peephole optimization to reorder the iv update statement with
5970 a mem ref to enable instruction combining in later phases. The mem ref uses
5971 the iv value before the update, so the reordering transformation requires
5972 adjustment of the offset. CAND is the selected IV_CAND.
5976 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5984 directly propagating t over to (1) will introduce overlapping live range
5985 thus increase register pressure. This peephole transform it into:
5989 t = MEM_REF (base, iv2, 8, 8);
5996 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
5999 gimple iv_update
, stmt
;
6001 gimple_stmt_iterator gsi
, gsi_iv
;
6003 if (cand
->pos
!= IP_NORMAL
)
6006 var_after
= cand
->var_after
;
6007 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6009 bb
= gimple_bb (iv_update
);
6010 gsi
= gsi_last_nondebug_bb (bb
);
6011 stmt
= gsi_stmt (gsi
);
6013 /* Only handle conditional statement for now. */
6014 if (gimple_code (stmt
) != GIMPLE_COND
)
6017 gsi_prev_nondebug (&gsi
);
6018 stmt
= gsi_stmt (gsi
);
6019 if (stmt
!= iv_update
)
6022 gsi_prev_nondebug (&gsi
);
6023 if (gsi_end_p (gsi
))
6026 stmt
= gsi_stmt (gsi
);
6027 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6030 if (stmt
!= use
->stmt
)
6033 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6036 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6038 fprintf (dump_file
, "Reordering \n");
6039 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6040 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6041 fprintf (dump_file
, "\n");
6044 gsi
= gsi_for_stmt (use
->stmt
);
6045 gsi_iv
= gsi_for_stmt (iv_update
);
6046 gsi_move_before (&gsi_iv
, &gsi
);
6048 cand
->pos
= IP_BEFORE_USE
;
6049 cand
->incremented_at
= use
->stmt
;
6052 /* Rewrites USE (address that is an iv) using candidate CAND. */
6055 rewrite_use_address (struct ivopts_data
*data
,
6056 struct iv_use
*use
, struct iv_cand
*cand
)
6059 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6060 tree base_hint
= NULL_TREE
;
6064 adjust_iv_update_pos (cand
, use
);
6065 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6067 unshare_aff_combination (&aff
);
6069 /* To avoid undefined overflow problems, all IV candidates use unsigned
6070 integer types. The drawback is that this makes it impossible for
6071 create_mem_ref to distinguish an IV that is based on a memory object
6072 from one that represents simply an offset.
6074 To work around this problem, we pass a hint to create_mem_ref that
6075 indicates which variable (if any) in aff is an IV based on a memory
6076 object. Note that we only consider the candidate. If this is not
6077 based on an object, the base of the reference is in some subexpression
6078 of the use -- but these will use pointer types, so they are recognized
6079 by the create_mem_ref heuristics anyway. */
6080 if (cand
->iv
->base_object
)
6081 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6083 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6084 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6085 reference_alias_ptr_type (*use
->op_p
),
6086 iv
, base_hint
, data
->speed
);
6087 copy_ref_info (ref
, *use
->op_p
);
6091 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6095 rewrite_use_compare (struct ivopts_data
*data
,
6096 struct iv_use
*use
, struct iv_cand
*cand
)
6098 tree comp
, *var_p
, op
, bound
;
6099 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6100 enum tree_code compare
;
6101 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6107 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6108 tree var_type
= TREE_TYPE (var
);
6111 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6113 fprintf (dump_file
, "Replacing exit test: ");
6114 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6116 compare
= iv_elimination_compare (data
, use
);
6117 bound
= unshare_expr (fold_convert (var_type
, bound
));
6118 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6120 gsi_insert_seq_on_edge_immediate (
6121 loop_preheader_edge (data
->current_loop
),
6124 gimple_cond_set_lhs (use
->stmt
, var
);
6125 gimple_cond_set_code (use
->stmt
, compare
);
6126 gimple_cond_set_rhs (use
->stmt
, op
);
6130 /* The induction variable elimination failed; just express the original
6132 comp
= get_computation (data
->current_loop
, use
, cand
);
6133 gcc_assert (comp
!= NULL_TREE
);
6135 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6138 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6139 true, GSI_SAME_STMT
);
6142 /* Rewrites USE using candidate CAND. */
6145 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6149 case USE_NONLINEAR_EXPR
:
6150 rewrite_use_nonlinear_expr (data
, use
, cand
);
6154 rewrite_use_address (data
, use
, cand
);
6158 rewrite_use_compare (data
, use
, cand
);
6165 update_stmt (use
->stmt
);
6168 /* Rewrite the uses using the selected induction variables. */
6171 rewrite_uses (struct ivopts_data
*data
)
6174 struct iv_cand
*cand
;
6177 for (i
= 0; i
< n_iv_uses (data
); i
++)
6179 use
= iv_use (data
, i
);
6180 cand
= use
->selected
;
6183 rewrite_use (data
, use
, cand
);
6187 /* Removes the ivs that are not used after rewriting. */
6190 remove_unused_ivs (struct ivopts_data
*data
)
6194 bitmap toremove
= BITMAP_ALLOC (NULL
);
6196 /* Figure out an order in which to release SSA DEFs so that we don't
6197 release something that we'd have to propagate into a debug stmt
6199 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6201 struct version_info
*info
;
6203 info
= ver_info (data
, j
);
6205 && !integer_zerop (info
->iv
->step
)
6207 && !info
->iv
->have_use_for
6208 && !info
->preserve_biv
)
6209 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6212 release_defs_bitset (toremove
);
6214 BITMAP_FREE (toremove
);
6217 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6218 for pointer_map_traverse. */
6221 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6222 void *data ATTRIBUTE_UNUSED
)
6224 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6230 /* Frees data allocated by the optimization of a single loop. */
6233 free_loop_data (struct ivopts_data
*data
)
6241 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6242 pointer_map_destroy (data
->niters
);
6243 data
->niters
= NULL
;
6246 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6248 struct version_info
*info
;
6250 info
= ver_info (data
, i
);
6254 info
->has_nonlin_use
= false;
6255 info
->preserve_biv
= false;
6258 bitmap_clear (data
->relevant
);
6259 bitmap_clear (data
->important_candidates
);
6261 for (i
= 0; i
< n_iv_uses (data
); i
++)
6263 struct iv_use
*use
= iv_use (data
, i
);
6266 BITMAP_FREE (use
->related_cands
);
6267 for (j
= 0; j
< use
->n_map_members
; j
++)
6268 if (use
->cost_map
[j
].depends_on
)
6269 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6270 free (use
->cost_map
);
6273 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
6275 for (i
= 0; i
< n_iv_cands (data
); i
++)
6277 struct iv_cand
*cand
= iv_cand (data
, i
);
6281 if (cand
->depends_on
)
6282 BITMAP_FREE (cand
->depends_on
);
6285 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
6287 if (data
->version_info_size
< num_ssa_names
)
6289 data
->version_info_size
= 2 * num_ssa_names
;
6290 free (data
->version_info
);
6291 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6294 data
->max_inv_id
= 0;
6296 FOR_EACH_VEC_ELT (tree
, decl_rtl_to_reset
, i
, obj
)
6297 SET_DECL_RTL (obj
, NULL_RTX
);
6299 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
6301 htab_empty (data
->inv_expr_tab
);
6302 data
->inv_expr_id
= 0;
6305 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6309 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6311 free_loop_data (data
);
6312 free (data
->version_info
);
6313 BITMAP_FREE (data
->relevant
);
6314 BITMAP_FREE (data
->important_candidates
);
6316 VEC_free (tree
, heap
, decl_rtl_to_reset
);
6317 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
6318 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
6319 htab_delete (data
->inv_expr_tab
);
6322 /* Returns true if the loop body BODY includes any function calls. */
6325 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6327 gimple_stmt_iterator gsi
;
6330 for (i
= 0; i
< num_nodes
; i
++)
6331 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6333 gimple stmt
= gsi_stmt (gsi
);
6334 if (is_gimple_call (stmt
)
6335 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6341 /* Optimizes the LOOP. Returns true if anything changed. */
6344 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6346 bool changed
= false;
6347 struct iv_ca
*iv_ca
;
6351 gcc_assert (!data
->niters
);
6352 data
->current_loop
= loop
;
6353 data
->speed
= optimize_loop_for_speed_p (loop
);
6355 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6357 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6359 exit
= single_dom_exit (loop
);
6362 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6363 exit
->src
->index
, exit
->dest
->index
);
6364 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6365 fprintf (dump_file
, "\n");
6368 fprintf (dump_file
, "\n");
6371 body
= get_loop_body (loop
);
6372 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6373 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6376 /* For each ssa name determines whether it behaves as an induction variable
6378 if (!find_induction_variables (data
))
6381 /* Finds interesting uses (item 1). */
6382 find_interesting_uses (data
);
6383 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6386 /* Finds candidates for the induction variables (item 2). */
6387 find_iv_candidates (data
);
6389 /* Calculates the costs (item 3, part 1). */
6390 determine_iv_costs (data
);
6391 determine_use_iv_costs (data
);
6392 determine_set_costs (data
);
6394 /* Find the optimal set of induction variables (item 3, part 2). */
6395 iv_ca
= find_optimal_iv_set (data
);
6400 /* Create the new induction variables (item 4, part 1). */
6401 create_new_ivs (data
, iv_ca
);
6402 iv_ca_free (&iv_ca
);
6404 /* Rewrite the uses (item 4, part 2). */
6405 rewrite_uses (data
);
6407 /* Remove the ivs that are unused after rewriting. */
6408 remove_unused_ivs (data
);
6410 /* We have changed the structure of induction variables; it might happen
6411 that definitions in the scev database refer to some of them that were
6416 free_loop_data (data
);
6421 /* Main entry point. Optimizes induction variables in loops. */
6424 tree_ssa_iv_optimize (void)
6427 struct ivopts_data data
;
6430 tree_ssa_iv_optimize_init (&data
);
6432 /* Optimize the loops starting with the innermost ones. */
6433 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6435 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6436 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6438 tree_ssa_iv_optimize_loop (&data
, loop
);
6441 tree_ssa_iv_optimize_finalize (&data
);