1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
79 #include "tree-pass.h"
81 #include "insn-config.h"
83 #include "pointer-set.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: add_cost and zero_cost defined in exprmed.h conflict with local uses.
101 /* FIXME: Expressions are expanded to RTL in this pass to determine the
102 cost of different addressing modes. This should be moved to a TBD
103 interface between the GIMPLE and RTL worlds. */
106 /* The infinite cost. */
107 #define INFTY 10000000
109 #define AVG_LOOP_NITER(LOOP) 5
111 /* Returns the expected number of loop iterations for LOOP.
112 The average trip count is computed from profile data if it
115 static inline HOST_WIDE_INT
116 avg_loop_niter (struct loop
*loop
)
118 HOST_WIDE_INT niter
= max_stmt_executions_int (loop
, false);
120 return AVG_LOOP_NITER (loop
);
125 /* Representation of the induction variable. */
128 tree base
; /* Initial value of the iv. */
129 tree base_object
; /* A memory object to that the induction variable points. */
130 tree step
; /* Step of the iv (constant only). */
131 tree ssa_name
; /* The ssa name with the value. */
132 bool biv_p
; /* Is it a biv? */
133 bool have_use_for
; /* Do we already have a use for it? */
134 unsigned use_id
; /* The identifier in the use if it is the case. */
137 /* Per-ssa version information (induction variable descriptions, etc.). */
140 tree name
; /* The ssa name. */
141 struct iv
*iv
; /* Induction variable description. */
142 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
143 an expression that is not an induction variable. */
144 bool preserve_biv
; /* For the original biv, whether to preserve it. */
145 unsigned inv_id
; /* Id of an invariant. */
151 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
152 USE_ADDRESS
, /* Use in an address. */
153 USE_COMPARE
/* Use is a compare. */
156 /* Cost of a computation. */
159 int cost
; /* The runtime cost. */
160 unsigned complexity
; /* The estimate of the complexity of the code for
161 the computation (in no concrete units --
162 complexity field should be larger for more
163 complex expressions and addressing modes). */
166 static const comp_cost zero_cost
= {0, 0};
167 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
169 /* The candidate - cost pair. */
172 struct iv_cand
*cand
; /* The candidate. */
173 comp_cost cost
; /* The cost. */
174 bitmap depends_on
; /* The list of invariants that have to be
176 tree value
; /* For final value elimination, the expression for
177 the final value of the iv. For iv elimination,
178 the new bound to compare with. */
179 int inv_expr_id
; /* Loop invariant expression id. */
185 unsigned id
; /* The id of the use. */
186 enum use_type type
; /* Type of the use. */
187 struct iv
*iv
; /* The induction variable it is based on. */
188 gimple stmt
; /* Statement in that it occurs. */
189 tree
*op_p
; /* The place where it occurs. */
190 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
193 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
194 struct cost_pair
*cost_map
;
195 /* The costs wrto the iv candidates. */
197 struct iv_cand
*selected
;
198 /* The selected candidate. */
201 /* The position where the iv is computed. */
204 IP_NORMAL
, /* At the end, just before the exit condition. */
205 IP_END
, /* At the end of the latch block. */
206 IP_BEFORE_USE
, /* Immediately before a specific use. */
207 IP_AFTER_USE
, /* Immediately after a specific use. */
208 IP_ORIGINAL
/* The original biv. */
211 /* The induction variable candidate. */
214 unsigned id
; /* The number of the candidate. */
215 bool important
; /* Whether this is an "important" candidate, i.e. such
216 that it should be considered by all uses. */
217 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
218 gimple incremented_at
;/* For original biv, the statement where it is
220 tree var_before
; /* The variable used for it before increment. */
221 tree var_after
; /* The variable used for it after increment. */
222 struct iv
*iv
; /* The value of the candidate. NULL for
223 "pseudocandidate" used to indicate the possibility
224 to replace the final value of an iv by direct
225 computation of the value. */
226 unsigned cost
; /* Cost of the candidate. */
227 unsigned cost_step
; /* Cost of the candidate's increment operation. */
228 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
229 where it is incremented. */
230 bitmap depends_on
; /* The list of invariants that are used in step of the
234 /* Loop invariant expression hashtable entry. */
235 struct iv_inv_expr_ent
242 /* The data used by the induction variable optimizations. */
244 typedef struct iv_use
*iv_use_p
;
246 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
248 typedef struct iv_cand
*iv_cand_p
;
249 DEF_VEC_P(iv_cand_p
);
250 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
254 /* The currently optimized loop. */
255 struct loop
*current_loop
;
257 /* Numbers of iterations for all exits of the current loop. */
258 struct pointer_map_t
*niters
;
260 /* Number of registers used in it. */
263 /* The size of version_info array allocated. */
264 unsigned version_info_size
;
266 /* The array of information for the ssa names. */
267 struct version_info
*version_info
;
269 /* The hashtable of loop invariant expressions created
273 /* Loop invariant expression id. */
276 /* The bitmap of indices in version_info whose value was changed. */
279 /* The uses of induction variables. */
280 VEC(iv_use_p
,heap
) *iv_uses
;
282 /* The candidates. */
283 VEC(iv_cand_p
,heap
) *iv_candidates
;
285 /* A bitmap of important candidates. */
286 bitmap important_candidates
;
288 /* The maximum invariant id. */
291 /* Whether to consider just related and important candidates when replacing a
293 bool consider_all_candidates
;
295 /* Are we optimizing for speed? */
298 /* Whether the loop body includes any function calls. */
299 bool body_includes_call
;
302 /* An assignment of iv candidates to uses. */
306 /* The number of uses covered by the assignment. */
309 /* Number of uses that cannot be expressed by the candidates in the set. */
312 /* Candidate assigned to a use, together with the related costs. */
313 struct cost_pair
**cand_for_use
;
315 /* Number of times each candidate is used. */
316 unsigned *n_cand_uses
;
318 /* The candidates used. */
321 /* The number of candidates in the set. */
324 /* Total number of registers needed. */
327 /* Total cost of expressing uses. */
328 comp_cost cand_use_cost
;
330 /* Total cost of candidates. */
333 /* Number of times each invariant is used. */
334 unsigned *n_invariant_uses
;
336 /* The array holding the number of uses of each loop
337 invariant expressions created by ivopt. */
338 unsigned *used_inv_expr
;
340 /* The number of created loop invariants. */
341 unsigned num_used_inv_expr
;
343 /* Total cost of the assignment. */
347 /* Difference of two iv candidate assignments. */
354 /* An old assignment (for rollback purposes). */
355 struct cost_pair
*old_cp
;
357 /* A new assignment. */
358 struct cost_pair
*new_cp
;
360 /* Next change in the list. */
361 struct iv_ca_delta
*next_change
;
364 /* Bound on number of candidates below that all candidates are considered. */
366 #define CONSIDER_ALL_CANDIDATES_BOUND \
367 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
369 /* If there are more iv occurrences, we just give up (it is quite unlikely that
370 optimizing such a loop would help, and it would take ages). */
372 #define MAX_CONSIDERED_USES \
373 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
375 /* If there are at most this number of ivs in the set, try removing unnecessary
376 ivs from the set always. */
378 #define ALWAYS_PRUNE_CAND_SET_BOUND \
379 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
381 /* The list of trees for that the decl_rtl field must be reset is stored
384 static VEC(tree
,heap
) *decl_rtl_to_reset
;
386 static comp_cost
force_expr_to_var_cost (tree
, bool);
388 /* Number of uses recorded in DATA. */
390 static inline unsigned
391 n_iv_uses (struct ivopts_data
*data
)
393 return VEC_length (iv_use_p
, data
->iv_uses
);
396 /* Ith use recorded in DATA. */
398 static inline struct iv_use
*
399 iv_use (struct ivopts_data
*data
, unsigned i
)
401 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
404 /* Number of candidates recorded in DATA. */
406 static inline unsigned
407 n_iv_cands (struct ivopts_data
*data
)
409 return VEC_length (iv_cand_p
, data
->iv_candidates
);
412 /* Ith candidate recorded in DATA. */
414 static inline struct iv_cand
*
415 iv_cand (struct ivopts_data
*data
, unsigned i
)
417 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
420 /* The single loop exit if it dominates the latch, NULL otherwise. */
423 single_dom_exit (struct loop
*loop
)
425 edge exit
= single_exit (loop
);
430 if (!just_once_each_iteration_p (loop
, exit
->src
))
436 /* Dumps information about the induction variable IV to FILE. */
438 extern void dump_iv (FILE *, struct iv
*);
440 dump_iv (FILE *file
, struct iv
*iv
)
444 fprintf (file
, "ssa name ");
445 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
446 fprintf (file
, "\n");
449 fprintf (file
, " type ");
450 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
451 fprintf (file
, "\n");
455 fprintf (file
, " base ");
456 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
457 fprintf (file
, "\n");
459 fprintf (file
, " step ");
460 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
461 fprintf (file
, "\n");
465 fprintf (file
, " invariant ");
466 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
467 fprintf (file
, "\n");
472 fprintf (file
, " base object ");
473 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
474 fprintf (file
, "\n");
478 fprintf (file
, " is a biv\n");
481 /* Dumps information about the USE to FILE. */
483 extern void dump_use (FILE *, struct iv_use
*);
485 dump_use (FILE *file
, struct iv_use
*use
)
487 fprintf (file
, "use %d\n", use
->id
);
491 case USE_NONLINEAR_EXPR
:
492 fprintf (file
, " generic\n");
496 fprintf (file
, " address\n");
500 fprintf (file
, " compare\n");
507 fprintf (file
, " in statement ");
508 print_gimple_stmt (file
, use
->stmt
, 0, 0);
509 fprintf (file
, "\n");
511 fprintf (file
, " at position ");
513 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
514 fprintf (file
, "\n");
516 dump_iv (file
, use
->iv
);
518 if (use
->related_cands
)
520 fprintf (file
, " related candidates ");
521 dump_bitmap (file
, use
->related_cands
);
525 /* Dumps information about the uses to FILE. */
527 extern void dump_uses (FILE *, struct ivopts_data
*);
529 dump_uses (FILE *file
, struct ivopts_data
*data
)
534 for (i
= 0; i
< n_iv_uses (data
); i
++)
536 use
= iv_use (data
, i
);
538 dump_use (file
, use
);
539 fprintf (file
, "\n");
543 /* Dumps information about induction variable candidate CAND to FILE. */
545 extern void dump_cand (FILE *, struct iv_cand
*);
547 dump_cand (FILE *file
, struct iv_cand
*cand
)
549 struct iv
*iv
= cand
->iv
;
551 fprintf (file
, "candidate %d%s\n",
552 cand
->id
, cand
->important
? " (important)" : "");
554 if (cand
->depends_on
)
556 fprintf (file
, " depends on ");
557 dump_bitmap (file
, cand
->depends_on
);
562 fprintf (file
, " final value replacement\n");
566 if (cand
->var_before
)
568 fprintf (file
, " var_before ");
569 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
570 fprintf (file
, "\n");
574 fprintf (file
, " var_after ");
575 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
576 fprintf (file
, "\n");
582 fprintf (file
, " incremented before exit test\n");
586 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
590 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
594 fprintf (file
, " incremented at end\n");
598 fprintf (file
, " original biv\n");
605 /* Returns the info for ssa version VER. */
607 static inline struct version_info
*
608 ver_info (struct ivopts_data
*data
, unsigned ver
)
610 return data
->version_info
+ ver
;
613 /* Returns the info for ssa name NAME. */
615 static inline struct version_info
*
616 name_info (struct ivopts_data
*data
, tree name
)
618 return ver_info (data
, SSA_NAME_VERSION (name
));
621 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
625 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
627 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
631 if (sbb
== loop
->latch
)
637 return stmt
== last_stmt (bb
);
640 /* Returns true if STMT if after the place where the original induction
641 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
642 if the positions are identical. */
645 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
647 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
648 basic_block stmt_bb
= gimple_bb (stmt
);
650 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
653 if (stmt_bb
!= cand_bb
)
657 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
659 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
662 /* Returns true if STMT if after the place where the induction variable
663 CAND is incremented in LOOP. */
666 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
674 return stmt_after_ip_normal_pos (loop
, stmt
);
678 return stmt_after_inc_pos (cand
, stmt
, false);
681 return stmt_after_inc_pos (cand
, stmt
, true);
688 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
691 abnormal_ssa_name_p (tree exp
)
696 if (TREE_CODE (exp
) != SSA_NAME
)
699 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
702 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
703 abnormal phi node. Callback for for_each_index. */
706 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
707 void *data ATTRIBUTE_UNUSED
)
709 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
711 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
713 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
717 return !abnormal_ssa_name_p (*index
);
720 /* Returns true if EXPR contains a ssa name that occurs in an
721 abnormal phi node. */
724 contains_abnormal_ssa_name_p (tree expr
)
727 enum tree_code_class codeclass
;
732 code
= TREE_CODE (expr
);
733 codeclass
= TREE_CODE_CLASS (code
);
735 if (code
== SSA_NAME
)
736 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
738 if (code
== INTEGER_CST
739 || is_gimple_min_invariant (expr
))
742 if (code
== ADDR_EXPR
)
743 return !for_each_index (&TREE_OPERAND (expr
, 0),
744 idx_contains_abnormal_ssa_name_p
,
747 if (code
== COND_EXPR
)
748 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
749 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
750 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
756 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
761 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
773 /* Returns tree describing number of iterations determined from
774 EXIT of DATA->current_loop, or NULL if something goes wrong. */
777 niter_for_exit (struct ivopts_data
*data
, edge exit
,
778 struct tree_niter_desc
**desc_p
)
780 struct tree_niter_desc
* desc
= NULL
;
786 data
->niters
= pointer_map_create ();
790 slot
= pointer_map_contains (data
->niters
, exit
);
794 /* Try to determine number of iterations. We must know it
795 unconditionally (i.e., without possibility of # of iterations
796 being zero). Also, we cannot safely work with ssa names that
797 appear in phi nodes on abnormal edges, so that we do not create
798 overlapping life ranges for them (PR 27283). */
799 desc
= XNEW (struct tree_niter_desc
);
800 if (number_of_iterations_exit (data
->current_loop
,
802 && integer_zerop (desc
->may_be_zero
)
803 && !contains_abnormal_ssa_name_p (desc
->niter
))
809 slot
= pointer_map_insert (data
->niters
, exit
);
813 niter
= ((struct tree_niter_desc
*) *slot
)->niter
;
816 *desc_p
= (struct tree_niter_desc
*) *slot
;
820 /* Returns tree describing number of iterations determined from
821 single dominating exit of DATA->current_loop, or NULL if something
825 niter_for_single_dom_exit (struct ivopts_data
*data
)
827 edge exit
= single_dom_exit (data
->current_loop
);
832 return niter_for_exit (data
, exit
, NULL
);
835 /* Hash table equality function for expressions. */
838 htab_inv_expr_eq (const void *ent1
, const void *ent2
)
840 const struct iv_inv_expr_ent
*expr1
=
841 (const struct iv_inv_expr_ent
*)ent1
;
842 const struct iv_inv_expr_ent
*expr2
=
843 (const struct iv_inv_expr_ent
*)ent2
;
845 return expr1
->hash
== expr2
->hash
846 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
849 /* Hash function for loop invariant expressions. */
852 htab_inv_expr_hash (const void *ent
)
854 const struct iv_inv_expr_ent
*expr
=
855 (const struct iv_inv_expr_ent
*)ent
;
859 /* Initializes data structures used by the iv optimization pass, stored
863 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
865 data
->version_info_size
= 2 * num_ssa_names
;
866 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
867 data
->relevant
= BITMAP_ALLOC (NULL
);
868 data
->important_candidates
= BITMAP_ALLOC (NULL
);
869 data
->max_inv_id
= 0;
871 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
872 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
873 data
->inv_expr_tab
= htab_create (10, htab_inv_expr_hash
,
874 htab_inv_expr_eq
, free
);
875 data
->inv_expr_id
= 0;
876 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
879 /* Returns a memory object to that EXPR points. In case we are able to
880 determine that it does not point to any such object, NULL is returned. */
883 determine_base_object (tree expr
)
885 enum tree_code code
= TREE_CODE (expr
);
888 /* If this is a pointer casted to any type, we need to determine
889 the base object for the pointer; so handle conversions before
890 throwing away non-pointer expressions. */
891 if (CONVERT_EXPR_P (expr
))
892 return determine_base_object (TREE_OPERAND (expr
, 0));
894 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
903 obj
= TREE_OPERAND (expr
, 0);
904 base
= get_base_address (obj
);
909 if (TREE_CODE (base
) == MEM_REF
)
910 return determine_base_object (TREE_OPERAND (base
, 0));
912 return fold_convert (ptr_type_node
,
913 build_fold_addr_expr (base
));
915 case POINTER_PLUS_EXPR
:
916 return determine_base_object (TREE_OPERAND (expr
, 0));
920 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
924 return fold_convert (ptr_type_node
, expr
);
928 /* Allocates an induction variable with given initial value BASE and step STEP
932 alloc_iv (tree base
, tree step
)
934 struct iv
*iv
= XCNEW (struct iv
);
935 gcc_assert (step
!= NULL_TREE
);
938 iv
->base_object
= determine_base_object (base
);
941 iv
->have_use_for
= false;
943 iv
->ssa_name
= NULL_TREE
;
948 /* Sets STEP and BASE for induction variable IV. */
951 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
953 struct version_info
*info
= name_info (data
, iv
);
955 gcc_assert (!info
->iv
);
957 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
958 info
->iv
= alloc_iv (base
, step
);
959 info
->iv
->ssa_name
= iv
;
962 /* Finds induction variable declaration for VAR. */
965 get_iv (struct ivopts_data
*data
, tree var
)
968 tree type
= TREE_TYPE (var
);
970 if (!POINTER_TYPE_P (type
)
971 && !INTEGRAL_TYPE_P (type
))
974 if (!name_info (data
, var
)->iv
)
976 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
979 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
980 set_iv (data
, var
, var
, build_int_cst (type
, 0));
983 return name_info (data
, var
)->iv
;
986 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
987 not define a simple affine biv with nonzero step. */
990 determine_biv_step (gimple phi
)
992 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
993 tree name
= PHI_RESULT (phi
);
996 if (!is_gimple_reg (name
))
999 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1002 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1005 /* Finds basic ivs. */
1008 find_bivs (struct ivopts_data
*data
)
1011 tree step
, type
, base
;
1013 struct loop
*loop
= data
->current_loop
;
1014 gimple_stmt_iterator psi
;
1016 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1018 phi
= gsi_stmt (psi
);
1020 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1023 step
= determine_biv_step (phi
);
1027 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1028 base
= expand_simple_operations (base
);
1029 if (contains_abnormal_ssa_name_p (base
)
1030 || contains_abnormal_ssa_name_p (step
))
1033 type
= TREE_TYPE (PHI_RESULT (phi
));
1034 base
= fold_convert (type
, base
);
1037 if (POINTER_TYPE_P (type
))
1038 step
= convert_to_ptrofftype (step
);
1040 step
= fold_convert (type
, step
);
1043 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1050 /* Marks basic ivs. */
1053 mark_bivs (struct ivopts_data
*data
)
1057 struct iv
*iv
, *incr_iv
;
1058 struct loop
*loop
= data
->current_loop
;
1059 basic_block incr_bb
;
1060 gimple_stmt_iterator psi
;
1062 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1064 phi
= gsi_stmt (psi
);
1066 iv
= get_iv (data
, PHI_RESULT (phi
));
1070 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1071 incr_iv
= get_iv (data
, var
);
1075 /* If the increment is in the subloop, ignore it. */
1076 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1077 if (incr_bb
->loop_father
!= data
->current_loop
1078 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1082 incr_iv
->biv_p
= true;
1086 /* Checks whether STMT defines a linear induction variable and stores its
1087 parameters to IV. */
1090 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1093 struct loop
*loop
= data
->current_loop
;
1095 iv
->base
= NULL_TREE
;
1096 iv
->step
= NULL_TREE
;
1098 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1101 lhs
= gimple_assign_lhs (stmt
);
1102 if (TREE_CODE (lhs
) != SSA_NAME
)
1105 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1107 iv
->base
= expand_simple_operations (iv
->base
);
1109 if (contains_abnormal_ssa_name_p (iv
->base
)
1110 || contains_abnormal_ssa_name_p (iv
->step
))
1113 /* If STMT could throw, then do not consider STMT as defining a GIV.
1114 While this will suppress optimizations, we can not safely delete this
1115 GIV and associated statements, even if it appears it is not used. */
1116 if (stmt_could_throw_p (stmt
))
1122 /* Finds general ivs in statement STMT. */
1125 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1129 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1132 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1135 /* Finds general ivs in basic block BB. */
1138 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1140 gimple_stmt_iterator bsi
;
1142 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1143 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1146 /* Finds general ivs. */
1149 find_givs (struct ivopts_data
*data
)
1151 struct loop
*loop
= data
->current_loop
;
1152 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1155 for (i
= 0; i
< loop
->num_nodes
; i
++)
1156 find_givs_in_bb (data
, body
[i
]);
1160 /* For each ssa name defined in LOOP determines whether it is an induction
1161 variable and if so, its initial value and step. */
1164 find_induction_variables (struct ivopts_data
*data
)
1169 if (!find_bivs (data
))
1175 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1177 tree niter
= niter_for_single_dom_exit (data
);
1181 fprintf (dump_file
, " number of iterations ");
1182 print_generic_expr (dump_file
, niter
, TDF_SLIM
);
1183 fprintf (dump_file
, "\n\n");
1186 fprintf (dump_file
, "Induction variables:\n\n");
1188 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1190 if (ver_info (data
, i
)->iv
)
1191 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1198 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1200 static struct iv_use
*
1201 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1202 gimple stmt
, enum use_type use_type
)
1204 struct iv_use
*use
= XCNEW (struct iv_use
);
1206 use
->id
= n_iv_uses (data
);
1207 use
->type
= use_type
;
1211 use
->related_cands
= BITMAP_ALLOC (NULL
);
1213 /* To avoid showing ssa name in the dumps, if it was not reset by the
1215 iv
->ssa_name
= NULL_TREE
;
1217 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1218 dump_use (dump_file
, use
);
1220 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1225 /* Checks whether OP is a loop-level invariant and if so, records it.
1226 NONLINEAR_USE is true if the invariant is used in a way we do not
1227 handle specially. */
1230 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1233 struct version_info
*info
;
1235 if (TREE_CODE (op
) != SSA_NAME
1236 || !is_gimple_reg (op
))
1239 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1241 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1244 info
= name_info (data
, op
);
1246 info
->has_nonlin_use
|= nonlinear_use
;
1248 info
->inv_id
= ++data
->max_inv_id
;
1249 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1252 /* Checks whether the use OP is interesting and if so, records it. */
1254 static struct iv_use
*
1255 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1262 if (TREE_CODE (op
) != SSA_NAME
)
1265 iv
= get_iv (data
, op
);
1269 if (iv
->have_use_for
)
1271 use
= iv_use (data
, iv
->use_id
);
1273 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1277 if (integer_zerop (iv
->step
))
1279 record_invariant (data
, op
, true);
1282 iv
->have_use_for
= true;
1284 civ
= XNEW (struct iv
);
1287 stmt
= SSA_NAME_DEF_STMT (op
);
1288 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1289 || is_gimple_assign (stmt
));
1291 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1292 iv
->use_id
= use
->id
;
1297 /* Given a condition in statement STMT, checks whether it is a compare
1298 of an induction variable and an invariant. If this is the case,
1299 CONTROL_VAR is set to location of the iv, BOUND to the location of
1300 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1301 induction variable descriptions, and true is returned. If this is not
1302 the case, CONTROL_VAR and BOUND are set to the arguments of the
1303 condition and false is returned. */
1306 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1307 tree
**control_var
, tree
**bound
,
1308 struct iv
**iv_var
, struct iv
**iv_bound
)
1310 /* The objects returned when COND has constant operands. */
1311 static struct iv const_iv
;
1313 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1314 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1317 if (gimple_code (stmt
) == GIMPLE_COND
)
1319 op0
= gimple_cond_lhs_ptr (stmt
);
1320 op1
= gimple_cond_rhs_ptr (stmt
);
1324 op0
= gimple_assign_rhs1_ptr (stmt
);
1325 op1
= gimple_assign_rhs2_ptr (stmt
);
1328 zero
= integer_zero_node
;
1329 const_iv
.step
= integer_zero_node
;
1331 if (TREE_CODE (*op0
) == SSA_NAME
)
1332 iv0
= get_iv (data
, *op0
);
1333 if (TREE_CODE (*op1
) == SSA_NAME
)
1334 iv1
= get_iv (data
, *op1
);
1336 /* Exactly one of the compared values must be an iv, and the other one must
1341 if (integer_zerop (iv0
->step
))
1343 /* Control variable may be on the other side. */
1344 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1345 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1347 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1351 *control_var
= op0
;;
1362 /* Checks whether the condition in STMT is interesting and if so,
1366 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1368 tree
*var_p
, *bound_p
;
1369 struct iv
*var_iv
, *civ
;
1371 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1373 find_interesting_uses_op (data
, *var_p
);
1374 find_interesting_uses_op (data
, *bound_p
);
1378 civ
= XNEW (struct iv
);
1380 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1383 /* Returns true if expression EXPR is obviously invariant in LOOP,
1384 i.e. if all its operands are defined outside of the LOOP. LOOP
1385 should not be the function body. */
1388 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1393 gcc_assert (loop_depth (loop
) > 0);
1395 if (is_gimple_min_invariant (expr
))
1398 if (TREE_CODE (expr
) == SSA_NAME
)
1400 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1402 && flow_bb_inside_loop_p (loop
, def_bb
))
1411 len
= TREE_OPERAND_LENGTH (expr
);
1412 for (i
= 0; i
< len
; i
++)
1413 if (!expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1419 /* Returns true if statement STMT is obviously invariant in LOOP,
1420 i.e. if all its operands on the RHS are defined outside of the LOOP.
1421 LOOP should not be the function body. */
1424 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1429 gcc_assert (loop_depth (loop
) > 0);
1431 lhs
= gimple_get_lhs (stmt
);
1432 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1434 tree op
= gimple_op (stmt
, i
);
1435 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1442 /* Cumulates the steps of indices into DATA and replaces their values with the
1443 initial ones. Returns false when the value of the index cannot be determined.
1444 Callback for for_each_index. */
1446 struct ifs_ivopts_data
1448 struct ivopts_data
*ivopts_data
;
1454 idx_find_step (tree base
, tree
*idx
, void *data
)
1456 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1458 tree step
, iv_base
, iv_step
, lbound
, off
;
1459 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1461 /* If base is a component ref, require that the offset of the reference
1463 if (TREE_CODE (base
) == COMPONENT_REF
)
1465 off
= component_ref_field_offset (base
);
1466 return expr_invariant_in_loop_p (loop
, off
);
1469 /* If base is array, first check whether we will be able to move the
1470 reference out of the loop (in order to take its address in strength
1471 reduction). In order for this to work we need both lower bound
1472 and step to be loop invariants. */
1473 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1475 /* Moreover, for a range, the size needs to be invariant as well. */
1476 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1477 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1480 step
= array_ref_element_size (base
);
1481 lbound
= array_ref_low_bound (base
);
1483 if (!expr_invariant_in_loop_p (loop
, step
)
1484 || !expr_invariant_in_loop_p (loop
, lbound
))
1488 if (TREE_CODE (*idx
) != SSA_NAME
)
1491 iv
= get_iv (dta
->ivopts_data
, *idx
);
1495 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1496 *&x[0], which is not folded and does not trigger the
1497 ARRAY_REF path below. */
1500 if (integer_zerop (iv
->step
))
1503 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1505 step
= array_ref_element_size (base
);
1507 /* We only handle addresses whose step is an integer constant. */
1508 if (TREE_CODE (step
) != INTEGER_CST
)
1512 /* The step for pointer arithmetics already is 1 byte. */
1513 step
= size_one_node
;
1517 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1518 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1521 /* The index might wrap. */
1525 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1526 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1531 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1532 object is passed to it in DATA. */
1535 idx_record_use (tree base
, tree
*idx
,
1538 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1539 find_interesting_uses_op (data
, *idx
);
1540 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1542 find_interesting_uses_op (data
, array_ref_element_size (base
));
1543 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1548 /* If we can prove that TOP = cst * BOT for some constant cst,
1549 store cst to MUL and return true. Otherwise return false.
1550 The returned value is always sign-extended, regardless of the
1551 signedness of TOP and BOT. */
1554 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1557 enum tree_code code
;
1558 double_int res
, p0
, p1
;
1559 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1564 if (operand_equal_p (top
, bot
, 0))
1566 *mul
= double_int_one
;
1570 code
= TREE_CODE (top
);
1574 mby
= TREE_OPERAND (top
, 1);
1575 if (TREE_CODE (mby
) != INTEGER_CST
)
1578 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1581 *mul
= double_int_sext (double_int_mul (res
, tree_to_double_int (mby
)),
1587 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1588 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1591 if (code
== MINUS_EXPR
)
1592 p1
= double_int_neg (p1
);
1593 *mul
= double_int_sext (double_int_add (p0
, p1
), precision
);
1597 if (TREE_CODE (bot
) != INTEGER_CST
)
1600 p0
= double_int_sext (tree_to_double_int (top
), precision
);
1601 p1
= double_int_sext (tree_to_double_int (bot
), precision
);
1602 if (double_int_zero_p (p1
))
1604 *mul
= double_int_sext (double_int_sdivmod (p0
, p1
, FLOOR_DIV_EXPR
, &res
),
1606 return double_int_zero_p (res
);
1613 /* Returns true if memory reference REF with step STEP may be unaligned. */
1616 may_be_unaligned_p (tree ref
, tree step
)
1620 HOST_WIDE_INT bitsize
;
1621 HOST_WIDE_INT bitpos
;
1623 enum machine_mode mode
;
1624 int unsignedp
, volatilep
;
1625 unsigned base_align
;
1627 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1628 thus they are not misaligned. */
1629 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1632 /* The test below is basically copy of what expr.c:normal_inner_ref
1633 does to check whether the object must be loaded by parts when
1634 STRICT_ALIGNMENT is true. */
1635 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1636 &unsignedp
, &volatilep
, true);
1637 base_type
= TREE_TYPE (base
);
1638 base_align
= get_object_alignment (base
);
1639 base_align
= MAX (base_align
, TYPE_ALIGN (base_type
));
1641 if (mode
!= BLKmode
)
1643 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1645 if (base_align
< mode_align
1646 || (bitpos
% mode_align
) != 0
1647 || (bitpos
% BITS_PER_UNIT
) != 0)
1651 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1654 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1661 /* Return true if EXPR may be non-addressable. */
1664 may_be_nonaddressable_p (tree expr
)
1666 switch (TREE_CODE (expr
))
1668 case TARGET_MEM_REF
:
1669 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1670 target, thus they are always addressable. */
1674 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1675 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1677 case VIEW_CONVERT_EXPR
:
1678 /* This kind of view-conversions may wrap non-addressable objects
1679 and make them look addressable. After some processing the
1680 non-addressability may be uncovered again, causing ADDR_EXPRs
1681 of inappropriate objects to be built. */
1682 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1683 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1686 /* ... fall through ... */
1689 case ARRAY_RANGE_REF
:
1690 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1702 /* Finds addresses in *OP_P inside STMT. */
1705 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1707 tree base
= *op_p
, step
= size_zero_node
;
1709 struct ifs_ivopts_data ifs_ivopts_data
;
1711 /* Do not play with volatile memory references. A bit too conservative,
1712 perhaps, but safe. */
1713 if (gimple_has_volatile_ops (stmt
))
1716 /* Ignore bitfields for now. Not really something terribly complicated
1718 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1721 base
= unshare_expr (base
);
1723 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1725 tree type
= build_pointer_type (TREE_TYPE (base
));
1729 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1731 civ
= get_iv (data
, TMR_BASE (base
));
1735 TMR_BASE (base
) = civ
->base
;
1738 if (TMR_INDEX2 (base
)
1739 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1741 civ
= get_iv (data
, TMR_INDEX2 (base
));
1745 TMR_INDEX2 (base
) = civ
->base
;
1748 if (TMR_INDEX (base
)
1749 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1751 civ
= get_iv (data
, TMR_INDEX (base
));
1755 TMR_INDEX (base
) = civ
->base
;
1760 if (TMR_STEP (base
))
1761 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1763 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1767 if (integer_zerop (step
))
1769 base
= tree_mem_ref_addr (type
, base
);
1773 ifs_ivopts_data
.ivopts_data
= data
;
1774 ifs_ivopts_data
.stmt
= stmt
;
1775 ifs_ivopts_data
.step
= size_zero_node
;
1776 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1777 || integer_zerop (ifs_ivopts_data
.step
))
1779 step
= ifs_ivopts_data
.step
;
1781 /* Check that the base expression is addressable. This needs
1782 to be done after substituting bases of IVs into it. */
1783 if (may_be_nonaddressable_p (base
))
1786 /* Moreover, on strict alignment platforms, check that it is
1787 sufficiently aligned. */
1788 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1791 base
= build_fold_addr_expr (base
);
1793 /* Substituting bases of IVs into the base expression might
1794 have caused folding opportunities. */
1795 if (TREE_CODE (base
) == ADDR_EXPR
)
1797 tree
*ref
= &TREE_OPERAND (base
, 0);
1798 while (handled_component_p (*ref
))
1799 ref
= &TREE_OPERAND (*ref
, 0);
1800 if (TREE_CODE (*ref
) == MEM_REF
)
1802 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1803 TREE_OPERAND (*ref
, 0),
1804 TREE_OPERAND (*ref
, 1));
1811 civ
= alloc_iv (base
, step
);
1812 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1816 for_each_index (op_p
, idx_record_use
, data
);
1819 /* Finds and records invariants used in STMT. */
1822 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1825 use_operand_p use_p
;
1828 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1830 op
= USE_FROM_PTR (use_p
);
1831 record_invariant (data
, op
, false);
1835 /* Finds interesting uses of induction variables in the statement STMT. */
1838 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1841 tree op
, *lhs
, *rhs
;
1843 use_operand_p use_p
;
1844 enum tree_code code
;
1846 find_invariants_stmt (data
, stmt
);
1848 if (gimple_code (stmt
) == GIMPLE_COND
)
1850 find_interesting_uses_cond (data
, stmt
);
1854 if (is_gimple_assign (stmt
))
1856 lhs
= gimple_assign_lhs_ptr (stmt
);
1857 rhs
= gimple_assign_rhs1_ptr (stmt
);
1859 if (TREE_CODE (*lhs
) == SSA_NAME
)
1861 /* If the statement defines an induction variable, the uses are not
1862 interesting by themselves. */
1864 iv
= get_iv (data
, *lhs
);
1866 if (iv
&& !integer_zerop (iv
->step
))
1870 code
= gimple_assign_rhs_code (stmt
);
1871 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1872 && (REFERENCE_CLASS_P (*rhs
)
1873 || is_gimple_val (*rhs
)))
1875 if (REFERENCE_CLASS_P (*rhs
))
1876 find_interesting_uses_address (data
, stmt
, rhs
);
1878 find_interesting_uses_op (data
, *rhs
);
1880 if (REFERENCE_CLASS_P (*lhs
))
1881 find_interesting_uses_address (data
, stmt
, lhs
);
1884 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1886 find_interesting_uses_cond (data
, stmt
);
1890 /* TODO -- we should also handle address uses of type
1892 memory = call (whatever);
1899 if (gimple_code (stmt
) == GIMPLE_PHI
1900 && gimple_bb (stmt
) == data
->current_loop
->header
)
1902 iv
= get_iv (data
, PHI_RESULT (stmt
));
1904 if (iv
&& !integer_zerop (iv
->step
))
1908 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1910 op
= USE_FROM_PTR (use_p
);
1912 if (TREE_CODE (op
) != SSA_NAME
)
1915 iv
= get_iv (data
, op
);
1919 find_interesting_uses_op (data
, op
);
1923 /* Finds interesting uses of induction variables outside of loops
1924 on loop exit edge EXIT. */
1927 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1930 gimple_stmt_iterator psi
;
1933 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1935 phi
= gsi_stmt (psi
);
1936 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1937 if (is_gimple_reg (def
))
1938 find_interesting_uses_op (data
, def
);
1942 /* Finds uses of the induction variables that are interesting. */
1945 find_interesting_uses (struct ivopts_data
*data
)
1948 gimple_stmt_iterator bsi
;
1949 basic_block
*body
= get_loop_body (data
->current_loop
);
1951 struct version_info
*info
;
1954 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1955 fprintf (dump_file
, "Uses:\n\n");
1957 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1962 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1963 if (e
->dest
!= EXIT_BLOCK_PTR
1964 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1965 find_interesting_uses_outside (data
, e
);
1967 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1968 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1969 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1970 if (!is_gimple_debug (gsi_stmt (bsi
)))
1971 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1974 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1978 fprintf (dump_file
, "\n");
1980 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1982 info
= ver_info (data
, i
);
1985 fprintf (dump_file
, " ");
1986 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1987 fprintf (dump_file
, " is invariant (%d)%s\n",
1988 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1992 fprintf (dump_file
, "\n");
1998 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1999 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2000 we are at the top-level of the processed address. */
2003 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2004 unsigned HOST_WIDE_INT
*offset
)
2006 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2007 enum tree_code code
;
2008 tree type
, orig_type
= TREE_TYPE (expr
);
2009 unsigned HOST_WIDE_INT off0
, off1
, st
;
2010 tree orig_expr
= expr
;
2014 type
= TREE_TYPE (expr
);
2015 code
= TREE_CODE (expr
);
2021 if (!cst_and_fits_in_hwi (expr
)
2022 || integer_zerop (expr
))
2025 *offset
= int_cst_value (expr
);
2026 return build_int_cst (orig_type
, 0);
2028 case POINTER_PLUS_EXPR
:
2031 op0
= TREE_OPERAND (expr
, 0);
2032 op1
= TREE_OPERAND (expr
, 1);
2034 op0
= strip_offset_1 (op0
, false, false, &off0
);
2035 op1
= strip_offset_1 (op1
, false, false, &off1
);
2037 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2038 if (op0
== TREE_OPERAND (expr
, 0)
2039 && op1
== TREE_OPERAND (expr
, 1))
2042 if (integer_zerop (op1
))
2044 else if (integer_zerop (op0
))
2046 if (code
== MINUS_EXPR
)
2047 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2052 expr
= fold_build2 (code
, type
, op0
, op1
);
2054 return fold_convert (orig_type
, expr
);
2057 op1
= TREE_OPERAND (expr
, 1);
2058 if (!cst_and_fits_in_hwi (op1
))
2061 op0
= TREE_OPERAND (expr
, 0);
2062 op0
= strip_offset_1 (op0
, false, false, &off0
);
2063 if (op0
== TREE_OPERAND (expr
, 0))
2066 *offset
= off0
* int_cst_value (op1
);
2067 if (integer_zerop (op0
))
2070 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2072 return fold_convert (orig_type
, expr
);
2075 case ARRAY_RANGE_REF
:
2079 step
= array_ref_element_size (expr
);
2080 if (!cst_and_fits_in_hwi (step
))
2083 st
= int_cst_value (step
);
2084 op1
= TREE_OPERAND (expr
, 1);
2085 op1
= strip_offset_1 (op1
, false, false, &off1
);
2086 *offset
= off1
* st
;
2089 && integer_zerop (op1
))
2091 /* Strip the component reference completely. */
2092 op0
= TREE_OPERAND (expr
, 0);
2093 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2103 tmp
= component_ref_field_offset (expr
);
2105 && cst_and_fits_in_hwi (tmp
))
2107 /* Strip the component reference completely. */
2108 op0
= TREE_OPERAND (expr
, 0);
2109 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2110 *offset
= off0
+ int_cst_value (tmp
);
2116 op0
= TREE_OPERAND (expr
, 0);
2117 op0
= strip_offset_1 (op0
, true, true, &off0
);
2120 if (op0
== TREE_OPERAND (expr
, 0))
2123 expr
= build_fold_addr_expr (op0
);
2124 return fold_convert (orig_type
, expr
);
2127 /* ??? Offset operand? */
2128 inside_addr
= false;
2135 /* Default handling of expressions for that we want to recurse into
2136 the first operand. */
2137 op0
= TREE_OPERAND (expr
, 0);
2138 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2141 if (op0
== TREE_OPERAND (expr
, 0)
2142 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2145 expr
= copy_node (expr
);
2146 TREE_OPERAND (expr
, 0) = op0
;
2148 TREE_OPERAND (expr
, 1) = op1
;
2150 /* Inside address, we might strip the top level component references,
2151 thus changing type of the expression. Handling of ADDR_EXPR
2153 expr
= fold_convert (orig_type
, expr
);
2158 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2161 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2163 return strip_offset_1 (expr
, false, false, offset
);
2166 /* Returns variant of TYPE that can be used as base for different uses.
2167 We return unsigned type with the same precision, which avoids problems
2171 generic_type_for (tree type
)
2173 if (POINTER_TYPE_P (type
))
2174 return unsigned_type_for (type
);
2176 if (TYPE_UNSIGNED (type
))
2179 return unsigned_type_for (type
);
2182 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2183 the bitmap to that we should store it. */
2185 static struct ivopts_data
*fd_ivopts_data
;
2187 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2189 bitmap
*depends_on
= (bitmap
*) data
;
2190 struct version_info
*info
;
2192 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2194 info
= name_info (fd_ivopts_data
, *expr_p
);
2196 if (!info
->inv_id
|| info
->has_nonlin_use
)
2200 *depends_on
= BITMAP_ALLOC (NULL
);
2201 bitmap_set_bit (*depends_on
, info
->inv_id
);
2206 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2207 position to POS. If USE is not NULL, the candidate is set as related to
2208 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2209 replacement of the final value of the iv by a direct computation. */
2211 static struct iv_cand
*
2212 add_candidate_1 (struct ivopts_data
*data
,
2213 tree base
, tree step
, bool important
, enum iv_position pos
,
2214 struct iv_use
*use
, gimple incremented_at
)
2217 struct iv_cand
*cand
= NULL
;
2218 tree type
, orig_type
;
2222 orig_type
= TREE_TYPE (base
);
2223 type
= generic_type_for (orig_type
);
2224 if (type
!= orig_type
)
2226 base
= fold_convert (type
, base
);
2227 step
= fold_convert (type
, step
);
2231 for (i
= 0; i
< n_iv_cands (data
); i
++)
2233 cand
= iv_cand (data
, i
);
2235 if (cand
->pos
!= pos
)
2238 if (cand
->incremented_at
!= incremented_at
2239 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2240 && cand
->ainc_use
!= use
))
2254 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2255 && operand_equal_p (step
, cand
->iv
->step
, 0)
2256 && (TYPE_PRECISION (TREE_TYPE (base
))
2257 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2261 if (i
== n_iv_cands (data
))
2263 cand
= XCNEW (struct iv_cand
);
2269 cand
->iv
= alloc_iv (base
, step
);
2272 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2274 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2275 cand
->var_after
= cand
->var_before
;
2277 cand
->important
= important
;
2278 cand
->incremented_at
= incremented_at
;
2279 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2282 && TREE_CODE (step
) != INTEGER_CST
)
2284 fd_ivopts_data
= data
;
2285 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2288 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2289 cand
->ainc_use
= use
;
2291 cand
->ainc_use
= NULL
;
2293 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2294 dump_cand (dump_file
, cand
);
2297 if (important
&& !cand
->important
)
2299 cand
->important
= true;
2300 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2301 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2306 bitmap_set_bit (use
->related_cands
, i
);
2307 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2308 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2315 /* Returns true if incrementing the induction variable at the end of the LOOP
2318 The purpose is to avoid splitting latch edge with a biv increment, thus
2319 creating a jump, possibly confusing other optimization passes and leaving
2320 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2321 is not available (so we do not have a better alternative), or if the latch
2322 edge is already nonempty. */
2325 allow_ip_end_pos_p (struct loop
*loop
)
2327 if (!ip_normal_pos (loop
))
2330 if (!empty_block_p (ip_end_pos (loop
)))
2336 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2337 Important field is set to IMPORTANT. */
2340 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2341 bool important
, struct iv_use
*use
)
2343 basic_block use_bb
= gimple_bb (use
->stmt
);
2344 enum machine_mode mem_mode
;
2345 unsigned HOST_WIDE_INT cstepi
;
2347 /* If we insert the increment in any position other than the standard
2348 ones, we must ensure that it is incremented once per iteration.
2349 It must not be in an inner nested loop, or one side of an if
2351 if (use_bb
->loop_father
!= data
->current_loop
2352 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2353 || stmt_could_throw_p (use
->stmt
)
2354 || !cst_and_fits_in_hwi (step
))
2357 cstepi
= int_cst_value (step
);
2359 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2360 if ((HAVE_PRE_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2361 || (HAVE_PRE_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2363 enum tree_code code
= MINUS_EXPR
;
2365 tree new_step
= step
;
2367 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2369 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2370 code
= POINTER_PLUS_EXPR
;
2373 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2374 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2375 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2378 if ((HAVE_POST_INCREMENT
&& GET_MODE_SIZE (mem_mode
) == cstepi
)
2379 || (HAVE_POST_DECREMENT
&& GET_MODE_SIZE (mem_mode
) == -cstepi
))
2381 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2386 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2387 position to POS. If USE is not NULL, the candidate is set as related to
2388 it. The candidate computation is scheduled on all available positions. */
2391 add_candidate (struct ivopts_data
*data
,
2392 tree base
, tree step
, bool important
, struct iv_use
*use
)
2394 if (ip_normal_pos (data
->current_loop
))
2395 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2396 if (ip_end_pos (data
->current_loop
)
2397 && allow_ip_end_pos_p (data
->current_loop
))
2398 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2400 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2401 add_autoinc_candidates (data
, base
, step
, important
, use
);
2404 /* Add a standard "0 + 1 * iteration" iv candidate for a
2405 type with SIZE bits. */
2408 add_standard_iv_candidates_for_size (struct ivopts_data
*data
,
2411 tree type
= lang_hooks
.types
.type_for_size (size
, true);
2412 add_candidate (data
, build_int_cst (type
, 0), build_int_cst (type
, 1),
2416 /* Adds standard iv candidates. */
2419 add_standard_iv_candidates (struct ivopts_data
*data
)
2421 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
);
2423 /* The same for a double-integer type if it is still fast enough. */
2424 if (BITS_PER_WORD
>= INT_TYPE_SIZE
* 2)
2425 add_standard_iv_candidates_for_size (data
, INT_TYPE_SIZE
* 2);
2429 /* Adds candidates bases on the old induction variable IV. */
2432 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2436 struct iv_cand
*cand
;
2438 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2440 /* The same, but with initial value zero. */
2441 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2442 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2444 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2445 iv
->step
, true, NULL
);
2447 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2448 if (gimple_code (phi
) == GIMPLE_PHI
)
2450 /* Additionally record the possibility of leaving the original iv
2452 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2453 cand
= add_candidate_1 (data
,
2454 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2455 SSA_NAME_DEF_STMT (def
));
2456 cand
->var_before
= iv
->ssa_name
;
2457 cand
->var_after
= def
;
2461 /* Adds candidates based on the old induction variables. */
2464 add_old_ivs_candidates (struct ivopts_data
*data
)
2470 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2472 iv
= ver_info (data
, i
)->iv
;
2473 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2474 add_old_iv_candidates (data
, iv
);
2478 /* Adds candidates based on the value of the induction variable IV and USE. */
2481 add_iv_value_candidates (struct ivopts_data
*data
,
2482 struct iv
*iv
, struct iv_use
*use
)
2484 unsigned HOST_WIDE_INT offset
;
2488 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2490 /* The same, but with initial value zero. Make such variable important,
2491 since it is generic enough so that possibly many uses may be based
2493 basetype
= TREE_TYPE (iv
->base
);
2494 if (POINTER_TYPE_P (basetype
))
2495 basetype
= sizetype
;
2496 add_candidate (data
, build_int_cst (basetype
, 0),
2497 iv
->step
, true, use
);
2499 /* Third, try removing the constant offset. Make sure to even
2500 add a candidate for &a[0] vs. (T *)&a. */
2501 base
= strip_offset (iv
->base
, &offset
);
2503 || base
!= iv
->base
)
2504 add_candidate (data
, base
, iv
->step
, false, use
);
2507 /* Adds candidates based on the uses. */
2510 add_derived_ivs_candidates (struct ivopts_data
*data
)
2514 for (i
= 0; i
< n_iv_uses (data
); i
++)
2516 struct iv_use
*use
= iv_use (data
, i
);
2523 case USE_NONLINEAR_EXPR
:
2526 /* Just add the ivs based on the value of the iv used here. */
2527 add_iv_value_candidates (data
, use
->iv
, use
);
2536 /* Record important candidates and add them to related_cands bitmaps
2540 record_important_candidates (struct ivopts_data
*data
)
2545 for (i
= 0; i
< n_iv_cands (data
); i
++)
2547 struct iv_cand
*cand
= iv_cand (data
, i
);
2549 if (cand
->important
)
2550 bitmap_set_bit (data
->important_candidates
, i
);
2553 data
->consider_all_candidates
= (n_iv_cands (data
)
2554 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2556 if (data
->consider_all_candidates
)
2558 /* We will not need "related_cands" bitmaps in this case,
2559 so release them to decrease peak memory consumption. */
2560 for (i
= 0; i
< n_iv_uses (data
); i
++)
2562 use
= iv_use (data
, i
);
2563 BITMAP_FREE (use
->related_cands
);
2568 /* Add important candidates to the related_cands bitmaps. */
2569 for (i
= 0; i
< n_iv_uses (data
); i
++)
2570 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2571 data
->important_candidates
);
2575 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2576 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2577 we allocate a simple list to every use. */
2580 alloc_use_cost_map (struct ivopts_data
*data
)
2582 unsigned i
, size
, s
, j
;
2584 for (i
= 0; i
< n_iv_uses (data
); i
++)
2586 struct iv_use
*use
= iv_use (data
, i
);
2589 if (data
->consider_all_candidates
)
2590 size
= n_iv_cands (data
);
2594 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2599 /* Round up to the power of two, so that moduling by it is fast. */
2600 for (size
= 1; size
< s
; size
<<= 1)
2604 use
->n_map_members
= size
;
2605 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2609 /* Returns description of computation cost of expression whose runtime
2610 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2613 new_cost (unsigned runtime
, unsigned complexity
)
2617 cost
.cost
= runtime
;
2618 cost
.complexity
= complexity
;
2623 /* Adds costs COST1 and COST2. */
2626 add_costs (comp_cost cost1
, comp_cost cost2
)
2628 cost1
.cost
+= cost2
.cost
;
2629 cost1
.complexity
+= cost2
.complexity
;
2633 /* Subtracts costs COST1 and COST2. */
2636 sub_costs (comp_cost cost1
, comp_cost cost2
)
2638 cost1
.cost
-= cost2
.cost
;
2639 cost1
.complexity
-= cost2
.complexity
;
2644 /* Returns a negative number if COST1 < COST2, a positive number if
2645 COST1 > COST2, and 0 if COST1 = COST2. */
2648 compare_costs (comp_cost cost1
, comp_cost cost2
)
2650 if (cost1
.cost
== cost2
.cost
)
2651 return cost1
.complexity
- cost2
.complexity
;
2653 return cost1
.cost
- cost2
.cost
;
2656 /* Returns true if COST is infinite. */
2659 infinite_cost_p (comp_cost cost
)
2661 return cost
.cost
== INFTY
;
2664 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2665 on invariants DEPENDS_ON and that the value used in expressing it
2669 set_use_iv_cost (struct ivopts_data
*data
,
2670 struct iv_use
*use
, struct iv_cand
*cand
,
2671 comp_cost cost
, bitmap depends_on
, tree value
,
2676 if (infinite_cost_p (cost
))
2678 BITMAP_FREE (depends_on
);
2682 if (data
->consider_all_candidates
)
2684 use
->cost_map
[cand
->id
].cand
= cand
;
2685 use
->cost_map
[cand
->id
].cost
= cost
;
2686 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2687 use
->cost_map
[cand
->id
].value
= value
;
2688 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2692 /* n_map_members is a power of two, so this computes modulo. */
2693 s
= cand
->id
& (use
->n_map_members
- 1);
2694 for (i
= s
; i
< use
->n_map_members
; i
++)
2695 if (!use
->cost_map
[i
].cand
)
2697 for (i
= 0; i
< s
; i
++)
2698 if (!use
->cost_map
[i
].cand
)
2704 use
->cost_map
[i
].cand
= cand
;
2705 use
->cost_map
[i
].cost
= cost
;
2706 use
->cost_map
[i
].depends_on
= depends_on
;
2707 use
->cost_map
[i
].value
= value
;
2708 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2711 /* Gets cost of (USE, CANDIDATE) pair. */
2713 static struct cost_pair
*
2714 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2715 struct iv_cand
*cand
)
2718 struct cost_pair
*ret
;
2723 if (data
->consider_all_candidates
)
2725 ret
= use
->cost_map
+ cand
->id
;
2732 /* n_map_members is a power of two, so this computes modulo. */
2733 s
= cand
->id
& (use
->n_map_members
- 1);
2734 for (i
= s
; i
< use
->n_map_members
; i
++)
2735 if (use
->cost_map
[i
].cand
== cand
)
2736 return use
->cost_map
+ i
;
2738 for (i
= 0; i
< s
; i
++)
2739 if (use
->cost_map
[i
].cand
== cand
)
2740 return use
->cost_map
+ i
;
2745 /* Returns estimate on cost of computing SEQ. */
2748 seq_cost (rtx seq
, bool speed
)
2753 for (; seq
; seq
= NEXT_INSN (seq
))
2755 set
= single_set (seq
);
2757 cost
+= set_src_cost (SET_SRC (set
), speed
);
2765 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2767 produce_memory_decl_rtl (tree obj
, int *regno
)
2769 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2770 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2774 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2776 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2777 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2778 SET_SYMBOL_REF_DECL (x
, obj
);
2779 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2780 set_mem_addr_space (x
, as
);
2781 targetm
.encode_section_info (obj
, x
, true);
2785 x
= gen_raw_REG (address_mode
, (*regno
)++);
2786 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2787 set_mem_addr_space (x
, as
);
2793 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2794 walk_tree. DATA contains the actual fake register number. */
2797 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2799 tree obj
= NULL_TREE
;
2801 int *regno
= (int *) data
;
2803 switch (TREE_CODE (*expr_p
))
2806 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2807 handled_component_p (*expr_p
);
2808 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2811 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2812 x
= produce_memory_decl_rtl (obj
, regno
);
2817 obj
= SSA_NAME_VAR (*expr_p
);
2818 if (!DECL_RTL_SET_P (obj
))
2819 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2828 if (DECL_RTL_SET_P (obj
))
2831 if (DECL_MODE (obj
) == BLKmode
)
2832 x
= produce_memory_decl_rtl (obj
, regno
);
2834 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2844 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2845 SET_DECL_RTL (obj
, x
);
2851 /* Determines cost of the computation of EXPR. */
2854 computation_cost (tree expr
, bool speed
)
2857 tree type
= TREE_TYPE (expr
);
2859 /* Avoid using hard regs in ways which may be unsupported. */
2860 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2861 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2862 enum node_frequency real_frequency
= node
->frequency
;
2864 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2865 crtl
->maybe_hot_insn_p
= speed
;
2866 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2868 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2871 default_rtl_profile ();
2872 node
->frequency
= real_frequency
;
2874 cost
= seq_cost (seq
, speed
);
2876 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2877 TYPE_ADDR_SPACE (type
), speed
);
2878 else if (!REG_P (rslt
))
2879 cost
+= set_src_cost (rslt
, speed
);
2884 /* Returns variable containing the value of candidate CAND at statement AT. */
2887 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2889 if (stmt_after_increment (loop
, cand
, stmt
))
2890 return cand
->var_after
;
2892 return cand
->var_before
;
2895 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2896 same precision that is at least as wide as the precision of TYPE, stores
2897 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2901 determine_common_wider_type (tree
*a
, tree
*b
)
2903 tree wider_type
= NULL
;
2905 tree atype
= TREE_TYPE (*a
);
2907 if (CONVERT_EXPR_P (*a
))
2909 suba
= TREE_OPERAND (*a
, 0);
2910 wider_type
= TREE_TYPE (suba
);
2911 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2917 if (CONVERT_EXPR_P (*b
))
2919 subb
= TREE_OPERAND (*b
, 0);
2920 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2931 /* Determines the expression by that USE is expressed from induction variable
2932 CAND at statement AT in LOOP. The expression is stored in a decomposed
2933 form into AFF. Returns false if USE cannot be expressed using CAND. */
2936 get_computation_aff (struct loop
*loop
,
2937 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2938 struct affine_tree_combination
*aff
)
2940 tree ubase
= use
->iv
->base
;
2941 tree ustep
= use
->iv
->step
;
2942 tree cbase
= cand
->iv
->base
;
2943 tree cstep
= cand
->iv
->step
, cstep_common
;
2944 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2945 tree common_type
, var
;
2947 aff_tree cbase_aff
, var_aff
;
2950 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2952 /* We do not have a precision to express the values of use. */
2956 var
= var_at_stmt (loop
, cand
, at
);
2957 uutype
= unsigned_type_for (utype
);
2959 /* If the conversion is not noop, perform it. */
2960 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2962 cstep
= fold_convert (uutype
, cstep
);
2963 cbase
= fold_convert (uutype
, cbase
);
2964 var
= fold_convert (uutype
, var
);
2967 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2970 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2971 type, we achieve better folding by computing their difference in this
2972 wider type, and cast the result to UUTYPE. We do not need to worry about
2973 overflows, as all the arithmetics will in the end be performed in UUTYPE
2975 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2977 /* use = ubase - ratio * cbase + ratio * var. */
2978 tree_to_aff_combination (ubase
, common_type
, aff
);
2979 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2980 tree_to_aff_combination (var
, uutype
, &var_aff
);
2982 /* We need to shift the value if we are after the increment. */
2983 if (stmt_after_increment (loop
, cand
, at
))
2987 if (common_type
!= uutype
)
2988 cstep_common
= fold_convert (common_type
, cstep
);
2990 cstep_common
= cstep
;
2992 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
2993 aff_combination_add (&cbase_aff
, &cstep_aff
);
2996 aff_combination_scale (&cbase_aff
, double_int_neg (rat
));
2997 aff_combination_add (aff
, &cbase_aff
);
2998 if (common_type
!= uutype
)
2999 aff_combination_convert (aff
, uutype
);
3001 aff_combination_scale (&var_aff
, rat
);
3002 aff_combination_add (aff
, &var_aff
);
3007 /* Determines the expression by that USE is expressed from induction variable
3008 CAND at statement AT in LOOP. The computation is unshared. */
3011 get_computation_at (struct loop
*loop
,
3012 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3015 tree type
= TREE_TYPE (use
->iv
->base
);
3017 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3019 unshare_aff_combination (&aff
);
3020 return fold_convert (type
, aff_combination_to_tree (&aff
));
3023 /* Determines the expression by that USE is expressed from induction variable
3024 CAND in LOOP. The computation is unshared. */
3027 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3029 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3032 /* Adjust the cost COST for being in loop setup rather than loop body.
3033 If we're optimizing for space, the loop setup overhead is constant;
3034 if we're optimizing for speed, amortize it over the per-iteration cost. */
3036 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3040 else if (optimize_loop_for_speed_p (data
->current_loop
))
3041 return cost
/ avg_loop_niter (data
->current_loop
);
3046 /* Returns cost of addition in MODE. */
3049 add_cost (enum machine_mode mode
, bool speed
)
3051 static unsigned costs
[NUM_MACHINE_MODES
];
3059 force_operand (gen_rtx_fmt_ee (PLUS
, mode
,
3060 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3061 gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 2)),
3066 cost
= seq_cost (seq
, speed
);
3072 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3073 fprintf (dump_file
, "Addition in %s costs %d\n",
3074 GET_MODE_NAME (mode
), cost
);
3078 /* Entry in a hashtable of already known costs for multiplication. */
3081 HOST_WIDE_INT cst
; /* The constant to multiply by. */
3082 enum machine_mode mode
; /* In mode. */
3083 unsigned cost
; /* The cost. */
3086 /* Counts hash value for the ENTRY. */
3089 mbc_entry_hash (const void *entry
)
3091 const struct mbc_entry
*e
= (const struct mbc_entry
*) entry
;
3093 return 57 * (hashval_t
) e
->mode
+ (hashval_t
) (e
->cst
% 877);
3096 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3099 mbc_entry_eq (const void *entry1
, const void *entry2
)
3101 const struct mbc_entry
*e1
= (const struct mbc_entry
*) entry1
;
3102 const struct mbc_entry
*e2
= (const struct mbc_entry
*) entry2
;
3104 return (e1
->mode
== e2
->mode
3105 && e1
->cst
== e2
->cst
);
3108 /* Returns cost of multiplication by constant CST in MODE. */
3111 multiply_by_cost (HOST_WIDE_INT cst
, enum machine_mode mode
, bool speed
)
3113 static htab_t costs
;
3114 struct mbc_entry
**cached
, act
;
3119 costs
= htab_create (100, mbc_entry_hash
, mbc_entry_eq
, free
);
3123 cached
= (struct mbc_entry
**) htab_find_slot (costs
, &act
, INSERT
);
3125 return (*cached
)->cost
;
3127 *cached
= XNEW (struct mbc_entry
);
3128 (*cached
)->mode
= mode
;
3129 (*cached
)->cst
= cst
;
3132 expand_mult (mode
, gen_raw_REG (mode
, LAST_VIRTUAL_REGISTER
+ 1),
3133 gen_int_mode (cst
, mode
), NULL_RTX
, 0);
3137 cost
= seq_cost (seq
, speed
);
3139 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3140 fprintf (dump_file
, "Multiplication by %d in %s costs %d\n",
3141 (int) cst
, GET_MODE_NAME (mode
), cost
);
3143 (*cached
)->cost
= cost
;
3148 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3149 validity for a memory reference accessing memory of mode MODE in
3150 address space AS. */
3152 DEF_VEC_P (sbitmap
);
3153 DEF_VEC_ALLOC_P (sbitmap
, heap
);
3156 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3159 #define MAX_RATIO 128
3160 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3161 static VEC (sbitmap
, heap
) *valid_mult_list
;
3164 if (data_index
>= VEC_length (sbitmap
, valid_mult_list
))
3165 VEC_safe_grow_cleared (sbitmap
, heap
, valid_mult_list
, data_index
+ 1);
3167 valid_mult
= VEC_index (sbitmap
, valid_mult_list
, data_index
);
3170 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3171 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3175 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3176 sbitmap_zero (valid_mult
);
3177 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3178 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3180 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3181 if (memory_address_addr_space_p (mode
, addr
, as
))
3182 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3185 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3187 fprintf (dump_file
, " allowed multipliers:");
3188 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3189 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3190 fprintf (dump_file
, " %d", (int) i
);
3191 fprintf (dump_file
, "\n");
3192 fprintf (dump_file
, "\n");
3195 VEC_replace (sbitmap
, valid_mult_list
, data_index
, valid_mult
);
3198 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3201 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3204 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3205 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3206 variable is omitted. Compute the cost for a memory reference that accesses
3207 a memory location of mode MEM_MODE in address space AS.
3209 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3210 size of MEM_MODE / RATIO) is available. To make this determination, we
3211 look at the size of the increment to be made, which is given in CSTEP.
3212 CSTEP may be zero if the step is unknown.
3213 STMT_AFTER_INC is true iff the statement we're looking at is after the
3214 increment of the original biv.
3216 TODO -- there must be some better way. This all is quite crude. */
3220 HOST_WIDE_INT min_offset
, max_offset
;
3221 unsigned costs
[2][2][2][2];
3222 } *address_cost_data
;
3224 DEF_VEC_P (address_cost_data
);
3225 DEF_VEC_ALLOC_P (address_cost_data
, heap
);
3228 get_address_cost (bool symbol_present
, bool var_present
,
3229 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3230 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3231 addr_space_t as
, bool speed
,
3232 bool stmt_after_inc
, bool *may_autoinc
)
3234 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3235 static VEC(address_cost_data
, heap
) *address_cost_data_list
;
3236 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3237 address_cost_data data
;
3238 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3239 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3240 unsigned cost
, acost
, complexity
;
3241 bool offset_p
, ratio_p
, autoinc
;
3242 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3243 unsigned HOST_WIDE_INT mask
;
3246 if (data_index
>= VEC_length (address_cost_data
, address_cost_data_list
))
3247 VEC_safe_grow_cleared (address_cost_data
, heap
, address_cost_data_list
,
3250 data
= VEC_index (address_cost_data
, address_cost_data_list
, data_index
);
3254 HOST_WIDE_INT rat
, off
= 0;
3255 int old_cse_not_expected
, width
;
3256 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3257 rtx seq
, addr
, base
;
3260 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3262 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3264 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3265 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3266 width
= HOST_BITS_PER_WIDE_INT
- 1;
3267 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3269 for (i
= width
; i
>= 0; i
--)
3271 off
= -((HOST_WIDE_INT
) 1 << i
);
3272 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3273 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3276 data
->min_offset
= (i
== -1? 0 : off
);
3278 for (i
= width
; i
>= 0; i
--)
3280 off
= ((HOST_WIDE_INT
) 1 << i
) - 1;
3281 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3282 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3287 data
->max_offset
= off
;
3289 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3291 fprintf (dump_file
, "get_address_cost:\n");
3292 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3293 GET_MODE_NAME (mem_mode
),
3295 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3296 GET_MODE_NAME (mem_mode
),
3301 for (i
= 2; i
<= MAX_RATIO
; i
++)
3302 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3308 /* Compute the cost of various addressing modes. */
3310 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3311 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3313 if (HAVE_PRE_DECREMENT
)
3315 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3316 has_predec
[mem_mode
]
3317 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3319 if (HAVE_POST_DECREMENT
)
3321 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3322 has_postdec
[mem_mode
]
3323 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3325 if (HAVE_PRE_INCREMENT
)
3327 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3328 has_preinc
[mem_mode
]
3329 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3331 if (HAVE_POST_INCREMENT
)
3333 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3334 has_postinc
[mem_mode
]
3335 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3337 for (i
= 0; i
< 16; i
++)
3340 var_p
= (i
>> 1) & 1;
3341 off_p
= (i
>> 2) & 1;
3342 rat_p
= (i
>> 3) & 1;
3346 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3347 gen_int_mode (rat
, address_mode
));
3350 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3354 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3355 /* ??? We can run into trouble with some backends by presenting
3356 it with symbols which haven't been properly passed through
3357 targetm.encode_section_info. By setting the local bit, we
3358 enhance the probability of things working. */
3359 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3362 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3364 (PLUS
, address_mode
, base
,
3365 gen_int_mode (off
, address_mode
)));
3368 base
= gen_int_mode (off
, address_mode
);
3373 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3376 /* To avoid splitting addressing modes, pretend that no cse will
3378 old_cse_not_expected
= cse_not_expected
;
3379 cse_not_expected
= true;
3380 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3381 cse_not_expected
= old_cse_not_expected
;
3385 acost
= seq_cost (seq
, speed
);
3386 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3390 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3393 /* On some targets, it is quite expensive to load symbol to a register,
3394 which makes addresses that contain symbols look much more expensive.
3395 However, the symbol will have to be loaded in any case before the
3396 loop (and quite likely we have it in register already), so it does not
3397 make much sense to penalize them too heavily. So make some final
3398 tweaks for the SYMBOL_PRESENT modes:
3400 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3401 var is cheaper, use this mode with small penalty.
3402 If VAR_PRESENT is true, try whether the mode with
3403 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3404 if this is the case, use it. */
3405 add_c
= add_cost (address_mode
, speed
);
3406 for (i
= 0; i
< 8; i
++)
3409 off_p
= (i
>> 1) & 1;
3410 rat_p
= (i
>> 2) & 1;
3412 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3416 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3417 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3420 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3422 fprintf (dump_file
, "Address costs:\n");
3424 for (i
= 0; i
< 16; i
++)
3427 var_p
= (i
>> 1) & 1;
3428 off_p
= (i
>> 2) & 1;
3429 rat_p
= (i
>> 3) & 1;
3431 fprintf (dump_file
, " ");
3433 fprintf (dump_file
, "sym + ");
3435 fprintf (dump_file
, "var + ");
3437 fprintf (dump_file
, "cst + ");
3439 fprintf (dump_file
, "rat * ");
3441 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3442 fprintf (dump_file
, "index costs %d\n", acost
);
3444 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3445 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3446 fprintf (dump_file
, " May include autoinc/dec\n");
3447 fprintf (dump_file
, "\n");
3450 VEC_replace (address_cost_data
, address_cost_data_list
,
3454 bits
= GET_MODE_BITSIZE (address_mode
);
3455 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3457 if ((offset
>> (bits
- 1) & 1))
3462 msize
= GET_MODE_SIZE (mem_mode
);
3463 autoinc_offset
= offset
;
3465 autoinc_offset
+= ratio
* cstep
;
3466 if (symbol_present
|| var_present
|| ratio
!= 1)
3468 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3470 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3472 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3474 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3475 && msize
== -cstep
))
3479 offset_p
= (s_offset
!= 0
3480 && data
->min_offset
<= s_offset
3481 && s_offset
<= data
->max_offset
);
3482 ratio_p
= (ratio
!= 1
3483 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3485 if (ratio
!= 1 && !ratio_p
)
3486 cost
+= multiply_by_cost (ratio
, address_mode
, speed
);
3488 if (s_offset
&& !offset_p
&& !symbol_present
)
3489 cost
+= add_cost (address_mode
, speed
);
3492 *may_autoinc
= autoinc
;
3493 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3494 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3495 return new_cost (cost
+ acost
, complexity
);
3498 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3499 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3500 calculating the operands of EXPR. Returns true if successful, and returns
3501 the cost in COST. */
3504 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3505 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3508 tree op1
= TREE_OPERAND (expr
, 1);
3509 tree cst
= TREE_OPERAND (mult
, 1);
3510 tree multop
= TREE_OPERAND (mult
, 0);
3511 int m
= exact_log2 (int_cst_value (cst
));
3512 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3515 if (!(m
>= 0 && m
< maxm
))
3518 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3519 ? shiftadd_cost
[speed
][mode
][m
]
3521 ? shiftsub1_cost
[speed
][mode
][m
]
3522 : shiftsub0_cost
[speed
][mode
][m
]));
3523 res
= new_cost (sa_cost
, 0);
3524 res
= add_costs (res
, mult
== op1
? cost0
: cost1
);
3526 STRIP_NOPS (multop
);
3527 if (!is_gimple_val (multop
))
3528 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3534 /* Estimates cost of forcing expression EXPR into a variable. */
3537 force_expr_to_var_cost (tree expr
, bool speed
)
3539 static bool costs_initialized
= false;
3540 static unsigned integer_cost
[2];
3541 static unsigned symbol_cost
[2];
3542 static unsigned address_cost
[2];
3544 comp_cost cost0
, cost1
, cost
;
3545 enum machine_mode mode
;
3547 if (!costs_initialized
)
3549 tree type
= build_pointer_type (integer_type_node
);
3554 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3555 TREE_STATIC (var
) = 1;
3556 x
= produce_memory_decl_rtl (var
, NULL
);
3557 SET_DECL_RTL (var
, x
);
3559 addr
= build1 (ADDR_EXPR
, type
, var
);
3562 for (i
= 0; i
< 2; i
++)
3564 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3567 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3570 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3571 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3573 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3574 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3575 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3576 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3577 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3578 fprintf (dump_file
, "\n");
3582 costs_initialized
= true;
3587 if (SSA_VAR_P (expr
))
3590 if (is_gimple_min_invariant (expr
))
3592 if (TREE_CODE (expr
) == INTEGER_CST
)
3593 return new_cost (integer_cost
[speed
], 0);
3595 if (TREE_CODE (expr
) == ADDR_EXPR
)
3597 tree obj
= TREE_OPERAND (expr
, 0);
3599 if (TREE_CODE (obj
) == VAR_DECL
3600 || TREE_CODE (obj
) == PARM_DECL
3601 || TREE_CODE (obj
) == RESULT_DECL
)
3602 return new_cost (symbol_cost
[speed
], 0);
3605 return new_cost (address_cost
[speed
], 0);
3608 switch (TREE_CODE (expr
))
3610 case POINTER_PLUS_EXPR
:
3614 op0
= TREE_OPERAND (expr
, 0);
3615 op1
= TREE_OPERAND (expr
, 1);
3619 if (is_gimple_val (op0
))
3622 cost0
= force_expr_to_var_cost (op0
, speed
);
3624 if (is_gimple_val (op1
))
3627 cost1
= force_expr_to_var_cost (op1
, speed
);
3632 op0
= TREE_OPERAND (expr
, 0);
3636 if (is_gimple_val (op0
))
3639 cost0
= force_expr_to_var_cost (op0
, speed
);
3645 /* Just an arbitrary value, FIXME. */
3646 return new_cost (target_spill_cost
[speed
], 0);
3649 mode
= TYPE_MODE (TREE_TYPE (expr
));
3650 switch (TREE_CODE (expr
))
3652 case POINTER_PLUS_EXPR
:
3656 cost
= new_cost (add_cost (mode
, speed
), 0);
3657 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3659 tree mult
= NULL_TREE
;
3661 if (TREE_CODE (op1
) == MULT_EXPR
)
3663 else if (TREE_CODE (op0
) == MULT_EXPR
)
3666 if (mult
!= NULL_TREE
3667 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3668 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
, speed
,
3675 if (cst_and_fits_in_hwi (op0
))
3676 cost
= new_cost (multiply_by_cost (int_cst_value (op0
), mode
, speed
), 0);
3677 else if (cst_and_fits_in_hwi (op1
))
3678 cost
= new_cost (multiply_by_cost (int_cst_value (op1
), mode
, speed
), 0);
3680 return new_cost (target_spill_cost
[speed
], 0);
3687 cost
= add_costs (cost
, cost0
);
3688 cost
= add_costs (cost
, cost1
);
3690 /* Bound the cost by target_spill_cost. The parts of complicated
3691 computations often are either loop invariant or at least can
3692 be shared between several iv uses, so letting this grow without
3693 limits would not give reasonable results. */
3694 if (cost
.cost
> (int) target_spill_cost
[speed
])
3695 cost
.cost
= target_spill_cost
[speed
];
3700 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3701 invariants the computation depends on. */
3704 force_var_cost (struct ivopts_data
*data
,
3705 tree expr
, bitmap
*depends_on
)
3709 fd_ivopts_data
= data
;
3710 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3713 return force_expr_to_var_cost (expr
, data
->speed
);
3716 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3717 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3718 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3719 invariants the computation depends on. */
3722 split_address_cost (struct ivopts_data
*data
,
3723 tree addr
, bool *symbol_present
, bool *var_present
,
3724 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3727 HOST_WIDE_INT bitsize
;
3728 HOST_WIDE_INT bitpos
;
3730 enum machine_mode mode
;
3731 int unsignedp
, volatilep
;
3733 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3734 &unsignedp
, &volatilep
, false);
3737 || bitpos
% BITS_PER_UNIT
!= 0
3738 || TREE_CODE (core
) != VAR_DECL
)
3740 *symbol_present
= false;
3741 *var_present
= true;
3742 fd_ivopts_data
= data
;
3743 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3744 return new_cost (target_spill_cost
[data
->speed
], 0);
3747 *offset
+= bitpos
/ BITS_PER_UNIT
;
3748 if (TREE_STATIC (core
)
3749 || DECL_EXTERNAL (core
))
3751 *symbol_present
= true;
3752 *var_present
= false;
3756 *symbol_present
= false;
3757 *var_present
= true;
3761 /* Estimates cost of expressing difference of addresses E1 - E2 as
3762 var + symbol + offset. The value of offset is added to OFFSET,
3763 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3764 part is missing. DEPENDS_ON is a set of the invariants the computation
3768 ptr_difference_cost (struct ivopts_data
*data
,
3769 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3770 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3772 HOST_WIDE_INT diff
= 0;
3773 aff_tree aff_e1
, aff_e2
;
3776 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3778 if (ptr_difference_const (e1
, e2
, &diff
))
3781 *symbol_present
= false;
3782 *var_present
= false;
3786 if (integer_zerop (e2
))
3787 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3788 symbol_present
, var_present
, offset
, depends_on
);
3790 *symbol_present
= false;
3791 *var_present
= true;
3793 type
= signed_type_for (TREE_TYPE (e1
));
3794 tree_to_aff_combination (e1
, type
, &aff_e1
);
3795 tree_to_aff_combination (e2
, type
, &aff_e2
);
3796 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3797 aff_combination_add (&aff_e1
, &aff_e2
);
3799 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3802 /* Estimates cost of expressing difference E1 - E2 as
3803 var + symbol + offset. The value of offset is added to OFFSET,
3804 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3805 part is missing. DEPENDS_ON is a set of the invariants the computation
3809 difference_cost (struct ivopts_data
*data
,
3810 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3811 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3813 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3814 unsigned HOST_WIDE_INT off1
, off2
;
3815 aff_tree aff_e1
, aff_e2
;
3818 e1
= strip_offset (e1
, &off1
);
3819 e2
= strip_offset (e2
, &off2
);
3820 *offset
+= off1
- off2
;
3825 if (TREE_CODE (e1
) == ADDR_EXPR
)
3826 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3827 offset
, depends_on
);
3828 *symbol_present
= false;
3830 if (operand_equal_p (e1
, e2
, 0))
3832 *var_present
= false;
3836 *var_present
= true;
3838 if (integer_zerop (e2
))
3839 return force_var_cost (data
, e1
, depends_on
);
3841 if (integer_zerop (e1
))
3843 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3844 cost
.cost
+= multiply_by_cost (-1, mode
, data
->speed
);
3848 type
= signed_type_for (TREE_TYPE (e1
));
3849 tree_to_aff_combination (e1
, type
, &aff_e1
);
3850 tree_to_aff_combination (e2
, type
, &aff_e2
);
3851 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3852 aff_combination_add (&aff_e1
, &aff_e2
);
3854 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3857 /* Returns true if AFF1 and AFF2 are identical. */
3860 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3864 if (aff1
->n
!= aff2
->n
)
3867 for (i
= 0; i
< aff1
->n
; i
++)
3869 if (double_int_cmp (aff1
->elts
[i
].coef
, aff2
->elts
[i
].coef
, 0) != 0)
3872 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3878 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3881 get_expr_id (struct ivopts_data
*data
, tree expr
)
3883 struct iv_inv_expr_ent ent
;
3884 struct iv_inv_expr_ent
**slot
;
3887 ent
.hash
= iterative_hash_expr (expr
, 0);
3888 slot
= (struct iv_inv_expr_ent
**) htab_find_slot (data
->inv_expr_tab
,
3893 *slot
= XNEW (struct iv_inv_expr_ent
);
3894 (*slot
)->expr
= expr
;
3895 (*slot
)->hash
= ent
.hash
;
3896 (*slot
)->id
= data
->inv_expr_id
++;
3900 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3901 requires a new compiler generated temporary. Returns -1 otherwise.
3902 ADDRESS_P is a flag indicating if the expression is for address
3906 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3907 tree cbase
, HOST_WIDE_INT ratio
,
3910 aff_tree ubase_aff
, cbase_aff
;
3918 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3919 && (TREE_CODE (cbase
) == INTEGER_CST
))
3922 /* Strips the constant part. */
3923 if (TREE_CODE (ubase
) == PLUS_EXPR
3924 || TREE_CODE (ubase
) == MINUS_EXPR
3925 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3927 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3928 ubase
= TREE_OPERAND (ubase
, 0);
3931 /* Strips the constant part. */
3932 if (TREE_CODE (cbase
) == PLUS_EXPR
3933 || TREE_CODE (cbase
) == MINUS_EXPR
3934 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3936 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3937 cbase
= TREE_OPERAND (cbase
, 0);
3942 if (((TREE_CODE (ubase
) == SSA_NAME
)
3943 || (TREE_CODE (ubase
) == ADDR_EXPR
3944 && is_gimple_min_invariant (ubase
)))
3945 && (TREE_CODE (cbase
) == INTEGER_CST
))
3948 if (((TREE_CODE (cbase
) == SSA_NAME
)
3949 || (TREE_CODE (cbase
) == ADDR_EXPR
3950 && is_gimple_min_invariant (cbase
)))
3951 && (TREE_CODE (ubase
) == INTEGER_CST
))
3957 if(operand_equal_p (ubase
, cbase
, 0))
3960 if (TREE_CODE (ubase
) == ADDR_EXPR
3961 && TREE_CODE (cbase
) == ADDR_EXPR
)
3965 usym
= TREE_OPERAND (ubase
, 0);
3966 csym
= TREE_OPERAND (cbase
, 0);
3967 if (TREE_CODE (usym
) == ARRAY_REF
)
3969 tree ind
= TREE_OPERAND (usym
, 1);
3970 if (TREE_CODE (ind
) == INTEGER_CST
3971 && host_integerp (ind
, 0)
3972 && TREE_INT_CST_LOW (ind
) == 0)
3973 usym
= TREE_OPERAND (usym
, 0);
3975 if (TREE_CODE (csym
) == ARRAY_REF
)
3977 tree ind
= TREE_OPERAND (csym
, 1);
3978 if (TREE_CODE (ind
) == INTEGER_CST
3979 && host_integerp (ind
, 0)
3980 && TREE_INT_CST_LOW (ind
) == 0)
3981 csym
= TREE_OPERAND (csym
, 0);
3983 if (operand_equal_p (usym
, csym
, 0))
3986 /* Now do more complex comparison */
3987 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3988 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3989 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3993 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3994 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3996 aff_combination_scale (&cbase_aff
, shwi_to_double_int (-1 * ratio
));
3997 aff_combination_add (&ubase_aff
, &cbase_aff
);
3998 expr
= aff_combination_to_tree (&ubase_aff
);
3999 return get_expr_id (data
, expr
);
4004 /* Determines the cost of the computation by that USE is expressed
4005 from induction variable CAND. If ADDRESS_P is true, we just need
4006 to create an address from it, otherwise we want to get it into
4007 register. A set of invariants we depend on is stored in
4008 DEPENDS_ON. AT is the statement at that the value is computed.
4009 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4010 addressing is likely. */
4013 get_computation_cost_at (struct ivopts_data
*data
,
4014 struct iv_use
*use
, struct iv_cand
*cand
,
4015 bool address_p
, bitmap
*depends_on
, gimple at
,
4019 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4021 tree utype
= TREE_TYPE (ubase
), ctype
;
4022 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4023 HOST_WIDE_INT ratio
, aratio
;
4024 bool var_present
, symbol_present
, stmt_is_after_inc
;
4027 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4031 /* Only consider real candidates. */
4033 return infinite_cost
;
4035 cbase
= cand
->iv
->base
;
4036 cstep
= cand
->iv
->step
;
4037 ctype
= TREE_TYPE (cbase
);
4039 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4041 /* We do not have a precision to express the values of use. */
4042 return infinite_cost
;
4047 /* Do not try to express address of an object with computation based
4048 on address of a different object. This may cause problems in rtl
4049 level alias analysis (that does not expect this to be happening,
4050 as this is illegal in C), and would be unlikely to be useful
4052 if (use
->iv
->base_object
4053 && cand
->iv
->base_object
4054 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4055 return infinite_cost
;
4058 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4060 /* TODO -- add direct handling of this case. */
4064 /* CSTEPI is removed from the offset in case statement is after the
4065 increment. If the step is not constant, we use zero instead.
4066 This is a bit imprecise (there is the extra addition), but
4067 redundancy elimination is likely to transform the code so that
4068 it uses value of the variable before increment anyway,
4069 so it is not that much unrealistic. */
4070 if (cst_and_fits_in_hwi (cstep
))
4071 cstepi
= int_cst_value (cstep
);
4075 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4076 return infinite_cost
;
4078 if (double_int_fits_in_shwi_p (rat
))
4079 ratio
= double_int_to_shwi (rat
);
4081 return infinite_cost
;
4084 ctype
= TREE_TYPE (cbase
);
4086 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4088 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4089 or ratio == 1, it is better to handle this like
4091 ubase - ratio * cbase + ratio * var
4093 (also holds in the case ratio == -1, TODO. */
4095 if (cst_and_fits_in_hwi (cbase
))
4097 offset
= - ratio
* int_cst_value (cbase
);
4098 cost
= difference_cost (data
,
4099 ubase
, build_int_cst (utype
, 0),
4100 &symbol_present
, &var_present
, &offset
,
4102 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4104 else if (ratio
== 1)
4106 tree real_cbase
= cbase
;
4108 /* Check to see if any adjustment is needed. */
4109 if (cstepi
== 0 && stmt_is_after_inc
)
4111 aff_tree real_cbase_aff
;
4114 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4116 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4118 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4119 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4122 cost
= difference_cost (data
,
4124 &symbol_present
, &var_present
, &offset
,
4126 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4129 && !POINTER_TYPE_P (ctype
)
4130 && multiplier_allowed_in_address_p
4131 (ratio
, TYPE_MODE (TREE_TYPE (utype
)),
4132 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4135 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4136 cost
= difference_cost (data
,
4138 &symbol_present
, &var_present
, &offset
,
4140 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4144 cost
= force_var_cost (data
, cbase
, depends_on
);
4145 cost
= add_costs (cost
,
4146 difference_cost (data
,
4147 ubase
, build_int_cst (utype
, 0),
4148 &symbol_present
, &var_present
,
4149 &offset
, depends_on
));
4150 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4151 cost
.cost
+= add_cost (TYPE_MODE (ctype
), data
->speed
);
4157 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4158 /* Clear depends on. */
4159 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4160 bitmap_clear (*depends_on
);
4163 /* If we are after the increment, the value of the candidate is higher by
4165 if (stmt_is_after_inc
)
4166 offset
-= ratio
* cstepi
;
4168 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4169 (symbol/var1/const parts may be omitted). If we are looking for an
4170 address, find the cost of addressing this. */
4172 return add_costs (cost
,
4173 get_address_cost (symbol_present
, var_present
,
4174 offset
, ratio
, cstepi
,
4175 TYPE_MODE (TREE_TYPE (utype
)),
4176 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4177 speed
, stmt_is_after_inc
,
4180 /* Otherwise estimate the costs for computing the expression. */
4181 if (!symbol_present
&& !var_present
&& !offset
)
4184 cost
.cost
+= multiply_by_cost (ratio
, TYPE_MODE (ctype
), speed
);
4188 /* Symbol + offset should be compile-time computable so consider that they
4189 are added once to the variable, if present. */
4190 if (var_present
&& (symbol_present
|| offset
))
4191 cost
.cost
+= adjust_setup_cost (data
,
4192 add_cost (TYPE_MODE (ctype
), speed
));
4194 /* Having offset does not affect runtime cost in case it is added to
4195 symbol, but it increases complexity. */
4199 cost
.cost
+= add_cost (TYPE_MODE (ctype
), speed
);
4201 aratio
= ratio
> 0 ? ratio
: -ratio
;
4203 cost
.cost
+= multiply_by_cost (aratio
, TYPE_MODE (ctype
), speed
);
4208 *can_autoinc
= false;
4211 /* Just get the expression, expand it and measure the cost. */
4212 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4215 return infinite_cost
;
4218 comp
= build_simple_mem_ref (comp
);
4220 return new_cost (computation_cost (comp
, speed
), 0);
4224 /* Determines the cost of the computation by that USE is expressed
4225 from induction variable CAND. If ADDRESS_P is true, we just need
4226 to create an address from it, otherwise we want to get it into
4227 register. A set of invariants we depend on is stored in
4228 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4229 autoinc addressing is likely. */
4232 get_computation_cost (struct ivopts_data
*data
,
4233 struct iv_use
*use
, struct iv_cand
*cand
,
4234 bool address_p
, bitmap
*depends_on
,
4235 bool *can_autoinc
, int *inv_expr_id
)
4237 return get_computation_cost_at (data
,
4238 use
, cand
, address_p
, depends_on
, use
->stmt
,
4239 can_autoinc
, inv_expr_id
);
4242 /* Determines cost of basing replacement of USE on CAND in a generic
4246 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4247 struct iv_use
*use
, struct iv_cand
*cand
)
4251 int inv_expr_id
= -1;
4253 /* The simple case first -- if we need to express value of the preserved
4254 original biv, the cost is 0. This also prevents us from counting the
4255 cost of increment twice -- once at this use and once in the cost of
4257 if (cand
->pos
== IP_ORIGINAL
4258 && cand
->incremented_at
== use
->stmt
)
4260 set_use_iv_cost (data
, use
, cand
, zero_cost
, NULL
, NULL_TREE
, -1);
4264 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4265 NULL
, &inv_expr_id
);
4267 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
,
4270 return !infinite_cost_p (cost
);
4273 /* Determines cost of basing replacement of USE on CAND in an address. */
4276 determine_use_iv_cost_address (struct ivopts_data
*data
,
4277 struct iv_use
*use
, struct iv_cand
*cand
)
4281 int inv_expr_id
= -1;
4282 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4283 &can_autoinc
, &inv_expr_id
);
4285 if (cand
->ainc_use
== use
)
4288 cost
.cost
-= cand
->cost_step
;
4289 /* If we generated the candidate solely for exploiting autoincrement
4290 opportunities, and it turns out it can't be used, set the cost to
4291 infinity to make sure we ignore it. */
4292 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4293 cost
= infinite_cost
;
4295 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
,
4298 return !infinite_cost_p (cost
);
4301 /* Computes value of candidate CAND at position AT in iteration NITER, and
4302 stores it to VAL. */
4305 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4308 aff_tree step
, delta
, nit
;
4309 struct iv
*iv
= cand
->iv
;
4310 tree type
= TREE_TYPE (iv
->base
);
4311 tree steptype
= type
;
4312 if (POINTER_TYPE_P (type
))
4313 steptype
= sizetype
;
4315 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4316 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4317 aff_combination_convert (&nit
, steptype
);
4318 aff_combination_mult (&nit
, &step
, &delta
);
4319 if (stmt_after_increment (loop
, cand
, at
))
4320 aff_combination_add (&delta
, &step
);
4322 tree_to_aff_combination (iv
->base
, type
, val
);
4323 aff_combination_add (val
, &delta
);
4326 /* Returns period of induction variable iv. */
4329 iv_period (struct iv
*iv
)
4331 tree step
= iv
->step
, period
, type
;
4334 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4336 type
= unsigned_type_for (TREE_TYPE (step
));
4337 /* Period of the iv is lcm (step, type_range)/step -1,
4338 i.e., N*type_range/step - 1. Since type range is power
4339 of two, N == (step >> num_of_ending_zeros_binary (step),
4340 so the final result is
4342 (type_range >> num_of_ending_zeros_binary (step)) - 1
4345 pow2div
= num_ending_zeros (step
);
4347 period
= build_low_bits_mask (type
,
4348 (TYPE_PRECISION (type
)
4349 - tree_low_cst (pow2div
, 1)));
4354 /* Returns the comparison operator used when eliminating the iv USE. */
4356 static enum tree_code
4357 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4359 struct loop
*loop
= data
->current_loop
;
4363 ex_bb
= gimple_bb (use
->stmt
);
4364 exit
= EDGE_SUCC (ex_bb
, 0);
4365 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4366 exit
= EDGE_SUCC (ex_bb
, 1);
4368 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4371 /* Check whether it is possible to express the condition in USE by comparison
4372 of candidate CAND. If so, store the value compared with to BOUND. */
4375 may_eliminate_iv (struct ivopts_data
*data
,
4376 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
)
4381 struct loop
*loop
= data
->current_loop
;
4383 struct tree_niter_desc
*desc
= NULL
;
4385 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4388 /* For now works only for exits that dominate the loop latch.
4389 TODO: extend to other conditions inside loop body. */
4390 ex_bb
= gimple_bb (use
->stmt
);
4391 if (use
->stmt
!= last_stmt (ex_bb
)
4392 || gimple_code (use
->stmt
) != GIMPLE_COND
4393 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4396 exit
= EDGE_SUCC (ex_bb
, 0);
4397 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4398 exit
= EDGE_SUCC (ex_bb
, 1);
4399 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4402 nit
= niter_for_exit (data
, exit
, &desc
);
4406 /* Determine whether we can use the variable to test the exit condition.
4407 This is the case iff the period of the induction variable is greater
4408 than the number of iterations for which the exit condition is true. */
4409 period
= iv_period (cand
->iv
);
4411 /* If the number of iterations is constant, compare against it directly. */
4412 if (TREE_CODE (nit
) == INTEGER_CST
)
4414 /* See cand_value_at. */
4415 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4417 if (!tree_int_cst_lt (nit
, period
))
4422 if (tree_int_cst_lt (period
, nit
))
4427 /* If not, and if this is the only possible exit of the loop, see whether
4428 we can get a conservative estimate on the number of iterations of the
4429 entire loop and compare against that instead. */
4432 double_int period_value
, max_niter
;
4434 max_niter
= desc
->max
;
4435 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4436 max_niter
= double_int_add (max_niter
, double_int_one
);
4437 period_value
= tree_to_double_int (period
);
4438 if (double_int_ucmp (max_niter
, period_value
) > 0)
4440 /* See if we can take advantage of infered loop bound information. */
4441 if (loop_only_exit_p (loop
, exit
))
4443 if (!estimated_loop_iterations (loop
, true, &max_niter
))
4445 /* The loop bound is already adjusted by adding 1. */
4446 if (double_int_ucmp (max_niter
, period_value
) > 0)
4454 cand_value_at (loop
, cand
, use
->stmt
, nit
, &bnd
);
4456 *bound
= aff_combination_to_tree (&bnd
);
4457 /* It is unlikely that computing the number of iterations using division
4458 would be more profitable than keeping the original induction variable. */
4459 if (expression_expensive_p (*bound
))
4464 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4465 be copied, if is is used in the loop body and DATA->body_includes_call. */
4468 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4470 tree sbound
= bound
;
4471 STRIP_NOPS (sbound
);
4473 if (TREE_CODE (sbound
) == SSA_NAME
4474 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4475 && gimple_nop_p (SSA_NAME_DEF_STMT (sbound
))
4476 && data
->body_includes_call
)
4477 return COSTS_N_INSNS (1);
4482 /* Determines cost of basing replacement of USE on CAND in a condition. */
4485 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4486 struct iv_use
*use
, struct iv_cand
*cand
)
4488 tree bound
= NULL_TREE
;
4490 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4491 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4493 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4494 tree
*control_var
, *bound_cst
;
4496 /* Only consider real candidates. */
4499 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
, -1);
4503 /* Try iv elimination. */
4504 if (may_eliminate_iv (data
, use
, cand
, &bound
))
4506 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4507 if (elim_cost
.cost
== 0)
4508 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4509 else if (TREE_CODE (bound
) == INTEGER_CST
)
4511 /* If we replace a loop condition 'i < n' with 'p < base + n',
4512 depends_on_elim will have 'base' and 'n' set, which implies
4513 that both 'base' and 'n' will be live during the loop. More likely,
4514 'base + n' will be loop invariant, resulting in only one live value
4515 during the loop. So in that case we clear depends_on_elim and set
4516 elim_inv_expr_id instead. */
4517 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4519 elim_inv_expr_id
= get_expr_id (data
, bound
);
4520 bitmap_clear (depends_on_elim
);
4522 /* The bound is a loop invariant, so it will be only computed
4524 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4527 elim_cost
= infinite_cost
;
4529 /* Try expressing the original giv. If it is compared with an invariant,
4530 note that we cannot get rid of it. */
4531 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4535 /* When the condition is a comparison of the candidate IV against
4536 zero, prefer this IV.
4538 TODO: The constant that we're substracting from the cost should
4539 be target-dependent. This information should be added to the
4540 target costs for each backend. */
4541 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4542 && integer_zerop (*bound_cst
)
4543 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4544 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4545 elim_cost
.cost
-= 1;
4547 express_cost
= get_computation_cost (data
, use
, cand
, false,
4548 &depends_on_express
, NULL
,
4549 &express_inv_expr_id
);
4550 fd_ivopts_data
= data
;
4551 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4553 /* Count the cost of the original bound as well. */
4554 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4555 if (bound_cost
.cost
== 0)
4556 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4557 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4558 bound_cost
.cost
= 0;
4559 express_cost
.cost
+= bound_cost
.cost
;
4561 /* Choose the better approach, preferring the eliminated IV. */
4562 if (compare_costs (elim_cost
, express_cost
) <= 0)
4565 depends_on
= depends_on_elim
;
4566 depends_on_elim
= NULL
;
4567 inv_expr_id
= elim_inv_expr_id
;
4571 cost
= express_cost
;
4572 depends_on
= depends_on_express
;
4573 depends_on_express
= NULL
;
4575 inv_expr_id
= express_inv_expr_id
;
4578 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, inv_expr_id
);
4580 if (depends_on_elim
)
4581 BITMAP_FREE (depends_on_elim
);
4582 if (depends_on_express
)
4583 BITMAP_FREE (depends_on_express
);
4585 return !infinite_cost_p (cost
);
4588 /* Determines cost of basing replacement of USE on CAND. Returns false
4589 if USE cannot be based on CAND. */
4592 determine_use_iv_cost (struct ivopts_data
*data
,
4593 struct iv_use
*use
, struct iv_cand
*cand
)
4597 case USE_NONLINEAR_EXPR
:
4598 return determine_use_iv_cost_generic (data
, use
, cand
);
4601 return determine_use_iv_cost_address (data
, use
, cand
);
4604 return determine_use_iv_cost_condition (data
, use
, cand
);
4611 /* Return true if get_computation_cost indicates that autoincrement is
4612 a possibility for the pair of USE and CAND, false otherwise. */
4615 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4616 struct iv_cand
*cand
)
4622 if (use
->type
!= USE_ADDRESS
)
4625 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4626 &can_autoinc
, NULL
);
4628 BITMAP_FREE (depends_on
);
4630 return !infinite_cost_p (cost
) && can_autoinc
;
4633 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4634 use that allows autoincrement, and set their AINC_USE if possible. */
4637 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4641 for (i
= 0; i
< n_iv_cands (data
); i
++)
4643 struct iv_cand
*cand
= iv_cand (data
, i
);
4644 struct iv_use
*closest
= NULL
;
4645 if (cand
->pos
!= IP_ORIGINAL
)
4647 for (j
= 0; j
< n_iv_uses (data
); j
++)
4649 struct iv_use
*use
= iv_use (data
, j
);
4650 unsigned uid
= gimple_uid (use
->stmt
);
4651 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
4652 || uid
> gimple_uid (cand
->incremented_at
))
4654 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
4657 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
4659 cand
->ainc_use
= closest
;
4663 /* Finds the candidates for the induction variables. */
4666 find_iv_candidates (struct ivopts_data
*data
)
4668 /* Add commonly used ivs. */
4669 add_standard_iv_candidates (data
);
4671 /* Add old induction variables. */
4672 add_old_ivs_candidates (data
);
4674 /* Add induction variables derived from uses. */
4675 add_derived_ivs_candidates (data
);
4677 set_autoinc_for_original_candidates (data
);
4679 /* Record the important candidates. */
4680 record_important_candidates (data
);
4683 /* Determines costs of basing the use of the iv on an iv candidate. */
4686 determine_use_iv_costs (struct ivopts_data
*data
)
4690 struct iv_cand
*cand
;
4691 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4693 alloc_use_cost_map (data
);
4695 for (i
= 0; i
< n_iv_uses (data
); i
++)
4697 use
= iv_use (data
, i
);
4699 if (data
->consider_all_candidates
)
4701 for (j
= 0; j
< n_iv_cands (data
); j
++)
4703 cand
= iv_cand (data
, j
);
4704 determine_use_iv_cost (data
, use
, cand
);
4711 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4713 cand
= iv_cand (data
, j
);
4714 if (!determine_use_iv_cost (data
, use
, cand
))
4715 bitmap_set_bit (to_clear
, j
);
4718 /* Remove the candidates for that the cost is infinite from
4719 the list of related candidates. */
4720 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4721 bitmap_clear (to_clear
);
4725 BITMAP_FREE (to_clear
);
4727 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4729 fprintf (dump_file
, "Use-candidate costs:\n");
4731 for (i
= 0; i
< n_iv_uses (data
); i
++)
4733 use
= iv_use (data
, i
);
4735 fprintf (dump_file
, "Use %d:\n", i
);
4736 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4737 for (j
= 0; j
< use
->n_map_members
; j
++)
4739 if (!use
->cost_map
[j
].cand
4740 || infinite_cost_p (use
->cost_map
[j
].cost
))
4743 fprintf (dump_file
, " %d\t%d\t%d\t",
4744 use
->cost_map
[j
].cand
->id
,
4745 use
->cost_map
[j
].cost
.cost
,
4746 use
->cost_map
[j
].cost
.complexity
);
4747 if (use
->cost_map
[j
].depends_on
)
4748 bitmap_print (dump_file
,
4749 use
->cost_map
[j
].depends_on
, "","");
4750 if (use
->cost_map
[j
].inv_expr_id
!= -1)
4751 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
4752 fprintf (dump_file
, "\n");
4755 fprintf (dump_file
, "\n");
4757 fprintf (dump_file
, "\n");
4761 /* Determines cost of the candidate CAND. */
4764 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4766 comp_cost cost_base
;
4767 unsigned cost
, cost_step
;
4776 /* There are two costs associated with the candidate -- its increment
4777 and its initialization. The second is almost negligible for any loop
4778 that rolls enough, so we take it just very little into account. */
4780 base
= cand
->iv
->base
;
4781 cost_base
= force_var_cost (data
, base
, NULL
);
4782 /* It will be exceptional that the iv register happens to be initialized with
4783 the proper value at no cost. In general, there will at least be a regcopy
4785 if (cost_base
.cost
== 0)
4786 cost_base
.cost
= COSTS_N_INSNS (1);
4787 cost_step
= add_cost (TYPE_MODE (TREE_TYPE (base
)), data
->speed
);
4789 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
4791 /* Prefer the original ivs unless we may gain something by replacing it.
4792 The reason is to make debugging simpler; so this is not relevant for
4793 artificial ivs created by other optimization passes. */
4794 if (cand
->pos
!= IP_ORIGINAL
4795 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4798 /* Prefer not to insert statements into latch unless there are some
4799 already (so that we do not create unnecessary jumps). */
4800 if (cand
->pos
== IP_END
4801 && empty_block_p (ip_end_pos (data
->current_loop
)))
4805 cand
->cost_step
= cost_step
;
4808 /* Determines costs of computation of the candidates. */
4811 determine_iv_costs (struct ivopts_data
*data
)
4815 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4817 fprintf (dump_file
, "Candidate costs:\n");
4818 fprintf (dump_file
, " cand\tcost\n");
4821 for (i
= 0; i
< n_iv_cands (data
); i
++)
4823 struct iv_cand
*cand
= iv_cand (data
, i
);
4825 determine_iv_cost (data
, cand
);
4827 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4828 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
4831 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4832 fprintf (dump_file
, "\n");
4835 /* Calculates cost for having SIZE induction variables. */
4838 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
4840 /* We add size to the cost, so that we prefer eliminating ivs
4842 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
4843 data
->body_includes_call
);
4846 /* For each size of the induction variable set determine the penalty. */
4849 determine_set_costs (struct ivopts_data
*data
)
4853 gimple_stmt_iterator psi
;
4855 struct loop
*loop
= data
->current_loop
;
4858 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4860 fprintf (dump_file
, "Global costs:\n");
4861 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
4862 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
4863 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
4864 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
4868 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
4870 phi
= gsi_stmt (psi
);
4871 op
= PHI_RESULT (phi
);
4873 if (!is_gimple_reg (op
))
4876 if (get_iv (data
, op
))
4882 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
4884 struct version_info
*info
= ver_info (data
, j
);
4886 if (info
->inv_id
&& info
->has_nonlin_use
)
4890 data
->regs_used
= n
;
4891 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4892 fprintf (dump_file
, " regs_used %d\n", n
);
4894 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4896 fprintf (dump_file
, " cost for size:\n");
4897 fprintf (dump_file
, " ivs\tcost\n");
4898 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
4899 fprintf (dump_file
, " %d\t%d\n", j
,
4900 ivopts_global_cost_for_size (data
, j
));
4901 fprintf (dump_file
, "\n");
4905 /* Returns true if A is a cheaper cost pair than B. */
4908 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
4918 cmp
= compare_costs (a
->cost
, b
->cost
);
4925 /* In case the costs are the same, prefer the cheaper candidate. */
4926 if (a
->cand
->cost
< b
->cand
->cost
)
4933 /* Returns candidate by that USE is expressed in IVS. */
4935 static struct cost_pair
*
4936 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
4938 return ivs
->cand_for_use
[use
->id
];
4941 /* Computes the cost field of IVS structure. */
4944 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
4946 comp_cost cost
= ivs
->cand_use_cost
;
4948 cost
.cost
+= ivs
->cand_cost
;
4950 cost
.cost
+= ivopts_global_cost_for_size (data
,
4951 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
4956 /* Remove invariants in set INVS to set IVS. */
4959 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
4967 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
4969 ivs
->n_invariant_uses
[iid
]--;
4970 if (ivs
->n_invariant_uses
[iid
] == 0)
4975 /* Set USE not to be expressed by any candidate in IVS. */
4978 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
4981 unsigned uid
= use
->id
, cid
;
4982 struct cost_pair
*cp
;
4984 cp
= ivs
->cand_for_use
[uid
];
4990 ivs
->cand_for_use
[uid
] = NULL
;
4991 ivs
->n_cand_uses
[cid
]--;
4993 if (ivs
->n_cand_uses
[cid
] == 0)
4995 bitmap_clear_bit (ivs
->cands
, cid
);
4996 /* Do not count the pseudocandidates. */
5000 ivs
->cand_cost
-= cp
->cand
->cost
;
5002 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5005 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5007 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5009 if (cp
->inv_expr_id
!= -1)
5011 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5012 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5013 ivs
->num_used_inv_expr
--;
5015 iv_ca_recount_cost (data
, ivs
);
5018 /* Add invariants in set INVS to set IVS. */
5021 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5029 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5031 ivs
->n_invariant_uses
[iid
]++;
5032 if (ivs
->n_invariant_uses
[iid
] == 1)
5037 /* Set cost pair for USE in set IVS to CP. */
5040 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5041 struct iv_use
*use
, struct cost_pair
*cp
)
5043 unsigned uid
= use
->id
, cid
;
5045 if (ivs
->cand_for_use
[uid
] == cp
)
5048 if (ivs
->cand_for_use
[uid
])
5049 iv_ca_set_no_cp (data
, ivs
, use
);
5056 ivs
->cand_for_use
[uid
] = cp
;
5057 ivs
->n_cand_uses
[cid
]++;
5058 if (ivs
->n_cand_uses
[cid
] == 1)
5060 bitmap_set_bit (ivs
->cands
, cid
);
5061 /* Do not count the pseudocandidates. */
5065 ivs
->cand_cost
+= cp
->cand
->cost
;
5067 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5070 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5071 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5073 if (cp
->inv_expr_id
!= -1)
5075 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5076 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5077 ivs
->num_used_inv_expr
++;
5079 iv_ca_recount_cost (data
, ivs
);
5083 /* Extend set IVS by expressing USE by some of the candidates in it
5084 if possible. All important candidates will be considered
5085 if IMPORTANT_CANDIDATES is true. */
5088 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5089 struct iv_use
*use
, bool important_candidates
)
5091 struct cost_pair
*best_cp
= NULL
, *cp
;
5096 gcc_assert (ivs
->upto
>= use
->id
);
5098 if (ivs
->upto
== use
->id
)
5104 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5105 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5107 struct iv_cand
*cand
= iv_cand (data
, i
);
5109 cp
= get_use_iv_cost (data
, use
, cand
);
5111 if (cheaper_cost_pair (cp
, best_cp
))
5115 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5118 /* Get cost for assignment IVS. */
5121 iv_ca_cost (struct iv_ca
*ivs
)
5123 /* This was a conditional expression but it triggered a bug in
5126 return infinite_cost
;
5131 /* Returns true if all dependences of CP are among invariants in IVS. */
5134 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5139 if (!cp
->depends_on
)
5142 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5144 if (ivs
->n_invariant_uses
[i
] == 0)
5151 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5152 it before NEXT_CHANGE. */
5154 static struct iv_ca_delta
*
5155 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5156 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5158 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5161 change
->old_cp
= old_cp
;
5162 change
->new_cp
= new_cp
;
5163 change
->next_change
= next_change
;
5168 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5171 static struct iv_ca_delta
*
5172 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5174 struct iv_ca_delta
*last
;
5182 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5184 last
->next_change
= l2
;
5189 /* Reverse the list of changes DELTA, forming the inverse to it. */
5191 static struct iv_ca_delta
*
5192 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5194 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5195 struct cost_pair
*tmp
;
5197 for (act
= delta
; act
; act
= next
)
5199 next
= act
->next_change
;
5200 act
->next_change
= prev
;
5204 act
->old_cp
= act
->new_cp
;
5211 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5212 reverted instead. */
5215 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5216 struct iv_ca_delta
*delta
, bool forward
)
5218 struct cost_pair
*from
, *to
;
5219 struct iv_ca_delta
*act
;
5222 delta
= iv_ca_delta_reverse (delta
);
5224 for (act
= delta
; act
; act
= act
->next_change
)
5228 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5229 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5233 iv_ca_delta_reverse (delta
);
5236 /* Returns true if CAND is used in IVS. */
5239 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5241 return ivs
->n_cand_uses
[cand
->id
] > 0;
5244 /* Returns number of induction variable candidates in the set IVS. */
5247 iv_ca_n_cands (struct iv_ca
*ivs
)
5249 return ivs
->n_cands
;
5252 /* Free the list of changes DELTA. */
5255 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5257 struct iv_ca_delta
*act
, *next
;
5259 for (act
= *delta
; act
; act
= next
)
5261 next
= act
->next_change
;
5268 /* Allocates new iv candidates assignment. */
5270 static struct iv_ca
*
5271 iv_ca_new (struct ivopts_data
*data
)
5273 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5277 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5278 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5279 nw
->cands
= BITMAP_ALLOC (NULL
);
5282 nw
->cand_use_cost
= zero_cost
;
5284 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5285 nw
->cost
= zero_cost
;
5286 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5287 nw
->num_used_inv_expr
= 0;
5292 /* Free memory occupied by the set IVS. */
5295 iv_ca_free (struct iv_ca
**ivs
)
5297 free ((*ivs
)->cand_for_use
);
5298 free ((*ivs
)->n_cand_uses
);
5299 BITMAP_FREE ((*ivs
)->cands
);
5300 free ((*ivs
)->n_invariant_uses
);
5301 free ((*ivs
)->used_inv_expr
);
5306 /* Dumps IVS to FILE. */
5309 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5311 const char *pref
= " invariants ";
5313 comp_cost cost
= iv_ca_cost (ivs
);
5315 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5316 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5317 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5318 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5320 for (i
= 0; i
< ivs
->upto
; i
++)
5322 struct iv_use
*use
= iv_use (data
, i
);
5323 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5325 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5326 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5328 fprintf (file
, " use:%d --> ??\n", use
->id
);
5331 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5332 if (ivs
->n_invariant_uses
[i
])
5334 fprintf (file
, "%s%d", pref
, i
);
5337 fprintf (file
, "\n\n");
5340 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5341 new set, and store differences in DELTA. Number of induction variables
5342 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5343 the function will try to find a solution with mimimal iv candidates. */
5346 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5347 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5348 unsigned *n_ivs
, bool min_ncand
)
5353 struct cost_pair
*old_cp
, *new_cp
;
5356 for (i
= 0; i
< ivs
->upto
; i
++)
5358 use
= iv_use (data
, i
);
5359 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5362 && old_cp
->cand
== cand
)
5365 new_cp
= get_use_iv_cost (data
, use
, cand
);
5369 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5372 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5375 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5378 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5379 cost
= iv_ca_cost (ivs
);
5381 *n_ivs
= iv_ca_n_cands (ivs
);
5382 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5387 /* Try narrowing set IVS by removing CAND. Return the cost of
5388 the new set and store the differences in DELTA. */
5391 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5392 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5396 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5398 struct iv_cand
*cnd
;
5402 for (i
= 0; i
< n_iv_uses (data
); i
++)
5404 use
= iv_use (data
, i
);
5406 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5407 if (old_cp
->cand
!= cand
)
5412 if (data
->consider_all_candidates
)
5414 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5419 cnd
= iv_cand (data
, ci
);
5421 cp
= get_use_iv_cost (data
, use
, cnd
);
5425 if (!iv_ca_has_deps (ivs
, cp
))
5428 if (!cheaper_cost_pair (cp
, new_cp
))
5436 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5441 cnd
= iv_cand (data
, ci
);
5443 cp
= get_use_iv_cost (data
, use
, cnd
);
5446 if (!iv_ca_has_deps (ivs
, cp
))
5449 if (!cheaper_cost_pair (cp
, new_cp
))
5458 iv_ca_delta_free (delta
);
5459 return infinite_cost
;
5462 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5465 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5466 cost
= iv_ca_cost (ivs
);
5467 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5472 /* Try optimizing the set of candidates IVS by removing candidates different
5473 from to EXCEPT_CAND from it. Return cost of the new set, and store
5474 differences in DELTA. */
5477 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5478 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5481 struct iv_ca_delta
*act_delta
, *best_delta
;
5483 comp_cost best_cost
, acost
;
5484 struct iv_cand
*cand
;
5487 best_cost
= iv_ca_cost (ivs
);
5489 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5491 cand
= iv_cand (data
, i
);
5493 if (cand
== except_cand
)
5496 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5498 if (compare_costs (acost
, best_cost
) < 0)
5501 iv_ca_delta_free (&best_delta
);
5502 best_delta
= act_delta
;
5505 iv_ca_delta_free (&act_delta
);
5514 /* Recurse to possibly remove other unnecessary ivs. */
5515 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5516 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5517 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5518 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5522 /* Tries to extend the sets IVS in the best possible way in order
5523 to express the USE. If ORIGINALP is true, prefer candidates from
5524 the original set of IVs, otherwise favor important candidates not
5525 based on any memory object. */
5528 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5529 struct iv_use
*use
, bool originalp
)
5531 comp_cost best_cost
, act_cost
;
5534 struct iv_cand
*cand
;
5535 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5536 struct cost_pair
*cp
;
5538 iv_ca_add_use (data
, ivs
, use
, false);
5539 best_cost
= iv_ca_cost (ivs
);
5541 cp
= iv_ca_cand_for_use (ivs
, use
);
5546 iv_ca_add_use (data
, ivs
, use
, true);
5547 best_cost
= iv_ca_cost (ivs
);
5548 cp
= iv_ca_cand_for_use (ivs
, use
);
5552 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5553 iv_ca_set_no_cp (data
, ivs
, use
);
5556 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5557 first try important candidates not based on any memory object. Only if
5558 this fails, try the specific ones. Rationale -- in loops with many
5559 variables the best choice often is to use just one generic biv. If we
5560 added here many ivs specific to the uses, the optimization algorithm later
5561 would be likely to get stuck in a local minimum, thus causing us to create
5562 too many ivs. The approach from few ivs to more seems more likely to be
5563 successful -- starting from few ivs, replacing an expensive use by a
5564 specific iv should always be a win. */
5565 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5567 cand
= iv_cand (data
, i
);
5569 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5572 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5575 if (iv_ca_cand_used_p (ivs
, cand
))
5578 cp
= get_use_iv_cost (data
, use
, cand
);
5582 iv_ca_set_cp (data
, ivs
, use
, cp
);
5583 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5585 iv_ca_set_no_cp (data
, ivs
, use
);
5586 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5588 if (compare_costs (act_cost
, best_cost
) < 0)
5590 best_cost
= act_cost
;
5592 iv_ca_delta_free (&best_delta
);
5593 best_delta
= act_delta
;
5596 iv_ca_delta_free (&act_delta
);
5599 if (infinite_cost_p (best_cost
))
5601 for (i
= 0; i
< use
->n_map_members
; i
++)
5603 cp
= use
->cost_map
+ i
;
5608 /* Already tried this. */
5609 if (cand
->important
)
5611 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5613 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5617 if (iv_ca_cand_used_p (ivs
, cand
))
5621 iv_ca_set_cp (data
, ivs
, use
, cp
);
5622 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5623 iv_ca_set_no_cp (data
, ivs
, use
);
5624 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5627 if (compare_costs (act_cost
, best_cost
) < 0)
5629 best_cost
= act_cost
;
5632 iv_ca_delta_free (&best_delta
);
5633 best_delta
= act_delta
;
5636 iv_ca_delta_free (&act_delta
);
5640 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5641 iv_ca_delta_free (&best_delta
);
5643 return !infinite_cost_p (best_cost
);
5646 /* Finds an initial assignment of candidates to uses. */
5648 static struct iv_ca
*
5649 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5651 struct iv_ca
*ivs
= iv_ca_new (data
);
5654 for (i
= 0; i
< n_iv_uses (data
); i
++)
5655 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5664 /* Tries to improve set of induction variables IVS. */
5667 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5670 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5671 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5672 struct iv_cand
*cand
;
5674 /* Try extending the set of induction variables by one. */
5675 for (i
= 0; i
< n_iv_cands (data
); i
++)
5677 cand
= iv_cand (data
, i
);
5679 if (iv_ca_cand_used_p (ivs
, cand
))
5682 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5686 /* If we successfully added the candidate and the set is small enough,
5687 try optimizing it by removing other candidates. */
5688 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5690 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5691 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5692 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5693 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5696 if (compare_costs (acost
, best_cost
) < 0)
5699 iv_ca_delta_free (&best_delta
);
5700 best_delta
= act_delta
;
5703 iv_ca_delta_free (&act_delta
);
5708 /* Try removing the candidates from the set instead. */
5709 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5711 /* Nothing more we can do. */
5716 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5717 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5718 iv_ca_delta_free (&best_delta
);
5722 /* Attempts to find the optimal set of induction variables. We do simple
5723 greedy heuristic -- we try to replace at most one candidate in the selected
5724 solution and remove the unused ivs while this improves the cost. */
5726 static struct iv_ca
*
5727 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
5731 /* Get the initial solution. */
5732 set
= get_initial_solution (data
, originalp
);
5735 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5736 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5740 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5742 fprintf (dump_file
, "Initial set of candidates:\n");
5743 iv_ca_dump (data
, dump_file
, set
);
5746 while (try_improve_iv_set (data
, set
))
5748 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5750 fprintf (dump_file
, "Improved to:\n");
5751 iv_ca_dump (data
, dump_file
, set
);
5758 static struct iv_ca
*
5759 find_optimal_iv_set (struct ivopts_data
*data
)
5762 struct iv_ca
*set
, *origset
;
5764 comp_cost cost
, origcost
;
5766 /* Determine the cost based on a strategy that starts with original IVs,
5767 and try again using a strategy that prefers candidates not based
5769 origset
= find_optimal_iv_set_1 (data
, true);
5770 set
= find_optimal_iv_set_1 (data
, false);
5772 if (!origset
&& !set
)
5775 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
5776 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
5778 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5780 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
5781 origcost
.cost
, origcost
.complexity
);
5782 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
5783 cost
.cost
, cost
.complexity
);
5786 /* Choose the one with the best cost. */
5787 if (compare_costs (origcost
, cost
) <= 0)
5794 iv_ca_free (&origset
);
5796 for (i
= 0; i
< n_iv_uses (data
); i
++)
5798 use
= iv_use (data
, i
);
5799 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5805 /* Creates a new induction variable corresponding to CAND. */
5808 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5810 gimple_stmt_iterator incr_pos
;
5820 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
5824 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
5832 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
5836 /* Mark that the iv is preserved. */
5837 name_info (data
, cand
->var_before
)->preserve_biv
= true;
5838 name_info (data
, cand
->var_after
)->preserve_biv
= true;
5840 /* Rewrite the increment so that it uses var_before directly. */
5841 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
5845 gimple_add_tmp_var (cand
->var_before
);
5846 add_referenced_var (cand
->var_before
);
5848 base
= unshare_expr (cand
->iv
->base
);
5850 create_iv (base
, unshare_expr (cand
->iv
->step
),
5851 cand
->var_before
, data
->current_loop
,
5852 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
5855 /* Creates new induction variables described in SET. */
5858 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
5861 struct iv_cand
*cand
;
5864 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5866 cand
= iv_cand (data
, i
);
5867 create_new_iv (data
, cand
);
5870 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5872 fprintf (dump_file
, "\nSelected IV set: \n");
5873 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
5875 cand
= iv_cand (data
, i
);
5876 dump_cand (dump_file
, cand
);
5878 fprintf (dump_file
, "\n");
5882 /* Rewrites USE (definition of iv used in a nonlinear expression)
5883 using candidate CAND. */
5886 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
5887 struct iv_use
*use
, struct iv_cand
*cand
)
5892 gimple_stmt_iterator bsi
;
5894 /* An important special case -- if we are asked to express value of
5895 the original iv by itself, just exit; there is no need to
5896 introduce a new computation (that might also need casting the
5897 variable to unsigned and back). */
5898 if (cand
->pos
== IP_ORIGINAL
5899 && cand
->incremented_at
== use
->stmt
)
5901 tree step
, ctype
, utype
;
5902 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
5904 gcc_assert (is_gimple_assign (use
->stmt
));
5905 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
5907 step
= cand
->iv
->step
;
5908 ctype
= TREE_TYPE (step
);
5909 utype
= TREE_TYPE (cand
->var_after
);
5910 if (TREE_CODE (step
) == NEGATE_EXPR
)
5912 incr_code
= MINUS_EXPR
;
5913 step
= TREE_OPERAND (step
, 0);
5916 /* Check whether we may leave the computation unchanged.
5917 This is the case only if it does not rely on other
5918 computations in the loop -- otherwise, the computation
5919 we rely upon may be removed in remove_unused_ivs,
5920 thus leading to ICE. */
5921 old_code
= gimple_assign_rhs_code (use
->stmt
);
5922 if (old_code
== PLUS_EXPR
5923 || old_code
== MINUS_EXPR
5924 || old_code
== POINTER_PLUS_EXPR
)
5926 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
5927 op
= gimple_assign_rhs2 (use
->stmt
);
5928 else if (old_code
!= MINUS_EXPR
5929 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
5930 op
= gimple_assign_rhs1 (use
->stmt
);
5938 && (TREE_CODE (op
) == INTEGER_CST
5939 || operand_equal_p (op
, step
, 0)))
5942 /* Otherwise, add the necessary computations to express
5944 op
= fold_convert (ctype
, cand
->var_before
);
5945 comp
= fold_convert (utype
,
5946 build2 (incr_code
, ctype
, op
,
5947 unshare_expr (step
)));
5951 comp
= get_computation (data
->current_loop
, use
, cand
);
5952 gcc_assert (comp
!= NULL_TREE
);
5955 switch (gimple_code (use
->stmt
))
5958 tgt
= PHI_RESULT (use
->stmt
);
5960 /* If we should keep the biv, do not replace it. */
5961 if (name_info (data
, tgt
)->preserve_biv
)
5964 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
5968 tgt
= gimple_assign_lhs (use
->stmt
);
5969 bsi
= gsi_for_stmt (use
->stmt
);
5976 if (!valid_gimple_rhs_p (comp
)
5977 || (gimple_code (use
->stmt
) != GIMPLE_PHI
5978 /* We can't allow re-allocating the stmt as it might be pointed
5980 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
5981 >= gimple_num_ops (gsi_stmt (bsi
)))))
5983 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
5984 true, GSI_SAME_STMT
);
5985 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
5987 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
5988 /* As this isn't a plain copy we have to reset alignment
5990 if (SSA_NAME_PTR_INFO (comp
))
5992 SSA_NAME_PTR_INFO (comp
)->align
= 1;
5993 SSA_NAME_PTR_INFO (comp
)->misalign
= 0;
5998 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6000 ass
= gimple_build_assign (tgt
, comp
);
6001 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6003 bsi
= gsi_for_stmt (use
->stmt
);
6004 remove_phi_node (&bsi
, false);
6008 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6009 use
->stmt
= gsi_stmt (bsi
);
6013 /* Copies the reference information from OLD_REF to NEW_REF. */
6016 copy_ref_info (tree new_ref
, tree old_ref
)
6018 tree new_ptr_base
= NULL_TREE
;
6020 TREE_SIDE_EFFECTS (new_ref
) = TREE_SIDE_EFFECTS (old_ref
);
6021 TREE_THIS_VOLATILE (new_ref
) = TREE_THIS_VOLATILE (old_ref
);
6023 new_ptr_base
= TREE_OPERAND (new_ref
, 0);
6025 /* We can transfer points-to information from an old pointer
6026 or decl base to the new one. */
6028 && TREE_CODE (new_ptr_base
) == SSA_NAME
6029 && !SSA_NAME_PTR_INFO (new_ptr_base
))
6031 tree base
= get_base_address (old_ref
);
6034 else if ((TREE_CODE (base
) == MEM_REF
6035 || TREE_CODE (base
) == TARGET_MEM_REF
)
6036 && TREE_CODE (TREE_OPERAND (base
, 0)) == SSA_NAME
6037 && SSA_NAME_PTR_INFO (TREE_OPERAND (base
, 0)))
6039 struct ptr_info_def
*new_pi
;
6040 duplicate_ssa_name_ptr_info
6041 (new_ptr_base
, SSA_NAME_PTR_INFO (TREE_OPERAND (base
, 0)));
6042 new_pi
= SSA_NAME_PTR_INFO (new_ptr_base
);
6043 /* We have to be careful about transfering alignment information. */
6044 if (TREE_CODE (old_ref
) == MEM_REF
6045 && !(TREE_CODE (new_ref
) == TARGET_MEM_REF
6046 && (TMR_INDEX2 (new_ref
)
6047 || (TMR_STEP (new_ref
)
6048 && (TREE_INT_CST_LOW (TMR_STEP (new_ref
))
6049 < new_pi
->align
)))))
6051 new_pi
->misalign
+= double_int_sub (mem_ref_offset (old_ref
),
6052 mem_ref_offset (new_ref
)).low
;
6053 new_pi
->misalign
&= (new_pi
->align
- 1);
6058 new_pi
->misalign
= 0;
6061 else if (TREE_CODE (base
) == VAR_DECL
6062 || TREE_CODE (base
) == PARM_DECL
6063 || TREE_CODE (base
) == RESULT_DECL
)
6065 struct ptr_info_def
*pi
= get_ptr_info (new_ptr_base
);
6066 pt_solution_set_var (&pi
->pt
, base
);
6071 /* Performs a peephole optimization to reorder the iv update statement with
6072 a mem ref to enable instruction combining in later phases. The mem ref uses
6073 the iv value before the update, so the reordering transformation requires
6074 adjustment of the offset. CAND is the selected IV_CAND.
6078 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6086 directly propagating t over to (1) will introduce overlapping live range
6087 thus increase register pressure. This peephole transform it into:
6091 t = MEM_REF (base, iv2, 8, 8);
6098 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6101 gimple iv_update
, stmt
;
6103 gimple_stmt_iterator gsi
, gsi_iv
;
6105 if (cand
->pos
!= IP_NORMAL
)
6108 var_after
= cand
->var_after
;
6109 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6111 bb
= gimple_bb (iv_update
);
6112 gsi
= gsi_last_nondebug_bb (bb
);
6113 stmt
= gsi_stmt (gsi
);
6115 /* Only handle conditional statement for now. */
6116 if (gimple_code (stmt
) != GIMPLE_COND
)
6119 gsi_prev_nondebug (&gsi
);
6120 stmt
= gsi_stmt (gsi
);
6121 if (stmt
!= iv_update
)
6124 gsi_prev_nondebug (&gsi
);
6125 if (gsi_end_p (gsi
))
6128 stmt
= gsi_stmt (gsi
);
6129 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6132 if (stmt
!= use
->stmt
)
6135 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6138 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6140 fprintf (dump_file
, "Reordering \n");
6141 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6142 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6143 fprintf (dump_file
, "\n");
6146 gsi
= gsi_for_stmt (use
->stmt
);
6147 gsi_iv
= gsi_for_stmt (iv_update
);
6148 gsi_move_before (&gsi_iv
, &gsi
);
6150 cand
->pos
= IP_BEFORE_USE
;
6151 cand
->incremented_at
= use
->stmt
;
6154 /* Rewrites USE (address that is an iv) using candidate CAND. */
6157 rewrite_use_address (struct ivopts_data
*data
,
6158 struct iv_use
*use
, struct iv_cand
*cand
)
6161 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6162 tree base_hint
= NULL_TREE
;
6166 adjust_iv_update_pos (cand
, use
);
6167 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6169 unshare_aff_combination (&aff
);
6171 /* To avoid undefined overflow problems, all IV candidates use unsigned
6172 integer types. The drawback is that this makes it impossible for
6173 create_mem_ref to distinguish an IV that is based on a memory object
6174 from one that represents simply an offset.
6176 To work around this problem, we pass a hint to create_mem_ref that
6177 indicates which variable (if any) in aff is an IV based on a memory
6178 object. Note that we only consider the candidate. If this is not
6179 based on an object, the base of the reference is in some subexpression
6180 of the use -- but these will use pointer types, so they are recognized
6181 by the create_mem_ref heuristics anyway. */
6182 if (cand
->iv
->base_object
)
6183 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6185 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6186 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6187 reference_alias_ptr_type (*use
->op_p
),
6188 iv
, base_hint
, data
->speed
);
6189 copy_ref_info (ref
, *use
->op_p
);
6193 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6197 rewrite_use_compare (struct ivopts_data
*data
,
6198 struct iv_use
*use
, struct iv_cand
*cand
)
6200 tree comp
, *var_p
, op
, bound
;
6201 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6202 enum tree_code compare
;
6203 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6209 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6210 tree var_type
= TREE_TYPE (var
);
6213 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6215 fprintf (dump_file
, "Replacing exit test: ");
6216 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6218 compare
= iv_elimination_compare (data
, use
);
6219 bound
= unshare_expr (fold_convert (var_type
, bound
));
6220 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6222 gsi_insert_seq_on_edge_immediate (
6223 loop_preheader_edge (data
->current_loop
),
6226 gimple_cond_set_lhs (use
->stmt
, var
);
6227 gimple_cond_set_code (use
->stmt
, compare
);
6228 gimple_cond_set_rhs (use
->stmt
, op
);
6232 /* The induction variable elimination failed; just express the original
6234 comp
= get_computation (data
->current_loop
, use
, cand
);
6235 gcc_assert (comp
!= NULL_TREE
);
6237 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6240 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6241 true, GSI_SAME_STMT
);
6244 /* Rewrites USE using candidate CAND. */
6247 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6251 case USE_NONLINEAR_EXPR
:
6252 rewrite_use_nonlinear_expr (data
, use
, cand
);
6256 rewrite_use_address (data
, use
, cand
);
6260 rewrite_use_compare (data
, use
, cand
);
6267 update_stmt (use
->stmt
);
6270 /* Rewrite the uses using the selected induction variables. */
6273 rewrite_uses (struct ivopts_data
*data
)
6276 struct iv_cand
*cand
;
6279 for (i
= 0; i
< n_iv_uses (data
); i
++)
6281 use
= iv_use (data
, i
);
6282 cand
= use
->selected
;
6285 rewrite_use (data
, use
, cand
);
6289 /* Removes the ivs that are not used after rewriting. */
6292 remove_unused_ivs (struct ivopts_data
*data
)
6296 bitmap toremove
= BITMAP_ALLOC (NULL
);
6298 /* Figure out an order in which to release SSA DEFs so that we don't
6299 release something that we'd have to propagate into a debug stmt
6301 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6303 struct version_info
*info
;
6305 info
= ver_info (data
, j
);
6307 && !integer_zerop (info
->iv
->step
)
6309 && !info
->iv
->have_use_for
6310 && !info
->preserve_biv
)
6311 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6314 release_defs_bitset (toremove
);
6316 BITMAP_FREE (toremove
);
6319 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6320 for pointer_map_traverse. */
6323 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6324 void *data ATTRIBUTE_UNUSED
)
6326 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6332 /* Frees data allocated by the optimization of a single loop. */
6335 free_loop_data (struct ivopts_data
*data
)
6343 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6344 pointer_map_destroy (data
->niters
);
6345 data
->niters
= NULL
;
6348 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6350 struct version_info
*info
;
6352 info
= ver_info (data
, i
);
6355 info
->has_nonlin_use
= false;
6356 info
->preserve_biv
= false;
6359 bitmap_clear (data
->relevant
);
6360 bitmap_clear (data
->important_candidates
);
6362 for (i
= 0; i
< n_iv_uses (data
); i
++)
6364 struct iv_use
*use
= iv_use (data
, i
);
6367 BITMAP_FREE (use
->related_cands
);
6368 for (j
= 0; j
< use
->n_map_members
; j
++)
6369 if (use
->cost_map
[j
].depends_on
)
6370 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6371 free (use
->cost_map
);
6374 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
6376 for (i
= 0; i
< n_iv_cands (data
); i
++)
6378 struct iv_cand
*cand
= iv_cand (data
, i
);
6381 if (cand
->depends_on
)
6382 BITMAP_FREE (cand
->depends_on
);
6385 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
6387 if (data
->version_info_size
< num_ssa_names
)
6389 data
->version_info_size
= 2 * num_ssa_names
;
6390 free (data
->version_info
);
6391 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6394 data
->max_inv_id
= 0;
6396 FOR_EACH_VEC_ELT (tree
, decl_rtl_to_reset
, i
, obj
)
6397 SET_DECL_RTL (obj
, NULL_RTX
);
6399 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
6401 htab_empty (data
->inv_expr_tab
);
6402 data
->inv_expr_id
= 0;
6405 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6409 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6411 free_loop_data (data
);
6412 free (data
->version_info
);
6413 BITMAP_FREE (data
->relevant
);
6414 BITMAP_FREE (data
->important_candidates
);
6416 VEC_free (tree
, heap
, decl_rtl_to_reset
);
6417 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
6418 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
6419 htab_delete (data
->inv_expr_tab
);
6422 /* Returns true if the loop body BODY includes any function calls. */
6425 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6427 gimple_stmt_iterator gsi
;
6430 for (i
= 0; i
< num_nodes
; i
++)
6431 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6433 gimple stmt
= gsi_stmt (gsi
);
6434 if (is_gimple_call (stmt
)
6435 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6441 /* Optimizes the LOOP. Returns true if anything changed. */
6444 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6446 bool changed
= false;
6447 struct iv_ca
*iv_ca
;
6451 gcc_assert (!data
->niters
);
6452 data
->current_loop
= loop
;
6453 data
->speed
= optimize_loop_for_speed_p (loop
);
6455 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6457 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6459 exit
= single_dom_exit (loop
);
6462 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6463 exit
->src
->index
, exit
->dest
->index
);
6464 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6465 fprintf (dump_file
, "\n");
6468 fprintf (dump_file
, "\n");
6471 body
= get_loop_body (loop
);
6472 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6473 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6476 /* For each ssa name determines whether it behaves as an induction variable
6478 if (!find_induction_variables (data
))
6481 /* Finds interesting uses (item 1). */
6482 find_interesting_uses (data
);
6483 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6486 /* Finds candidates for the induction variables (item 2). */
6487 find_iv_candidates (data
);
6489 /* Calculates the costs (item 3, part 1). */
6490 determine_iv_costs (data
);
6491 determine_use_iv_costs (data
);
6492 determine_set_costs (data
);
6494 /* Find the optimal set of induction variables (item 3, part 2). */
6495 iv_ca
= find_optimal_iv_set (data
);
6500 /* Create the new induction variables (item 4, part 1). */
6501 create_new_ivs (data
, iv_ca
);
6502 iv_ca_free (&iv_ca
);
6504 /* Rewrite the uses (item 4, part 2). */
6505 rewrite_uses (data
);
6507 /* Remove the ivs that are unused after rewriting. */
6508 remove_unused_ivs (data
);
6510 /* We have changed the structure of induction variables; it might happen
6511 that definitions in the scev database refer to some of them that were
6516 free_loop_data (data
);
6521 /* Main entry point. Optimizes induction variables in loops. */
6524 tree_ssa_iv_optimize (void)
6527 struct ivopts_data data
;
6530 tree_ssa_iv_optimize_init (&data
);
6532 /* Optimize the loops starting with the innermost ones. */
6533 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6535 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6536 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6538 tree_ssa_iv_optimize_loop (&data
, loop
);
6541 tree_ssa_iv_optimize_finalize (&data
);