1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
67 #include "coretypes.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "tree-flow.h"
75 #include "tree-pass.h"
77 #include "insn-config.h"
78 #include "pointer-set.h"
80 #include "tree-chrec.h"
81 #include "tree-scalar-evolution.h"
84 #include "langhooks.h"
85 #include "tree-affine.h"
87 #include "tree-inline.h"
88 #include "tree-ssa-propagate.h"
91 /* FIXME: Expressions are expanded to RTL in this pass to determine the
92 cost of different addressing modes. This should be moved to a TBD
93 interface between the GIMPLE and RTL worlds. */
97 /* The infinite cost. */
98 #define INFTY 10000000
100 #define AVG_LOOP_NITER(LOOP) 5
102 /* Returns the expected number of loop iterations for LOOP.
103 The average trip count is computed from profile data if it
106 static inline HOST_WIDE_INT
107 avg_loop_niter (struct loop
*loop
)
109 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
111 return AVG_LOOP_NITER (loop
);
116 /* Representation of the induction variable. */
119 tree base
; /* Initial value of the iv. */
120 tree base_object
; /* A memory object to that the induction variable points. */
121 tree step
; /* Step of the iv (constant only). */
122 tree ssa_name
; /* The ssa name with the value. */
123 bool biv_p
; /* Is it a biv? */
124 bool have_use_for
; /* Do we already have a use for it? */
125 unsigned use_id
; /* The identifier in the use if it is the case. */
128 /* Per-ssa version information (induction variable descriptions, etc.). */
131 tree name
; /* The ssa name. */
132 struct iv
*iv
; /* Induction variable description. */
133 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
134 an expression that is not an induction variable. */
135 bool preserve_biv
; /* For the original biv, whether to preserve it. */
136 unsigned inv_id
; /* Id of an invariant. */
142 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
143 USE_ADDRESS
, /* Use in an address. */
144 USE_COMPARE
/* Use is a compare. */
147 /* Cost of a computation. */
150 int cost
; /* The runtime cost. */
151 unsigned complexity
; /* The estimate of the complexity of the code for
152 the computation (in no concrete units --
153 complexity field should be larger for more
154 complex expressions and addressing modes). */
157 static const comp_cost no_cost
= {0, 0};
158 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
160 /* The candidate - cost pair. */
163 struct iv_cand
*cand
; /* The candidate. */
164 comp_cost cost
; /* The cost. */
165 bitmap depends_on
; /* The list of invariants that have to be
167 tree value
; /* For final value elimination, the expression for
168 the final value of the iv. For iv elimination,
169 the new bound to compare with. */
170 enum tree_code comp
; /* For iv elimination, the comparison. */
171 int inv_expr_id
; /* Loop invariant expression id. */
177 unsigned id
; /* The id of the use. */
178 enum use_type type
; /* Type of the use. */
179 struct iv
*iv
; /* The induction variable it is based on. */
180 gimple stmt
; /* Statement in that it occurs. */
181 tree
*op_p
; /* The place where it occurs. */
182 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
185 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
186 struct cost_pair
*cost_map
;
187 /* The costs wrto the iv candidates. */
189 struct iv_cand
*selected
;
190 /* The selected candidate. */
193 /* The position where the iv is computed. */
196 IP_NORMAL
, /* At the end, just before the exit condition. */
197 IP_END
, /* At the end of the latch block. */
198 IP_BEFORE_USE
, /* Immediately before a specific use. */
199 IP_AFTER_USE
, /* Immediately after a specific use. */
200 IP_ORIGINAL
/* The original biv. */
203 /* The induction variable candidate. */
206 unsigned id
; /* The number of the candidate. */
207 bool important
; /* Whether this is an "important" candidate, i.e. such
208 that it should be considered by all uses. */
209 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
210 gimple incremented_at
;/* For original biv, the statement where it is
212 tree var_before
; /* The variable used for it before increment. */
213 tree var_after
; /* The variable used for it after increment. */
214 struct iv
*iv
; /* The value of the candidate. NULL for
215 "pseudocandidate" used to indicate the possibility
216 to replace the final value of an iv by direct
217 computation of the value. */
218 unsigned cost
; /* Cost of the candidate. */
219 unsigned cost_step
; /* Cost of the candidate's increment operation. */
220 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
221 where it is incremented. */
222 bitmap depends_on
; /* The list of invariants that are used in step of the
226 /* Loop invariant expression hashtable entry. */
227 struct iv_inv_expr_ent
234 /* The data used by the induction variable optimizations. */
236 typedef struct iv_use
*iv_use_p
;
238 DEF_VEC_ALLOC_P(iv_use_p
,heap
);
240 typedef struct iv_cand
*iv_cand_p
;
241 DEF_VEC_P(iv_cand_p
);
242 DEF_VEC_ALLOC_P(iv_cand_p
,heap
);
246 /* The currently optimized loop. */
247 struct loop
*current_loop
;
249 /* Numbers of iterations for all exits of the current loop. */
250 struct pointer_map_t
*niters
;
252 /* Number of registers used in it. */
255 /* The size of version_info array allocated. */
256 unsigned version_info_size
;
258 /* The array of information for the ssa names. */
259 struct version_info
*version_info
;
261 /* The hashtable of loop invariant expressions created
265 /* Loop invariant expression id. */
268 /* The bitmap of indices in version_info whose value was changed. */
271 /* The uses of induction variables. */
272 VEC(iv_use_p
,heap
) *iv_uses
;
274 /* The candidates. */
275 VEC(iv_cand_p
,heap
) *iv_candidates
;
277 /* A bitmap of important candidates. */
278 bitmap important_candidates
;
280 /* The maximum invariant id. */
283 /* Whether to consider just related and important candidates when replacing a
285 bool consider_all_candidates
;
287 /* Are we optimizing for speed? */
290 /* Whether the loop body includes any function calls. */
291 bool body_includes_call
;
293 /* Whether the loop body can only be exited via single exit. */
294 bool loop_single_exit_p
;
297 /* An assignment of iv candidates to uses. */
301 /* The number of uses covered by the assignment. */
304 /* Number of uses that cannot be expressed by the candidates in the set. */
307 /* Candidate assigned to a use, together with the related costs. */
308 struct cost_pair
**cand_for_use
;
310 /* Number of times each candidate is used. */
311 unsigned *n_cand_uses
;
313 /* The candidates used. */
316 /* The number of candidates in the set. */
319 /* Total number of registers needed. */
322 /* Total cost of expressing uses. */
323 comp_cost cand_use_cost
;
325 /* Total cost of candidates. */
328 /* Number of times each invariant is used. */
329 unsigned *n_invariant_uses
;
331 /* The array holding the number of uses of each loop
332 invariant expressions created by ivopt. */
333 unsigned *used_inv_expr
;
335 /* The number of created loop invariants. */
336 unsigned num_used_inv_expr
;
338 /* Total cost of the assignment. */
342 /* Difference of two iv candidate assignments. */
349 /* An old assignment (for rollback purposes). */
350 struct cost_pair
*old_cp
;
352 /* A new assignment. */
353 struct cost_pair
*new_cp
;
355 /* Next change in the list. */
356 struct iv_ca_delta
*next_change
;
359 /* Bound on number of candidates below that all candidates are considered. */
361 #define CONSIDER_ALL_CANDIDATES_BOUND \
362 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
364 /* If there are more iv occurrences, we just give up (it is quite unlikely that
365 optimizing such a loop would help, and it would take ages). */
367 #define MAX_CONSIDERED_USES \
368 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
370 /* If there are at most this number of ivs in the set, try removing unnecessary
371 ivs from the set always. */
373 #define ALWAYS_PRUNE_CAND_SET_BOUND \
374 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
376 /* The list of trees for that the decl_rtl field must be reset is stored
379 static VEC(tree
,heap
) *decl_rtl_to_reset
;
381 static comp_cost
force_expr_to_var_cost (tree
, bool);
383 /* Number of uses recorded in DATA. */
385 static inline unsigned
386 n_iv_uses (struct ivopts_data
*data
)
388 return VEC_length (iv_use_p
, data
->iv_uses
);
391 /* Ith use recorded in DATA. */
393 static inline struct iv_use
*
394 iv_use (struct ivopts_data
*data
, unsigned i
)
396 return VEC_index (iv_use_p
, data
->iv_uses
, i
);
399 /* Number of candidates recorded in DATA. */
401 static inline unsigned
402 n_iv_cands (struct ivopts_data
*data
)
404 return VEC_length (iv_cand_p
, data
->iv_candidates
);
407 /* Ith candidate recorded in DATA. */
409 static inline struct iv_cand
*
410 iv_cand (struct ivopts_data
*data
, unsigned i
)
412 return VEC_index (iv_cand_p
, data
->iv_candidates
, i
);
415 /* The single loop exit if it dominates the latch, NULL otherwise. */
418 single_dom_exit (struct loop
*loop
)
420 edge exit
= single_exit (loop
);
425 if (!just_once_each_iteration_p (loop
, exit
->src
))
431 /* Dumps information about the induction variable IV to FILE. */
433 extern void dump_iv (FILE *, struct iv
*);
435 dump_iv (FILE *file
, struct iv
*iv
)
439 fprintf (file
, "ssa name ");
440 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
441 fprintf (file
, "\n");
444 fprintf (file
, " type ");
445 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
446 fprintf (file
, "\n");
450 fprintf (file
, " base ");
451 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
452 fprintf (file
, "\n");
454 fprintf (file
, " step ");
455 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
456 fprintf (file
, "\n");
460 fprintf (file
, " invariant ");
461 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
462 fprintf (file
, "\n");
467 fprintf (file
, " base object ");
468 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
469 fprintf (file
, "\n");
473 fprintf (file
, " is a biv\n");
476 /* Dumps information about the USE to FILE. */
478 extern void dump_use (FILE *, struct iv_use
*);
480 dump_use (FILE *file
, struct iv_use
*use
)
482 fprintf (file
, "use %d\n", use
->id
);
486 case USE_NONLINEAR_EXPR
:
487 fprintf (file
, " generic\n");
491 fprintf (file
, " address\n");
495 fprintf (file
, " compare\n");
502 fprintf (file
, " in statement ");
503 print_gimple_stmt (file
, use
->stmt
, 0, 0);
504 fprintf (file
, "\n");
506 fprintf (file
, " at position ");
508 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
509 fprintf (file
, "\n");
511 dump_iv (file
, use
->iv
);
513 if (use
->related_cands
)
515 fprintf (file
, " related candidates ");
516 dump_bitmap (file
, use
->related_cands
);
520 /* Dumps information about the uses to FILE. */
522 extern void dump_uses (FILE *, struct ivopts_data
*);
524 dump_uses (FILE *file
, struct ivopts_data
*data
)
529 for (i
= 0; i
< n_iv_uses (data
); i
++)
531 use
= iv_use (data
, i
);
533 dump_use (file
, use
);
534 fprintf (file
, "\n");
538 /* Dumps information about induction variable candidate CAND to FILE. */
540 extern void dump_cand (FILE *, struct iv_cand
*);
542 dump_cand (FILE *file
, struct iv_cand
*cand
)
544 struct iv
*iv
= cand
->iv
;
546 fprintf (file
, "candidate %d%s\n",
547 cand
->id
, cand
->important
? " (important)" : "");
549 if (cand
->depends_on
)
551 fprintf (file
, " depends on ");
552 dump_bitmap (file
, cand
->depends_on
);
557 fprintf (file
, " final value replacement\n");
561 if (cand
->var_before
)
563 fprintf (file
, " var_before ");
564 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
565 fprintf (file
, "\n");
569 fprintf (file
, " var_after ");
570 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
571 fprintf (file
, "\n");
577 fprintf (file
, " incremented before exit test\n");
581 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
585 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
589 fprintf (file
, " incremented at end\n");
593 fprintf (file
, " original biv\n");
600 /* Returns the info for ssa version VER. */
602 static inline struct version_info
*
603 ver_info (struct ivopts_data
*data
, unsigned ver
)
605 return data
->version_info
+ ver
;
608 /* Returns the info for ssa name NAME. */
610 static inline struct version_info
*
611 name_info (struct ivopts_data
*data
, tree name
)
613 return ver_info (data
, SSA_NAME_VERSION (name
));
616 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
620 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
622 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
626 if (sbb
== loop
->latch
)
632 return stmt
== last_stmt (bb
);
635 /* Returns true if STMT if after the place where the original induction
636 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
637 if the positions are identical. */
640 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
642 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
643 basic_block stmt_bb
= gimple_bb (stmt
);
645 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
648 if (stmt_bb
!= cand_bb
)
652 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
654 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
657 /* Returns true if STMT if after the place where the induction variable
658 CAND is incremented in LOOP. */
661 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
669 return stmt_after_ip_normal_pos (loop
, stmt
);
673 return stmt_after_inc_pos (cand
, stmt
, false);
676 return stmt_after_inc_pos (cand
, stmt
, true);
683 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
686 abnormal_ssa_name_p (tree exp
)
691 if (TREE_CODE (exp
) != SSA_NAME
)
694 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
697 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
698 abnormal phi node. Callback for for_each_index. */
701 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
702 void *data ATTRIBUTE_UNUSED
)
704 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
706 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
708 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
712 return !abnormal_ssa_name_p (*index
);
715 /* Returns true if EXPR contains a ssa name that occurs in an
716 abnormal phi node. */
719 contains_abnormal_ssa_name_p (tree expr
)
722 enum tree_code_class codeclass
;
727 code
= TREE_CODE (expr
);
728 codeclass
= TREE_CODE_CLASS (code
);
730 if (code
== SSA_NAME
)
731 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
733 if (code
== INTEGER_CST
734 || is_gimple_min_invariant (expr
))
737 if (code
== ADDR_EXPR
)
738 return !for_each_index (&TREE_OPERAND (expr
, 0),
739 idx_contains_abnormal_ssa_name_p
,
742 if (code
== COND_EXPR
)
743 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
744 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
745 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
751 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
756 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
768 /* Returns the structure describing number of iterations determined from
769 EXIT of DATA->current_loop, or NULL if something goes wrong. */
771 static struct tree_niter_desc
*
772 niter_for_exit (struct ivopts_data
*data
, edge exit
)
774 struct tree_niter_desc
*desc
;
779 data
->niters
= pointer_map_create ();
783 slot
= pointer_map_contains (data
->niters
, exit
);
787 /* Try to determine number of iterations. We cannot safely work with ssa
788 names that appear in phi nodes on abnormal edges, so that we do not
789 create overlapping life ranges for them (PR 27283). */
790 desc
= XNEW (struct tree_niter_desc
);
791 if (!number_of_iterations_exit (data
->current_loop
,
793 || contains_abnormal_ssa_name_p (desc
->niter
))
798 slot
= pointer_map_insert (data
->niters
, exit
);
802 desc
= (struct tree_niter_desc
*) *slot
;
807 /* Returns the structure describing number of iterations determined from
808 single dominating exit of DATA->current_loop, or NULL if something
811 static struct tree_niter_desc
*
812 niter_for_single_dom_exit (struct ivopts_data
*data
)
814 edge exit
= single_dom_exit (data
->current_loop
);
819 return niter_for_exit (data
, exit
);
822 /* Hash table equality function for expressions. */
825 htab_inv_expr_eq (const void *ent1
, const void *ent2
)
827 const struct iv_inv_expr_ent
*expr1
=
828 (const struct iv_inv_expr_ent
*)ent1
;
829 const struct iv_inv_expr_ent
*expr2
=
830 (const struct iv_inv_expr_ent
*)ent2
;
832 return expr1
->hash
== expr2
->hash
833 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
836 /* Hash function for loop invariant expressions. */
839 htab_inv_expr_hash (const void *ent
)
841 const struct iv_inv_expr_ent
*expr
=
842 (const struct iv_inv_expr_ent
*)ent
;
846 /* Initializes data structures used by the iv optimization pass, stored
850 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
852 data
->version_info_size
= 2 * num_ssa_names
;
853 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
854 data
->relevant
= BITMAP_ALLOC (NULL
);
855 data
->important_candidates
= BITMAP_ALLOC (NULL
);
856 data
->max_inv_id
= 0;
858 data
->iv_uses
= VEC_alloc (iv_use_p
, heap
, 20);
859 data
->iv_candidates
= VEC_alloc (iv_cand_p
, heap
, 20);
860 data
->inv_expr_tab
= htab_create (10, htab_inv_expr_hash
,
861 htab_inv_expr_eq
, free
);
862 data
->inv_expr_id
= 0;
863 decl_rtl_to_reset
= VEC_alloc (tree
, heap
, 20);
866 /* Returns a memory object to that EXPR points. In case we are able to
867 determine that it does not point to any such object, NULL is returned. */
870 determine_base_object (tree expr
)
872 enum tree_code code
= TREE_CODE (expr
);
875 /* If this is a pointer casted to any type, we need to determine
876 the base object for the pointer; so handle conversions before
877 throwing away non-pointer expressions. */
878 if (CONVERT_EXPR_P (expr
))
879 return determine_base_object (TREE_OPERAND (expr
, 0));
881 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
890 obj
= TREE_OPERAND (expr
, 0);
891 base
= get_base_address (obj
);
896 if (TREE_CODE (base
) == MEM_REF
)
897 return determine_base_object (TREE_OPERAND (base
, 0));
899 return fold_convert (ptr_type_node
,
900 build_fold_addr_expr (base
));
902 case POINTER_PLUS_EXPR
:
903 return determine_base_object (TREE_OPERAND (expr
, 0));
907 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
911 return fold_convert (ptr_type_node
, expr
);
915 /* Allocates an induction variable with given initial value BASE and step STEP
919 alloc_iv (tree base
, tree step
)
921 struct iv
*iv
= XCNEW (struct iv
);
922 gcc_assert (step
!= NULL_TREE
);
925 iv
->base_object
= determine_base_object (base
);
928 iv
->have_use_for
= false;
930 iv
->ssa_name
= NULL_TREE
;
935 /* Sets STEP and BASE for induction variable IV. */
938 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
940 struct version_info
*info
= name_info (data
, iv
);
942 gcc_assert (!info
->iv
);
944 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
945 info
->iv
= alloc_iv (base
, step
);
946 info
->iv
->ssa_name
= iv
;
949 /* Finds induction variable declaration for VAR. */
952 get_iv (struct ivopts_data
*data
, tree var
)
955 tree type
= TREE_TYPE (var
);
957 if (!POINTER_TYPE_P (type
)
958 && !INTEGRAL_TYPE_P (type
))
961 if (!name_info (data
, var
)->iv
)
963 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
966 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
967 set_iv (data
, var
, var
, build_int_cst (type
, 0));
970 return name_info (data
, var
)->iv
;
973 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
974 not define a simple affine biv with nonzero step. */
977 determine_biv_step (gimple phi
)
979 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
980 tree name
= PHI_RESULT (phi
);
983 if (virtual_operand_p (name
))
986 if (!simple_iv (loop
, loop
, name
, &iv
, true))
989 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
992 /* Finds basic ivs. */
995 find_bivs (struct ivopts_data
*data
)
998 tree step
, type
, base
;
1000 struct loop
*loop
= data
->current_loop
;
1001 gimple_stmt_iterator psi
;
1003 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1005 phi
= gsi_stmt (psi
);
1007 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1010 step
= determine_biv_step (phi
);
1014 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1015 base
= expand_simple_operations (base
);
1016 if (contains_abnormal_ssa_name_p (base
)
1017 || contains_abnormal_ssa_name_p (step
))
1020 type
= TREE_TYPE (PHI_RESULT (phi
));
1021 base
= fold_convert (type
, base
);
1024 if (POINTER_TYPE_P (type
))
1025 step
= convert_to_ptrofftype (step
);
1027 step
= fold_convert (type
, step
);
1030 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1037 /* Marks basic ivs. */
1040 mark_bivs (struct ivopts_data
*data
)
1044 struct iv
*iv
, *incr_iv
;
1045 struct loop
*loop
= data
->current_loop
;
1046 basic_block incr_bb
;
1047 gimple_stmt_iterator psi
;
1049 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1051 phi
= gsi_stmt (psi
);
1053 iv
= get_iv (data
, PHI_RESULT (phi
));
1057 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1058 incr_iv
= get_iv (data
, var
);
1062 /* If the increment is in the subloop, ignore it. */
1063 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1064 if (incr_bb
->loop_father
!= data
->current_loop
1065 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1069 incr_iv
->biv_p
= true;
1073 /* Checks whether STMT defines a linear induction variable and stores its
1074 parameters to IV. */
1077 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1080 struct loop
*loop
= data
->current_loop
;
1082 iv
->base
= NULL_TREE
;
1083 iv
->step
= NULL_TREE
;
1085 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1088 lhs
= gimple_assign_lhs (stmt
);
1089 if (TREE_CODE (lhs
) != SSA_NAME
)
1092 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1094 iv
->base
= expand_simple_operations (iv
->base
);
1096 if (contains_abnormal_ssa_name_p (iv
->base
)
1097 || contains_abnormal_ssa_name_p (iv
->step
))
1100 /* If STMT could throw, then do not consider STMT as defining a GIV.
1101 While this will suppress optimizations, we can not safely delete this
1102 GIV and associated statements, even if it appears it is not used. */
1103 if (stmt_could_throw_p (stmt
))
1109 /* Finds general ivs in statement STMT. */
1112 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1116 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1119 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1122 /* Finds general ivs in basic block BB. */
1125 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1127 gimple_stmt_iterator bsi
;
1129 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1130 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1133 /* Finds general ivs. */
1136 find_givs (struct ivopts_data
*data
)
1138 struct loop
*loop
= data
->current_loop
;
1139 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1142 for (i
= 0; i
< loop
->num_nodes
; i
++)
1143 find_givs_in_bb (data
, body
[i
]);
1147 /* For each ssa name defined in LOOP determines whether it is an induction
1148 variable and if so, its initial value and step. */
1151 find_induction_variables (struct ivopts_data
*data
)
1156 if (!find_bivs (data
))
1162 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1164 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1168 fprintf (dump_file
, " number of iterations ");
1169 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1170 if (!integer_zerop (niter
->may_be_zero
))
1172 fprintf (dump_file
, "; zero if ");
1173 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1175 fprintf (dump_file
, "\n\n");
1178 fprintf (dump_file
, "Induction variables:\n\n");
1180 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1182 if (ver_info (data
, i
)->iv
)
1183 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1190 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1192 static struct iv_use
*
1193 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1194 gimple stmt
, enum use_type use_type
)
1196 struct iv_use
*use
= XCNEW (struct iv_use
);
1198 use
->id
= n_iv_uses (data
);
1199 use
->type
= use_type
;
1203 use
->related_cands
= BITMAP_ALLOC (NULL
);
1205 /* To avoid showing ssa name in the dumps, if it was not reset by the
1207 iv
->ssa_name
= NULL_TREE
;
1209 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1210 dump_use (dump_file
, use
);
1212 VEC_safe_push (iv_use_p
, heap
, data
->iv_uses
, use
);
1217 /* Checks whether OP is a loop-level invariant and if so, records it.
1218 NONLINEAR_USE is true if the invariant is used in a way we do not
1219 handle specially. */
1222 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1225 struct version_info
*info
;
1227 if (TREE_CODE (op
) != SSA_NAME
1228 || virtual_operand_p (op
))
1231 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1233 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1236 info
= name_info (data
, op
);
1238 info
->has_nonlin_use
|= nonlinear_use
;
1240 info
->inv_id
= ++data
->max_inv_id
;
1241 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1244 /* Checks whether the use OP is interesting and if so, records it. */
1246 static struct iv_use
*
1247 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1254 if (TREE_CODE (op
) != SSA_NAME
)
1257 iv
= get_iv (data
, op
);
1261 if (iv
->have_use_for
)
1263 use
= iv_use (data
, iv
->use_id
);
1265 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1269 if (integer_zerop (iv
->step
))
1271 record_invariant (data
, op
, true);
1274 iv
->have_use_for
= true;
1276 civ
= XNEW (struct iv
);
1279 stmt
= SSA_NAME_DEF_STMT (op
);
1280 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1281 || is_gimple_assign (stmt
));
1283 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1284 iv
->use_id
= use
->id
;
1289 /* Given a condition in statement STMT, checks whether it is a compare
1290 of an induction variable and an invariant. If this is the case,
1291 CONTROL_VAR is set to location of the iv, BOUND to the location of
1292 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1293 induction variable descriptions, and true is returned. If this is not
1294 the case, CONTROL_VAR and BOUND are set to the arguments of the
1295 condition and false is returned. */
1298 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1299 tree
**control_var
, tree
**bound
,
1300 struct iv
**iv_var
, struct iv
**iv_bound
)
1302 /* The objects returned when COND has constant operands. */
1303 static struct iv const_iv
;
1305 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1306 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1309 if (gimple_code (stmt
) == GIMPLE_COND
)
1311 op0
= gimple_cond_lhs_ptr (stmt
);
1312 op1
= gimple_cond_rhs_ptr (stmt
);
1316 op0
= gimple_assign_rhs1_ptr (stmt
);
1317 op1
= gimple_assign_rhs2_ptr (stmt
);
1320 zero
= integer_zero_node
;
1321 const_iv
.step
= integer_zero_node
;
1323 if (TREE_CODE (*op0
) == SSA_NAME
)
1324 iv0
= get_iv (data
, *op0
);
1325 if (TREE_CODE (*op1
) == SSA_NAME
)
1326 iv1
= get_iv (data
, *op1
);
1328 /* Exactly one of the compared values must be an iv, and the other one must
1333 if (integer_zerop (iv0
->step
))
1335 /* Control variable may be on the other side. */
1336 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1337 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1339 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1343 *control_var
= op0
;;
1354 /* Checks whether the condition in STMT is interesting and if so,
1358 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1360 tree
*var_p
, *bound_p
;
1361 struct iv
*var_iv
, *civ
;
1363 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1365 find_interesting_uses_op (data
, *var_p
);
1366 find_interesting_uses_op (data
, *bound_p
);
1370 civ
= XNEW (struct iv
);
1372 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1375 /* Returns true if expression EXPR is obviously invariant in LOOP,
1376 i.e. if all its operands are defined outside of the LOOP. LOOP
1377 should not be the function body. */
1380 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1385 gcc_assert (loop_depth (loop
) > 0);
1387 if (is_gimple_min_invariant (expr
))
1390 if (TREE_CODE (expr
) == SSA_NAME
)
1392 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1394 && flow_bb_inside_loop_p (loop
, def_bb
))
1403 len
= TREE_OPERAND_LENGTH (expr
);
1404 for (i
= 0; i
< len
; i
++)
1405 if (TREE_OPERAND (expr
, i
)
1406 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1412 /* Returns true if statement STMT is obviously invariant in LOOP,
1413 i.e. if all its operands on the RHS are defined outside of the LOOP.
1414 LOOP should not be the function body. */
1417 stmt_invariant_in_loop_p (struct loop
*loop
, gimple stmt
)
1422 gcc_assert (loop_depth (loop
) > 0);
1424 lhs
= gimple_get_lhs (stmt
);
1425 for (i
= 0; i
< gimple_num_ops (stmt
); i
++)
1427 tree op
= gimple_op (stmt
, i
);
1428 if (op
!= lhs
&& !expr_invariant_in_loop_p (loop
, op
))
1435 /* Cumulates the steps of indices into DATA and replaces their values with the
1436 initial ones. Returns false when the value of the index cannot be determined.
1437 Callback for for_each_index. */
1439 struct ifs_ivopts_data
1441 struct ivopts_data
*ivopts_data
;
1447 idx_find_step (tree base
, tree
*idx
, void *data
)
1449 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1451 tree step
, iv_base
, iv_step
, lbound
, off
;
1452 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1454 /* If base is a component ref, require that the offset of the reference
1456 if (TREE_CODE (base
) == COMPONENT_REF
)
1458 off
= component_ref_field_offset (base
);
1459 return expr_invariant_in_loop_p (loop
, off
);
1462 /* If base is array, first check whether we will be able to move the
1463 reference out of the loop (in order to take its address in strength
1464 reduction). In order for this to work we need both lower bound
1465 and step to be loop invariants. */
1466 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1468 /* Moreover, for a range, the size needs to be invariant as well. */
1469 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1470 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1473 step
= array_ref_element_size (base
);
1474 lbound
= array_ref_low_bound (base
);
1476 if (!expr_invariant_in_loop_p (loop
, step
)
1477 || !expr_invariant_in_loop_p (loop
, lbound
))
1481 if (TREE_CODE (*idx
) != SSA_NAME
)
1484 iv
= get_iv (dta
->ivopts_data
, *idx
);
1488 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1489 *&x[0], which is not folded and does not trigger the
1490 ARRAY_REF path below. */
1493 if (integer_zerop (iv
->step
))
1496 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1498 step
= array_ref_element_size (base
);
1500 /* We only handle addresses whose step is an integer constant. */
1501 if (TREE_CODE (step
) != INTEGER_CST
)
1505 /* The step for pointer arithmetics already is 1 byte. */
1506 step
= size_one_node
;
1510 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1511 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1514 /* The index might wrap. */
1518 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1519 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1524 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1525 object is passed to it in DATA. */
1528 idx_record_use (tree base
, tree
*idx
,
1531 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1532 find_interesting_uses_op (data
, *idx
);
1533 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1535 find_interesting_uses_op (data
, array_ref_element_size (base
));
1536 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1541 /* If we can prove that TOP = cst * BOT for some constant cst,
1542 store cst to MUL and return true. Otherwise return false.
1543 The returned value is always sign-extended, regardless of the
1544 signedness of TOP and BOT. */
1547 constant_multiple_of (tree top
, tree bot
, double_int
*mul
)
1550 enum tree_code code
;
1551 double_int res
, p0
, p1
;
1552 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1557 if (operand_equal_p (top
, bot
, 0))
1559 *mul
= double_int_one
;
1563 code
= TREE_CODE (top
);
1567 mby
= TREE_OPERAND (top
, 1);
1568 if (TREE_CODE (mby
) != INTEGER_CST
)
1571 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1574 *mul
= (res
* tree_to_double_int (mby
)).sext (precision
);
1579 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1580 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1583 if (code
== MINUS_EXPR
)
1585 *mul
= (p0
+ p1
).sext (precision
);
1589 if (TREE_CODE (bot
) != INTEGER_CST
)
1592 p0
= tree_to_double_int (top
).sext (precision
);
1593 p1
= tree_to_double_int (bot
).sext (precision
);
1596 *mul
= p0
.sdivmod (p1
, FLOOR_DIV_EXPR
, &res
).sext (precision
);
1597 return res
.is_zero ();
1604 /* Returns true if memory reference REF with step STEP may be unaligned. */
1607 may_be_unaligned_p (tree ref
, tree step
)
1611 HOST_WIDE_INT bitsize
;
1612 HOST_WIDE_INT bitpos
;
1614 enum machine_mode mode
;
1615 int unsignedp
, volatilep
;
1616 unsigned base_align
;
1618 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1619 thus they are not misaligned. */
1620 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1623 /* The test below is basically copy of what expr.c:normal_inner_ref
1624 does to check whether the object must be loaded by parts when
1625 STRICT_ALIGNMENT is true. */
1626 base
= get_inner_reference (ref
, &bitsize
, &bitpos
, &toffset
, &mode
,
1627 &unsignedp
, &volatilep
, true);
1628 base_type
= TREE_TYPE (base
);
1629 base_align
= get_object_alignment (base
);
1630 base_align
= MAX (base_align
, TYPE_ALIGN (base_type
));
1632 if (mode
!= BLKmode
)
1634 unsigned mode_align
= GET_MODE_ALIGNMENT (mode
);
1636 if (base_align
< mode_align
1637 || (bitpos
% mode_align
) != 0
1638 || (bitpos
% BITS_PER_UNIT
) != 0)
1642 && (highest_pow2_factor (toffset
) * BITS_PER_UNIT
) < mode_align
)
1645 if ((highest_pow2_factor (step
) * BITS_PER_UNIT
) < mode_align
)
1652 /* Return true if EXPR may be non-addressable. */
1655 may_be_nonaddressable_p (tree expr
)
1657 switch (TREE_CODE (expr
))
1659 case TARGET_MEM_REF
:
1660 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1661 target, thus they are always addressable. */
1665 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1666 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1668 case VIEW_CONVERT_EXPR
:
1669 /* This kind of view-conversions may wrap non-addressable objects
1670 and make them look addressable. After some processing the
1671 non-addressability may be uncovered again, causing ADDR_EXPRs
1672 of inappropriate objects to be built. */
1673 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1674 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1677 /* ... fall through ... */
1680 case ARRAY_RANGE_REF
:
1681 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1693 /* Finds addresses in *OP_P inside STMT. */
1696 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1698 tree base
= *op_p
, step
= size_zero_node
;
1700 struct ifs_ivopts_data ifs_ivopts_data
;
1702 /* Do not play with volatile memory references. A bit too conservative,
1703 perhaps, but safe. */
1704 if (gimple_has_volatile_ops (stmt
))
1707 /* Ignore bitfields for now. Not really something terribly complicated
1709 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1712 base
= unshare_expr (base
);
1714 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1716 tree type
= build_pointer_type (TREE_TYPE (base
));
1720 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1722 civ
= get_iv (data
, TMR_BASE (base
));
1726 TMR_BASE (base
) = civ
->base
;
1729 if (TMR_INDEX2 (base
)
1730 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1732 civ
= get_iv (data
, TMR_INDEX2 (base
));
1736 TMR_INDEX2 (base
) = civ
->base
;
1739 if (TMR_INDEX (base
)
1740 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1742 civ
= get_iv (data
, TMR_INDEX (base
));
1746 TMR_INDEX (base
) = civ
->base
;
1751 if (TMR_STEP (base
))
1752 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1754 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1758 if (integer_zerop (step
))
1760 base
= tree_mem_ref_addr (type
, base
);
1764 ifs_ivopts_data
.ivopts_data
= data
;
1765 ifs_ivopts_data
.stmt
= stmt
;
1766 ifs_ivopts_data
.step
= size_zero_node
;
1767 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1768 || integer_zerop (ifs_ivopts_data
.step
))
1770 step
= ifs_ivopts_data
.step
;
1772 /* Check that the base expression is addressable. This needs
1773 to be done after substituting bases of IVs into it. */
1774 if (may_be_nonaddressable_p (base
))
1777 /* Moreover, on strict alignment platforms, check that it is
1778 sufficiently aligned. */
1779 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1782 base
= build_fold_addr_expr (base
);
1784 /* Substituting bases of IVs into the base expression might
1785 have caused folding opportunities. */
1786 if (TREE_CODE (base
) == ADDR_EXPR
)
1788 tree
*ref
= &TREE_OPERAND (base
, 0);
1789 while (handled_component_p (*ref
))
1790 ref
= &TREE_OPERAND (*ref
, 0);
1791 if (TREE_CODE (*ref
) == MEM_REF
)
1793 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1794 TREE_OPERAND (*ref
, 0),
1795 TREE_OPERAND (*ref
, 1));
1802 civ
= alloc_iv (base
, step
);
1803 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1807 for_each_index (op_p
, idx_record_use
, data
);
1810 /* Finds and records invariants used in STMT. */
1813 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1816 use_operand_p use_p
;
1819 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1821 op
= USE_FROM_PTR (use_p
);
1822 record_invariant (data
, op
, false);
1826 /* Finds interesting uses of induction variables in the statement STMT. */
1829 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1832 tree op
, *lhs
, *rhs
;
1834 use_operand_p use_p
;
1835 enum tree_code code
;
1837 find_invariants_stmt (data
, stmt
);
1839 if (gimple_code (stmt
) == GIMPLE_COND
)
1841 find_interesting_uses_cond (data
, stmt
);
1845 if (is_gimple_assign (stmt
))
1847 lhs
= gimple_assign_lhs_ptr (stmt
);
1848 rhs
= gimple_assign_rhs1_ptr (stmt
);
1850 if (TREE_CODE (*lhs
) == SSA_NAME
)
1852 /* If the statement defines an induction variable, the uses are not
1853 interesting by themselves. */
1855 iv
= get_iv (data
, *lhs
);
1857 if (iv
&& !integer_zerop (iv
->step
))
1861 code
= gimple_assign_rhs_code (stmt
);
1862 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1863 && (REFERENCE_CLASS_P (*rhs
)
1864 || is_gimple_val (*rhs
)))
1866 if (REFERENCE_CLASS_P (*rhs
))
1867 find_interesting_uses_address (data
, stmt
, rhs
);
1869 find_interesting_uses_op (data
, *rhs
);
1871 if (REFERENCE_CLASS_P (*lhs
))
1872 find_interesting_uses_address (data
, stmt
, lhs
);
1875 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1877 find_interesting_uses_cond (data
, stmt
);
1881 /* TODO -- we should also handle address uses of type
1883 memory = call (whatever);
1890 if (gimple_code (stmt
) == GIMPLE_PHI
1891 && gimple_bb (stmt
) == data
->current_loop
->header
)
1893 iv
= get_iv (data
, PHI_RESULT (stmt
));
1895 if (iv
&& !integer_zerop (iv
->step
))
1899 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1901 op
= USE_FROM_PTR (use_p
);
1903 if (TREE_CODE (op
) != SSA_NAME
)
1906 iv
= get_iv (data
, op
);
1910 find_interesting_uses_op (data
, op
);
1914 /* Finds interesting uses of induction variables outside of loops
1915 on loop exit edge EXIT. */
1918 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
1921 gimple_stmt_iterator psi
;
1924 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
1926 phi
= gsi_stmt (psi
);
1927 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
1928 if (!virtual_operand_p (def
))
1929 find_interesting_uses_op (data
, def
);
1933 /* Finds uses of the induction variables that are interesting. */
1936 find_interesting_uses (struct ivopts_data
*data
)
1939 gimple_stmt_iterator bsi
;
1940 basic_block
*body
= get_loop_body (data
->current_loop
);
1942 struct version_info
*info
;
1945 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1946 fprintf (dump_file
, "Uses:\n\n");
1948 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
1953 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
1954 if (e
->dest
!= EXIT_BLOCK_PTR
1955 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
1956 find_interesting_uses_outside (data
, e
);
1958 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1959 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1960 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1961 if (!is_gimple_debug (gsi_stmt (bsi
)))
1962 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
1965 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1969 fprintf (dump_file
, "\n");
1971 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1973 info
= ver_info (data
, i
);
1976 fprintf (dump_file
, " ");
1977 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
1978 fprintf (dump_file
, " is invariant (%d)%s\n",
1979 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
1983 fprintf (dump_file
, "\n");
1989 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1990 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1991 we are at the top-level of the processed address. */
1994 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
1995 unsigned HOST_WIDE_INT
*offset
)
1997 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
1998 enum tree_code code
;
1999 tree type
, orig_type
= TREE_TYPE (expr
);
2000 unsigned HOST_WIDE_INT off0
, off1
, st
;
2001 tree orig_expr
= expr
;
2005 type
= TREE_TYPE (expr
);
2006 code
= TREE_CODE (expr
);
2012 if (!cst_and_fits_in_hwi (expr
)
2013 || integer_zerop (expr
))
2016 *offset
= int_cst_value (expr
);
2017 return build_int_cst (orig_type
, 0);
2019 case POINTER_PLUS_EXPR
:
2022 op0
= TREE_OPERAND (expr
, 0);
2023 op1
= TREE_OPERAND (expr
, 1);
2025 op0
= strip_offset_1 (op0
, false, false, &off0
);
2026 op1
= strip_offset_1 (op1
, false, false, &off1
);
2028 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2029 if (op0
== TREE_OPERAND (expr
, 0)
2030 && op1
== TREE_OPERAND (expr
, 1))
2033 if (integer_zerop (op1
))
2035 else if (integer_zerop (op0
))
2037 if (code
== MINUS_EXPR
)
2038 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2043 expr
= fold_build2 (code
, type
, op0
, op1
);
2045 return fold_convert (orig_type
, expr
);
2048 op1
= TREE_OPERAND (expr
, 1);
2049 if (!cst_and_fits_in_hwi (op1
))
2052 op0
= TREE_OPERAND (expr
, 0);
2053 op0
= strip_offset_1 (op0
, false, false, &off0
);
2054 if (op0
== TREE_OPERAND (expr
, 0))
2057 *offset
= off0
* int_cst_value (op1
);
2058 if (integer_zerop (op0
))
2061 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2063 return fold_convert (orig_type
, expr
);
2066 case ARRAY_RANGE_REF
:
2070 step
= array_ref_element_size (expr
);
2071 if (!cst_and_fits_in_hwi (step
))
2074 st
= int_cst_value (step
);
2075 op1
= TREE_OPERAND (expr
, 1);
2076 op1
= strip_offset_1 (op1
, false, false, &off1
);
2077 *offset
= off1
* st
;
2080 && integer_zerop (op1
))
2082 /* Strip the component reference completely. */
2083 op0
= TREE_OPERAND (expr
, 0);
2084 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2094 tmp
= component_ref_field_offset (expr
);
2096 && cst_and_fits_in_hwi (tmp
))
2098 /* Strip the component reference completely. */
2099 op0
= TREE_OPERAND (expr
, 0);
2100 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2101 *offset
= off0
+ int_cst_value (tmp
);
2107 op0
= TREE_OPERAND (expr
, 0);
2108 op0
= strip_offset_1 (op0
, true, true, &off0
);
2111 if (op0
== TREE_OPERAND (expr
, 0))
2114 expr
= build_fold_addr_expr (op0
);
2115 return fold_convert (orig_type
, expr
);
2118 /* ??? Offset operand? */
2119 inside_addr
= false;
2126 /* Default handling of expressions for that we want to recurse into
2127 the first operand. */
2128 op0
= TREE_OPERAND (expr
, 0);
2129 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2132 if (op0
== TREE_OPERAND (expr
, 0)
2133 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2136 expr
= copy_node (expr
);
2137 TREE_OPERAND (expr
, 0) = op0
;
2139 TREE_OPERAND (expr
, 1) = op1
;
2141 /* Inside address, we might strip the top level component references,
2142 thus changing type of the expression. Handling of ADDR_EXPR
2144 expr
= fold_convert (orig_type
, expr
);
2149 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2152 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2154 return strip_offset_1 (expr
, false, false, offset
);
2157 /* Returns variant of TYPE that can be used as base for different uses.
2158 We return unsigned type with the same precision, which avoids problems
2162 generic_type_for (tree type
)
2164 if (POINTER_TYPE_P (type
))
2165 return unsigned_type_for (type
);
2167 if (TYPE_UNSIGNED (type
))
2170 return unsigned_type_for (type
);
2173 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2174 the bitmap to that we should store it. */
2176 static struct ivopts_data
*fd_ivopts_data
;
2178 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2180 bitmap
*depends_on
= (bitmap
*) data
;
2181 struct version_info
*info
;
2183 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2185 info
= name_info (fd_ivopts_data
, *expr_p
);
2187 if (!info
->inv_id
|| info
->has_nonlin_use
)
2191 *depends_on
= BITMAP_ALLOC (NULL
);
2192 bitmap_set_bit (*depends_on
, info
->inv_id
);
2197 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2198 position to POS. If USE is not NULL, the candidate is set as related to
2199 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2200 replacement of the final value of the iv by a direct computation. */
2202 static struct iv_cand
*
2203 add_candidate_1 (struct ivopts_data
*data
,
2204 tree base
, tree step
, bool important
, enum iv_position pos
,
2205 struct iv_use
*use
, gimple incremented_at
)
2208 struct iv_cand
*cand
= NULL
;
2209 tree type
, orig_type
;
2211 /* For non-original variables, make sure their values are computed in a type
2212 that does not invoke undefined behavior on overflows (since in general,
2213 we cannot prove that these induction variables are non-wrapping). */
2214 if (pos
!= IP_ORIGINAL
)
2216 orig_type
= TREE_TYPE (base
);
2217 type
= generic_type_for (orig_type
);
2218 if (type
!= orig_type
)
2220 base
= fold_convert (type
, base
);
2221 step
= fold_convert (type
, step
);
2225 for (i
= 0; i
< n_iv_cands (data
); i
++)
2227 cand
= iv_cand (data
, i
);
2229 if (cand
->pos
!= pos
)
2232 if (cand
->incremented_at
!= incremented_at
2233 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2234 && cand
->ainc_use
!= use
))
2248 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2249 && operand_equal_p (step
, cand
->iv
->step
, 0)
2250 && (TYPE_PRECISION (TREE_TYPE (base
))
2251 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2255 if (i
== n_iv_cands (data
))
2257 cand
= XCNEW (struct iv_cand
);
2263 cand
->iv
= alloc_iv (base
, step
);
2266 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2268 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2269 cand
->var_after
= cand
->var_before
;
2271 cand
->important
= important
;
2272 cand
->incremented_at
= incremented_at
;
2273 VEC_safe_push (iv_cand_p
, heap
, data
->iv_candidates
, cand
);
2276 && TREE_CODE (step
) != INTEGER_CST
)
2278 fd_ivopts_data
= data
;
2279 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2282 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2283 cand
->ainc_use
= use
;
2285 cand
->ainc_use
= NULL
;
2287 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2288 dump_cand (dump_file
, cand
);
2291 if (important
&& !cand
->important
)
2293 cand
->important
= true;
2294 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2295 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2300 bitmap_set_bit (use
->related_cands
, i
);
2301 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2302 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2309 /* Returns true if incrementing the induction variable at the end of the LOOP
2312 The purpose is to avoid splitting latch edge with a biv increment, thus
2313 creating a jump, possibly confusing other optimization passes and leaving
2314 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2315 is not available (so we do not have a better alternative), or if the latch
2316 edge is already nonempty. */
2319 allow_ip_end_pos_p (struct loop
*loop
)
2321 if (!ip_normal_pos (loop
))
2324 if (!empty_block_p (ip_end_pos (loop
)))
2330 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2331 Important field is set to IMPORTANT. */
2334 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2335 bool important
, struct iv_use
*use
)
2337 basic_block use_bb
= gimple_bb (use
->stmt
);
2338 enum machine_mode mem_mode
;
2339 unsigned HOST_WIDE_INT cstepi
;
2341 /* If we insert the increment in any position other than the standard
2342 ones, we must ensure that it is incremented once per iteration.
2343 It must not be in an inner nested loop, or one side of an if
2345 if (use_bb
->loop_father
!= data
->current_loop
2346 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2347 || stmt_could_throw_p (use
->stmt
)
2348 || !cst_and_fits_in_hwi (step
))
2351 cstepi
= int_cst_value (step
);
2353 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2354 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2355 || USE_STORE_PRE_INCREMENT (mem_mode
))
2356 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2357 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2358 || USE_STORE_PRE_DECREMENT (mem_mode
))
2359 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2361 enum tree_code code
= MINUS_EXPR
;
2363 tree new_step
= step
;
2365 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2367 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2368 code
= POINTER_PLUS_EXPR
;
2371 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2372 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2373 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2376 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2377 || USE_STORE_POST_INCREMENT (mem_mode
))
2378 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2379 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2380 || USE_STORE_POST_DECREMENT (mem_mode
))
2381 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2383 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2388 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2389 position to POS. If USE is not NULL, the candidate is set as related to
2390 it. The candidate computation is scheduled on all available positions. */
2393 add_candidate (struct ivopts_data
*data
,
2394 tree base
, tree step
, bool important
, struct iv_use
*use
)
2396 if (ip_normal_pos (data
->current_loop
))
2397 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2398 if (ip_end_pos (data
->current_loop
)
2399 && allow_ip_end_pos_p (data
->current_loop
))
2400 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2402 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2403 add_autoinc_candidates (data
, base
, step
, important
, use
);
2406 /* Adds standard iv candidates. */
2409 add_standard_iv_candidates (struct ivopts_data
*data
)
2411 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2413 /* The same for a double-integer type if it is still fast enough. */
2415 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2416 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2417 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2418 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2420 /* The same for a double-integer type if it is still fast enough. */
2422 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2423 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2424 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2425 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2429 /* Adds candidates bases on the old induction variable IV. */
2432 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2436 struct iv_cand
*cand
;
2438 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2440 /* The same, but with initial value zero. */
2441 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2442 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2444 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2445 iv
->step
, true, NULL
);
2447 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2448 if (gimple_code (phi
) == GIMPLE_PHI
)
2450 /* Additionally record the possibility of leaving the original iv
2452 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2453 cand
= add_candidate_1 (data
,
2454 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2455 SSA_NAME_DEF_STMT (def
));
2456 cand
->var_before
= iv
->ssa_name
;
2457 cand
->var_after
= def
;
2461 /* Adds candidates based on the old induction variables. */
2464 add_old_ivs_candidates (struct ivopts_data
*data
)
2470 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2472 iv
= ver_info (data
, i
)->iv
;
2473 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2474 add_old_iv_candidates (data
, iv
);
2478 /* Adds candidates based on the value of the induction variable IV and USE. */
2481 add_iv_value_candidates (struct ivopts_data
*data
,
2482 struct iv
*iv
, struct iv_use
*use
)
2484 unsigned HOST_WIDE_INT offset
;
2488 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2490 /* The same, but with initial value zero. Make such variable important,
2491 since it is generic enough so that possibly many uses may be based
2493 basetype
= TREE_TYPE (iv
->base
);
2494 if (POINTER_TYPE_P (basetype
))
2495 basetype
= sizetype
;
2496 add_candidate (data
, build_int_cst (basetype
, 0),
2497 iv
->step
, true, use
);
2499 /* Third, try removing the constant offset. Make sure to even
2500 add a candidate for &a[0] vs. (T *)&a. */
2501 base
= strip_offset (iv
->base
, &offset
);
2503 || base
!= iv
->base
)
2504 add_candidate (data
, base
, iv
->step
, false, use
);
2507 /* Adds candidates based on the uses. */
2510 add_derived_ivs_candidates (struct ivopts_data
*data
)
2514 for (i
= 0; i
< n_iv_uses (data
); i
++)
2516 struct iv_use
*use
= iv_use (data
, i
);
2523 case USE_NONLINEAR_EXPR
:
2526 /* Just add the ivs based on the value of the iv used here. */
2527 add_iv_value_candidates (data
, use
->iv
, use
);
2536 /* Record important candidates and add them to related_cands bitmaps
2540 record_important_candidates (struct ivopts_data
*data
)
2545 for (i
= 0; i
< n_iv_cands (data
); i
++)
2547 struct iv_cand
*cand
= iv_cand (data
, i
);
2549 if (cand
->important
)
2550 bitmap_set_bit (data
->important_candidates
, i
);
2553 data
->consider_all_candidates
= (n_iv_cands (data
)
2554 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2556 if (data
->consider_all_candidates
)
2558 /* We will not need "related_cands" bitmaps in this case,
2559 so release them to decrease peak memory consumption. */
2560 for (i
= 0; i
< n_iv_uses (data
); i
++)
2562 use
= iv_use (data
, i
);
2563 BITMAP_FREE (use
->related_cands
);
2568 /* Add important candidates to the related_cands bitmaps. */
2569 for (i
= 0; i
< n_iv_uses (data
); i
++)
2570 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2571 data
->important_candidates
);
2575 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2576 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2577 we allocate a simple list to every use. */
2580 alloc_use_cost_map (struct ivopts_data
*data
)
2582 unsigned i
, size
, s
, j
;
2584 for (i
= 0; i
< n_iv_uses (data
); i
++)
2586 struct iv_use
*use
= iv_use (data
, i
);
2589 if (data
->consider_all_candidates
)
2590 size
= n_iv_cands (data
);
2594 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
2599 /* Round up to the power of two, so that moduling by it is fast. */
2600 for (size
= 1; size
< s
; size
<<= 1)
2604 use
->n_map_members
= size
;
2605 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2609 /* Returns description of computation cost of expression whose runtime
2610 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2613 new_cost (unsigned runtime
, unsigned complexity
)
2617 cost
.cost
= runtime
;
2618 cost
.complexity
= complexity
;
2623 /* Adds costs COST1 and COST2. */
2626 add_costs (comp_cost cost1
, comp_cost cost2
)
2628 cost1
.cost
+= cost2
.cost
;
2629 cost1
.complexity
+= cost2
.complexity
;
2633 /* Subtracts costs COST1 and COST2. */
2636 sub_costs (comp_cost cost1
, comp_cost cost2
)
2638 cost1
.cost
-= cost2
.cost
;
2639 cost1
.complexity
-= cost2
.complexity
;
2644 /* Returns a negative number if COST1 < COST2, a positive number if
2645 COST1 > COST2, and 0 if COST1 = COST2. */
2648 compare_costs (comp_cost cost1
, comp_cost cost2
)
2650 if (cost1
.cost
== cost2
.cost
)
2651 return cost1
.complexity
- cost2
.complexity
;
2653 return cost1
.cost
- cost2
.cost
;
2656 /* Returns true if COST is infinite. */
2659 infinite_cost_p (comp_cost cost
)
2661 return cost
.cost
== INFTY
;
2664 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2665 on invariants DEPENDS_ON and that the value used in expressing it
2666 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2669 set_use_iv_cost (struct ivopts_data
*data
,
2670 struct iv_use
*use
, struct iv_cand
*cand
,
2671 comp_cost cost
, bitmap depends_on
, tree value
,
2672 enum tree_code comp
, int inv_expr_id
)
2676 if (infinite_cost_p (cost
))
2678 BITMAP_FREE (depends_on
);
2682 if (data
->consider_all_candidates
)
2684 use
->cost_map
[cand
->id
].cand
= cand
;
2685 use
->cost_map
[cand
->id
].cost
= cost
;
2686 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2687 use
->cost_map
[cand
->id
].value
= value
;
2688 use
->cost_map
[cand
->id
].comp
= comp
;
2689 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2693 /* n_map_members is a power of two, so this computes modulo. */
2694 s
= cand
->id
& (use
->n_map_members
- 1);
2695 for (i
= s
; i
< use
->n_map_members
; i
++)
2696 if (!use
->cost_map
[i
].cand
)
2698 for (i
= 0; i
< s
; i
++)
2699 if (!use
->cost_map
[i
].cand
)
2705 use
->cost_map
[i
].cand
= cand
;
2706 use
->cost_map
[i
].cost
= cost
;
2707 use
->cost_map
[i
].depends_on
= depends_on
;
2708 use
->cost_map
[i
].value
= value
;
2709 use
->cost_map
[i
].comp
= comp
;
2710 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2713 /* Gets cost of (USE, CANDIDATE) pair. */
2715 static struct cost_pair
*
2716 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2717 struct iv_cand
*cand
)
2720 struct cost_pair
*ret
;
2725 if (data
->consider_all_candidates
)
2727 ret
= use
->cost_map
+ cand
->id
;
2734 /* n_map_members is a power of two, so this computes modulo. */
2735 s
= cand
->id
& (use
->n_map_members
- 1);
2736 for (i
= s
; i
< use
->n_map_members
; i
++)
2737 if (use
->cost_map
[i
].cand
== cand
)
2738 return use
->cost_map
+ i
;
2740 for (i
= 0; i
< s
; i
++)
2741 if (use
->cost_map
[i
].cand
== cand
)
2742 return use
->cost_map
+ i
;
2747 /* Returns estimate on cost of computing SEQ. */
2750 seq_cost (rtx seq
, bool speed
)
2755 for (; seq
; seq
= NEXT_INSN (seq
))
2757 set
= single_set (seq
);
2759 cost
+= set_src_cost (SET_SRC (set
), speed
);
2767 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2769 produce_memory_decl_rtl (tree obj
, int *regno
)
2771 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2772 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2776 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2778 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2779 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2780 SET_SYMBOL_REF_DECL (x
, obj
);
2781 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2782 set_mem_addr_space (x
, as
);
2783 targetm
.encode_section_info (obj
, x
, true);
2787 x
= gen_raw_REG (address_mode
, (*regno
)++);
2788 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2789 set_mem_addr_space (x
, as
);
2795 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2796 walk_tree. DATA contains the actual fake register number. */
2799 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2801 tree obj
= NULL_TREE
;
2803 int *regno
= (int *) data
;
2805 switch (TREE_CODE (*expr_p
))
2808 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2809 handled_component_p (*expr_p
);
2810 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2813 if (DECL_P (obj
) && !DECL_RTL_SET_P (obj
))
2814 x
= produce_memory_decl_rtl (obj
, regno
);
2819 obj
= SSA_NAME_VAR (*expr_p
);
2820 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2823 if (!DECL_RTL_SET_P (obj
))
2824 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2833 if (DECL_RTL_SET_P (obj
))
2836 if (DECL_MODE (obj
) == BLKmode
)
2837 x
= produce_memory_decl_rtl (obj
, regno
);
2839 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2849 VEC_safe_push (tree
, heap
, decl_rtl_to_reset
, obj
);
2850 SET_DECL_RTL (obj
, x
);
2856 /* Determines cost of the computation of EXPR. */
2859 computation_cost (tree expr
, bool speed
)
2862 tree type
= TREE_TYPE (expr
);
2864 /* Avoid using hard regs in ways which may be unsupported. */
2865 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2866 struct cgraph_node
*node
= cgraph_get_node (current_function_decl
);
2867 enum node_frequency real_frequency
= node
->frequency
;
2869 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2870 crtl
->maybe_hot_insn_p
= speed
;
2871 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2873 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2876 default_rtl_profile ();
2877 node
->frequency
= real_frequency
;
2879 cost
= seq_cost (seq
, speed
);
2881 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2882 TYPE_ADDR_SPACE (type
), speed
);
2883 else if (!REG_P (rslt
))
2884 cost
+= set_src_cost (rslt
, speed
);
2889 /* Returns variable containing the value of candidate CAND at statement AT. */
2892 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
2894 if (stmt_after_increment (loop
, cand
, stmt
))
2895 return cand
->var_after
;
2897 return cand
->var_before
;
2900 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2901 same precision that is at least as wide as the precision of TYPE, stores
2902 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2906 determine_common_wider_type (tree
*a
, tree
*b
)
2908 tree wider_type
= NULL
;
2910 tree atype
= TREE_TYPE (*a
);
2912 if (CONVERT_EXPR_P (*a
))
2914 suba
= TREE_OPERAND (*a
, 0);
2915 wider_type
= TREE_TYPE (suba
);
2916 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
2922 if (CONVERT_EXPR_P (*b
))
2924 subb
= TREE_OPERAND (*b
, 0);
2925 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
2936 /* Determines the expression by that USE is expressed from induction variable
2937 CAND at statement AT in LOOP. The expression is stored in a decomposed
2938 form into AFF. Returns false if USE cannot be expressed using CAND. */
2941 get_computation_aff (struct loop
*loop
,
2942 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
2943 struct affine_tree_combination
*aff
)
2945 tree ubase
= use
->iv
->base
;
2946 tree ustep
= use
->iv
->step
;
2947 tree cbase
= cand
->iv
->base
;
2948 tree cstep
= cand
->iv
->step
, cstep_common
;
2949 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
2950 tree common_type
, var
;
2952 aff_tree cbase_aff
, var_aff
;
2955 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
2957 /* We do not have a precision to express the values of use. */
2961 var
= var_at_stmt (loop
, cand
, at
);
2962 uutype
= unsigned_type_for (utype
);
2964 /* If the conversion is not noop, perform it. */
2965 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
2967 cstep
= fold_convert (uutype
, cstep
);
2968 cbase
= fold_convert (uutype
, cbase
);
2969 var
= fold_convert (uutype
, var
);
2972 if (!constant_multiple_of (ustep
, cstep
, &rat
))
2975 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2976 type, we achieve better folding by computing their difference in this
2977 wider type, and cast the result to UUTYPE. We do not need to worry about
2978 overflows, as all the arithmetics will in the end be performed in UUTYPE
2980 common_type
= determine_common_wider_type (&ubase
, &cbase
);
2982 /* use = ubase - ratio * cbase + ratio * var. */
2983 tree_to_aff_combination (ubase
, common_type
, aff
);
2984 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
2985 tree_to_aff_combination (var
, uutype
, &var_aff
);
2987 /* We need to shift the value if we are after the increment. */
2988 if (stmt_after_increment (loop
, cand
, at
))
2992 if (common_type
!= uutype
)
2993 cstep_common
= fold_convert (common_type
, cstep
);
2995 cstep_common
= cstep
;
2997 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
2998 aff_combination_add (&cbase_aff
, &cstep_aff
);
3001 aff_combination_scale (&cbase_aff
, -rat
);
3002 aff_combination_add (aff
, &cbase_aff
);
3003 if (common_type
!= uutype
)
3004 aff_combination_convert (aff
, uutype
);
3006 aff_combination_scale (&var_aff
, rat
);
3007 aff_combination_add (aff
, &var_aff
);
3012 /* Determines the expression by that USE is expressed from induction variable
3013 CAND at statement AT in LOOP. The computation is unshared. */
3016 get_computation_at (struct loop
*loop
,
3017 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3020 tree type
= TREE_TYPE (use
->iv
->base
);
3022 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3024 unshare_aff_combination (&aff
);
3025 return fold_convert (type
, aff_combination_to_tree (&aff
));
3028 /* Determines the expression by that USE is expressed from induction variable
3029 CAND in LOOP. The computation is unshared. */
3032 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3034 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3037 /* Adjust the cost COST for being in loop setup rather than loop body.
3038 If we're optimizing for space, the loop setup overhead is constant;
3039 if we're optimizing for speed, amortize it over the per-iteration cost. */
3041 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3045 else if (optimize_loop_for_speed_p (data
->current_loop
))
3046 return cost
/ avg_loop_niter (data
->current_loop
);
3051 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3052 validity for a memory reference accessing memory of mode MODE in
3053 address space AS. */
3055 DEF_VEC_P (sbitmap
);
3056 DEF_VEC_ALLOC_P (sbitmap
, heap
);
3059 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, enum machine_mode mode
,
3062 #define MAX_RATIO 128
3063 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3064 static VEC (sbitmap
, heap
) *valid_mult_list
;
3067 if (data_index
>= VEC_length (sbitmap
, valid_mult_list
))
3068 VEC_safe_grow_cleared (sbitmap
, heap
, valid_mult_list
, data_index
+ 1);
3070 valid_mult
= VEC_index (sbitmap
, valid_mult_list
, data_index
);
3073 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3074 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3078 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3079 sbitmap_zero (valid_mult
);
3080 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3081 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3083 XEXP (addr
, 1) = gen_int_mode (i
, address_mode
);
3084 if (memory_address_addr_space_p (mode
, addr
, as
))
3085 SET_BIT (valid_mult
, i
+ MAX_RATIO
);
3088 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3090 fprintf (dump_file
, " allowed multipliers:");
3091 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3092 if (TEST_BIT (valid_mult
, i
+ MAX_RATIO
))
3093 fprintf (dump_file
, " %d", (int) i
);
3094 fprintf (dump_file
, "\n");
3095 fprintf (dump_file
, "\n");
3098 VEC_replace (sbitmap
, valid_mult_list
, data_index
, valid_mult
);
3101 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3104 return TEST_BIT (valid_mult
, ratio
+ MAX_RATIO
);
3107 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3108 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3109 variable is omitted. Compute the cost for a memory reference that accesses
3110 a memory location of mode MEM_MODE in address space AS.
3112 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3113 size of MEM_MODE / RATIO) is available. To make this determination, we
3114 look at the size of the increment to be made, which is given in CSTEP.
3115 CSTEP may be zero if the step is unknown.
3116 STMT_AFTER_INC is true iff the statement we're looking at is after the
3117 increment of the original biv.
3119 TODO -- there must be some better way. This all is quite crude. */
3121 typedef struct address_cost_data_s
3123 HOST_WIDE_INT min_offset
, max_offset
;
3124 unsigned costs
[2][2][2][2];
3125 } *address_cost_data
;
3127 DEF_VEC_P (address_cost_data
);
3128 DEF_VEC_ALLOC_P (address_cost_data
, heap
);
3131 get_address_cost (bool symbol_present
, bool var_present
,
3132 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3133 HOST_WIDE_INT cstep
, enum machine_mode mem_mode
,
3134 addr_space_t as
, bool speed
,
3135 bool stmt_after_inc
, bool *may_autoinc
)
3137 enum machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3138 static VEC(address_cost_data
, heap
) *address_cost_data_list
;
3139 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3140 address_cost_data data
;
3141 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3142 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3143 unsigned cost
, acost
, complexity
;
3144 bool offset_p
, ratio_p
, autoinc
;
3145 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3146 unsigned HOST_WIDE_INT mask
;
3149 if (data_index
>= VEC_length (address_cost_data
, address_cost_data_list
))
3150 VEC_safe_grow_cleared (address_cost_data
, heap
, address_cost_data_list
,
3153 data
= VEC_index (address_cost_data
, address_cost_data_list
, data_index
);
3157 HOST_WIDE_INT rat
, off
= 0;
3158 int old_cse_not_expected
, width
;
3159 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3160 rtx seq
, addr
, base
;
3163 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3165 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3167 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3168 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3169 width
= HOST_BITS_PER_WIDE_INT
- 1;
3170 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3172 for (i
= width
; i
>= 0; i
--)
3174 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3175 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3176 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3179 data
->min_offset
= (i
== -1? 0 : off
);
3181 for (i
= width
; i
>= 0; i
--)
3183 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3184 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3185 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3190 data
->max_offset
= off
;
3192 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3194 fprintf (dump_file
, "get_address_cost:\n");
3195 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3196 GET_MODE_NAME (mem_mode
),
3198 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3199 GET_MODE_NAME (mem_mode
),
3204 for (i
= 2; i
<= MAX_RATIO
; i
++)
3205 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3211 /* Compute the cost of various addressing modes. */
3213 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3214 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3216 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3217 || USE_STORE_PRE_DECREMENT (mem_mode
))
3219 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3220 has_predec
[mem_mode
]
3221 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3223 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3224 || USE_STORE_POST_DECREMENT (mem_mode
))
3226 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3227 has_postdec
[mem_mode
]
3228 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3230 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3231 || USE_STORE_PRE_DECREMENT (mem_mode
))
3233 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3234 has_preinc
[mem_mode
]
3235 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3237 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3238 || USE_STORE_POST_INCREMENT (mem_mode
))
3240 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3241 has_postinc
[mem_mode
]
3242 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3244 for (i
= 0; i
< 16; i
++)
3247 var_p
= (i
>> 1) & 1;
3248 off_p
= (i
>> 2) & 1;
3249 rat_p
= (i
>> 3) & 1;
3253 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3254 gen_int_mode (rat
, address_mode
));
3257 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3261 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3262 /* ??? We can run into trouble with some backends by presenting
3263 it with symbols which haven't been properly passed through
3264 targetm.encode_section_info. By setting the local bit, we
3265 enhance the probability of things working. */
3266 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3269 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3271 (PLUS
, address_mode
, base
,
3272 gen_int_mode (off
, address_mode
)));
3275 base
= gen_int_mode (off
, address_mode
);
3280 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3283 /* To avoid splitting addressing modes, pretend that no cse will
3285 old_cse_not_expected
= cse_not_expected
;
3286 cse_not_expected
= true;
3287 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3288 cse_not_expected
= old_cse_not_expected
;
3292 acost
= seq_cost (seq
, speed
);
3293 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3297 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3300 /* On some targets, it is quite expensive to load symbol to a register,
3301 which makes addresses that contain symbols look much more expensive.
3302 However, the symbol will have to be loaded in any case before the
3303 loop (and quite likely we have it in register already), so it does not
3304 make much sense to penalize them too heavily. So make some final
3305 tweaks for the SYMBOL_PRESENT modes:
3307 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3308 var is cheaper, use this mode with small penalty.
3309 If VAR_PRESENT is true, try whether the mode with
3310 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3311 if this is the case, use it. */
3312 add_c
= add_cost (speed
, address_mode
);
3313 for (i
= 0; i
< 8; i
++)
3316 off_p
= (i
>> 1) & 1;
3317 rat_p
= (i
>> 2) & 1;
3319 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3323 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3324 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3327 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3329 fprintf (dump_file
, "Address costs:\n");
3331 for (i
= 0; i
< 16; i
++)
3334 var_p
= (i
>> 1) & 1;
3335 off_p
= (i
>> 2) & 1;
3336 rat_p
= (i
>> 3) & 1;
3338 fprintf (dump_file
, " ");
3340 fprintf (dump_file
, "sym + ");
3342 fprintf (dump_file
, "var + ");
3344 fprintf (dump_file
, "cst + ");
3346 fprintf (dump_file
, "rat * ");
3348 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3349 fprintf (dump_file
, "index costs %d\n", acost
);
3351 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3352 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3353 fprintf (dump_file
, " May include autoinc/dec\n");
3354 fprintf (dump_file
, "\n");
3357 VEC_replace (address_cost_data
, address_cost_data_list
,
3361 bits
= GET_MODE_BITSIZE (address_mode
);
3362 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3364 if ((offset
>> (bits
- 1) & 1))
3369 msize
= GET_MODE_SIZE (mem_mode
);
3370 autoinc_offset
= offset
;
3372 autoinc_offset
+= ratio
* cstep
;
3373 if (symbol_present
|| var_present
|| ratio
!= 1)
3375 else if ((has_postinc
[mem_mode
] && autoinc_offset
== 0
3377 || (has_postdec
[mem_mode
] && autoinc_offset
== 0
3379 || (has_preinc
[mem_mode
] && autoinc_offset
== msize
3381 || (has_predec
[mem_mode
] && autoinc_offset
== -msize
3382 && msize
== -cstep
))
3386 offset_p
= (s_offset
!= 0
3387 && data
->min_offset
<= s_offset
3388 && s_offset
<= data
->max_offset
);
3389 ratio_p
= (ratio
!= 1
3390 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3392 if (ratio
!= 1 && !ratio_p
)
3393 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3395 if (s_offset
&& !offset_p
&& !symbol_present
)
3396 cost
+= add_cost (speed
, address_mode
);
3399 *may_autoinc
= autoinc
;
3400 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3401 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3402 return new_cost (cost
+ acost
, complexity
);
3405 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3406 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3407 calculating the operands of EXPR. Returns true if successful, and returns
3408 the cost in COST. */
3411 get_shiftadd_cost (tree expr
, enum machine_mode mode
, comp_cost cost0
,
3412 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3415 tree op1
= TREE_OPERAND (expr
, 1);
3416 tree cst
= TREE_OPERAND (mult
, 1);
3417 tree multop
= TREE_OPERAND (mult
, 0);
3418 int m
= exact_log2 (int_cst_value (cst
));
3419 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3422 if (!(m
>= 0 && m
< maxm
))
3425 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3426 ? shiftadd_cost (speed
, mode
, m
)
3428 ? shiftsub1_cost (speed
, mode
, m
)
3429 : shiftsub0_cost (speed
, mode
, m
)));
3430 res
= new_cost (sa_cost
, 0);
3431 res
= add_costs (res
, mult
== op1
? cost0
: cost1
);
3433 STRIP_NOPS (multop
);
3434 if (!is_gimple_val (multop
))
3435 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3441 /* Estimates cost of forcing expression EXPR into a variable. */
3444 force_expr_to_var_cost (tree expr
, bool speed
)
3446 static bool costs_initialized
= false;
3447 static unsigned integer_cost
[2];
3448 static unsigned symbol_cost
[2];
3449 static unsigned address_cost
[2];
3451 comp_cost cost0
, cost1
, cost
;
3452 enum machine_mode mode
;
3454 if (!costs_initialized
)
3456 tree type
= build_pointer_type (integer_type_node
);
3461 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3462 TREE_STATIC (var
) = 1;
3463 x
= produce_memory_decl_rtl (var
, NULL
);
3464 SET_DECL_RTL (var
, x
);
3466 addr
= build1 (ADDR_EXPR
, type
, var
);
3469 for (i
= 0; i
< 2; i
++)
3471 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3474 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3477 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3478 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3480 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3481 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3482 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3483 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3484 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3485 fprintf (dump_file
, "\n");
3489 costs_initialized
= true;
3494 if (SSA_VAR_P (expr
))
3497 if (is_gimple_min_invariant (expr
))
3499 if (TREE_CODE (expr
) == INTEGER_CST
)
3500 return new_cost (integer_cost
[speed
], 0);
3502 if (TREE_CODE (expr
) == ADDR_EXPR
)
3504 tree obj
= TREE_OPERAND (expr
, 0);
3506 if (TREE_CODE (obj
) == VAR_DECL
3507 || TREE_CODE (obj
) == PARM_DECL
3508 || TREE_CODE (obj
) == RESULT_DECL
)
3509 return new_cost (symbol_cost
[speed
], 0);
3512 return new_cost (address_cost
[speed
], 0);
3515 switch (TREE_CODE (expr
))
3517 case POINTER_PLUS_EXPR
:
3521 op0
= TREE_OPERAND (expr
, 0);
3522 op1
= TREE_OPERAND (expr
, 1);
3526 if (is_gimple_val (op0
))
3529 cost0
= force_expr_to_var_cost (op0
, speed
);
3531 if (is_gimple_val (op1
))
3534 cost1
= force_expr_to_var_cost (op1
, speed
);
3539 op0
= TREE_OPERAND (expr
, 0);
3543 if (is_gimple_val (op0
))
3546 cost0
= force_expr_to_var_cost (op0
, speed
);
3552 /* Just an arbitrary value, FIXME. */
3553 return new_cost (target_spill_cost
[speed
], 0);
3556 mode
= TYPE_MODE (TREE_TYPE (expr
));
3557 switch (TREE_CODE (expr
))
3559 case POINTER_PLUS_EXPR
:
3563 cost
= new_cost (add_cost (speed
, mode
), 0);
3564 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3566 tree mult
= NULL_TREE
;
3568 if (TREE_CODE (op1
) == MULT_EXPR
)
3570 else if (TREE_CODE (op0
) == MULT_EXPR
)
3573 if (mult
!= NULL_TREE
3574 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3575 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3582 if (cst_and_fits_in_hwi (op0
))
3583 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3585 else if (cst_and_fits_in_hwi (op1
))
3586 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3589 return new_cost (target_spill_cost
[speed
], 0);
3596 cost
= add_costs (cost
, cost0
);
3597 cost
= add_costs (cost
, cost1
);
3599 /* Bound the cost by target_spill_cost. The parts of complicated
3600 computations often are either loop invariant or at least can
3601 be shared between several iv uses, so letting this grow without
3602 limits would not give reasonable results. */
3603 if (cost
.cost
> (int) target_spill_cost
[speed
])
3604 cost
.cost
= target_spill_cost
[speed
];
3609 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3610 invariants the computation depends on. */
3613 force_var_cost (struct ivopts_data
*data
,
3614 tree expr
, bitmap
*depends_on
)
3618 fd_ivopts_data
= data
;
3619 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3622 return force_expr_to_var_cost (expr
, data
->speed
);
3625 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3626 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3627 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3628 invariants the computation depends on. */
3631 split_address_cost (struct ivopts_data
*data
,
3632 tree addr
, bool *symbol_present
, bool *var_present
,
3633 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3636 HOST_WIDE_INT bitsize
;
3637 HOST_WIDE_INT bitpos
;
3639 enum machine_mode mode
;
3640 int unsignedp
, volatilep
;
3642 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3643 &unsignedp
, &volatilep
, false);
3646 || bitpos
% BITS_PER_UNIT
!= 0
3647 || TREE_CODE (core
) != VAR_DECL
)
3649 *symbol_present
= false;
3650 *var_present
= true;
3651 fd_ivopts_data
= data
;
3652 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3653 return new_cost (target_spill_cost
[data
->speed
], 0);
3656 *offset
+= bitpos
/ BITS_PER_UNIT
;
3657 if (TREE_STATIC (core
)
3658 || DECL_EXTERNAL (core
))
3660 *symbol_present
= true;
3661 *var_present
= false;
3665 *symbol_present
= false;
3666 *var_present
= true;
3670 /* Estimates cost of expressing difference of addresses E1 - E2 as
3671 var + symbol + offset. The value of offset is added to OFFSET,
3672 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3673 part is missing. DEPENDS_ON is a set of the invariants the computation
3677 ptr_difference_cost (struct ivopts_data
*data
,
3678 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3679 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3681 HOST_WIDE_INT diff
= 0;
3682 aff_tree aff_e1
, aff_e2
;
3685 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3687 if (ptr_difference_const (e1
, e2
, &diff
))
3690 *symbol_present
= false;
3691 *var_present
= false;
3695 if (integer_zerop (e2
))
3696 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3697 symbol_present
, var_present
, offset
, depends_on
);
3699 *symbol_present
= false;
3700 *var_present
= true;
3702 type
= signed_type_for (TREE_TYPE (e1
));
3703 tree_to_aff_combination (e1
, type
, &aff_e1
);
3704 tree_to_aff_combination (e2
, type
, &aff_e2
);
3705 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3706 aff_combination_add (&aff_e1
, &aff_e2
);
3708 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3711 /* Estimates cost of expressing difference E1 - E2 as
3712 var + symbol + offset. The value of offset is added to OFFSET,
3713 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3714 part is missing. DEPENDS_ON is a set of the invariants the computation
3718 difference_cost (struct ivopts_data
*data
,
3719 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3720 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3722 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3723 unsigned HOST_WIDE_INT off1
, off2
;
3724 aff_tree aff_e1
, aff_e2
;
3727 e1
= strip_offset (e1
, &off1
);
3728 e2
= strip_offset (e2
, &off2
);
3729 *offset
+= off1
- off2
;
3734 if (TREE_CODE (e1
) == ADDR_EXPR
)
3735 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3736 offset
, depends_on
);
3737 *symbol_present
= false;
3739 if (operand_equal_p (e1
, e2
, 0))
3741 *var_present
= false;
3745 *var_present
= true;
3747 if (integer_zerop (e2
))
3748 return force_var_cost (data
, e1
, depends_on
);
3750 if (integer_zerop (e1
))
3752 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3753 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3757 type
= signed_type_for (TREE_TYPE (e1
));
3758 tree_to_aff_combination (e1
, type
, &aff_e1
);
3759 tree_to_aff_combination (e2
, type
, &aff_e2
);
3760 aff_combination_scale (&aff_e2
, double_int_minus_one
);
3761 aff_combination_add (&aff_e1
, &aff_e2
);
3763 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3766 /* Returns true if AFF1 and AFF2 are identical. */
3769 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3773 if (aff1
->n
!= aff2
->n
)
3776 for (i
= 0; i
< aff1
->n
; i
++)
3778 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3781 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3787 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3790 get_expr_id (struct ivopts_data
*data
, tree expr
)
3792 struct iv_inv_expr_ent ent
;
3793 struct iv_inv_expr_ent
**slot
;
3796 ent
.hash
= iterative_hash_expr (expr
, 0);
3797 slot
= (struct iv_inv_expr_ent
**) htab_find_slot (data
->inv_expr_tab
,
3802 *slot
= XNEW (struct iv_inv_expr_ent
);
3803 (*slot
)->expr
= expr
;
3804 (*slot
)->hash
= ent
.hash
;
3805 (*slot
)->id
= data
->inv_expr_id
++;
3809 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3810 requires a new compiler generated temporary. Returns -1 otherwise.
3811 ADDRESS_P is a flag indicating if the expression is for address
3815 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
3816 tree cbase
, HOST_WIDE_INT ratio
,
3819 aff_tree ubase_aff
, cbase_aff
;
3827 if ((TREE_CODE (ubase
) == INTEGER_CST
)
3828 && (TREE_CODE (cbase
) == INTEGER_CST
))
3831 /* Strips the constant part. */
3832 if (TREE_CODE (ubase
) == PLUS_EXPR
3833 || TREE_CODE (ubase
) == MINUS_EXPR
3834 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
3836 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
3837 ubase
= TREE_OPERAND (ubase
, 0);
3840 /* Strips the constant part. */
3841 if (TREE_CODE (cbase
) == PLUS_EXPR
3842 || TREE_CODE (cbase
) == MINUS_EXPR
3843 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
3845 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
3846 cbase
= TREE_OPERAND (cbase
, 0);
3851 if (((TREE_CODE (ubase
) == SSA_NAME
)
3852 || (TREE_CODE (ubase
) == ADDR_EXPR
3853 && is_gimple_min_invariant (ubase
)))
3854 && (TREE_CODE (cbase
) == INTEGER_CST
))
3857 if (((TREE_CODE (cbase
) == SSA_NAME
)
3858 || (TREE_CODE (cbase
) == ADDR_EXPR
3859 && is_gimple_min_invariant (cbase
)))
3860 && (TREE_CODE (ubase
) == INTEGER_CST
))
3866 if(operand_equal_p (ubase
, cbase
, 0))
3869 if (TREE_CODE (ubase
) == ADDR_EXPR
3870 && TREE_CODE (cbase
) == ADDR_EXPR
)
3874 usym
= TREE_OPERAND (ubase
, 0);
3875 csym
= TREE_OPERAND (cbase
, 0);
3876 if (TREE_CODE (usym
) == ARRAY_REF
)
3878 tree ind
= TREE_OPERAND (usym
, 1);
3879 if (TREE_CODE (ind
) == INTEGER_CST
3880 && host_integerp (ind
, 0)
3881 && TREE_INT_CST_LOW (ind
) == 0)
3882 usym
= TREE_OPERAND (usym
, 0);
3884 if (TREE_CODE (csym
) == ARRAY_REF
)
3886 tree ind
= TREE_OPERAND (csym
, 1);
3887 if (TREE_CODE (ind
) == INTEGER_CST
3888 && host_integerp (ind
, 0)
3889 && TREE_INT_CST_LOW (ind
) == 0)
3890 csym
= TREE_OPERAND (csym
, 0);
3892 if (operand_equal_p (usym
, csym
, 0))
3895 /* Now do more complex comparison */
3896 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
3897 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
3898 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
3902 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
3903 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
3905 aff_combination_scale (&cbase_aff
, double_int::from_shwi (-1 * ratio
));
3906 aff_combination_add (&ubase_aff
, &cbase_aff
);
3907 expr
= aff_combination_to_tree (&ubase_aff
);
3908 return get_expr_id (data
, expr
);
3913 /* Determines the cost of the computation by that USE is expressed
3914 from induction variable CAND. If ADDRESS_P is true, we just need
3915 to create an address from it, otherwise we want to get it into
3916 register. A set of invariants we depend on is stored in
3917 DEPENDS_ON. AT is the statement at that the value is computed.
3918 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3919 addressing is likely. */
3922 get_computation_cost_at (struct ivopts_data
*data
,
3923 struct iv_use
*use
, struct iv_cand
*cand
,
3924 bool address_p
, bitmap
*depends_on
, gimple at
,
3928 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3930 tree utype
= TREE_TYPE (ubase
), ctype
;
3931 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
3932 HOST_WIDE_INT ratio
, aratio
;
3933 bool var_present
, symbol_present
, stmt_is_after_inc
;
3936 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
3940 /* Only consider real candidates. */
3942 return infinite_cost
;
3944 cbase
= cand
->iv
->base
;
3945 cstep
= cand
->iv
->step
;
3946 ctype
= TREE_TYPE (cbase
);
3948 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3950 /* We do not have a precision to express the values of use. */
3951 return infinite_cost
;
3955 || (use
->iv
->base_object
3956 && cand
->iv
->base_object
3957 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
3958 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
3960 /* Do not try to express address of an object with computation based
3961 on address of a different object. This may cause problems in rtl
3962 level alias analysis (that does not expect this to be happening,
3963 as this is illegal in C), and would be unlikely to be useful
3965 if (use
->iv
->base_object
3966 && cand
->iv
->base_object
3967 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
3968 return infinite_cost
;
3971 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3973 /* TODO -- add direct handling of this case. */
3977 /* CSTEPI is removed from the offset in case statement is after the
3978 increment. If the step is not constant, we use zero instead.
3979 This is a bit imprecise (there is the extra addition), but
3980 redundancy elimination is likely to transform the code so that
3981 it uses value of the variable before increment anyway,
3982 so it is not that much unrealistic. */
3983 if (cst_and_fits_in_hwi (cstep
))
3984 cstepi
= int_cst_value (cstep
);
3988 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3989 return infinite_cost
;
3991 if (rat
.fits_shwi ())
3992 ratio
= rat
.to_shwi ();
3994 return infinite_cost
;
3997 ctype
= TREE_TYPE (cbase
);
3999 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4001 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4002 or ratio == 1, it is better to handle this like
4004 ubase - ratio * cbase + ratio * var
4006 (also holds in the case ratio == -1, TODO. */
4008 if (cst_and_fits_in_hwi (cbase
))
4010 offset
= - ratio
* int_cst_value (cbase
);
4011 cost
= difference_cost (data
,
4012 ubase
, build_int_cst (utype
, 0),
4013 &symbol_present
, &var_present
, &offset
,
4015 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4017 else if (ratio
== 1)
4019 tree real_cbase
= cbase
;
4021 /* Check to see if any adjustment is needed. */
4022 if (cstepi
== 0 && stmt_is_after_inc
)
4024 aff_tree real_cbase_aff
;
4027 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4029 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4031 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4032 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4035 cost
= difference_cost (data
,
4037 &symbol_present
, &var_present
, &offset
,
4039 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4042 && !POINTER_TYPE_P (ctype
)
4043 && multiplier_allowed_in_address_p
4044 (ratio
, TYPE_MODE (TREE_TYPE (utype
)),
4045 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4048 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4049 cost
= difference_cost (data
,
4051 &symbol_present
, &var_present
, &offset
,
4053 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4057 cost
= force_var_cost (data
, cbase
, depends_on
);
4058 cost
= add_costs (cost
,
4059 difference_cost (data
,
4060 ubase
, build_int_cst (utype
, 0),
4061 &symbol_present
, &var_present
,
4062 &offset
, depends_on
));
4063 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4064 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4070 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4071 /* Clear depends on. */
4072 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4073 bitmap_clear (*depends_on
);
4076 /* If we are after the increment, the value of the candidate is higher by
4078 if (stmt_is_after_inc
)
4079 offset
-= ratio
* cstepi
;
4081 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4082 (symbol/var1/const parts may be omitted). If we are looking for an
4083 address, find the cost of addressing this. */
4085 return add_costs (cost
,
4086 get_address_cost (symbol_present
, var_present
,
4087 offset
, ratio
, cstepi
,
4088 TYPE_MODE (TREE_TYPE (utype
)),
4089 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4090 speed
, stmt_is_after_inc
,
4093 /* Otherwise estimate the costs for computing the expression. */
4094 if (!symbol_present
&& !var_present
&& !offset
)
4097 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4101 /* Symbol + offset should be compile-time computable so consider that they
4102 are added once to the variable, if present. */
4103 if (var_present
&& (symbol_present
|| offset
))
4104 cost
.cost
+= adjust_setup_cost (data
,
4105 add_cost (speed
, TYPE_MODE (ctype
)));
4107 /* Having offset does not affect runtime cost in case it is added to
4108 symbol, but it increases complexity. */
4112 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4114 aratio
= ratio
> 0 ? ratio
: -ratio
;
4116 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4121 *can_autoinc
= false;
4124 /* Just get the expression, expand it and measure the cost. */
4125 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4128 return infinite_cost
;
4131 comp
= build_simple_mem_ref (comp
);
4133 return new_cost (computation_cost (comp
, speed
), 0);
4137 /* Determines the cost of the computation by that USE is expressed
4138 from induction variable CAND. If ADDRESS_P is true, we just need
4139 to create an address from it, otherwise we want to get it into
4140 register. A set of invariants we depend on is stored in
4141 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4142 autoinc addressing is likely. */
4145 get_computation_cost (struct ivopts_data
*data
,
4146 struct iv_use
*use
, struct iv_cand
*cand
,
4147 bool address_p
, bitmap
*depends_on
,
4148 bool *can_autoinc
, int *inv_expr_id
)
4150 return get_computation_cost_at (data
,
4151 use
, cand
, address_p
, depends_on
, use
->stmt
,
4152 can_autoinc
, inv_expr_id
);
4155 /* Determines cost of basing replacement of USE on CAND in a generic
4159 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4160 struct iv_use
*use
, struct iv_cand
*cand
)
4164 int inv_expr_id
= -1;
4166 /* The simple case first -- if we need to express value of the preserved
4167 original biv, the cost is 0. This also prevents us from counting the
4168 cost of increment twice -- once at this use and once in the cost of
4170 if (cand
->pos
== IP_ORIGINAL
4171 && cand
->incremented_at
== use
->stmt
)
4173 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4178 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4179 NULL
, &inv_expr_id
);
4181 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4184 return !infinite_cost_p (cost
);
4187 /* Determines cost of basing replacement of USE on CAND in an address. */
4190 determine_use_iv_cost_address (struct ivopts_data
*data
,
4191 struct iv_use
*use
, struct iv_cand
*cand
)
4195 int inv_expr_id
= -1;
4196 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4197 &can_autoinc
, &inv_expr_id
);
4199 if (cand
->ainc_use
== use
)
4202 cost
.cost
-= cand
->cost_step
;
4203 /* If we generated the candidate solely for exploiting autoincrement
4204 opportunities, and it turns out it can't be used, set the cost to
4205 infinity to make sure we ignore it. */
4206 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4207 cost
= infinite_cost
;
4209 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4212 return !infinite_cost_p (cost
);
4215 /* Computes value of candidate CAND at position AT in iteration NITER, and
4216 stores it to VAL. */
4219 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4222 aff_tree step
, delta
, nit
;
4223 struct iv
*iv
= cand
->iv
;
4224 tree type
= TREE_TYPE (iv
->base
);
4225 tree steptype
= type
;
4226 if (POINTER_TYPE_P (type
))
4227 steptype
= sizetype
;
4229 tree_to_aff_combination (iv
->step
, steptype
, &step
);
4230 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4231 aff_combination_convert (&nit
, steptype
);
4232 aff_combination_mult (&nit
, &step
, &delta
);
4233 if (stmt_after_increment (loop
, cand
, at
))
4234 aff_combination_add (&delta
, &step
);
4236 tree_to_aff_combination (iv
->base
, type
, val
);
4237 aff_combination_add (val
, &delta
);
4240 /* Returns period of induction variable iv. */
4243 iv_period (struct iv
*iv
)
4245 tree step
= iv
->step
, period
, type
;
4248 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4250 type
= unsigned_type_for (TREE_TYPE (step
));
4251 /* Period of the iv is lcm (step, type_range)/step -1,
4252 i.e., N*type_range/step - 1. Since type range is power
4253 of two, N == (step >> num_of_ending_zeros_binary (step),
4254 so the final result is
4256 (type_range >> num_of_ending_zeros_binary (step)) - 1
4259 pow2div
= num_ending_zeros (step
);
4261 period
= build_low_bits_mask (type
,
4262 (TYPE_PRECISION (type
)
4263 - tree_low_cst (pow2div
, 1)));
4268 /* Returns the comparison operator used when eliminating the iv USE. */
4270 static enum tree_code
4271 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4273 struct loop
*loop
= data
->current_loop
;
4277 ex_bb
= gimple_bb (use
->stmt
);
4278 exit
= EDGE_SUCC (ex_bb
, 0);
4279 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4280 exit
= EDGE_SUCC (ex_bb
, 1);
4282 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4286 strip_wrap_conserving_type_conversions (tree exp
)
4288 while (tree_ssa_useless_type_conversion (exp
)
4289 && (nowrap_type_p (TREE_TYPE (exp
))
4290 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp
, 0)))))
4291 exp
= TREE_OPERAND (exp
, 0);
4295 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4296 check for an exact match. */
4299 expr_equal_p (tree e
, tree what
)
4302 enum tree_code code
;
4304 e
= strip_wrap_conserving_type_conversions (e
);
4305 what
= strip_wrap_conserving_type_conversions (what
);
4307 code
= TREE_CODE (what
);
4308 if (TREE_TYPE (e
) != TREE_TYPE (what
))
4311 if (operand_equal_p (e
, what
, 0))
4314 if (TREE_CODE (e
) != SSA_NAME
)
4317 stmt
= SSA_NAME_DEF_STMT (e
);
4318 if (gimple_code (stmt
) != GIMPLE_ASSIGN
4319 || gimple_assign_rhs_code (stmt
) != code
)
4322 switch (get_gimple_rhs_class (code
))
4324 case GIMPLE_BINARY_RHS
:
4325 if (!expr_equal_p (gimple_assign_rhs2 (stmt
), TREE_OPERAND (what
, 1)))
4329 case GIMPLE_UNARY_RHS
:
4330 case GIMPLE_SINGLE_RHS
:
4331 return expr_equal_p (gimple_assign_rhs1 (stmt
), TREE_OPERAND (what
, 0));
4337 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4338 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4339 calculation is performed in non-wrapping type.
4341 TODO: More generally, we could test for the situation that
4342 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4343 This would require knowing the sign of OFFSET.
4345 Also, we only look for the first addition in the computation of BASE.
4346 More complex analysis would be better, but introducing it just for
4347 this optimization seems like an overkill. */
4350 difference_cannot_overflow_p (tree base
, tree offset
)
4352 enum tree_code code
;
4355 if (!nowrap_type_p (TREE_TYPE (base
)))
4358 base
= expand_simple_operations (base
);
4360 if (TREE_CODE (base
) == SSA_NAME
)
4362 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4364 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4367 code
= gimple_assign_rhs_code (stmt
);
4368 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4371 e1
= gimple_assign_rhs1 (stmt
);
4372 e2
= gimple_assign_rhs2 (stmt
);
4376 code
= TREE_CODE (base
);
4377 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4379 e1
= TREE_OPERAND (base
, 0);
4380 e2
= TREE_OPERAND (base
, 1);
4383 /* TODO: deeper inspection may be necessary to prove the equality. */
4387 return expr_equal_p (e1
, offset
) || expr_equal_p (e2
, offset
);
4388 case POINTER_PLUS_EXPR
:
4389 return expr_equal_p (e2
, offset
);
4396 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4397 comparison with CAND. NITER describes the number of iterations of
4398 the loops. If successful, the comparison in COMP_P is altered accordingly.
4400 We aim to handle the following situation:
4416 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4417 We aim to optimize this to
4425 while (p < p_0 - a + b);
4427 This preserves the correctness, since the pointer arithmetics does not
4428 overflow. More precisely:
4430 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4431 overflow in computing it or the values of p.
4432 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4433 overflow. To prove this, we use the fact that p_0 = base + a. */
4436 iv_elimination_compare_lt (struct ivopts_data
*data
,
4437 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4438 struct tree_niter_desc
*niter
)
4440 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4441 struct affine_tree_combination nit
, tmpa
, tmpb
;
4442 enum tree_code comp
;
4445 /* We need to know that the candidate induction variable does not overflow.
4446 While more complex analysis may be used to prove this, for now just
4447 check that the variable appears in the original program and that it
4448 is computed in a type that guarantees no overflows. */
4449 cand_type
= TREE_TYPE (cand
->iv
->base
);
4450 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4453 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4454 the calculation of the BOUND could overflow, making the comparison
4456 if (!data
->loop_single_exit_p
)
4459 /* We need to be able to decide whether candidate is increasing or decreasing
4460 in order to choose the right comparison operator. */
4461 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4463 step
= int_cst_value (cand
->iv
->step
);
4465 /* Check that the number of iterations matches the expected pattern:
4466 a + 1 > b ? 0 : b - a - 1. */
4467 mbz
= niter
->may_be_zero
;
4468 if (TREE_CODE (mbz
) == GT_EXPR
)
4470 /* Handle a + 1 > b. */
4471 tree op0
= TREE_OPERAND (mbz
, 0);
4472 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4474 a
= TREE_OPERAND (op0
, 0);
4475 b
= TREE_OPERAND (mbz
, 1);
4480 else if (TREE_CODE (mbz
) == LT_EXPR
)
4482 tree op1
= TREE_OPERAND (mbz
, 1);
4484 /* Handle b < a + 1. */
4485 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4487 a
= TREE_OPERAND (op1
, 0);
4488 b
= TREE_OPERAND (mbz
, 0);
4496 /* Expected number of iterations is B - A - 1. Check that it matches
4497 the actual number, i.e., that B - A - NITER = 1. */
4498 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4499 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4500 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4501 aff_combination_scale (&nit
, double_int_minus_one
);
4502 aff_combination_scale (&tmpa
, double_int_minus_one
);
4503 aff_combination_add (&tmpb
, &tmpa
);
4504 aff_combination_add (&tmpb
, &nit
);
4505 if (tmpb
.n
!= 0 || tmpb
.offset
!= double_int_one
)
4508 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4510 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4512 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4513 if (!difference_cannot_overflow_p (cand
->iv
->base
, offset
))
4516 /* Determine the new comparison operator. */
4517 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4518 if (*comp_p
== NE_EXPR
)
4520 else if (*comp_p
== EQ_EXPR
)
4521 *comp_p
= invert_tree_comparison (comp
, false);
4528 /* Check whether it is possible to express the condition in USE by comparison
4529 of candidate CAND. If so, store the value compared with to BOUND, and the
4530 comparison operator to COMP. */
4533 may_eliminate_iv (struct ivopts_data
*data
,
4534 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4535 enum tree_code
*comp
)
4540 struct loop
*loop
= data
->current_loop
;
4542 struct tree_niter_desc
*desc
= NULL
;
4544 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4547 /* For now works only for exits that dominate the loop latch.
4548 TODO: extend to other conditions inside loop body. */
4549 ex_bb
= gimple_bb (use
->stmt
);
4550 if (use
->stmt
!= last_stmt (ex_bb
)
4551 || gimple_code (use
->stmt
) != GIMPLE_COND
4552 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4555 exit
= EDGE_SUCC (ex_bb
, 0);
4556 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4557 exit
= EDGE_SUCC (ex_bb
, 1);
4558 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4561 desc
= niter_for_exit (data
, exit
);
4565 /* Determine whether we can use the variable to test the exit condition.
4566 This is the case iff the period of the induction variable is greater
4567 than the number of iterations for which the exit condition is true. */
4568 period
= iv_period (cand
->iv
);
4570 /* If the number of iterations is constant, compare against it directly. */
4571 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4573 /* See cand_value_at. */
4574 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4576 if (!tree_int_cst_lt (desc
->niter
, period
))
4581 if (tree_int_cst_lt (period
, desc
->niter
))
4586 /* If not, and if this is the only possible exit of the loop, see whether
4587 we can get a conservative estimate on the number of iterations of the
4588 entire loop and compare against that instead. */
4591 double_int period_value
, max_niter
;
4593 max_niter
= desc
->max
;
4594 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4595 max_niter
+= double_int_one
;
4596 period_value
= tree_to_double_int (period
);
4597 if (max_niter
.ugt (period_value
))
4599 /* See if we can take advantage of inferred loop bound information. */
4600 if (data
->loop_single_exit_p
)
4602 if (!max_loop_iterations (loop
, &max_niter
))
4604 /* The loop bound is already adjusted by adding 1. */
4605 if (max_niter
.ugt (period_value
))
4613 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4615 *bound
= aff_combination_to_tree (&bnd
);
4616 *comp
= iv_elimination_compare (data
, use
);
4618 /* It is unlikely that computing the number of iterations using division
4619 would be more profitable than keeping the original induction variable. */
4620 if (expression_expensive_p (*bound
))
4623 /* Sometimes, it is possible to handle the situation that the number of
4624 iterations may be zero unless additional assumtions by using <
4625 instead of != in the exit condition.
4627 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4628 base the exit condition on it. However, that is often too
4630 if (!integer_zerop (desc
->may_be_zero
))
4631 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4636 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4637 be copied, if is is used in the loop body and DATA->body_includes_call. */
4640 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4642 tree sbound
= bound
;
4643 STRIP_NOPS (sbound
);
4645 if (TREE_CODE (sbound
) == SSA_NAME
4646 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4647 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4648 && data
->body_includes_call
)
4649 return COSTS_N_INSNS (1);
4654 /* Determines cost of basing replacement of USE on CAND in a condition. */
4657 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4658 struct iv_use
*use
, struct iv_cand
*cand
)
4660 tree bound
= NULL_TREE
;
4662 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4663 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4665 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4666 tree
*control_var
, *bound_cst
;
4667 enum tree_code comp
= ERROR_MARK
;
4669 /* Only consider real candidates. */
4672 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4677 /* Try iv elimination. */
4678 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4680 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4681 if (elim_cost
.cost
== 0)
4682 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4683 else if (TREE_CODE (bound
) == INTEGER_CST
)
4685 /* If we replace a loop condition 'i < n' with 'p < base + n',
4686 depends_on_elim will have 'base' and 'n' set, which implies
4687 that both 'base' and 'n' will be live during the loop. More likely,
4688 'base + n' will be loop invariant, resulting in only one live value
4689 during the loop. So in that case we clear depends_on_elim and set
4690 elim_inv_expr_id instead. */
4691 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4693 elim_inv_expr_id
= get_expr_id (data
, bound
);
4694 bitmap_clear (depends_on_elim
);
4696 /* The bound is a loop invariant, so it will be only computed
4698 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4701 elim_cost
= infinite_cost
;
4703 /* Try expressing the original giv. If it is compared with an invariant,
4704 note that we cannot get rid of it. */
4705 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4709 /* When the condition is a comparison of the candidate IV against
4710 zero, prefer this IV.
4712 TODO: The constant that we're subtracting from the cost should
4713 be target-dependent. This information should be added to the
4714 target costs for each backend. */
4715 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4716 && integer_zerop (*bound_cst
)
4717 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4718 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4719 elim_cost
.cost
-= 1;
4721 express_cost
= get_computation_cost (data
, use
, cand
, false,
4722 &depends_on_express
, NULL
,
4723 &express_inv_expr_id
);
4724 fd_ivopts_data
= data
;
4725 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4727 /* Count the cost of the original bound as well. */
4728 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4729 if (bound_cost
.cost
== 0)
4730 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4731 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4732 bound_cost
.cost
= 0;
4733 express_cost
.cost
+= bound_cost
.cost
;
4735 /* Choose the better approach, preferring the eliminated IV. */
4736 if (compare_costs (elim_cost
, express_cost
) <= 0)
4739 depends_on
= depends_on_elim
;
4740 depends_on_elim
= NULL
;
4741 inv_expr_id
= elim_inv_expr_id
;
4745 cost
= express_cost
;
4746 depends_on
= depends_on_express
;
4747 depends_on_express
= NULL
;
4750 inv_expr_id
= express_inv_expr_id
;
4753 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4755 if (depends_on_elim
)
4756 BITMAP_FREE (depends_on_elim
);
4757 if (depends_on_express
)
4758 BITMAP_FREE (depends_on_express
);
4760 return !infinite_cost_p (cost
);
4763 /* Determines cost of basing replacement of USE on CAND. Returns false
4764 if USE cannot be based on CAND. */
4767 determine_use_iv_cost (struct ivopts_data
*data
,
4768 struct iv_use
*use
, struct iv_cand
*cand
)
4772 case USE_NONLINEAR_EXPR
:
4773 return determine_use_iv_cost_generic (data
, use
, cand
);
4776 return determine_use_iv_cost_address (data
, use
, cand
);
4779 return determine_use_iv_cost_condition (data
, use
, cand
);
4786 /* Return true if get_computation_cost indicates that autoincrement is
4787 a possibility for the pair of USE and CAND, false otherwise. */
4790 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4791 struct iv_cand
*cand
)
4797 if (use
->type
!= USE_ADDRESS
)
4800 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4801 &can_autoinc
, NULL
);
4803 BITMAP_FREE (depends_on
);
4805 return !infinite_cost_p (cost
) && can_autoinc
;
4808 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4809 use that allows autoincrement, and set their AINC_USE if possible. */
4812 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4816 for (i
= 0; i
< n_iv_cands (data
); i
++)
4818 struct iv_cand
*cand
= iv_cand (data
, i
);
4819 struct iv_use
*closest
= NULL
;
4820 if (cand
->pos
!= IP_ORIGINAL
)
4822 for (j
= 0; j
< n_iv_uses (data
); j
++)
4824 struct iv_use
*use
= iv_use (data
, j
);
4825 unsigned uid
= gimple_uid (use
->stmt
);
4826 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
)
4827 || uid
> gimple_uid (cand
->incremented_at
))
4829 if (closest
== NULL
|| uid
> gimple_uid (closest
->stmt
))
4832 if (closest
== NULL
|| !autoinc_possible_for_pair (data
, closest
, cand
))
4834 cand
->ainc_use
= closest
;
4838 /* Finds the candidates for the induction variables. */
4841 find_iv_candidates (struct ivopts_data
*data
)
4843 /* Add commonly used ivs. */
4844 add_standard_iv_candidates (data
);
4846 /* Add old induction variables. */
4847 add_old_ivs_candidates (data
);
4849 /* Add induction variables derived from uses. */
4850 add_derived_ivs_candidates (data
);
4852 set_autoinc_for_original_candidates (data
);
4854 /* Record the important candidates. */
4855 record_important_candidates (data
);
4858 /* Determines costs of basing the use of the iv on an iv candidate. */
4861 determine_use_iv_costs (struct ivopts_data
*data
)
4865 struct iv_cand
*cand
;
4866 bitmap to_clear
= BITMAP_ALLOC (NULL
);
4868 alloc_use_cost_map (data
);
4870 for (i
= 0; i
< n_iv_uses (data
); i
++)
4872 use
= iv_use (data
, i
);
4874 if (data
->consider_all_candidates
)
4876 for (j
= 0; j
< n_iv_cands (data
); j
++)
4878 cand
= iv_cand (data
, j
);
4879 determine_use_iv_cost (data
, use
, cand
);
4886 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
4888 cand
= iv_cand (data
, j
);
4889 if (!determine_use_iv_cost (data
, use
, cand
))
4890 bitmap_set_bit (to_clear
, j
);
4893 /* Remove the candidates for that the cost is infinite from
4894 the list of related candidates. */
4895 bitmap_and_compl_into (use
->related_cands
, to_clear
);
4896 bitmap_clear (to_clear
);
4900 BITMAP_FREE (to_clear
);
4902 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4904 fprintf (dump_file
, "Use-candidate costs:\n");
4906 for (i
= 0; i
< n_iv_uses (data
); i
++)
4908 use
= iv_use (data
, i
);
4910 fprintf (dump_file
, "Use %d:\n", i
);
4911 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
4912 for (j
= 0; j
< use
->n_map_members
; j
++)
4914 if (!use
->cost_map
[j
].cand
4915 || infinite_cost_p (use
->cost_map
[j
].cost
))
4918 fprintf (dump_file
, " %d\t%d\t%d\t",
4919 use
->cost_map
[j
].cand
->id
,
4920 use
->cost_map
[j
].cost
.cost
,
4921 use
->cost_map
[j
].cost
.complexity
);
4922 if (use
->cost_map
[j
].depends_on
)
4923 bitmap_print (dump_file
,
4924 use
->cost_map
[j
].depends_on
, "","");
4925 if (use
->cost_map
[j
].inv_expr_id
!= -1)
4926 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
4927 fprintf (dump_file
, "\n");
4930 fprintf (dump_file
, "\n");
4932 fprintf (dump_file
, "\n");
4936 /* Determines cost of the candidate CAND. */
4939 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
4941 comp_cost cost_base
;
4942 unsigned cost
, cost_step
;
4951 /* There are two costs associated with the candidate -- its increment
4952 and its initialization. The second is almost negligible for any loop
4953 that rolls enough, so we take it just very little into account. */
4955 base
= cand
->iv
->base
;
4956 cost_base
= force_var_cost (data
, base
, NULL
);
4957 /* It will be exceptional that the iv register happens to be initialized with
4958 the proper value at no cost. In general, there will at least be a regcopy
4960 if (cost_base
.cost
== 0)
4961 cost_base
.cost
= COSTS_N_INSNS (1);
4962 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
4964 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
4966 /* Prefer the original ivs unless we may gain something by replacing it.
4967 The reason is to make debugging simpler; so this is not relevant for
4968 artificial ivs created by other optimization passes. */
4969 if (cand
->pos
!= IP_ORIGINAL
4970 || !SSA_NAME_VAR (cand
->var_before
)
4971 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
4974 /* Prefer not to insert statements into latch unless there are some
4975 already (so that we do not create unnecessary jumps). */
4976 if (cand
->pos
== IP_END
4977 && empty_block_p (ip_end_pos (data
->current_loop
)))
4981 cand
->cost_step
= cost_step
;
4984 /* Determines costs of computation of the candidates. */
4987 determine_iv_costs (struct ivopts_data
*data
)
4991 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4993 fprintf (dump_file
, "Candidate costs:\n");
4994 fprintf (dump_file
, " cand\tcost\n");
4997 for (i
= 0; i
< n_iv_cands (data
); i
++)
4999 struct iv_cand
*cand
= iv_cand (data
, i
);
5001 determine_iv_cost (data
, cand
);
5003 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5004 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5007 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5008 fprintf (dump_file
, "\n");
5011 /* Calculates cost for having SIZE induction variables. */
5014 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5016 /* We add size to the cost, so that we prefer eliminating ivs
5018 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5019 data
->body_includes_call
);
5022 /* For each size of the induction variable set determine the penalty. */
5025 determine_set_costs (struct ivopts_data
*data
)
5029 gimple_stmt_iterator psi
;
5031 struct loop
*loop
= data
->current_loop
;
5034 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5036 fprintf (dump_file
, "Global costs:\n");
5037 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5038 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5039 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5040 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5044 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5046 phi
= gsi_stmt (psi
);
5047 op
= PHI_RESULT (phi
);
5049 if (virtual_operand_p (op
))
5052 if (get_iv (data
, op
))
5058 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5060 struct version_info
*info
= ver_info (data
, j
);
5062 if (info
->inv_id
&& info
->has_nonlin_use
)
5066 data
->regs_used
= n
;
5067 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5068 fprintf (dump_file
, " regs_used %d\n", n
);
5070 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5072 fprintf (dump_file
, " cost for size:\n");
5073 fprintf (dump_file
, " ivs\tcost\n");
5074 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5075 fprintf (dump_file
, " %d\t%d\n", j
,
5076 ivopts_global_cost_for_size (data
, j
));
5077 fprintf (dump_file
, "\n");
5081 /* Returns true if A is a cheaper cost pair than B. */
5084 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5094 cmp
= compare_costs (a
->cost
, b
->cost
);
5101 /* In case the costs are the same, prefer the cheaper candidate. */
5102 if (a
->cand
->cost
< b
->cand
->cost
)
5109 /* Returns candidate by that USE is expressed in IVS. */
5111 static struct cost_pair
*
5112 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5114 return ivs
->cand_for_use
[use
->id
];
5117 /* Computes the cost field of IVS structure. */
5120 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5122 comp_cost cost
= ivs
->cand_use_cost
;
5124 cost
.cost
+= ivs
->cand_cost
;
5126 cost
.cost
+= ivopts_global_cost_for_size (data
,
5127 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5132 /* Remove invariants in set INVS to set IVS. */
5135 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5143 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5145 ivs
->n_invariant_uses
[iid
]--;
5146 if (ivs
->n_invariant_uses
[iid
] == 0)
5151 /* Set USE not to be expressed by any candidate in IVS. */
5154 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5157 unsigned uid
= use
->id
, cid
;
5158 struct cost_pair
*cp
;
5160 cp
= ivs
->cand_for_use
[uid
];
5166 ivs
->cand_for_use
[uid
] = NULL
;
5167 ivs
->n_cand_uses
[cid
]--;
5169 if (ivs
->n_cand_uses
[cid
] == 0)
5171 bitmap_clear_bit (ivs
->cands
, cid
);
5172 /* Do not count the pseudocandidates. */
5176 ivs
->cand_cost
-= cp
->cand
->cost
;
5178 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5181 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5183 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5185 if (cp
->inv_expr_id
!= -1)
5187 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5188 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5189 ivs
->num_used_inv_expr
--;
5191 iv_ca_recount_cost (data
, ivs
);
5194 /* Add invariants in set INVS to set IVS. */
5197 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5205 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5207 ivs
->n_invariant_uses
[iid
]++;
5208 if (ivs
->n_invariant_uses
[iid
] == 1)
5213 /* Set cost pair for USE in set IVS to CP. */
5216 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5217 struct iv_use
*use
, struct cost_pair
*cp
)
5219 unsigned uid
= use
->id
, cid
;
5221 if (ivs
->cand_for_use
[uid
] == cp
)
5224 if (ivs
->cand_for_use
[uid
])
5225 iv_ca_set_no_cp (data
, ivs
, use
);
5232 ivs
->cand_for_use
[uid
] = cp
;
5233 ivs
->n_cand_uses
[cid
]++;
5234 if (ivs
->n_cand_uses
[cid
] == 1)
5236 bitmap_set_bit (ivs
->cands
, cid
);
5237 /* Do not count the pseudocandidates. */
5241 ivs
->cand_cost
+= cp
->cand
->cost
;
5243 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5246 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5247 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5249 if (cp
->inv_expr_id
!= -1)
5251 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5252 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5253 ivs
->num_used_inv_expr
++;
5255 iv_ca_recount_cost (data
, ivs
);
5259 /* Extend set IVS by expressing USE by some of the candidates in it
5260 if possible. All important candidates will be considered
5261 if IMPORTANT_CANDIDATES is true. */
5264 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5265 struct iv_use
*use
, bool important_candidates
)
5267 struct cost_pair
*best_cp
= NULL
, *cp
;
5272 gcc_assert (ivs
->upto
>= use
->id
);
5274 if (ivs
->upto
== use
->id
)
5280 cands
= (important_candidates
? data
->important_candidates
: ivs
->cands
);
5281 EXECUTE_IF_SET_IN_BITMAP (cands
, 0, i
, bi
)
5283 struct iv_cand
*cand
= iv_cand (data
, i
);
5285 cp
= get_use_iv_cost (data
, use
, cand
);
5287 if (cheaper_cost_pair (cp
, best_cp
))
5291 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5294 /* Get cost for assignment IVS. */
5297 iv_ca_cost (struct iv_ca
*ivs
)
5299 /* This was a conditional expression but it triggered a bug in
5302 return infinite_cost
;
5307 /* Returns true if all dependences of CP are among invariants in IVS. */
5310 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5315 if (!cp
->depends_on
)
5318 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5320 if (ivs
->n_invariant_uses
[i
] == 0)
5327 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5328 it before NEXT_CHANGE. */
5330 static struct iv_ca_delta
*
5331 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5332 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5334 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5337 change
->old_cp
= old_cp
;
5338 change
->new_cp
= new_cp
;
5339 change
->next_change
= next_change
;
5344 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5347 static struct iv_ca_delta
*
5348 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5350 struct iv_ca_delta
*last
;
5358 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5360 last
->next_change
= l2
;
5365 /* Reverse the list of changes DELTA, forming the inverse to it. */
5367 static struct iv_ca_delta
*
5368 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5370 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5371 struct cost_pair
*tmp
;
5373 for (act
= delta
; act
; act
= next
)
5375 next
= act
->next_change
;
5376 act
->next_change
= prev
;
5380 act
->old_cp
= act
->new_cp
;
5387 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5388 reverted instead. */
5391 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5392 struct iv_ca_delta
*delta
, bool forward
)
5394 struct cost_pair
*from
, *to
;
5395 struct iv_ca_delta
*act
;
5398 delta
= iv_ca_delta_reverse (delta
);
5400 for (act
= delta
; act
; act
= act
->next_change
)
5404 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5405 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5409 iv_ca_delta_reverse (delta
);
5412 /* Returns true if CAND is used in IVS. */
5415 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5417 return ivs
->n_cand_uses
[cand
->id
] > 0;
5420 /* Returns number of induction variable candidates in the set IVS. */
5423 iv_ca_n_cands (struct iv_ca
*ivs
)
5425 return ivs
->n_cands
;
5428 /* Free the list of changes DELTA. */
5431 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5433 struct iv_ca_delta
*act
, *next
;
5435 for (act
= *delta
; act
; act
= next
)
5437 next
= act
->next_change
;
5444 /* Allocates new iv candidates assignment. */
5446 static struct iv_ca
*
5447 iv_ca_new (struct ivopts_data
*data
)
5449 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5453 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5454 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5455 nw
->cands
= BITMAP_ALLOC (NULL
);
5458 nw
->cand_use_cost
= no_cost
;
5460 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5462 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5463 nw
->num_used_inv_expr
= 0;
5468 /* Free memory occupied by the set IVS. */
5471 iv_ca_free (struct iv_ca
**ivs
)
5473 free ((*ivs
)->cand_for_use
);
5474 free ((*ivs
)->n_cand_uses
);
5475 BITMAP_FREE ((*ivs
)->cands
);
5476 free ((*ivs
)->n_invariant_uses
);
5477 free ((*ivs
)->used_inv_expr
);
5482 /* Dumps IVS to FILE. */
5485 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5487 const char *pref
= " invariants ";
5489 comp_cost cost
= iv_ca_cost (ivs
);
5491 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5492 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5493 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5494 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5496 for (i
= 0; i
< ivs
->upto
; i
++)
5498 struct iv_use
*use
= iv_use (data
, i
);
5499 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5501 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5502 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5504 fprintf (file
, " use:%d --> ??\n", use
->id
);
5507 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5508 if (ivs
->n_invariant_uses
[i
])
5510 fprintf (file
, "%s%d", pref
, i
);
5513 fprintf (file
, "\n\n");
5516 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5517 new set, and store differences in DELTA. Number of induction variables
5518 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5519 the function will try to find a solution with mimimal iv candidates. */
5522 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5523 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5524 unsigned *n_ivs
, bool min_ncand
)
5529 struct cost_pair
*old_cp
, *new_cp
;
5532 for (i
= 0; i
< ivs
->upto
; i
++)
5534 use
= iv_use (data
, i
);
5535 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5538 && old_cp
->cand
== cand
)
5541 new_cp
= get_use_iv_cost (data
, use
, cand
);
5545 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5548 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5551 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5554 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5555 cost
= iv_ca_cost (ivs
);
5557 *n_ivs
= iv_ca_n_cands (ivs
);
5558 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5563 /* Try narrowing set IVS by removing CAND. Return the cost of
5564 the new set and store the differences in DELTA. */
5567 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5568 struct iv_cand
*cand
, struct iv_ca_delta
**delta
)
5572 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5574 struct iv_cand
*cnd
;
5578 for (i
= 0; i
< n_iv_uses (data
); i
++)
5580 use
= iv_use (data
, i
);
5582 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5583 if (old_cp
->cand
!= cand
)
5588 if (data
->consider_all_candidates
)
5590 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5595 cnd
= iv_cand (data
, ci
);
5597 cp
= get_use_iv_cost (data
, use
, cnd
);
5601 if (!iv_ca_has_deps (ivs
, cp
))
5604 if (!cheaper_cost_pair (cp
, new_cp
))
5612 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5617 cnd
= iv_cand (data
, ci
);
5619 cp
= get_use_iv_cost (data
, use
, cnd
);
5622 if (!iv_ca_has_deps (ivs
, cp
))
5625 if (!cheaper_cost_pair (cp
, new_cp
))
5634 iv_ca_delta_free (delta
);
5635 return infinite_cost
;
5638 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5641 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5642 cost
= iv_ca_cost (ivs
);
5643 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5648 /* Try optimizing the set of candidates IVS by removing candidates different
5649 from to EXCEPT_CAND from it. Return cost of the new set, and store
5650 differences in DELTA. */
5653 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5654 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5657 struct iv_ca_delta
*act_delta
, *best_delta
;
5659 comp_cost best_cost
, acost
;
5660 struct iv_cand
*cand
;
5663 best_cost
= iv_ca_cost (ivs
);
5665 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5667 cand
= iv_cand (data
, i
);
5669 if (cand
== except_cand
)
5672 acost
= iv_ca_narrow (data
, ivs
, cand
, &act_delta
);
5674 if (compare_costs (acost
, best_cost
) < 0)
5677 iv_ca_delta_free (&best_delta
);
5678 best_delta
= act_delta
;
5681 iv_ca_delta_free (&act_delta
);
5690 /* Recurse to possibly remove other unnecessary ivs. */
5691 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5692 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5693 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5694 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5698 /* Tries to extend the sets IVS in the best possible way in order
5699 to express the USE. If ORIGINALP is true, prefer candidates from
5700 the original set of IVs, otherwise favor important candidates not
5701 based on any memory object. */
5704 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5705 struct iv_use
*use
, bool originalp
)
5707 comp_cost best_cost
, act_cost
;
5710 struct iv_cand
*cand
;
5711 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5712 struct cost_pair
*cp
;
5714 iv_ca_add_use (data
, ivs
, use
, false);
5715 best_cost
= iv_ca_cost (ivs
);
5717 cp
= iv_ca_cand_for_use (ivs
, use
);
5722 iv_ca_add_use (data
, ivs
, use
, true);
5723 best_cost
= iv_ca_cost (ivs
);
5724 cp
= iv_ca_cand_for_use (ivs
, use
);
5728 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
5729 iv_ca_set_no_cp (data
, ivs
, use
);
5732 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5733 first try important candidates not based on any memory object. Only if
5734 this fails, try the specific ones. Rationale -- in loops with many
5735 variables the best choice often is to use just one generic biv. If we
5736 added here many ivs specific to the uses, the optimization algorithm later
5737 would be likely to get stuck in a local minimum, thus causing us to create
5738 too many ivs. The approach from few ivs to more seems more likely to be
5739 successful -- starting from few ivs, replacing an expensive use by a
5740 specific iv should always be a win. */
5741 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5743 cand
= iv_cand (data
, i
);
5745 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
5748 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
5751 if (iv_ca_cand_used_p (ivs
, cand
))
5754 cp
= get_use_iv_cost (data
, use
, cand
);
5758 iv_ca_set_cp (data
, ivs
, use
, cp
);
5759 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
5761 iv_ca_set_no_cp (data
, ivs
, use
);
5762 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
5764 if (compare_costs (act_cost
, best_cost
) < 0)
5766 best_cost
= act_cost
;
5768 iv_ca_delta_free (&best_delta
);
5769 best_delta
= act_delta
;
5772 iv_ca_delta_free (&act_delta
);
5775 if (infinite_cost_p (best_cost
))
5777 for (i
= 0; i
< use
->n_map_members
; i
++)
5779 cp
= use
->cost_map
+ i
;
5784 /* Already tried this. */
5785 if (cand
->important
)
5787 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
5789 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
5793 if (iv_ca_cand_used_p (ivs
, cand
))
5797 iv_ca_set_cp (data
, ivs
, use
, cp
);
5798 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
5799 iv_ca_set_no_cp (data
, ivs
, use
);
5800 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
5803 if (compare_costs (act_cost
, best_cost
) < 0)
5805 best_cost
= act_cost
;
5808 iv_ca_delta_free (&best_delta
);
5809 best_delta
= act_delta
;
5812 iv_ca_delta_free (&act_delta
);
5816 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5817 iv_ca_delta_free (&best_delta
);
5819 return !infinite_cost_p (best_cost
);
5822 /* Finds an initial assignment of candidates to uses. */
5824 static struct iv_ca
*
5825 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
5827 struct iv_ca
*ivs
= iv_ca_new (data
);
5830 for (i
= 0; i
< n_iv_uses (data
); i
++)
5831 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
5840 /* Tries to improve set of induction variables IVS. */
5843 try_improve_iv_set (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5846 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
5847 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
5848 struct iv_cand
*cand
;
5850 /* Try extending the set of induction variables by one. */
5851 for (i
= 0; i
< n_iv_cands (data
); i
++)
5853 cand
= iv_cand (data
, i
);
5855 if (iv_ca_cand_used_p (ivs
, cand
))
5858 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
5862 /* If we successfully added the candidate and the set is small enough,
5863 try optimizing it by removing other candidates. */
5864 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
5866 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5867 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
5868 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5869 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5872 if (compare_costs (acost
, best_cost
) < 0)
5875 iv_ca_delta_free (&best_delta
);
5876 best_delta
= act_delta
;
5879 iv_ca_delta_free (&act_delta
);
5884 /* Try removing the candidates from the set instead. */
5885 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
5887 /* Nothing more we can do. */
5892 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5893 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
5894 iv_ca_delta_free (&best_delta
);
5898 /* Attempts to find the optimal set of induction variables. We do simple
5899 greedy heuristic -- we try to replace at most one candidate in the selected
5900 solution and remove the unused ivs while this improves the cost. */
5902 static struct iv_ca
*
5903 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
5907 /* Get the initial solution. */
5908 set
= get_initial_solution (data
, originalp
);
5911 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5912 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
5916 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5918 fprintf (dump_file
, "Initial set of candidates:\n");
5919 iv_ca_dump (data
, dump_file
, set
);
5922 while (try_improve_iv_set (data
, set
))
5924 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5926 fprintf (dump_file
, "Improved to:\n");
5927 iv_ca_dump (data
, dump_file
, set
);
5934 static struct iv_ca
*
5935 find_optimal_iv_set (struct ivopts_data
*data
)
5938 struct iv_ca
*set
, *origset
;
5940 comp_cost cost
, origcost
;
5942 /* Determine the cost based on a strategy that starts with original IVs,
5943 and try again using a strategy that prefers candidates not based
5945 origset
= find_optimal_iv_set_1 (data
, true);
5946 set
= find_optimal_iv_set_1 (data
, false);
5948 if (!origset
&& !set
)
5951 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
5952 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
5954 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5956 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
5957 origcost
.cost
, origcost
.complexity
);
5958 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
5959 cost
.cost
, cost
.complexity
);
5962 /* Choose the one with the best cost. */
5963 if (compare_costs (origcost
, cost
) <= 0)
5970 iv_ca_free (&origset
);
5972 for (i
= 0; i
< n_iv_uses (data
); i
++)
5974 use
= iv_use (data
, i
);
5975 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
5981 /* Creates a new induction variable corresponding to CAND. */
5984 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
5986 gimple_stmt_iterator incr_pos
;
5996 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6000 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6008 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6012 /* Mark that the iv is preserved. */
6013 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6014 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6016 /* Rewrite the increment so that it uses var_before directly. */
6017 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6021 gimple_add_tmp_var (cand
->var_before
);
6023 base
= unshare_expr (cand
->iv
->base
);
6025 create_iv (base
, unshare_expr (cand
->iv
->step
),
6026 cand
->var_before
, data
->current_loop
,
6027 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6030 /* Creates new induction variables described in SET. */
6033 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6036 struct iv_cand
*cand
;
6039 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6041 cand
= iv_cand (data
, i
);
6042 create_new_iv (data
, cand
);
6045 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6047 fprintf (dump_file
, "\nSelected IV set: \n");
6048 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6050 cand
= iv_cand (data
, i
);
6051 dump_cand (dump_file
, cand
);
6053 fprintf (dump_file
, "\n");
6057 /* Rewrites USE (definition of iv used in a nonlinear expression)
6058 using candidate CAND. */
6061 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6062 struct iv_use
*use
, struct iv_cand
*cand
)
6067 gimple_stmt_iterator bsi
;
6069 /* An important special case -- if we are asked to express value of
6070 the original iv by itself, just exit; there is no need to
6071 introduce a new computation (that might also need casting the
6072 variable to unsigned and back). */
6073 if (cand
->pos
== IP_ORIGINAL
6074 && cand
->incremented_at
== use
->stmt
)
6076 tree step
, ctype
, utype
;
6077 enum tree_code incr_code
= PLUS_EXPR
, old_code
;
6079 gcc_assert (is_gimple_assign (use
->stmt
));
6080 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6082 step
= cand
->iv
->step
;
6083 ctype
= TREE_TYPE (step
);
6084 utype
= TREE_TYPE (cand
->var_after
);
6085 if (TREE_CODE (step
) == NEGATE_EXPR
)
6087 incr_code
= MINUS_EXPR
;
6088 step
= TREE_OPERAND (step
, 0);
6091 /* Check whether we may leave the computation unchanged.
6092 This is the case only if it does not rely on other
6093 computations in the loop -- otherwise, the computation
6094 we rely upon may be removed in remove_unused_ivs,
6095 thus leading to ICE. */
6096 old_code
= gimple_assign_rhs_code (use
->stmt
);
6097 if (old_code
== PLUS_EXPR
6098 || old_code
== MINUS_EXPR
6099 || old_code
== POINTER_PLUS_EXPR
)
6101 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6102 op
= gimple_assign_rhs2 (use
->stmt
);
6103 else if (old_code
!= MINUS_EXPR
6104 && gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6105 op
= gimple_assign_rhs1 (use
->stmt
);
6113 && (TREE_CODE (op
) == INTEGER_CST
6114 || operand_equal_p (op
, step
, 0)))
6117 /* Otherwise, add the necessary computations to express
6119 op
= fold_convert (ctype
, cand
->var_before
);
6120 comp
= fold_convert (utype
,
6121 build2 (incr_code
, ctype
, op
,
6122 unshare_expr (step
)));
6126 comp
= get_computation (data
->current_loop
, use
, cand
);
6127 gcc_assert (comp
!= NULL_TREE
);
6130 switch (gimple_code (use
->stmt
))
6133 tgt
= PHI_RESULT (use
->stmt
);
6135 /* If we should keep the biv, do not replace it. */
6136 if (name_info (data
, tgt
)->preserve_biv
)
6139 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6143 tgt
= gimple_assign_lhs (use
->stmt
);
6144 bsi
= gsi_for_stmt (use
->stmt
);
6151 if (!valid_gimple_rhs_p (comp
)
6152 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6153 /* We can't allow re-allocating the stmt as it might be pointed
6155 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6156 >= gimple_num_ops (gsi_stmt (bsi
)))))
6158 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6159 true, GSI_SAME_STMT
);
6160 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6162 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6163 /* As this isn't a plain copy we have to reset alignment
6165 if (SSA_NAME_PTR_INFO (comp
))
6166 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6170 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6172 ass
= gimple_build_assign (tgt
, comp
);
6173 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6175 bsi
= gsi_for_stmt (use
->stmt
);
6176 remove_phi_node (&bsi
, false);
6180 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6181 use
->stmt
= gsi_stmt (bsi
);
6185 /* Performs a peephole optimization to reorder the iv update statement with
6186 a mem ref to enable instruction combining in later phases. The mem ref uses
6187 the iv value before the update, so the reordering transformation requires
6188 adjustment of the offset. CAND is the selected IV_CAND.
6192 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6200 directly propagating t over to (1) will introduce overlapping live range
6201 thus increase register pressure. This peephole transform it into:
6205 t = MEM_REF (base, iv2, 8, 8);
6212 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6215 gimple iv_update
, stmt
;
6217 gimple_stmt_iterator gsi
, gsi_iv
;
6219 if (cand
->pos
!= IP_NORMAL
)
6222 var_after
= cand
->var_after
;
6223 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6225 bb
= gimple_bb (iv_update
);
6226 gsi
= gsi_last_nondebug_bb (bb
);
6227 stmt
= gsi_stmt (gsi
);
6229 /* Only handle conditional statement for now. */
6230 if (gimple_code (stmt
) != GIMPLE_COND
)
6233 gsi_prev_nondebug (&gsi
);
6234 stmt
= gsi_stmt (gsi
);
6235 if (stmt
!= iv_update
)
6238 gsi_prev_nondebug (&gsi
);
6239 if (gsi_end_p (gsi
))
6242 stmt
= gsi_stmt (gsi
);
6243 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6246 if (stmt
!= use
->stmt
)
6249 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6252 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6254 fprintf (dump_file
, "Reordering \n");
6255 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6256 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6257 fprintf (dump_file
, "\n");
6260 gsi
= gsi_for_stmt (use
->stmt
);
6261 gsi_iv
= gsi_for_stmt (iv_update
);
6262 gsi_move_before (&gsi_iv
, &gsi
);
6264 cand
->pos
= IP_BEFORE_USE
;
6265 cand
->incremented_at
= use
->stmt
;
6268 /* Rewrites USE (address that is an iv) using candidate CAND. */
6271 rewrite_use_address (struct ivopts_data
*data
,
6272 struct iv_use
*use
, struct iv_cand
*cand
)
6275 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6276 tree base_hint
= NULL_TREE
;
6280 adjust_iv_update_pos (cand
, use
);
6281 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6283 unshare_aff_combination (&aff
);
6285 /* To avoid undefined overflow problems, all IV candidates use unsigned
6286 integer types. The drawback is that this makes it impossible for
6287 create_mem_ref to distinguish an IV that is based on a memory object
6288 from one that represents simply an offset.
6290 To work around this problem, we pass a hint to create_mem_ref that
6291 indicates which variable (if any) in aff is an IV based on a memory
6292 object. Note that we only consider the candidate. If this is not
6293 based on an object, the base of the reference is in some subexpression
6294 of the use -- but these will use pointer types, so they are recognized
6295 by the create_mem_ref heuristics anyway. */
6296 if (cand
->iv
->base_object
)
6297 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6299 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6300 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6301 reference_alias_ptr_type (*use
->op_p
),
6302 iv
, base_hint
, data
->speed
);
6303 copy_ref_info (ref
, *use
->op_p
);
6307 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6311 rewrite_use_compare (struct ivopts_data
*data
,
6312 struct iv_use
*use
, struct iv_cand
*cand
)
6314 tree comp
, *var_p
, op
, bound
;
6315 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6316 enum tree_code compare
;
6317 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6323 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6324 tree var_type
= TREE_TYPE (var
);
6327 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6329 fprintf (dump_file
, "Replacing exit test: ");
6330 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6333 bound
= unshare_expr (fold_convert (var_type
, bound
));
6334 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6336 gsi_insert_seq_on_edge_immediate (
6337 loop_preheader_edge (data
->current_loop
),
6340 gimple_cond_set_lhs (use
->stmt
, var
);
6341 gimple_cond_set_code (use
->stmt
, compare
);
6342 gimple_cond_set_rhs (use
->stmt
, op
);
6346 /* The induction variable elimination failed; just express the original
6348 comp
= get_computation (data
->current_loop
, use
, cand
);
6349 gcc_assert (comp
!= NULL_TREE
);
6351 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6354 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6355 true, GSI_SAME_STMT
);
6358 /* Rewrites USE using candidate CAND. */
6361 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6365 case USE_NONLINEAR_EXPR
:
6366 rewrite_use_nonlinear_expr (data
, use
, cand
);
6370 rewrite_use_address (data
, use
, cand
);
6374 rewrite_use_compare (data
, use
, cand
);
6381 update_stmt (use
->stmt
);
6384 /* Rewrite the uses using the selected induction variables. */
6387 rewrite_uses (struct ivopts_data
*data
)
6390 struct iv_cand
*cand
;
6393 for (i
= 0; i
< n_iv_uses (data
); i
++)
6395 use
= iv_use (data
, i
);
6396 cand
= use
->selected
;
6399 rewrite_use (data
, use
, cand
);
6403 /* Removes the ivs that are not used after rewriting. */
6406 remove_unused_ivs (struct ivopts_data
*data
)
6410 bitmap toremove
= BITMAP_ALLOC (NULL
);
6412 /* Figure out an order in which to release SSA DEFs so that we don't
6413 release something that we'd have to propagate into a debug stmt
6415 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6417 struct version_info
*info
;
6419 info
= ver_info (data
, j
);
6421 && !integer_zerop (info
->iv
->step
)
6423 && !info
->iv
->have_use_for
6424 && !info
->preserve_biv
)
6425 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6428 release_defs_bitset (toremove
);
6430 BITMAP_FREE (toremove
);
6433 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6434 for pointer_map_traverse. */
6437 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED
, void **value
,
6438 void *data ATTRIBUTE_UNUSED
)
6440 struct tree_niter_desc
*const niter
= (struct tree_niter_desc
*) *value
;
6446 /* Frees data allocated by the optimization of a single loop. */
6449 free_loop_data (struct ivopts_data
*data
)
6457 pointer_map_traverse (data
->niters
, free_tree_niter_desc
, NULL
);
6458 pointer_map_destroy (data
->niters
);
6459 data
->niters
= NULL
;
6462 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6464 struct version_info
*info
;
6466 info
= ver_info (data
, i
);
6469 info
->has_nonlin_use
= false;
6470 info
->preserve_biv
= false;
6473 bitmap_clear (data
->relevant
);
6474 bitmap_clear (data
->important_candidates
);
6476 for (i
= 0; i
< n_iv_uses (data
); i
++)
6478 struct iv_use
*use
= iv_use (data
, i
);
6481 BITMAP_FREE (use
->related_cands
);
6482 for (j
= 0; j
< use
->n_map_members
; j
++)
6483 if (use
->cost_map
[j
].depends_on
)
6484 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6485 free (use
->cost_map
);
6488 VEC_truncate (iv_use_p
, data
->iv_uses
, 0);
6490 for (i
= 0; i
< n_iv_cands (data
); i
++)
6492 struct iv_cand
*cand
= iv_cand (data
, i
);
6495 if (cand
->depends_on
)
6496 BITMAP_FREE (cand
->depends_on
);
6499 VEC_truncate (iv_cand_p
, data
->iv_candidates
, 0);
6501 if (data
->version_info_size
< num_ssa_names
)
6503 data
->version_info_size
= 2 * num_ssa_names
;
6504 free (data
->version_info
);
6505 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6508 data
->max_inv_id
= 0;
6510 FOR_EACH_VEC_ELT (tree
, decl_rtl_to_reset
, i
, obj
)
6511 SET_DECL_RTL (obj
, NULL_RTX
);
6513 VEC_truncate (tree
, decl_rtl_to_reset
, 0);
6515 htab_empty (data
->inv_expr_tab
);
6516 data
->inv_expr_id
= 0;
6519 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6523 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6525 free_loop_data (data
);
6526 free (data
->version_info
);
6527 BITMAP_FREE (data
->relevant
);
6528 BITMAP_FREE (data
->important_candidates
);
6530 VEC_free (tree
, heap
, decl_rtl_to_reset
);
6531 VEC_free (iv_use_p
, heap
, data
->iv_uses
);
6532 VEC_free (iv_cand_p
, heap
, data
->iv_candidates
);
6533 htab_delete (data
->inv_expr_tab
);
6536 /* Returns true if the loop body BODY includes any function calls. */
6539 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6541 gimple_stmt_iterator gsi
;
6544 for (i
= 0; i
< num_nodes
; i
++)
6545 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6547 gimple stmt
= gsi_stmt (gsi
);
6548 if (is_gimple_call (stmt
)
6549 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6555 /* Optimizes the LOOP. Returns true if anything changed. */
6558 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6560 bool changed
= false;
6561 struct iv_ca
*iv_ca
;
6562 edge exit
= single_dom_exit (loop
);
6565 gcc_assert (!data
->niters
);
6566 data
->current_loop
= loop
;
6567 data
->speed
= optimize_loop_for_speed_p (loop
);
6569 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6571 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6575 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6576 exit
->src
->index
, exit
->dest
->index
);
6577 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6578 fprintf (dump_file
, "\n");
6581 fprintf (dump_file
, "\n");
6584 body
= get_loop_body (loop
);
6585 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6586 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6589 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6591 /* For each ssa name determines whether it behaves as an induction variable
6593 if (!find_induction_variables (data
))
6596 /* Finds interesting uses (item 1). */
6597 find_interesting_uses (data
);
6598 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6601 /* Finds candidates for the induction variables (item 2). */
6602 find_iv_candidates (data
);
6604 /* Calculates the costs (item 3, part 1). */
6605 determine_iv_costs (data
);
6606 determine_use_iv_costs (data
);
6607 determine_set_costs (data
);
6609 /* Find the optimal set of induction variables (item 3, part 2). */
6610 iv_ca
= find_optimal_iv_set (data
);
6615 /* Create the new induction variables (item 4, part 1). */
6616 create_new_ivs (data
, iv_ca
);
6617 iv_ca_free (&iv_ca
);
6619 /* Rewrite the uses (item 4, part 2). */
6620 rewrite_uses (data
);
6622 /* Remove the ivs that are unused after rewriting. */
6623 remove_unused_ivs (data
);
6625 /* We have changed the structure of induction variables; it might happen
6626 that definitions in the scev database refer to some of them that were
6631 free_loop_data (data
);
6636 /* Main entry point. Optimizes induction variables in loops. */
6639 tree_ssa_iv_optimize (void)
6642 struct ivopts_data data
;
6645 tree_ssa_iv_optimize_init (&data
);
6647 /* Optimize the loops starting with the innermost ones. */
6648 FOR_EACH_LOOP (li
, loop
, LI_FROM_INNERMOST
)
6650 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6651 flow_loop_dump (loop
, dump_file
, NULL
, 1);
6653 tree_ssa_iv_optimize_loop (&data
, loop
);
6656 tree_ssa_iv_optimize_finalize (&data
);