1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
71 #include "double-int.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
82 #include "hard-reg-set.h"
84 #include "dominance.h"
86 #include "basic-block.h"
87 #include "gimple-pretty-print.h"
89 #include "hash-table.h"
90 #include "tree-ssa-alias.h"
91 #include "internal-fn.h"
93 #include "gimple-expr.h"
97 #include "gimple-iterator.h"
98 #include "gimplify-me.h"
99 #include "gimple-ssa.h"
100 #include "plugin-api.h"
103 #include "tree-cfg.h"
104 #include "tree-phinodes.h"
105 #include "ssa-iterators.h"
106 #include "stringpool.h"
107 #include "tree-ssanames.h"
108 #include "tree-ssa-loop-ivopts.h"
109 #include "tree-ssa-loop-manip.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop.h"
115 #include "statistics.h"
117 #include "fixed-value.h"
118 #include "insn-config.h"
123 #include "emit-rtl.h"
127 #include "tree-dfa.h"
128 #include "tree-ssa.h"
130 #include "tree-pass.h"
131 #include "tree-chrec.h"
132 #include "tree-scalar-evolution.h"
134 #include "langhooks.h"
135 #include "tree-affine.h"
137 #include "tree-inline.h"
138 #include "tree-ssa-propagate.h"
139 #include "tree-ssa-address.h"
140 #include "builtins.h"
142 /* FIXME: Expressions are expanded to RTL in this pass to determine the
143 cost of different addressing modes. This should be moved to a TBD
144 interface between the GIMPLE and RTL worlds. */
147 /* The infinite cost. */
148 #define INFTY 10000000
150 #define AVG_LOOP_NITER(LOOP) 5
152 /* Returns the expected number of loop iterations for LOOP.
153 The average trip count is computed from profile data if it
156 static inline HOST_WIDE_INT
157 avg_loop_niter (struct loop
*loop
)
159 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
161 return AVG_LOOP_NITER (loop
);
166 /* Representation of the induction variable. */
169 tree base
; /* Initial value of the iv. */
170 tree base_object
; /* A memory object to that the induction variable points. */
171 tree step
; /* Step of the iv (constant only). */
172 tree ssa_name
; /* The ssa name with the value. */
173 bool biv_p
; /* Is it a biv? */
174 bool have_use_for
; /* Do we already have a use for it? */
175 unsigned use_id
; /* The identifier in the use if it is the case. */
178 /* Per-ssa version information (induction variable descriptions, etc.). */
181 tree name
; /* The ssa name. */
182 struct iv
*iv
; /* Induction variable description. */
183 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
184 an expression that is not an induction variable. */
185 bool preserve_biv
; /* For the original biv, whether to preserve it. */
186 unsigned inv_id
; /* Id of an invariant. */
192 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
193 USE_ADDRESS
, /* Use in an address. */
194 USE_COMPARE
/* Use is a compare. */
197 /* Cost of a computation. */
200 int cost
; /* The runtime cost. */
201 unsigned complexity
; /* The estimate of the complexity of the code for
202 the computation (in no concrete units --
203 complexity field should be larger for more
204 complex expressions and addressing modes). */
207 static const comp_cost no_cost
= {0, 0};
208 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
210 /* The candidate - cost pair. */
213 struct iv_cand
*cand
; /* The candidate. */
214 comp_cost cost
; /* The cost. */
215 bitmap depends_on
; /* The list of invariants that have to be
217 tree value
; /* For final value elimination, the expression for
218 the final value of the iv. For iv elimination,
219 the new bound to compare with. */
220 enum tree_code comp
; /* For iv elimination, the comparison. */
221 int inv_expr_id
; /* Loop invariant expression id. */
227 unsigned id
; /* The id of the use. */
228 enum use_type type
; /* Type of the use. */
229 struct iv
*iv
; /* The induction variable it is based on. */
230 gimple stmt
; /* Statement in that it occurs. */
231 tree
*op_p
; /* The place where it occurs. */
232 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
235 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
236 struct cost_pair
*cost_map
;
237 /* The costs wrto the iv candidates. */
239 struct iv_cand
*selected
;
240 /* The selected candidate. */
243 /* The position where the iv is computed. */
246 IP_NORMAL
, /* At the end, just before the exit condition. */
247 IP_END
, /* At the end of the latch block. */
248 IP_BEFORE_USE
, /* Immediately before a specific use. */
249 IP_AFTER_USE
, /* Immediately after a specific use. */
250 IP_ORIGINAL
/* The original biv. */
253 /* The induction variable candidate. */
256 unsigned id
; /* The number of the candidate. */
257 bool important
; /* Whether this is an "important" candidate, i.e. such
258 that it should be considered by all uses. */
259 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
260 gimple incremented_at
;/* For original biv, the statement where it is
262 tree var_before
; /* The variable used for it before increment. */
263 tree var_after
; /* The variable used for it after increment. */
264 struct iv
*iv
; /* The value of the candidate. NULL for
265 "pseudocandidate" used to indicate the possibility
266 to replace the final value of an iv by direct
267 computation of the value. */
268 unsigned cost
; /* Cost of the candidate. */
269 unsigned cost_step
; /* Cost of the candidate's increment operation. */
270 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
271 where it is incremented. */
272 bitmap depends_on
; /* The list of invariants that are used in step of the
276 /* Loop invariant expression hashtable entry. */
277 struct iv_inv_expr_ent
284 /* The data used by the induction variable optimizations. */
286 typedef struct iv_use
*iv_use_p
;
288 typedef struct iv_cand
*iv_cand_p
;
290 /* Hashtable helpers. */
292 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
294 typedef iv_inv_expr_ent value_type
;
295 typedef iv_inv_expr_ent compare_type
;
296 static inline hashval_t
hash (const value_type
*);
297 static inline bool equal (const value_type
*, const compare_type
*);
300 /* Hash function for loop invariant expressions. */
303 iv_inv_expr_hasher::hash (const value_type
*expr
)
308 /* Hash table equality function for expressions. */
311 iv_inv_expr_hasher::equal (const value_type
*expr1
, const compare_type
*expr2
)
313 return expr1
->hash
== expr2
->hash
314 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
319 /* The currently optimized loop. */
320 struct loop
*current_loop
;
322 /* Numbers of iterations for all exits of the current loop. */
323 hash_map
<edge
, tree_niter_desc
*> *niters
;
325 /* Number of registers used in it. */
328 /* The size of version_info array allocated. */
329 unsigned version_info_size
;
331 /* The array of information for the ssa names. */
332 struct version_info
*version_info
;
334 /* The hashtable of loop invariant expressions created
336 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
338 /* Loop invariant expression id. */
341 /* The bitmap of indices in version_info whose value was changed. */
344 /* The uses of induction variables. */
345 vec
<iv_use_p
> iv_uses
;
347 /* The candidates. */
348 vec
<iv_cand_p
> iv_candidates
;
350 /* A bitmap of important candidates. */
351 bitmap important_candidates
;
353 /* Cache used by tree_to_aff_combination_expand. */
354 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
356 /* The maximum invariant id. */
359 /* Whether to consider just related and important candidates when replacing a
361 bool consider_all_candidates
;
363 /* Are we optimizing for speed? */
366 /* Whether the loop body includes any function calls. */
367 bool body_includes_call
;
369 /* Whether the loop body can only be exited via single exit. */
370 bool loop_single_exit_p
;
373 /* An assignment of iv candidates to uses. */
377 /* The number of uses covered by the assignment. */
380 /* Number of uses that cannot be expressed by the candidates in the set. */
383 /* Candidate assigned to a use, together with the related costs. */
384 struct cost_pair
**cand_for_use
;
386 /* Number of times each candidate is used. */
387 unsigned *n_cand_uses
;
389 /* The candidates used. */
392 /* The number of candidates in the set. */
395 /* Total number of registers needed. */
398 /* Total cost of expressing uses. */
399 comp_cost cand_use_cost
;
401 /* Total cost of candidates. */
404 /* Number of times each invariant is used. */
405 unsigned *n_invariant_uses
;
407 /* The array holding the number of uses of each loop
408 invariant expressions created by ivopt. */
409 unsigned *used_inv_expr
;
411 /* The number of created loop invariants. */
412 unsigned num_used_inv_expr
;
414 /* Total cost of the assignment. */
418 /* Difference of two iv candidate assignments. */
425 /* An old assignment (for rollback purposes). */
426 struct cost_pair
*old_cp
;
428 /* A new assignment. */
429 struct cost_pair
*new_cp
;
431 /* Next change in the list. */
432 struct iv_ca_delta
*next_change
;
435 /* Bound on number of candidates below that all candidates are considered. */
437 #define CONSIDER_ALL_CANDIDATES_BOUND \
438 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
440 /* If there are more iv occurrences, we just give up (it is quite unlikely that
441 optimizing such a loop would help, and it would take ages). */
443 #define MAX_CONSIDERED_USES \
444 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
446 /* If there are at most this number of ivs in the set, try removing unnecessary
447 ivs from the set always. */
449 #define ALWAYS_PRUNE_CAND_SET_BOUND \
450 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
452 /* The list of trees for that the decl_rtl field must be reset is stored
455 static vec
<tree
> decl_rtl_to_reset
;
457 static comp_cost
force_expr_to_var_cost (tree
, bool);
459 /* Number of uses recorded in DATA. */
461 static inline unsigned
462 n_iv_uses (struct ivopts_data
*data
)
464 return data
->iv_uses
.length ();
467 /* Ith use recorded in DATA. */
469 static inline struct iv_use
*
470 iv_use (struct ivopts_data
*data
, unsigned i
)
472 return data
->iv_uses
[i
];
475 /* Number of candidates recorded in DATA. */
477 static inline unsigned
478 n_iv_cands (struct ivopts_data
*data
)
480 return data
->iv_candidates
.length ();
483 /* Ith candidate recorded in DATA. */
485 static inline struct iv_cand
*
486 iv_cand (struct ivopts_data
*data
, unsigned i
)
488 return data
->iv_candidates
[i
];
491 /* The single loop exit if it dominates the latch, NULL otherwise. */
494 single_dom_exit (struct loop
*loop
)
496 edge exit
= single_exit (loop
);
501 if (!just_once_each_iteration_p (loop
, exit
->src
))
507 /* Dumps information about the induction variable IV to FILE. */
510 dump_iv (FILE *file
, struct iv
*iv
)
514 fprintf (file
, "ssa name ");
515 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
516 fprintf (file
, "\n");
519 fprintf (file
, " type ");
520 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
521 fprintf (file
, "\n");
525 fprintf (file
, " base ");
526 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
527 fprintf (file
, "\n");
529 fprintf (file
, " step ");
530 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
531 fprintf (file
, "\n");
535 fprintf (file
, " invariant ");
536 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
537 fprintf (file
, "\n");
542 fprintf (file
, " base object ");
543 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
544 fprintf (file
, "\n");
548 fprintf (file
, " is a biv\n");
551 /* Dumps information about the USE to FILE. */
554 dump_use (FILE *file
, struct iv_use
*use
)
556 fprintf (file
, "use %d\n", use
->id
);
560 case USE_NONLINEAR_EXPR
:
561 fprintf (file
, " generic\n");
565 fprintf (file
, " address\n");
569 fprintf (file
, " compare\n");
576 fprintf (file
, " in statement ");
577 print_gimple_stmt (file
, use
->stmt
, 0, 0);
578 fprintf (file
, "\n");
580 fprintf (file
, " at position ");
582 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
583 fprintf (file
, "\n");
585 dump_iv (file
, use
->iv
);
587 if (use
->related_cands
)
589 fprintf (file
, " related candidates ");
590 dump_bitmap (file
, use
->related_cands
);
594 /* Dumps information about the uses to FILE. */
597 dump_uses (FILE *file
, struct ivopts_data
*data
)
602 for (i
= 0; i
< n_iv_uses (data
); i
++)
604 use
= iv_use (data
, i
);
606 dump_use (file
, use
);
607 fprintf (file
, "\n");
611 /* Dumps information about induction variable candidate CAND to FILE. */
614 dump_cand (FILE *file
, struct iv_cand
*cand
)
616 struct iv
*iv
= cand
->iv
;
618 fprintf (file
, "candidate %d%s\n",
619 cand
->id
, cand
->important
? " (important)" : "");
621 if (cand
->depends_on
)
623 fprintf (file
, " depends on ");
624 dump_bitmap (file
, cand
->depends_on
);
629 fprintf (file
, " final value replacement\n");
633 if (cand
->var_before
)
635 fprintf (file
, " var_before ");
636 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
637 fprintf (file
, "\n");
641 fprintf (file
, " var_after ");
642 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
643 fprintf (file
, "\n");
649 fprintf (file
, " incremented before exit test\n");
653 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
657 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
661 fprintf (file
, " incremented at end\n");
665 fprintf (file
, " original biv\n");
672 /* Returns the info for ssa version VER. */
674 static inline struct version_info
*
675 ver_info (struct ivopts_data
*data
, unsigned ver
)
677 return data
->version_info
+ ver
;
680 /* Returns the info for ssa name NAME. */
682 static inline struct version_info
*
683 name_info (struct ivopts_data
*data
, tree name
)
685 return ver_info (data
, SSA_NAME_VERSION (name
));
688 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
692 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
694 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
698 if (sbb
== loop
->latch
)
704 return stmt
== last_stmt (bb
);
707 /* Returns true if STMT if after the place where the original induction
708 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
709 if the positions are identical. */
712 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
714 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
715 basic_block stmt_bb
= gimple_bb (stmt
);
717 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
720 if (stmt_bb
!= cand_bb
)
724 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
726 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
729 /* Returns true if STMT if after the place where the induction variable
730 CAND is incremented in LOOP. */
733 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
741 return stmt_after_ip_normal_pos (loop
, stmt
);
745 return stmt_after_inc_pos (cand
, stmt
, false);
748 return stmt_after_inc_pos (cand
, stmt
, true);
755 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
758 abnormal_ssa_name_p (tree exp
)
763 if (TREE_CODE (exp
) != SSA_NAME
)
766 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
769 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
770 abnormal phi node. Callback for for_each_index. */
773 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
774 void *data ATTRIBUTE_UNUSED
)
776 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
778 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
780 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
784 return !abnormal_ssa_name_p (*index
);
787 /* Returns true if EXPR contains a ssa name that occurs in an
788 abnormal phi node. */
791 contains_abnormal_ssa_name_p (tree expr
)
794 enum tree_code_class codeclass
;
799 code
= TREE_CODE (expr
);
800 codeclass
= TREE_CODE_CLASS (code
);
802 if (code
== SSA_NAME
)
803 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
805 if (code
== INTEGER_CST
806 || is_gimple_min_invariant (expr
))
809 if (code
== ADDR_EXPR
)
810 return !for_each_index (&TREE_OPERAND (expr
, 0),
811 idx_contains_abnormal_ssa_name_p
,
814 if (code
== COND_EXPR
)
815 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
816 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
817 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
823 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
828 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
840 /* Returns the structure describing number of iterations determined from
841 EXIT of DATA->current_loop, or NULL if something goes wrong. */
843 static struct tree_niter_desc
*
844 niter_for_exit (struct ivopts_data
*data
, edge exit
)
846 struct tree_niter_desc
*desc
;
847 tree_niter_desc
**slot
;
851 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
855 slot
= data
->niters
->get (exit
);
859 /* Try to determine number of iterations. We cannot safely work with ssa
860 names that appear in phi nodes on abnormal edges, so that we do not
861 create overlapping life ranges for them (PR 27283). */
862 desc
= XNEW (struct tree_niter_desc
);
863 if (!number_of_iterations_exit (data
->current_loop
,
865 || contains_abnormal_ssa_name_p (desc
->niter
))
870 data
->niters
->put (exit
, desc
);
878 /* Returns the structure describing number of iterations determined from
879 single dominating exit of DATA->current_loop, or NULL if something
882 static struct tree_niter_desc
*
883 niter_for_single_dom_exit (struct ivopts_data
*data
)
885 edge exit
= single_dom_exit (data
->current_loop
);
890 return niter_for_exit (data
, exit
);
893 /* Initializes data structures used by the iv optimization pass, stored
897 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
899 data
->version_info_size
= 2 * num_ssa_names
;
900 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
901 data
->relevant
= BITMAP_ALLOC (NULL
);
902 data
->important_candidates
= BITMAP_ALLOC (NULL
);
903 data
->max_inv_id
= 0;
905 data
->iv_uses
.create (20);
906 data
->iv_candidates
.create (20);
907 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
908 data
->inv_expr_id
= 0;
909 data
->name_expansion_cache
= NULL
;
910 decl_rtl_to_reset
.create (20);
913 /* Returns a memory object to that EXPR points. In case we are able to
914 determine that it does not point to any such object, NULL is returned. */
917 determine_base_object (tree expr
)
919 enum tree_code code
= TREE_CODE (expr
);
922 /* If this is a pointer casted to any type, we need to determine
923 the base object for the pointer; so handle conversions before
924 throwing away non-pointer expressions. */
925 if (CONVERT_EXPR_P (expr
))
926 return determine_base_object (TREE_OPERAND (expr
, 0));
928 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
937 obj
= TREE_OPERAND (expr
, 0);
938 base
= get_base_address (obj
);
943 if (TREE_CODE (base
) == MEM_REF
)
944 return determine_base_object (TREE_OPERAND (base
, 0));
946 return fold_convert (ptr_type_node
,
947 build_fold_addr_expr (base
));
949 case POINTER_PLUS_EXPR
:
950 return determine_base_object (TREE_OPERAND (expr
, 0));
954 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
958 return fold_convert (ptr_type_node
, expr
);
962 /* Return true if address expression with non-DECL_P operand appears
966 contain_complex_addr_expr (tree expr
)
971 switch (TREE_CODE (expr
))
973 case POINTER_PLUS_EXPR
:
976 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
977 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
981 return (!DECL_P (TREE_OPERAND (expr
, 0)));
990 /* Allocates an induction variable with given initial value BASE and step STEP
994 alloc_iv (tree base
, tree step
)
997 struct iv
*iv
= XCNEW (struct iv
);
998 gcc_assert (step
!= NULL_TREE
);
1000 /* Lower address expression in base except ones with DECL_P as operand.
1002 1) More accurate cost can be computed for address expressions;
1003 2) Duplicate candidates won't be created for bases in different
1004 forms, like &a[0] and &a. */
1006 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1007 || contain_complex_addr_expr (expr
))
1010 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1011 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1015 iv
->base_object
= determine_base_object (base
);
1018 iv
->have_use_for
= false;
1020 iv
->ssa_name
= NULL_TREE
;
1025 /* Sets STEP and BASE for induction variable IV. */
1028 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
1030 struct version_info
*info
= name_info (data
, iv
);
1032 gcc_assert (!info
->iv
);
1034 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1035 info
->iv
= alloc_iv (base
, step
);
1036 info
->iv
->ssa_name
= iv
;
1039 /* Finds induction variable declaration for VAR. */
1042 get_iv (struct ivopts_data
*data
, tree var
)
1045 tree type
= TREE_TYPE (var
);
1047 if (!POINTER_TYPE_P (type
)
1048 && !INTEGRAL_TYPE_P (type
))
1051 if (!name_info (data
, var
)->iv
)
1053 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1056 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1057 set_iv (data
, var
, var
, build_int_cst (type
, 0));
1060 return name_info (data
, var
)->iv
;
1063 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1064 not define a simple affine biv with nonzero step. */
1067 determine_biv_step (gphi
*phi
)
1069 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1070 tree name
= PHI_RESULT (phi
);
1073 if (virtual_operand_p (name
))
1076 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1079 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1082 /* Finds basic ivs. */
1085 find_bivs (struct ivopts_data
*data
)
1088 tree step
, type
, base
;
1090 struct loop
*loop
= data
->current_loop
;
1093 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1097 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1100 step
= determine_biv_step (phi
);
1104 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1105 base
= expand_simple_operations (base
);
1106 if (contains_abnormal_ssa_name_p (base
)
1107 || contains_abnormal_ssa_name_p (step
))
1110 type
= TREE_TYPE (PHI_RESULT (phi
));
1111 base
= fold_convert (type
, base
);
1114 if (POINTER_TYPE_P (type
))
1115 step
= convert_to_ptrofftype (step
);
1117 step
= fold_convert (type
, step
);
1120 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1127 /* Marks basic ivs. */
1130 mark_bivs (struct ivopts_data
*data
)
1135 struct iv
*iv
, *incr_iv
;
1136 struct loop
*loop
= data
->current_loop
;
1137 basic_block incr_bb
;
1140 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1144 iv
= get_iv (data
, PHI_RESULT (phi
));
1148 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1149 def
= SSA_NAME_DEF_STMT (var
);
1150 /* Don't mark iv peeled from other one as biv. */
1152 && gimple_code (def
) == GIMPLE_PHI
1153 && gimple_bb (def
) == loop
->header
)
1156 incr_iv
= get_iv (data
, var
);
1160 /* If the increment is in the subloop, ignore it. */
1161 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1162 if (incr_bb
->loop_father
!= data
->current_loop
1163 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1167 incr_iv
->biv_p
= true;
1171 /* Checks whether STMT defines a linear induction variable and stores its
1172 parameters to IV. */
1175 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1178 struct loop
*loop
= data
->current_loop
;
1180 iv
->base
= NULL_TREE
;
1181 iv
->step
= NULL_TREE
;
1183 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1186 lhs
= gimple_assign_lhs (stmt
);
1187 if (TREE_CODE (lhs
) != SSA_NAME
)
1190 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1192 iv
->base
= expand_simple_operations (iv
->base
);
1194 if (contains_abnormal_ssa_name_p (iv
->base
)
1195 || contains_abnormal_ssa_name_p (iv
->step
))
1198 /* If STMT could throw, then do not consider STMT as defining a GIV.
1199 While this will suppress optimizations, we can not safely delete this
1200 GIV and associated statements, even if it appears it is not used. */
1201 if (stmt_could_throw_p (stmt
))
1207 /* Finds general ivs in statement STMT. */
1210 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1214 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1217 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1220 /* Finds general ivs in basic block BB. */
1223 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1225 gimple_stmt_iterator bsi
;
1227 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1228 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1231 /* Finds general ivs. */
1234 find_givs (struct ivopts_data
*data
)
1236 struct loop
*loop
= data
->current_loop
;
1237 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1240 for (i
= 0; i
< loop
->num_nodes
; i
++)
1241 find_givs_in_bb (data
, body
[i
]);
1245 /* For each ssa name defined in LOOP determines whether it is an induction
1246 variable and if so, its initial value and step. */
1249 find_induction_variables (struct ivopts_data
*data
)
1254 if (!find_bivs (data
))
1260 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1262 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1266 fprintf (dump_file
, " number of iterations ");
1267 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1268 if (!integer_zerop (niter
->may_be_zero
))
1270 fprintf (dump_file
, "; zero if ");
1271 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1273 fprintf (dump_file
, "\n\n");
1276 fprintf (dump_file
, "Induction variables:\n\n");
1278 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1280 if (ver_info (data
, i
)->iv
)
1281 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1288 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1290 static struct iv_use
*
1291 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1292 gimple stmt
, enum use_type use_type
)
1294 struct iv_use
*use
= XCNEW (struct iv_use
);
1296 use
->id
= n_iv_uses (data
);
1297 use
->type
= use_type
;
1301 use
->related_cands
= BITMAP_ALLOC (NULL
);
1303 /* To avoid showing ssa name in the dumps, if it was not reset by the
1305 iv
->ssa_name
= NULL_TREE
;
1307 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1308 dump_use (dump_file
, use
);
1310 data
->iv_uses
.safe_push (use
);
1315 /* Checks whether OP is a loop-level invariant and if so, records it.
1316 NONLINEAR_USE is true if the invariant is used in a way we do not
1317 handle specially. */
1320 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1323 struct version_info
*info
;
1325 if (TREE_CODE (op
) != SSA_NAME
1326 || virtual_operand_p (op
))
1329 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1331 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1334 info
= name_info (data
, op
);
1336 info
->has_nonlin_use
|= nonlinear_use
;
1338 info
->inv_id
= ++data
->max_inv_id
;
1339 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1342 /* Checks whether the use OP is interesting and if so, records it. */
1344 static struct iv_use
*
1345 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1352 if (TREE_CODE (op
) != SSA_NAME
)
1355 iv
= get_iv (data
, op
);
1359 if (iv
->have_use_for
)
1361 use
= iv_use (data
, iv
->use_id
);
1363 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1367 if (integer_zerop (iv
->step
))
1369 record_invariant (data
, op
, true);
1372 iv
->have_use_for
= true;
1374 civ
= XNEW (struct iv
);
1377 stmt
= SSA_NAME_DEF_STMT (op
);
1378 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1379 || is_gimple_assign (stmt
));
1381 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1382 iv
->use_id
= use
->id
;
1387 /* Given a condition in statement STMT, checks whether it is a compare
1388 of an induction variable and an invariant. If this is the case,
1389 CONTROL_VAR is set to location of the iv, BOUND to the location of
1390 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1391 induction variable descriptions, and true is returned. If this is not
1392 the case, CONTROL_VAR and BOUND are set to the arguments of the
1393 condition and false is returned. */
1396 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1397 tree
**control_var
, tree
**bound
,
1398 struct iv
**iv_var
, struct iv
**iv_bound
)
1400 /* The objects returned when COND has constant operands. */
1401 static struct iv const_iv
;
1403 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1404 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1407 if (gimple_code (stmt
) == GIMPLE_COND
)
1409 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1410 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1411 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1415 op0
= gimple_assign_rhs1_ptr (stmt
);
1416 op1
= gimple_assign_rhs2_ptr (stmt
);
1419 zero
= integer_zero_node
;
1420 const_iv
.step
= integer_zero_node
;
1422 if (TREE_CODE (*op0
) == SSA_NAME
)
1423 iv0
= get_iv (data
, *op0
);
1424 if (TREE_CODE (*op1
) == SSA_NAME
)
1425 iv1
= get_iv (data
, *op1
);
1427 /* Exactly one of the compared values must be an iv, and the other one must
1432 if (integer_zerop (iv0
->step
))
1434 /* Control variable may be on the other side. */
1435 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1436 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1438 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1442 *control_var
= op0
;;
1453 /* Checks whether the condition in STMT is interesting and if so,
1457 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1459 tree
*var_p
, *bound_p
;
1460 struct iv
*var_iv
, *civ
;
1462 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1464 find_interesting_uses_op (data
, *var_p
);
1465 find_interesting_uses_op (data
, *bound_p
);
1469 civ
= XNEW (struct iv
);
1471 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1474 /* Returns the outermost loop EXPR is obviously invariant in
1475 relative to the loop LOOP, i.e. if all its operands are defined
1476 outside of the returned loop. Returns NULL if EXPR is not
1477 even obviously invariant in LOOP. */
1480 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1485 if (is_gimple_min_invariant (expr
))
1486 return current_loops
->tree_root
;
1488 if (TREE_CODE (expr
) == SSA_NAME
)
1490 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1493 if (flow_bb_inside_loop_p (loop
, def_bb
))
1495 return superloop_at_depth (loop
,
1496 loop_depth (def_bb
->loop_father
) + 1);
1499 return current_loops
->tree_root
;
1505 unsigned maxdepth
= 0;
1506 len
= TREE_OPERAND_LENGTH (expr
);
1507 for (i
= 0; i
< len
; i
++)
1509 struct loop
*ivloop
;
1510 if (!TREE_OPERAND (expr
, i
))
1513 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1516 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1519 return superloop_at_depth (loop
, maxdepth
);
1522 /* Returns true if expression EXPR is obviously invariant in LOOP,
1523 i.e. if all its operands are defined outside of the LOOP. LOOP
1524 should not be the function body. */
1527 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1532 gcc_assert (loop_depth (loop
) > 0);
1534 if (is_gimple_min_invariant (expr
))
1537 if (TREE_CODE (expr
) == SSA_NAME
)
1539 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1541 && flow_bb_inside_loop_p (loop
, def_bb
))
1550 len
= TREE_OPERAND_LENGTH (expr
);
1551 for (i
= 0; i
< len
; i
++)
1552 if (TREE_OPERAND (expr
, i
)
1553 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1559 /* Cumulates the steps of indices into DATA and replaces their values with the
1560 initial ones. Returns false when the value of the index cannot be determined.
1561 Callback for for_each_index. */
1563 struct ifs_ivopts_data
1565 struct ivopts_data
*ivopts_data
;
1571 idx_find_step (tree base
, tree
*idx
, void *data
)
1573 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1575 tree step
, iv_base
, iv_step
, lbound
, off
;
1576 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1578 /* If base is a component ref, require that the offset of the reference
1580 if (TREE_CODE (base
) == COMPONENT_REF
)
1582 off
= component_ref_field_offset (base
);
1583 return expr_invariant_in_loop_p (loop
, off
);
1586 /* If base is array, first check whether we will be able to move the
1587 reference out of the loop (in order to take its address in strength
1588 reduction). In order for this to work we need both lower bound
1589 and step to be loop invariants. */
1590 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1592 /* Moreover, for a range, the size needs to be invariant as well. */
1593 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1594 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1597 step
= array_ref_element_size (base
);
1598 lbound
= array_ref_low_bound (base
);
1600 if (!expr_invariant_in_loop_p (loop
, step
)
1601 || !expr_invariant_in_loop_p (loop
, lbound
))
1605 if (TREE_CODE (*idx
) != SSA_NAME
)
1608 iv
= get_iv (dta
->ivopts_data
, *idx
);
1612 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1613 *&x[0], which is not folded and does not trigger the
1614 ARRAY_REF path below. */
1617 if (integer_zerop (iv
->step
))
1620 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1622 step
= array_ref_element_size (base
);
1624 /* We only handle addresses whose step is an integer constant. */
1625 if (TREE_CODE (step
) != INTEGER_CST
)
1629 /* The step for pointer arithmetics already is 1 byte. */
1630 step
= size_one_node
;
1634 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1635 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1638 /* The index might wrap. */
1642 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1643 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1648 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1649 object is passed to it in DATA. */
1652 idx_record_use (tree base
, tree
*idx
,
1655 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1656 find_interesting_uses_op (data
, *idx
);
1657 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1659 find_interesting_uses_op (data
, array_ref_element_size (base
));
1660 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1665 /* If we can prove that TOP = cst * BOT for some constant cst,
1666 store cst to MUL and return true. Otherwise return false.
1667 The returned value is always sign-extended, regardless of the
1668 signedness of TOP and BOT. */
1671 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1674 enum tree_code code
;
1675 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1676 widest_int res
, p0
, p1
;
1681 if (operand_equal_p (top
, bot
, 0))
1687 code
= TREE_CODE (top
);
1691 mby
= TREE_OPERAND (top
, 1);
1692 if (TREE_CODE (mby
) != INTEGER_CST
)
1695 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1698 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1703 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1704 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1707 if (code
== MINUS_EXPR
)
1709 *mul
= wi::sext (p0
+ p1
, precision
);
1713 if (TREE_CODE (bot
) != INTEGER_CST
)
1716 p0
= widest_int::from (top
, SIGNED
);
1717 p1
= widest_int::from (bot
, SIGNED
);
1720 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1728 /* Return true if memory reference REF with step STEP may be unaligned. */
1731 may_be_unaligned_p (tree ref
, tree step
)
1733 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1734 thus they are not misaligned. */
1735 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1738 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1739 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1740 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1742 unsigned HOST_WIDE_INT bitpos
;
1743 unsigned int ref_align
;
1744 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1745 if (ref_align
< align
1746 || (bitpos
% align
) != 0
1747 || (bitpos
% BITS_PER_UNIT
) != 0)
1750 unsigned int trailing_zeros
= tree_ctz (step
);
1751 if (trailing_zeros
< HOST_BITS_PER_INT
1752 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1758 /* Return true if EXPR may be non-addressable. */
1761 may_be_nonaddressable_p (tree expr
)
1763 switch (TREE_CODE (expr
))
1765 case TARGET_MEM_REF
:
1766 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1767 target, thus they are always addressable. */
1771 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1772 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1774 case VIEW_CONVERT_EXPR
:
1775 /* This kind of view-conversions may wrap non-addressable objects
1776 and make them look addressable. After some processing the
1777 non-addressability may be uncovered again, causing ADDR_EXPRs
1778 of inappropriate objects to be built. */
1779 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1780 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1783 /* ... fall through ... */
1786 case ARRAY_RANGE_REF
:
1787 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1799 /* Finds addresses in *OP_P inside STMT. */
1802 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1804 tree base
= *op_p
, step
= size_zero_node
;
1806 struct ifs_ivopts_data ifs_ivopts_data
;
1808 /* Do not play with volatile memory references. A bit too conservative,
1809 perhaps, but safe. */
1810 if (gimple_has_volatile_ops (stmt
))
1813 /* Ignore bitfields for now. Not really something terribly complicated
1815 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1818 base
= unshare_expr (base
);
1820 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1822 tree type
= build_pointer_type (TREE_TYPE (base
));
1826 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1828 civ
= get_iv (data
, TMR_BASE (base
));
1832 TMR_BASE (base
) = civ
->base
;
1835 if (TMR_INDEX2 (base
)
1836 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1838 civ
= get_iv (data
, TMR_INDEX2 (base
));
1842 TMR_INDEX2 (base
) = civ
->base
;
1845 if (TMR_INDEX (base
)
1846 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1848 civ
= get_iv (data
, TMR_INDEX (base
));
1852 TMR_INDEX (base
) = civ
->base
;
1857 if (TMR_STEP (base
))
1858 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1860 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1864 if (integer_zerop (step
))
1866 base
= tree_mem_ref_addr (type
, base
);
1870 ifs_ivopts_data
.ivopts_data
= data
;
1871 ifs_ivopts_data
.stmt
= stmt
;
1872 ifs_ivopts_data
.step
= size_zero_node
;
1873 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1874 || integer_zerop (ifs_ivopts_data
.step
))
1876 step
= ifs_ivopts_data
.step
;
1878 /* Check that the base expression is addressable. This needs
1879 to be done after substituting bases of IVs into it. */
1880 if (may_be_nonaddressable_p (base
))
1883 /* Moreover, on strict alignment platforms, check that it is
1884 sufficiently aligned. */
1885 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1888 base
= build_fold_addr_expr (base
);
1890 /* Substituting bases of IVs into the base expression might
1891 have caused folding opportunities. */
1892 if (TREE_CODE (base
) == ADDR_EXPR
)
1894 tree
*ref
= &TREE_OPERAND (base
, 0);
1895 while (handled_component_p (*ref
))
1896 ref
= &TREE_OPERAND (*ref
, 0);
1897 if (TREE_CODE (*ref
) == MEM_REF
)
1899 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1900 TREE_OPERAND (*ref
, 0),
1901 TREE_OPERAND (*ref
, 1));
1908 civ
= alloc_iv (base
, step
);
1909 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1913 for_each_index (op_p
, idx_record_use
, data
);
1916 /* Finds and records invariants used in STMT. */
1919 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1922 use_operand_p use_p
;
1925 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1927 op
= USE_FROM_PTR (use_p
);
1928 record_invariant (data
, op
, false);
1932 /* Finds interesting uses of induction variables in the statement STMT. */
1935 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1938 tree op
, *lhs
, *rhs
;
1940 use_operand_p use_p
;
1941 enum tree_code code
;
1943 find_invariants_stmt (data
, stmt
);
1945 if (gimple_code (stmt
) == GIMPLE_COND
)
1947 find_interesting_uses_cond (data
, stmt
);
1951 if (is_gimple_assign (stmt
))
1953 lhs
= gimple_assign_lhs_ptr (stmt
);
1954 rhs
= gimple_assign_rhs1_ptr (stmt
);
1956 if (TREE_CODE (*lhs
) == SSA_NAME
)
1958 /* If the statement defines an induction variable, the uses are not
1959 interesting by themselves. */
1961 iv
= get_iv (data
, *lhs
);
1963 if (iv
&& !integer_zerop (iv
->step
))
1967 code
= gimple_assign_rhs_code (stmt
);
1968 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
1969 && (REFERENCE_CLASS_P (*rhs
)
1970 || is_gimple_val (*rhs
)))
1972 if (REFERENCE_CLASS_P (*rhs
))
1973 find_interesting_uses_address (data
, stmt
, rhs
);
1975 find_interesting_uses_op (data
, *rhs
);
1977 if (REFERENCE_CLASS_P (*lhs
))
1978 find_interesting_uses_address (data
, stmt
, lhs
);
1981 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
1983 find_interesting_uses_cond (data
, stmt
);
1987 /* TODO -- we should also handle address uses of type
1989 memory = call (whatever);
1996 if (gimple_code (stmt
) == GIMPLE_PHI
1997 && gimple_bb (stmt
) == data
->current_loop
->header
)
1999 iv
= get_iv (data
, PHI_RESULT (stmt
));
2001 if (iv
&& !integer_zerop (iv
->step
))
2005 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2007 op
= USE_FROM_PTR (use_p
);
2009 if (TREE_CODE (op
) != SSA_NAME
)
2012 iv
= get_iv (data
, op
);
2016 find_interesting_uses_op (data
, op
);
2020 /* Finds interesting uses of induction variables outside of loops
2021 on loop exit edge EXIT. */
2024 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2030 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2033 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2034 if (!virtual_operand_p (def
))
2035 find_interesting_uses_op (data
, def
);
2039 /* Finds uses of the induction variables that are interesting. */
2042 find_interesting_uses (struct ivopts_data
*data
)
2045 gimple_stmt_iterator bsi
;
2046 basic_block
*body
= get_loop_body (data
->current_loop
);
2048 struct version_info
*info
;
2051 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2052 fprintf (dump_file
, "Uses:\n\n");
2054 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2059 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2060 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2061 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2062 find_interesting_uses_outside (data
, e
);
2064 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2065 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2066 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2067 if (!is_gimple_debug (gsi_stmt (bsi
)))
2068 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2071 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2075 fprintf (dump_file
, "\n");
2077 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2079 info
= ver_info (data
, i
);
2082 fprintf (dump_file
, " ");
2083 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2084 fprintf (dump_file
, " is invariant (%d)%s\n",
2085 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2089 fprintf (dump_file
, "\n");
2095 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2096 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2097 we are at the top-level of the processed address. */
2100 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2101 HOST_WIDE_INT
*offset
)
2103 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2104 enum tree_code code
;
2105 tree type
, orig_type
= TREE_TYPE (expr
);
2106 HOST_WIDE_INT off0
, off1
, st
;
2107 tree orig_expr
= expr
;
2111 type
= TREE_TYPE (expr
);
2112 code
= TREE_CODE (expr
);
2118 if (!cst_and_fits_in_hwi (expr
)
2119 || integer_zerop (expr
))
2122 *offset
= int_cst_value (expr
);
2123 return build_int_cst (orig_type
, 0);
2125 case POINTER_PLUS_EXPR
:
2128 op0
= TREE_OPERAND (expr
, 0);
2129 op1
= TREE_OPERAND (expr
, 1);
2131 op0
= strip_offset_1 (op0
, false, false, &off0
);
2132 op1
= strip_offset_1 (op1
, false, false, &off1
);
2134 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2135 if (op0
== TREE_OPERAND (expr
, 0)
2136 && op1
== TREE_OPERAND (expr
, 1))
2139 if (integer_zerop (op1
))
2141 else if (integer_zerop (op0
))
2143 if (code
== MINUS_EXPR
)
2144 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2149 expr
= fold_build2 (code
, type
, op0
, op1
);
2151 return fold_convert (orig_type
, expr
);
2154 op1
= TREE_OPERAND (expr
, 1);
2155 if (!cst_and_fits_in_hwi (op1
))
2158 op0
= TREE_OPERAND (expr
, 0);
2159 op0
= strip_offset_1 (op0
, false, false, &off0
);
2160 if (op0
== TREE_OPERAND (expr
, 0))
2163 *offset
= off0
* int_cst_value (op1
);
2164 if (integer_zerop (op0
))
2167 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2169 return fold_convert (orig_type
, expr
);
2172 case ARRAY_RANGE_REF
:
2176 step
= array_ref_element_size (expr
);
2177 if (!cst_and_fits_in_hwi (step
))
2180 st
= int_cst_value (step
);
2181 op1
= TREE_OPERAND (expr
, 1);
2182 op1
= strip_offset_1 (op1
, false, false, &off1
);
2183 *offset
= off1
* st
;
2186 && integer_zerop (op1
))
2188 /* Strip the component reference completely. */
2189 op0
= TREE_OPERAND (expr
, 0);
2190 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2203 tmp
= component_ref_field_offset (expr
);
2204 field
= TREE_OPERAND (expr
, 1);
2206 && cst_and_fits_in_hwi (tmp
)
2207 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2209 HOST_WIDE_INT boffset
, abs_off
;
2211 /* Strip the component reference completely. */
2212 op0
= TREE_OPERAND (expr
, 0);
2213 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2214 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2215 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2219 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2226 op0
= TREE_OPERAND (expr
, 0);
2227 op0
= strip_offset_1 (op0
, true, true, &off0
);
2230 if (op0
== TREE_OPERAND (expr
, 0))
2233 expr
= build_fold_addr_expr (op0
);
2234 return fold_convert (orig_type
, expr
);
2237 /* ??? Offset operand? */
2238 inside_addr
= false;
2245 /* Default handling of expressions for that we want to recurse into
2246 the first operand. */
2247 op0
= TREE_OPERAND (expr
, 0);
2248 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2251 if (op0
== TREE_OPERAND (expr
, 0)
2252 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2255 expr
= copy_node (expr
);
2256 TREE_OPERAND (expr
, 0) = op0
;
2258 TREE_OPERAND (expr
, 1) = op1
;
2260 /* Inside address, we might strip the top level component references,
2261 thus changing type of the expression. Handling of ADDR_EXPR
2263 expr
= fold_convert (orig_type
, expr
);
2268 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2271 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2274 tree core
= strip_offset_1 (expr
, false, false, &off
);
2279 /* Returns variant of TYPE that can be used as base for different uses.
2280 We return unsigned type with the same precision, which avoids problems
2284 generic_type_for (tree type
)
2286 if (POINTER_TYPE_P (type
))
2287 return unsigned_type_for (type
);
2289 if (TYPE_UNSIGNED (type
))
2292 return unsigned_type_for (type
);
2295 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2296 the bitmap to that we should store it. */
2298 static struct ivopts_data
*fd_ivopts_data
;
2300 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2302 bitmap
*depends_on
= (bitmap
*) data
;
2303 struct version_info
*info
;
2305 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2307 info
= name_info (fd_ivopts_data
, *expr_p
);
2309 if (!info
->inv_id
|| info
->has_nonlin_use
)
2313 *depends_on
= BITMAP_ALLOC (NULL
);
2314 bitmap_set_bit (*depends_on
, info
->inv_id
);
2319 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2320 position to POS. If USE is not NULL, the candidate is set as related to
2321 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2322 replacement of the final value of the iv by a direct computation. */
2324 static struct iv_cand
*
2325 add_candidate_1 (struct ivopts_data
*data
,
2326 tree base
, tree step
, bool important
, enum iv_position pos
,
2327 struct iv_use
*use
, gimple incremented_at
)
2330 struct iv_cand
*cand
= NULL
;
2331 tree type
, orig_type
;
2333 /* For non-original variables, make sure their values are computed in a type
2334 that does not invoke undefined behavior on overflows (since in general,
2335 we cannot prove that these induction variables are non-wrapping). */
2336 if (pos
!= IP_ORIGINAL
)
2338 orig_type
= TREE_TYPE (base
);
2339 type
= generic_type_for (orig_type
);
2340 if (type
!= orig_type
)
2342 base
= fold_convert (type
, base
);
2343 step
= fold_convert (type
, step
);
2347 for (i
= 0; i
< n_iv_cands (data
); i
++)
2349 cand
= iv_cand (data
, i
);
2351 if (cand
->pos
!= pos
)
2354 if (cand
->incremented_at
!= incremented_at
2355 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2356 && cand
->ainc_use
!= use
))
2370 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2371 && operand_equal_p (step
, cand
->iv
->step
, 0)
2372 && (TYPE_PRECISION (TREE_TYPE (base
))
2373 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2377 if (i
== n_iv_cands (data
))
2379 cand
= XCNEW (struct iv_cand
);
2385 cand
->iv
= alloc_iv (base
, step
);
2388 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2390 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2391 cand
->var_after
= cand
->var_before
;
2393 cand
->important
= important
;
2394 cand
->incremented_at
= incremented_at
;
2395 data
->iv_candidates
.safe_push (cand
);
2398 && TREE_CODE (step
) != INTEGER_CST
)
2400 fd_ivopts_data
= data
;
2401 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2404 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2405 cand
->ainc_use
= use
;
2407 cand
->ainc_use
= NULL
;
2409 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2410 dump_cand (dump_file
, cand
);
2413 if (important
&& !cand
->important
)
2415 cand
->important
= true;
2416 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2417 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2422 bitmap_set_bit (use
->related_cands
, i
);
2423 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2424 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2431 /* Returns true if incrementing the induction variable at the end of the LOOP
2434 The purpose is to avoid splitting latch edge with a biv increment, thus
2435 creating a jump, possibly confusing other optimization passes and leaving
2436 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2437 is not available (so we do not have a better alternative), or if the latch
2438 edge is already nonempty. */
2441 allow_ip_end_pos_p (struct loop
*loop
)
2443 if (!ip_normal_pos (loop
))
2446 if (!empty_block_p (ip_end_pos (loop
)))
2452 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2453 Important field is set to IMPORTANT. */
2456 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2457 bool important
, struct iv_use
*use
)
2459 basic_block use_bb
= gimple_bb (use
->stmt
);
2460 machine_mode mem_mode
;
2461 unsigned HOST_WIDE_INT cstepi
;
2463 /* If we insert the increment in any position other than the standard
2464 ones, we must ensure that it is incremented once per iteration.
2465 It must not be in an inner nested loop, or one side of an if
2467 if (use_bb
->loop_father
!= data
->current_loop
2468 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2469 || stmt_could_throw_p (use
->stmt
)
2470 || !cst_and_fits_in_hwi (step
))
2473 cstepi
= int_cst_value (step
);
2475 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2476 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2477 || USE_STORE_PRE_INCREMENT (mem_mode
))
2478 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2479 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2480 || USE_STORE_PRE_DECREMENT (mem_mode
))
2481 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2483 enum tree_code code
= MINUS_EXPR
;
2485 tree new_step
= step
;
2487 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2489 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2490 code
= POINTER_PLUS_EXPR
;
2493 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2494 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2495 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2498 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2499 || USE_STORE_POST_INCREMENT (mem_mode
))
2500 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2501 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2502 || USE_STORE_POST_DECREMENT (mem_mode
))
2503 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2505 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2510 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2511 position to POS. If USE is not NULL, the candidate is set as related to
2512 it. The candidate computation is scheduled on all available positions. */
2515 add_candidate (struct ivopts_data
*data
,
2516 tree base
, tree step
, bool important
, struct iv_use
*use
)
2518 if (ip_normal_pos (data
->current_loop
))
2519 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2520 if (ip_end_pos (data
->current_loop
)
2521 && allow_ip_end_pos_p (data
->current_loop
))
2522 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2524 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2525 add_autoinc_candidates (data
, base
, step
, important
, use
);
2528 /* Adds standard iv candidates. */
2531 add_standard_iv_candidates (struct ivopts_data
*data
)
2533 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2535 /* The same for a double-integer type if it is still fast enough. */
2537 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2538 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2539 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2540 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2542 /* The same for a double-integer type if it is still fast enough. */
2544 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2545 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2546 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2547 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2551 /* Adds candidates bases on the old induction variable IV. */
2554 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2558 struct iv_cand
*cand
;
2560 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2562 /* The same, but with initial value zero. */
2563 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2564 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2566 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2567 iv
->step
, true, NULL
);
2569 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2570 if (gimple_code (phi
) == GIMPLE_PHI
)
2572 /* Additionally record the possibility of leaving the original iv
2574 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2575 /* Don't add candidate if it's from another PHI node because
2576 it's an affine iv appearing in the form of PEELED_CHREC. */
2577 phi
= SSA_NAME_DEF_STMT (def
);
2578 if (gimple_code (phi
) != GIMPLE_PHI
)
2580 cand
= add_candidate_1 (data
,
2581 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2582 SSA_NAME_DEF_STMT (def
));
2583 cand
->var_before
= iv
->ssa_name
;
2584 cand
->var_after
= def
;
2587 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2591 /* Adds candidates based on the old induction variables. */
2594 add_old_ivs_candidates (struct ivopts_data
*data
)
2600 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2602 iv
= ver_info (data
, i
)->iv
;
2603 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2604 add_old_iv_candidates (data
, iv
);
2608 /* Adds candidates based on the value of the induction variable IV and USE. */
2611 add_iv_value_candidates (struct ivopts_data
*data
,
2612 struct iv
*iv
, struct iv_use
*use
)
2614 unsigned HOST_WIDE_INT offset
;
2618 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2620 /* The same, but with initial value zero. Make such variable important,
2621 since it is generic enough so that possibly many uses may be based
2623 basetype
= TREE_TYPE (iv
->base
);
2624 if (POINTER_TYPE_P (basetype
))
2625 basetype
= sizetype
;
2626 add_candidate (data
, build_int_cst (basetype
, 0),
2627 iv
->step
, true, use
);
2629 /* Third, try removing the constant offset. Make sure to even
2630 add a candidate for &a[0] vs. (T *)&a. */
2631 base
= strip_offset (iv
->base
, &offset
);
2633 || base
!= iv
->base
)
2634 add_candidate (data
, base
, iv
->step
, false, use
);
2637 /* Adds candidates based on the uses. */
2640 add_derived_ivs_candidates (struct ivopts_data
*data
)
2644 for (i
= 0; i
< n_iv_uses (data
); i
++)
2646 struct iv_use
*use
= iv_use (data
, i
);
2653 case USE_NONLINEAR_EXPR
:
2656 /* Just add the ivs based on the value of the iv used here. */
2657 add_iv_value_candidates (data
, use
->iv
, use
);
2666 /* Record important candidates and add them to related_cands bitmaps
2670 record_important_candidates (struct ivopts_data
*data
)
2675 for (i
= 0; i
< n_iv_cands (data
); i
++)
2677 struct iv_cand
*cand
= iv_cand (data
, i
);
2679 if (cand
->important
)
2680 bitmap_set_bit (data
->important_candidates
, i
);
2683 data
->consider_all_candidates
= (n_iv_cands (data
)
2684 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2686 if (data
->consider_all_candidates
)
2688 /* We will not need "related_cands" bitmaps in this case,
2689 so release them to decrease peak memory consumption. */
2690 for (i
= 0; i
< n_iv_uses (data
); i
++)
2692 use
= iv_use (data
, i
);
2693 BITMAP_FREE (use
->related_cands
);
2698 /* Add important candidates to the related_cands bitmaps. */
2699 for (i
= 0; i
< n_iv_uses (data
); i
++)
2700 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2701 data
->important_candidates
);
2705 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2706 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2707 we allocate a simple list to every use. */
2710 alloc_use_cost_map (struct ivopts_data
*data
)
2712 unsigned i
, size
, s
;
2714 for (i
= 0; i
< n_iv_uses (data
); i
++)
2716 struct iv_use
*use
= iv_use (data
, i
);
2718 if (data
->consider_all_candidates
)
2719 size
= n_iv_cands (data
);
2722 s
= bitmap_count_bits (use
->related_cands
);
2724 /* Round up to the power of two, so that moduling by it is fast. */
2725 size
= s
? (1 << ceil_log2 (s
)) : 1;
2728 use
->n_map_members
= size
;
2729 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2733 /* Returns description of computation cost of expression whose runtime
2734 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2737 new_cost (unsigned runtime
, unsigned complexity
)
2741 cost
.cost
= runtime
;
2742 cost
.complexity
= complexity
;
2747 /* Adds costs COST1 and COST2. */
2750 add_costs (comp_cost cost1
, comp_cost cost2
)
2752 cost1
.cost
+= cost2
.cost
;
2753 cost1
.complexity
+= cost2
.complexity
;
2757 /* Subtracts costs COST1 and COST2. */
2760 sub_costs (comp_cost cost1
, comp_cost cost2
)
2762 cost1
.cost
-= cost2
.cost
;
2763 cost1
.complexity
-= cost2
.complexity
;
2768 /* Returns a negative number if COST1 < COST2, a positive number if
2769 COST1 > COST2, and 0 if COST1 = COST2. */
2772 compare_costs (comp_cost cost1
, comp_cost cost2
)
2774 if (cost1
.cost
== cost2
.cost
)
2775 return cost1
.complexity
- cost2
.complexity
;
2777 return cost1
.cost
- cost2
.cost
;
2780 /* Returns true if COST is infinite. */
2783 infinite_cost_p (comp_cost cost
)
2785 return cost
.cost
== INFTY
;
2788 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2789 on invariants DEPENDS_ON and that the value used in expressing it
2790 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2793 set_use_iv_cost (struct ivopts_data
*data
,
2794 struct iv_use
*use
, struct iv_cand
*cand
,
2795 comp_cost cost
, bitmap depends_on
, tree value
,
2796 enum tree_code comp
, int inv_expr_id
)
2800 if (infinite_cost_p (cost
))
2802 BITMAP_FREE (depends_on
);
2806 if (data
->consider_all_candidates
)
2808 use
->cost_map
[cand
->id
].cand
= cand
;
2809 use
->cost_map
[cand
->id
].cost
= cost
;
2810 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2811 use
->cost_map
[cand
->id
].value
= value
;
2812 use
->cost_map
[cand
->id
].comp
= comp
;
2813 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2817 /* n_map_members is a power of two, so this computes modulo. */
2818 s
= cand
->id
& (use
->n_map_members
- 1);
2819 for (i
= s
; i
< use
->n_map_members
; i
++)
2820 if (!use
->cost_map
[i
].cand
)
2822 for (i
= 0; i
< s
; i
++)
2823 if (!use
->cost_map
[i
].cand
)
2829 use
->cost_map
[i
].cand
= cand
;
2830 use
->cost_map
[i
].cost
= cost
;
2831 use
->cost_map
[i
].depends_on
= depends_on
;
2832 use
->cost_map
[i
].value
= value
;
2833 use
->cost_map
[i
].comp
= comp
;
2834 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2837 /* Gets cost of (USE, CANDIDATE) pair. */
2839 static struct cost_pair
*
2840 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2841 struct iv_cand
*cand
)
2844 struct cost_pair
*ret
;
2849 if (data
->consider_all_candidates
)
2851 ret
= use
->cost_map
+ cand
->id
;
2858 /* n_map_members is a power of two, so this computes modulo. */
2859 s
= cand
->id
& (use
->n_map_members
- 1);
2860 for (i
= s
; i
< use
->n_map_members
; i
++)
2861 if (use
->cost_map
[i
].cand
== cand
)
2862 return use
->cost_map
+ i
;
2863 else if (use
->cost_map
[i
].cand
== NULL
)
2865 for (i
= 0; i
< s
; i
++)
2866 if (use
->cost_map
[i
].cand
== cand
)
2867 return use
->cost_map
+ i
;
2868 else if (use
->cost_map
[i
].cand
== NULL
)
2874 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2876 produce_memory_decl_rtl (tree obj
, int *regno
)
2878 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2879 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2883 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2885 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2886 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2887 SET_SYMBOL_REF_DECL (x
, obj
);
2888 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2889 set_mem_addr_space (x
, as
);
2890 targetm
.encode_section_info (obj
, x
, true);
2894 x
= gen_raw_REG (address_mode
, (*regno
)++);
2895 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2896 set_mem_addr_space (x
, as
);
2902 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2903 walk_tree. DATA contains the actual fake register number. */
2906 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2908 tree obj
= NULL_TREE
;
2910 int *regno
= (int *) data
;
2912 switch (TREE_CODE (*expr_p
))
2915 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2916 handled_component_p (*expr_p
);
2917 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2920 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2921 x
= produce_memory_decl_rtl (obj
, regno
);
2926 obj
= SSA_NAME_VAR (*expr_p
);
2927 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2930 if (!DECL_RTL_SET_P (obj
))
2931 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2940 if (DECL_RTL_SET_P (obj
))
2943 if (DECL_MODE (obj
) == BLKmode
)
2944 x
= produce_memory_decl_rtl (obj
, regno
);
2946 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2956 decl_rtl_to_reset
.safe_push (obj
);
2957 SET_DECL_RTL (obj
, x
);
2963 /* Determines cost of the computation of EXPR. */
2966 computation_cost (tree expr
, bool speed
)
2970 tree type
= TREE_TYPE (expr
);
2972 /* Avoid using hard regs in ways which may be unsupported. */
2973 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
2974 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
2975 enum node_frequency real_frequency
= node
->frequency
;
2977 node
->frequency
= NODE_FREQUENCY_NORMAL
;
2978 crtl
->maybe_hot_insn_p
= speed
;
2979 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
2981 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
2984 default_rtl_profile ();
2985 node
->frequency
= real_frequency
;
2987 cost
= seq_cost (seq
, speed
);
2989 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
2990 TYPE_ADDR_SPACE (type
), speed
);
2991 else if (!REG_P (rslt
))
2992 cost
+= set_src_cost (rslt
, speed
);
2997 /* Returns variable containing the value of candidate CAND at statement AT. */
3000 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
3002 if (stmt_after_increment (loop
, cand
, stmt
))
3003 return cand
->var_after
;
3005 return cand
->var_before
;
3008 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3009 same precision that is at least as wide as the precision of TYPE, stores
3010 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3014 determine_common_wider_type (tree
*a
, tree
*b
)
3016 tree wider_type
= NULL
;
3018 tree atype
= TREE_TYPE (*a
);
3020 if (CONVERT_EXPR_P (*a
))
3022 suba
= TREE_OPERAND (*a
, 0);
3023 wider_type
= TREE_TYPE (suba
);
3024 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3030 if (CONVERT_EXPR_P (*b
))
3032 subb
= TREE_OPERAND (*b
, 0);
3033 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3044 /* Determines the expression by that USE is expressed from induction variable
3045 CAND at statement AT in LOOP. The expression is stored in a decomposed
3046 form into AFF. Returns false if USE cannot be expressed using CAND. */
3049 get_computation_aff (struct loop
*loop
,
3050 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3051 struct aff_tree
*aff
)
3053 tree ubase
= use
->iv
->base
;
3054 tree ustep
= use
->iv
->step
;
3055 tree cbase
= cand
->iv
->base
;
3056 tree cstep
= cand
->iv
->step
, cstep_common
;
3057 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3058 tree common_type
, var
;
3060 aff_tree cbase_aff
, var_aff
;
3063 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3065 /* We do not have a precision to express the values of use. */
3069 var
= var_at_stmt (loop
, cand
, at
);
3070 uutype
= unsigned_type_for (utype
);
3072 /* If the conversion is not noop, perform it. */
3073 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3075 cstep
= fold_convert (uutype
, cstep
);
3076 cbase
= fold_convert (uutype
, cbase
);
3077 var
= fold_convert (uutype
, var
);
3080 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3083 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3084 type, we achieve better folding by computing their difference in this
3085 wider type, and cast the result to UUTYPE. We do not need to worry about
3086 overflows, as all the arithmetics will in the end be performed in UUTYPE
3088 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3090 /* use = ubase - ratio * cbase + ratio * var. */
3091 tree_to_aff_combination (ubase
, common_type
, aff
);
3092 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3093 tree_to_aff_combination (var
, uutype
, &var_aff
);
3095 /* We need to shift the value if we are after the increment. */
3096 if (stmt_after_increment (loop
, cand
, at
))
3100 if (common_type
!= uutype
)
3101 cstep_common
= fold_convert (common_type
, cstep
);
3103 cstep_common
= cstep
;
3105 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3106 aff_combination_add (&cbase_aff
, &cstep_aff
);
3109 aff_combination_scale (&cbase_aff
, -rat
);
3110 aff_combination_add (aff
, &cbase_aff
);
3111 if (common_type
!= uutype
)
3112 aff_combination_convert (aff
, uutype
);
3114 aff_combination_scale (&var_aff
, rat
);
3115 aff_combination_add (aff
, &var_aff
);
3120 /* Return the type of USE. */
3123 get_use_type (struct iv_use
*use
)
3125 tree base_type
= TREE_TYPE (use
->iv
->base
);
3128 if (use
->type
== USE_ADDRESS
)
3130 /* The base_type may be a void pointer. Create a pointer type based on
3131 the mem_ref instead. */
3132 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3133 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3134 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3142 /* Determines the expression by that USE is expressed from induction variable
3143 CAND at statement AT in LOOP. The computation is unshared. */
3146 get_computation_at (struct loop
*loop
,
3147 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3150 tree type
= get_use_type (use
);
3152 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3154 unshare_aff_combination (&aff
);
3155 return fold_convert (type
, aff_combination_to_tree (&aff
));
3158 /* Determines the expression by that USE is expressed from induction variable
3159 CAND in LOOP. The computation is unshared. */
3162 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3164 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3167 /* Adjust the cost COST for being in loop setup rather than loop body.
3168 If we're optimizing for space, the loop setup overhead is constant;
3169 if we're optimizing for speed, amortize it over the per-iteration cost. */
3171 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3175 else if (optimize_loop_for_speed_p (data
->current_loop
))
3176 return cost
/ avg_loop_niter (data
->current_loop
);
3181 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3182 validity for a memory reference accessing memory of mode MODE in
3183 address space AS. */
3187 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3190 #define MAX_RATIO 128
3191 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3192 static vec
<sbitmap
> valid_mult_list
;
3195 if (data_index
>= valid_mult_list
.length ())
3196 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3198 valid_mult
= valid_mult_list
[data_index
];
3201 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3202 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3203 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3207 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3208 bitmap_clear (valid_mult
);
3209 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3210 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3211 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3213 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3214 if (memory_address_addr_space_p (mode
, addr
, as
)
3215 || memory_address_addr_space_p (mode
, scaled
, as
))
3216 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3219 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3221 fprintf (dump_file
, " allowed multipliers:");
3222 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3223 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3224 fprintf (dump_file
, " %d", (int) i
);
3225 fprintf (dump_file
, "\n");
3226 fprintf (dump_file
, "\n");
3229 valid_mult_list
[data_index
] = valid_mult
;
3232 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3235 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3238 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3239 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3240 variable is omitted. Compute the cost for a memory reference that accesses
3241 a memory location of mode MEM_MODE in address space AS.
3243 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3244 size of MEM_MODE / RATIO) is available. To make this determination, we
3245 look at the size of the increment to be made, which is given in CSTEP.
3246 CSTEP may be zero if the step is unknown.
3247 STMT_AFTER_INC is true iff the statement we're looking at is after the
3248 increment of the original biv.
3250 TODO -- there must be some better way. This all is quite crude. */
3254 AINC_PRE_INC
, /* Pre increment. */
3255 AINC_PRE_DEC
, /* Pre decrement. */
3256 AINC_POST_INC
, /* Post increment. */
3257 AINC_POST_DEC
, /* Post decrement. */
3258 AINC_NONE
/* Also the number of auto increment types. */
3261 typedef struct address_cost_data_s
3263 HOST_WIDE_INT min_offset
, max_offset
;
3264 unsigned costs
[2][2][2][2];
3265 unsigned ainc_costs
[AINC_NONE
];
3266 } *address_cost_data
;
3270 get_address_cost (bool symbol_present
, bool var_present
,
3271 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3272 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3273 addr_space_t as
, bool speed
,
3274 bool stmt_after_inc
, bool *may_autoinc
)
3276 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3277 static vec
<address_cost_data
> address_cost_data_list
;
3278 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3279 address_cost_data data
;
3280 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3281 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3282 unsigned cost
, acost
, complexity
;
3283 enum ainc_type autoinc_type
;
3284 bool offset_p
, ratio_p
, autoinc
;
3285 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3286 unsigned HOST_WIDE_INT mask
;
3289 if (data_index
>= address_cost_data_list
.length ())
3290 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3292 data
= address_cost_data_list
[data_index
];
3296 HOST_WIDE_INT rat
, off
= 0;
3297 int old_cse_not_expected
, width
;
3298 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3303 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3305 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3307 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3308 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3309 width
= HOST_BITS_PER_WIDE_INT
- 1;
3310 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3312 for (i
= width
; i
>= 0; i
--)
3314 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3315 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3316 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3319 data
->min_offset
= (i
== -1? 0 : off
);
3321 for (i
= width
; i
>= 0; i
--)
3323 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3324 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3325 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3327 /* For some TARGET, like ARM THUMB1, the offset should be nature
3328 aligned. Try an aligned offset if address_mode is not QImode. */
3329 off
= (address_mode
== QImode
)
3331 : ((unsigned HOST_WIDE_INT
) 1 << i
)
3332 - GET_MODE_SIZE (address_mode
);
3335 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3336 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3342 data
->max_offset
= off
;
3344 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3346 fprintf (dump_file
, "get_address_cost:\n");
3347 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3348 GET_MODE_NAME (mem_mode
),
3350 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3351 GET_MODE_NAME (mem_mode
),
3356 for (i
= 2; i
<= MAX_RATIO
; i
++)
3357 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3363 /* Compute the cost of various addressing modes. */
3365 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3366 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3368 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3369 || USE_STORE_PRE_DECREMENT (mem_mode
))
3371 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3372 has_predec
[mem_mode
]
3373 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3375 if (has_predec
[mem_mode
])
3376 data
->ainc_costs
[AINC_PRE_DEC
]
3377 = address_cost (addr
, mem_mode
, as
, speed
);
3379 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3380 || USE_STORE_POST_DECREMENT (mem_mode
))
3382 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3383 has_postdec
[mem_mode
]
3384 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3386 if (has_postdec
[mem_mode
])
3387 data
->ainc_costs
[AINC_POST_DEC
]
3388 = address_cost (addr
, mem_mode
, as
, speed
);
3390 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3391 || USE_STORE_PRE_DECREMENT (mem_mode
))
3393 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3394 has_preinc
[mem_mode
]
3395 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3397 if (has_preinc
[mem_mode
])
3398 data
->ainc_costs
[AINC_PRE_INC
]
3399 = address_cost (addr
, mem_mode
, as
, speed
);
3401 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3402 || USE_STORE_POST_INCREMENT (mem_mode
))
3404 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3405 has_postinc
[mem_mode
]
3406 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3408 if (has_postinc
[mem_mode
])
3409 data
->ainc_costs
[AINC_POST_INC
]
3410 = address_cost (addr
, mem_mode
, as
, speed
);
3412 for (i
= 0; i
< 16; i
++)
3415 var_p
= (i
>> 1) & 1;
3416 off_p
= (i
>> 2) & 1;
3417 rat_p
= (i
>> 3) & 1;
3421 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3422 gen_int_mode (rat
, address_mode
));
3425 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3429 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3430 /* ??? We can run into trouble with some backends by presenting
3431 it with symbols which haven't been properly passed through
3432 targetm.encode_section_info. By setting the local bit, we
3433 enhance the probability of things working. */
3434 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3437 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3439 (PLUS
, address_mode
, base
,
3440 gen_int_mode (off
, address_mode
)));
3443 base
= gen_int_mode (off
, address_mode
);
3448 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3451 /* To avoid splitting addressing modes, pretend that no cse will
3453 old_cse_not_expected
= cse_not_expected
;
3454 cse_not_expected
= true;
3455 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3456 cse_not_expected
= old_cse_not_expected
;
3460 acost
= seq_cost (seq
, speed
);
3461 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3465 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3468 /* On some targets, it is quite expensive to load symbol to a register,
3469 which makes addresses that contain symbols look much more expensive.
3470 However, the symbol will have to be loaded in any case before the
3471 loop (and quite likely we have it in register already), so it does not
3472 make much sense to penalize them too heavily. So make some final
3473 tweaks for the SYMBOL_PRESENT modes:
3475 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3476 var is cheaper, use this mode with small penalty.
3477 If VAR_PRESENT is true, try whether the mode with
3478 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3479 if this is the case, use it. */
3480 add_c
= add_cost (speed
, address_mode
);
3481 for (i
= 0; i
< 8; i
++)
3484 off_p
= (i
>> 1) & 1;
3485 rat_p
= (i
>> 2) & 1;
3487 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3491 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3492 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3495 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3497 fprintf (dump_file
, "Address costs:\n");
3499 for (i
= 0; i
< 16; i
++)
3502 var_p
= (i
>> 1) & 1;
3503 off_p
= (i
>> 2) & 1;
3504 rat_p
= (i
>> 3) & 1;
3506 fprintf (dump_file
, " ");
3508 fprintf (dump_file
, "sym + ");
3510 fprintf (dump_file
, "var + ");
3512 fprintf (dump_file
, "cst + ");
3514 fprintf (dump_file
, "rat * ");
3516 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3517 fprintf (dump_file
, "index costs %d\n", acost
);
3519 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3520 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3521 fprintf (dump_file
, " May include autoinc/dec\n");
3522 fprintf (dump_file
, "\n");
3525 address_cost_data_list
[data_index
] = data
;
3528 bits
= GET_MODE_BITSIZE (address_mode
);
3529 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3531 if ((offset
>> (bits
- 1) & 1))
3536 autoinc_type
= AINC_NONE
;
3537 msize
= GET_MODE_SIZE (mem_mode
);
3538 autoinc_offset
= offset
;
3540 autoinc_offset
+= ratio
* cstep
;
3541 if (symbol_present
|| var_present
|| ratio
!= 1)
3545 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3547 autoinc_type
= AINC_POST_INC
;
3548 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3550 autoinc_type
= AINC_POST_DEC
;
3551 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3553 autoinc_type
= AINC_PRE_INC
;
3554 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3556 autoinc_type
= AINC_PRE_DEC
;
3558 if (autoinc_type
!= AINC_NONE
)
3563 offset_p
= (s_offset
!= 0
3564 && data
->min_offset
<= s_offset
3565 && s_offset
<= data
->max_offset
);
3566 ratio_p
= (ratio
!= 1
3567 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3569 if (ratio
!= 1 && !ratio_p
)
3570 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3572 if (s_offset
&& !offset_p
&& !symbol_present
)
3573 cost
+= add_cost (speed
, address_mode
);
3576 *may_autoinc
= autoinc
;
3578 acost
= data
->ainc_costs
[autoinc_type
];
3580 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3581 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3582 return new_cost (cost
+ acost
, complexity
);
3585 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3586 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3587 calculating the operands of EXPR. Returns true if successful, and returns
3588 the cost in COST. */
3591 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
3592 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3595 tree op1
= TREE_OPERAND (expr
, 1);
3596 tree cst
= TREE_OPERAND (mult
, 1);
3597 tree multop
= TREE_OPERAND (mult
, 0);
3598 int m
= exact_log2 (int_cst_value (cst
));
3599 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3601 bool equal_p
= false;
3603 if (!(m
>= 0 && m
< maxm
))
3606 if (operand_equal_p (op1
, mult
, 0))
3609 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3610 ? shiftadd_cost (speed
, mode
, m
)
3612 ? shiftsub1_cost (speed
, mode
, m
)
3613 : shiftsub0_cost (speed
, mode
, m
)));
3614 res
= new_cost (sa_cost
, 0);
3615 res
= add_costs (res
, equal_p
? cost0
: cost1
);
3617 STRIP_NOPS (multop
);
3618 if (!is_gimple_val (multop
))
3619 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3625 /* Estimates cost of forcing expression EXPR into a variable. */
3628 force_expr_to_var_cost (tree expr
, bool speed
)
3630 static bool costs_initialized
= false;
3631 static unsigned integer_cost
[2];
3632 static unsigned symbol_cost
[2];
3633 static unsigned address_cost
[2];
3635 comp_cost cost0
, cost1
, cost
;
3638 if (!costs_initialized
)
3640 tree type
= build_pointer_type (integer_type_node
);
3645 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3646 TREE_STATIC (var
) = 1;
3647 x
= produce_memory_decl_rtl (var
, NULL
);
3648 SET_DECL_RTL (var
, x
);
3650 addr
= build1 (ADDR_EXPR
, type
, var
);
3653 for (i
= 0; i
< 2; i
++)
3655 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3658 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3661 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3662 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3664 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3665 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3666 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3667 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3668 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3669 fprintf (dump_file
, "\n");
3673 costs_initialized
= true;
3678 if (SSA_VAR_P (expr
))
3681 if (is_gimple_min_invariant (expr
))
3683 if (TREE_CODE (expr
) == INTEGER_CST
)
3684 return new_cost (integer_cost
[speed
], 0);
3686 if (TREE_CODE (expr
) == ADDR_EXPR
)
3688 tree obj
= TREE_OPERAND (expr
, 0);
3690 if (TREE_CODE (obj
) == VAR_DECL
3691 || TREE_CODE (obj
) == PARM_DECL
3692 || TREE_CODE (obj
) == RESULT_DECL
)
3693 return new_cost (symbol_cost
[speed
], 0);
3696 return new_cost (address_cost
[speed
], 0);
3699 switch (TREE_CODE (expr
))
3701 case POINTER_PLUS_EXPR
:
3705 op0
= TREE_OPERAND (expr
, 0);
3706 op1
= TREE_OPERAND (expr
, 1);
3713 op0
= TREE_OPERAND (expr
, 0);
3719 /* Just an arbitrary value, FIXME. */
3720 return new_cost (target_spill_cost
[speed
], 0);
3723 if (op0
== NULL_TREE
3724 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
3727 cost0
= force_expr_to_var_cost (op0
, speed
);
3729 if (op1
== NULL_TREE
3730 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
3733 cost1
= force_expr_to_var_cost (op1
, speed
);
3735 mode
= TYPE_MODE (TREE_TYPE (expr
));
3736 switch (TREE_CODE (expr
))
3738 case POINTER_PLUS_EXPR
:
3742 cost
= new_cost (add_cost (speed
, mode
), 0);
3743 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3745 tree mult
= NULL_TREE
;
3747 if (TREE_CODE (op1
) == MULT_EXPR
)
3749 else if (TREE_CODE (op0
) == MULT_EXPR
)
3752 if (mult
!= NULL_TREE
3753 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3754 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3762 tree inner_mode
, outer_mode
;
3763 outer_mode
= TREE_TYPE (expr
);
3764 inner_mode
= TREE_TYPE (op0
);
3765 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
3766 TYPE_MODE (inner_mode
), speed
), 0);
3771 if (cst_and_fits_in_hwi (op0
))
3772 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3774 else if (cst_and_fits_in_hwi (op1
))
3775 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3778 return new_cost (target_spill_cost
[speed
], 0);
3785 cost
= add_costs (cost
, cost0
);
3786 cost
= add_costs (cost
, cost1
);
3788 /* Bound the cost by target_spill_cost. The parts of complicated
3789 computations often are either loop invariant or at least can
3790 be shared between several iv uses, so letting this grow without
3791 limits would not give reasonable results. */
3792 if (cost
.cost
> (int) target_spill_cost
[speed
])
3793 cost
.cost
= target_spill_cost
[speed
];
3798 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3799 invariants the computation depends on. */
3802 force_var_cost (struct ivopts_data
*data
,
3803 tree expr
, bitmap
*depends_on
)
3807 fd_ivopts_data
= data
;
3808 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3811 return force_expr_to_var_cost (expr
, data
->speed
);
3814 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3815 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3816 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3817 invariants the computation depends on. */
3820 split_address_cost (struct ivopts_data
*data
,
3821 tree addr
, bool *symbol_present
, bool *var_present
,
3822 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3825 HOST_WIDE_INT bitsize
;
3826 HOST_WIDE_INT bitpos
;
3829 int unsignedp
, volatilep
;
3831 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3832 &unsignedp
, &volatilep
, false);
3835 || bitpos
% BITS_PER_UNIT
!= 0
3836 || TREE_CODE (core
) != VAR_DECL
)
3838 *symbol_present
= false;
3839 *var_present
= true;
3840 fd_ivopts_data
= data
;
3841 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3842 return new_cost (target_spill_cost
[data
->speed
], 0);
3845 *offset
+= bitpos
/ BITS_PER_UNIT
;
3846 if (TREE_STATIC (core
)
3847 || DECL_EXTERNAL (core
))
3849 *symbol_present
= true;
3850 *var_present
= false;
3854 *symbol_present
= false;
3855 *var_present
= true;
3859 /* Estimates cost of expressing difference of addresses E1 - E2 as
3860 var + symbol + offset. The value of offset is added to OFFSET,
3861 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3862 part is missing. DEPENDS_ON is a set of the invariants the computation
3866 ptr_difference_cost (struct ivopts_data
*data
,
3867 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3868 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3870 HOST_WIDE_INT diff
= 0;
3871 aff_tree aff_e1
, aff_e2
;
3874 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3876 if (ptr_difference_const (e1
, e2
, &diff
))
3879 *symbol_present
= false;
3880 *var_present
= false;
3884 if (integer_zerop (e2
))
3885 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3886 symbol_present
, var_present
, offset
, depends_on
);
3888 *symbol_present
= false;
3889 *var_present
= true;
3891 type
= signed_type_for (TREE_TYPE (e1
));
3892 tree_to_aff_combination (e1
, type
, &aff_e1
);
3893 tree_to_aff_combination (e2
, type
, &aff_e2
);
3894 aff_combination_scale (&aff_e2
, -1);
3895 aff_combination_add (&aff_e1
, &aff_e2
);
3897 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3900 /* Estimates cost of expressing difference E1 - E2 as
3901 var + symbol + offset. The value of offset is added to OFFSET,
3902 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3903 part is missing. DEPENDS_ON is a set of the invariants the computation
3907 difference_cost (struct ivopts_data
*data
,
3908 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3909 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3911 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3912 unsigned HOST_WIDE_INT off1
, off2
;
3913 aff_tree aff_e1
, aff_e2
;
3916 e1
= strip_offset (e1
, &off1
);
3917 e2
= strip_offset (e2
, &off2
);
3918 *offset
+= off1
- off2
;
3923 if (TREE_CODE (e1
) == ADDR_EXPR
)
3924 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3925 offset
, depends_on
);
3926 *symbol_present
= false;
3928 if (operand_equal_p (e1
, e2
, 0))
3930 *var_present
= false;
3934 *var_present
= true;
3936 if (integer_zerop (e2
))
3937 return force_var_cost (data
, e1
, depends_on
);
3939 if (integer_zerop (e1
))
3941 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
3942 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
3946 type
= signed_type_for (TREE_TYPE (e1
));
3947 tree_to_aff_combination (e1
, type
, &aff_e1
);
3948 tree_to_aff_combination (e2
, type
, &aff_e2
);
3949 aff_combination_scale (&aff_e2
, -1);
3950 aff_combination_add (&aff_e1
, &aff_e2
);
3952 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3955 /* Returns true if AFF1 and AFF2 are identical. */
3958 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
3962 if (aff1
->n
!= aff2
->n
)
3965 for (i
= 0; i
< aff1
->n
; i
++)
3967 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
3970 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
3976 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3979 get_expr_id (struct ivopts_data
*data
, tree expr
)
3981 struct iv_inv_expr_ent ent
;
3982 struct iv_inv_expr_ent
**slot
;
3985 ent
.hash
= iterative_hash_expr (expr
, 0);
3986 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
3990 *slot
= XNEW (struct iv_inv_expr_ent
);
3991 (*slot
)->expr
= expr
;
3992 (*slot
)->hash
= ent
.hash
;
3993 (*slot
)->id
= data
->inv_expr_id
++;
3997 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3998 requires a new compiler generated temporary. Returns -1 otherwise.
3999 ADDRESS_P is a flag indicating if the expression is for address
4003 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4004 tree cbase
, HOST_WIDE_INT ratio
,
4007 aff_tree ubase_aff
, cbase_aff
;
4015 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4016 && (TREE_CODE (cbase
) == INTEGER_CST
))
4019 /* Strips the constant part. */
4020 if (TREE_CODE (ubase
) == PLUS_EXPR
4021 || TREE_CODE (ubase
) == MINUS_EXPR
4022 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4024 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4025 ubase
= TREE_OPERAND (ubase
, 0);
4028 /* Strips the constant part. */
4029 if (TREE_CODE (cbase
) == PLUS_EXPR
4030 || TREE_CODE (cbase
) == MINUS_EXPR
4031 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4033 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4034 cbase
= TREE_OPERAND (cbase
, 0);
4039 if (((TREE_CODE (ubase
) == SSA_NAME
)
4040 || (TREE_CODE (ubase
) == ADDR_EXPR
4041 && is_gimple_min_invariant (ubase
)))
4042 && (TREE_CODE (cbase
) == INTEGER_CST
))
4045 if (((TREE_CODE (cbase
) == SSA_NAME
)
4046 || (TREE_CODE (cbase
) == ADDR_EXPR
4047 && is_gimple_min_invariant (cbase
)))
4048 && (TREE_CODE (ubase
) == INTEGER_CST
))
4054 if (operand_equal_p (ubase
, cbase
, 0))
4057 if (TREE_CODE (ubase
) == ADDR_EXPR
4058 && TREE_CODE (cbase
) == ADDR_EXPR
)
4062 usym
= TREE_OPERAND (ubase
, 0);
4063 csym
= TREE_OPERAND (cbase
, 0);
4064 if (TREE_CODE (usym
) == ARRAY_REF
)
4066 tree ind
= TREE_OPERAND (usym
, 1);
4067 if (TREE_CODE (ind
) == INTEGER_CST
4068 && tree_fits_shwi_p (ind
)
4069 && tree_to_shwi (ind
) == 0)
4070 usym
= TREE_OPERAND (usym
, 0);
4072 if (TREE_CODE (csym
) == ARRAY_REF
)
4074 tree ind
= TREE_OPERAND (csym
, 1);
4075 if (TREE_CODE (ind
) == INTEGER_CST
4076 && tree_fits_shwi_p (ind
)
4077 && tree_to_shwi (ind
) == 0)
4078 csym
= TREE_OPERAND (csym
, 0);
4080 if (operand_equal_p (usym
, csym
, 0))
4083 /* Now do more complex comparison */
4084 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4085 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4086 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4090 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4091 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4093 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4094 aff_combination_add (&ubase_aff
, &cbase_aff
);
4095 expr
= aff_combination_to_tree (&ubase_aff
);
4096 return get_expr_id (data
, expr
);
4101 /* Determines the cost of the computation by that USE is expressed
4102 from induction variable CAND. If ADDRESS_P is true, we just need
4103 to create an address from it, otherwise we want to get it into
4104 register. A set of invariants we depend on is stored in
4105 DEPENDS_ON. AT is the statement at that the value is computed.
4106 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4107 addressing is likely. */
4110 get_computation_cost_at (struct ivopts_data
*data
,
4111 struct iv_use
*use
, struct iv_cand
*cand
,
4112 bool address_p
, bitmap
*depends_on
, gimple at
,
4116 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4118 tree utype
= TREE_TYPE (ubase
), ctype
;
4119 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4120 HOST_WIDE_INT ratio
, aratio
;
4121 bool var_present
, symbol_present
, stmt_is_after_inc
;
4124 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4125 machine_mode mem_mode
= (address_p
4126 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4131 /* Only consider real candidates. */
4133 return infinite_cost
;
4135 cbase
= cand
->iv
->base
;
4136 cstep
= cand
->iv
->step
;
4137 ctype
= TREE_TYPE (cbase
);
4139 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4141 /* We do not have a precision to express the values of use. */
4142 return infinite_cost
;
4146 || (use
->iv
->base_object
4147 && cand
->iv
->base_object
4148 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4149 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4151 /* Do not try to express address of an object with computation based
4152 on address of a different object. This may cause problems in rtl
4153 level alias analysis (that does not expect this to be happening,
4154 as this is illegal in C), and would be unlikely to be useful
4156 if (use
->iv
->base_object
4157 && cand
->iv
->base_object
4158 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4159 return infinite_cost
;
4162 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4164 /* TODO -- add direct handling of this case. */
4168 /* CSTEPI is removed from the offset in case statement is after the
4169 increment. If the step is not constant, we use zero instead.
4170 This is a bit imprecise (there is the extra addition), but
4171 redundancy elimination is likely to transform the code so that
4172 it uses value of the variable before increment anyway,
4173 so it is not that much unrealistic. */
4174 if (cst_and_fits_in_hwi (cstep
))
4175 cstepi
= int_cst_value (cstep
);
4179 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4180 return infinite_cost
;
4182 if (wi::fits_shwi_p (rat
))
4183 ratio
= rat
.to_shwi ();
4185 return infinite_cost
;
4188 ctype
= TREE_TYPE (cbase
);
4190 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4192 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4193 or ratio == 1, it is better to handle this like
4195 ubase - ratio * cbase + ratio * var
4197 (also holds in the case ratio == -1, TODO. */
4199 if (cst_and_fits_in_hwi (cbase
))
4201 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4202 cost
= difference_cost (data
,
4203 ubase
, build_int_cst (utype
, 0),
4204 &symbol_present
, &var_present
, &offset
,
4206 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4208 else if (ratio
== 1)
4210 tree real_cbase
= cbase
;
4212 /* Check to see if any adjustment is needed. */
4213 if (cstepi
== 0 && stmt_is_after_inc
)
4215 aff_tree real_cbase_aff
;
4218 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4220 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4222 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4223 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4226 cost
= difference_cost (data
,
4228 &symbol_present
, &var_present
, &offset
,
4230 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4233 && !POINTER_TYPE_P (ctype
)
4234 && multiplier_allowed_in_address_p
4236 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4239 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4240 cost
= difference_cost (data
,
4242 &symbol_present
, &var_present
, &offset
,
4244 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4248 cost
= force_var_cost (data
, cbase
, depends_on
);
4249 cost
= add_costs (cost
,
4250 difference_cost (data
,
4251 ubase
, build_int_cst (utype
, 0),
4252 &symbol_present
, &var_present
,
4253 &offset
, depends_on
));
4254 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4255 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4261 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4262 /* Clear depends on. */
4263 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4264 bitmap_clear (*depends_on
);
4267 /* If we are after the increment, the value of the candidate is higher by
4269 if (stmt_is_after_inc
)
4270 offset
-= ratio
* cstepi
;
4272 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4273 (symbol/var1/const parts may be omitted). If we are looking for an
4274 address, find the cost of addressing this. */
4276 return add_costs (cost
,
4277 get_address_cost (symbol_present
, var_present
,
4278 offset
, ratio
, cstepi
,
4280 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4281 speed
, stmt_is_after_inc
,
4284 /* Otherwise estimate the costs for computing the expression. */
4285 if (!symbol_present
&& !var_present
&& !offset
)
4288 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4292 /* Symbol + offset should be compile-time computable so consider that they
4293 are added once to the variable, if present. */
4294 if (var_present
&& (symbol_present
|| offset
))
4295 cost
.cost
+= adjust_setup_cost (data
,
4296 add_cost (speed
, TYPE_MODE (ctype
)));
4298 /* Having offset does not affect runtime cost in case it is added to
4299 symbol, but it increases complexity. */
4303 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4305 aratio
= ratio
> 0 ? ratio
: -ratio
;
4307 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4312 *can_autoinc
= false;
4315 /* Just get the expression, expand it and measure the cost. */
4316 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4319 return infinite_cost
;
4322 comp
= build_simple_mem_ref (comp
);
4324 return new_cost (computation_cost (comp
, speed
), 0);
4328 /* Determines the cost of the computation by that USE is expressed
4329 from induction variable CAND. If ADDRESS_P is true, we just need
4330 to create an address from it, otherwise we want to get it into
4331 register. A set of invariants we depend on is stored in
4332 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4333 autoinc addressing is likely. */
4336 get_computation_cost (struct ivopts_data
*data
,
4337 struct iv_use
*use
, struct iv_cand
*cand
,
4338 bool address_p
, bitmap
*depends_on
,
4339 bool *can_autoinc
, int *inv_expr_id
)
4341 return get_computation_cost_at (data
,
4342 use
, cand
, address_p
, depends_on
, use
->stmt
,
4343 can_autoinc
, inv_expr_id
);
4346 /* Determines cost of basing replacement of USE on CAND in a generic
4350 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4351 struct iv_use
*use
, struct iv_cand
*cand
)
4355 int inv_expr_id
= -1;
4357 /* The simple case first -- if we need to express value of the preserved
4358 original biv, the cost is 0. This also prevents us from counting the
4359 cost of increment twice -- once at this use and once in the cost of
4361 if (cand
->pos
== IP_ORIGINAL
4362 && cand
->incremented_at
== use
->stmt
)
4364 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4369 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4370 NULL
, &inv_expr_id
);
4372 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4375 return !infinite_cost_p (cost
);
4378 /* Determines cost of basing replacement of USE on CAND in an address. */
4381 determine_use_iv_cost_address (struct ivopts_data
*data
,
4382 struct iv_use
*use
, struct iv_cand
*cand
)
4386 int inv_expr_id
= -1;
4387 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4388 &can_autoinc
, &inv_expr_id
);
4390 if (cand
->ainc_use
== use
)
4393 cost
.cost
-= cand
->cost_step
;
4394 /* If we generated the candidate solely for exploiting autoincrement
4395 opportunities, and it turns out it can't be used, set the cost to
4396 infinity to make sure we ignore it. */
4397 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4398 cost
= infinite_cost
;
4400 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4403 return !infinite_cost_p (cost
);
4406 /* Computes value of candidate CAND at position AT in iteration NITER, and
4407 stores it to VAL. */
4410 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4413 aff_tree step
, delta
, nit
;
4414 struct iv
*iv
= cand
->iv
;
4415 tree type
= TREE_TYPE (iv
->base
);
4416 tree steptype
= type
;
4417 if (POINTER_TYPE_P (type
))
4418 steptype
= sizetype
;
4419 steptype
= unsigned_type_for (type
);
4421 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4422 aff_combination_convert (&step
, steptype
);
4423 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4424 aff_combination_convert (&nit
, steptype
);
4425 aff_combination_mult (&nit
, &step
, &delta
);
4426 if (stmt_after_increment (loop
, cand
, at
))
4427 aff_combination_add (&delta
, &step
);
4429 tree_to_aff_combination (iv
->base
, type
, val
);
4430 if (!POINTER_TYPE_P (type
))
4431 aff_combination_convert (val
, steptype
);
4432 aff_combination_add (val
, &delta
);
4435 /* Returns period of induction variable iv. */
4438 iv_period (struct iv
*iv
)
4440 tree step
= iv
->step
, period
, type
;
4443 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4445 type
= unsigned_type_for (TREE_TYPE (step
));
4446 /* Period of the iv is lcm (step, type_range)/step -1,
4447 i.e., N*type_range/step - 1. Since type range is power
4448 of two, N == (step >> num_of_ending_zeros_binary (step),
4449 so the final result is
4451 (type_range >> num_of_ending_zeros_binary (step)) - 1
4454 pow2div
= num_ending_zeros (step
);
4456 period
= build_low_bits_mask (type
,
4457 (TYPE_PRECISION (type
)
4458 - tree_to_uhwi (pow2div
)));
4463 /* Returns the comparison operator used when eliminating the iv USE. */
4465 static enum tree_code
4466 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4468 struct loop
*loop
= data
->current_loop
;
4472 ex_bb
= gimple_bb (use
->stmt
);
4473 exit
= EDGE_SUCC (ex_bb
, 0);
4474 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4475 exit
= EDGE_SUCC (ex_bb
, 1);
4477 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4480 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4481 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4482 calculation is performed in non-wrapping type.
4484 TODO: More generally, we could test for the situation that
4485 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4486 This would require knowing the sign of OFFSET. */
4489 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4491 enum tree_code code
;
4493 aff_tree aff_e1
, aff_e2
, aff_offset
;
4495 if (!nowrap_type_p (TREE_TYPE (base
)))
4498 base
= expand_simple_operations (base
);
4500 if (TREE_CODE (base
) == SSA_NAME
)
4502 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4504 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4507 code
= gimple_assign_rhs_code (stmt
);
4508 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4511 e1
= gimple_assign_rhs1 (stmt
);
4512 e2
= gimple_assign_rhs2 (stmt
);
4516 code
= TREE_CODE (base
);
4517 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4519 e1
= TREE_OPERAND (base
, 0);
4520 e2
= TREE_OPERAND (base
, 1);
4523 /* Use affine expansion as deeper inspection to prove the equality. */
4524 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4525 &aff_e2
, &data
->name_expansion_cache
);
4526 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4527 &aff_offset
, &data
->name_expansion_cache
);
4528 aff_combination_scale (&aff_offset
, -1);
4532 aff_combination_add (&aff_e2
, &aff_offset
);
4533 if (aff_combination_zero_p (&aff_e2
))
4536 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4537 &aff_e1
, &data
->name_expansion_cache
);
4538 aff_combination_add (&aff_e1
, &aff_offset
);
4539 return aff_combination_zero_p (&aff_e1
);
4541 case POINTER_PLUS_EXPR
:
4542 aff_combination_add (&aff_e2
, &aff_offset
);
4543 return aff_combination_zero_p (&aff_e2
);
4550 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4551 comparison with CAND. NITER describes the number of iterations of
4552 the loops. If successful, the comparison in COMP_P is altered accordingly.
4554 We aim to handle the following situation:
4570 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4571 We aim to optimize this to
4579 while (p < p_0 - a + b);
4581 This preserves the correctness, since the pointer arithmetics does not
4582 overflow. More precisely:
4584 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4585 overflow in computing it or the values of p.
4586 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4587 overflow. To prove this, we use the fact that p_0 = base + a. */
4590 iv_elimination_compare_lt (struct ivopts_data
*data
,
4591 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4592 struct tree_niter_desc
*niter
)
4594 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4595 struct aff_tree nit
, tmpa
, tmpb
;
4596 enum tree_code comp
;
4599 /* We need to know that the candidate induction variable does not overflow.
4600 While more complex analysis may be used to prove this, for now just
4601 check that the variable appears in the original program and that it
4602 is computed in a type that guarantees no overflows. */
4603 cand_type
= TREE_TYPE (cand
->iv
->base
);
4604 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4607 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4608 the calculation of the BOUND could overflow, making the comparison
4610 if (!data
->loop_single_exit_p
)
4613 /* We need to be able to decide whether candidate is increasing or decreasing
4614 in order to choose the right comparison operator. */
4615 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4617 step
= int_cst_value (cand
->iv
->step
);
4619 /* Check that the number of iterations matches the expected pattern:
4620 a + 1 > b ? 0 : b - a - 1. */
4621 mbz
= niter
->may_be_zero
;
4622 if (TREE_CODE (mbz
) == GT_EXPR
)
4624 /* Handle a + 1 > b. */
4625 tree op0
= TREE_OPERAND (mbz
, 0);
4626 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4628 a
= TREE_OPERAND (op0
, 0);
4629 b
= TREE_OPERAND (mbz
, 1);
4634 else if (TREE_CODE (mbz
) == LT_EXPR
)
4636 tree op1
= TREE_OPERAND (mbz
, 1);
4638 /* Handle b < a + 1. */
4639 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4641 a
= TREE_OPERAND (op1
, 0);
4642 b
= TREE_OPERAND (mbz
, 0);
4650 /* Expected number of iterations is B - A - 1. Check that it matches
4651 the actual number, i.e., that B - A - NITER = 1. */
4652 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4653 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4654 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4655 aff_combination_scale (&nit
, -1);
4656 aff_combination_scale (&tmpa
, -1);
4657 aff_combination_add (&tmpb
, &tmpa
);
4658 aff_combination_add (&tmpb
, &nit
);
4659 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
4662 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4664 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4666 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4667 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
4670 /* Determine the new comparison operator. */
4671 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4672 if (*comp_p
== NE_EXPR
)
4674 else if (*comp_p
== EQ_EXPR
)
4675 *comp_p
= invert_tree_comparison (comp
, false);
4682 /* Check whether it is possible to express the condition in USE by comparison
4683 of candidate CAND. If so, store the value compared with to BOUND, and the
4684 comparison operator to COMP. */
4687 may_eliminate_iv (struct ivopts_data
*data
,
4688 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4689 enum tree_code
*comp
)
4694 struct loop
*loop
= data
->current_loop
;
4696 struct tree_niter_desc
*desc
= NULL
;
4698 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4701 /* For now works only for exits that dominate the loop latch.
4702 TODO: extend to other conditions inside loop body. */
4703 ex_bb
= gimple_bb (use
->stmt
);
4704 if (use
->stmt
!= last_stmt (ex_bb
)
4705 || gimple_code (use
->stmt
) != GIMPLE_COND
4706 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4709 exit
= EDGE_SUCC (ex_bb
, 0);
4710 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4711 exit
= EDGE_SUCC (ex_bb
, 1);
4712 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4715 desc
= niter_for_exit (data
, exit
);
4719 /* Determine whether we can use the variable to test the exit condition.
4720 This is the case iff the period of the induction variable is greater
4721 than the number of iterations for which the exit condition is true. */
4722 period
= iv_period (cand
->iv
);
4724 /* If the number of iterations is constant, compare against it directly. */
4725 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4727 /* See cand_value_at. */
4728 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4730 if (!tree_int_cst_lt (desc
->niter
, period
))
4735 if (tree_int_cst_lt (period
, desc
->niter
))
4740 /* If not, and if this is the only possible exit of the loop, see whether
4741 we can get a conservative estimate on the number of iterations of the
4742 entire loop and compare against that instead. */
4745 widest_int period_value
, max_niter
;
4747 max_niter
= desc
->max
;
4748 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4750 period_value
= wi::to_widest (period
);
4751 if (wi::gtu_p (max_niter
, period_value
))
4753 /* See if we can take advantage of inferred loop bound information. */
4754 if (data
->loop_single_exit_p
)
4756 if (!max_loop_iterations (loop
, &max_niter
))
4758 /* The loop bound is already adjusted by adding 1. */
4759 if (wi::gtu_p (max_niter
, period_value
))
4767 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4769 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
4770 aff_combination_to_tree (&bnd
));
4771 *comp
= iv_elimination_compare (data
, use
);
4773 /* It is unlikely that computing the number of iterations using division
4774 would be more profitable than keeping the original induction variable. */
4775 if (expression_expensive_p (*bound
))
4778 /* Sometimes, it is possible to handle the situation that the number of
4779 iterations may be zero unless additional assumtions by using <
4780 instead of != in the exit condition.
4782 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4783 base the exit condition on it. However, that is often too
4785 if (!integer_zerop (desc
->may_be_zero
))
4786 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4791 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4792 be copied, if is is used in the loop body and DATA->body_includes_call. */
4795 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4797 tree sbound
= bound
;
4798 STRIP_NOPS (sbound
);
4800 if (TREE_CODE (sbound
) == SSA_NAME
4801 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4802 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4803 && data
->body_includes_call
)
4804 return COSTS_N_INSNS (1);
4809 /* Determines cost of basing replacement of USE on CAND in a condition. */
4812 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4813 struct iv_use
*use
, struct iv_cand
*cand
)
4815 tree bound
= NULL_TREE
;
4817 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4818 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4820 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4821 tree
*control_var
, *bound_cst
;
4822 enum tree_code comp
= ERROR_MARK
;
4824 /* Only consider real candidates. */
4827 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4832 /* Try iv elimination. */
4833 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4835 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4836 if (elim_cost
.cost
== 0)
4837 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4838 else if (TREE_CODE (bound
) == INTEGER_CST
)
4840 /* If we replace a loop condition 'i < n' with 'p < base + n',
4841 depends_on_elim will have 'base' and 'n' set, which implies
4842 that both 'base' and 'n' will be live during the loop. More likely,
4843 'base + n' will be loop invariant, resulting in only one live value
4844 during the loop. So in that case we clear depends_on_elim and set
4845 elim_inv_expr_id instead. */
4846 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4848 elim_inv_expr_id
= get_expr_id (data
, bound
);
4849 bitmap_clear (depends_on_elim
);
4851 /* The bound is a loop invariant, so it will be only computed
4853 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4856 elim_cost
= infinite_cost
;
4858 /* Try expressing the original giv. If it is compared with an invariant,
4859 note that we cannot get rid of it. */
4860 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4864 /* When the condition is a comparison of the candidate IV against
4865 zero, prefer this IV.
4867 TODO: The constant that we're subtracting from the cost should
4868 be target-dependent. This information should be added to the
4869 target costs for each backend. */
4870 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4871 && integer_zerop (*bound_cst
)
4872 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4873 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4874 elim_cost
.cost
-= 1;
4876 express_cost
= get_computation_cost (data
, use
, cand
, false,
4877 &depends_on_express
, NULL
,
4878 &express_inv_expr_id
);
4879 fd_ivopts_data
= data
;
4880 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4882 /* Count the cost of the original bound as well. */
4883 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4884 if (bound_cost
.cost
== 0)
4885 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4886 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4887 bound_cost
.cost
= 0;
4888 express_cost
.cost
+= bound_cost
.cost
;
4890 /* Choose the better approach, preferring the eliminated IV. */
4891 if (compare_costs (elim_cost
, express_cost
) <= 0)
4894 depends_on
= depends_on_elim
;
4895 depends_on_elim
= NULL
;
4896 inv_expr_id
= elim_inv_expr_id
;
4900 cost
= express_cost
;
4901 depends_on
= depends_on_express
;
4902 depends_on_express
= NULL
;
4905 inv_expr_id
= express_inv_expr_id
;
4908 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4910 if (depends_on_elim
)
4911 BITMAP_FREE (depends_on_elim
);
4912 if (depends_on_express
)
4913 BITMAP_FREE (depends_on_express
);
4915 return !infinite_cost_p (cost
);
4918 /* Determines cost of basing replacement of USE on CAND. Returns false
4919 if USE cannot be based on CAND. */
4922 determine_use_iv_cost (struct ivopts_data
*data
,
4923 struct iv_use
*use
, struct iv_cand
*cand
)
4927 case USE_NONLINEAR_EXPR
:
4928 return determine_use_iv_cost_generic (data
, use
, cand
);
4931 return determine_use_iv_cost_address (data
, use
, cand
);
4934 return determine_use_iv_cost_condition (data
, use
, cand
);
4941 /* Return true if get_computation_cost indicates that autoincrement is
4942 a possibility for the pair of USE and CAND, false otherwise. */
4945 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
4946 struct iv_cand
*cand
)
4952 if (use
->type
!= USE_ADDRESS
)
4955 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4956 &can_autoinc
, NULL
);
4958 BITMAP_FREE (depends_on
);
4960 return !infinite_cost_p (cost
) && can_autoinc
;
4963 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4964 use that allows autoincrement, and set their AINC_USE if possible. */
4967 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
4971 for (i
= 0; i
< n_iv_cands (data
); i
++)
4973 struct iv_cand
*cand
= iv_cand (data
, i
);
4974 struct iv_use
*closest_before
= NULL
;
4975 struct iv_use
*closest_after
= NULL
;
4976 if (cand
->pos
!= IP_ORIGINAL
)
4979 for (j
= 0; j
< n_iv_uses (data
); j
++)
4981 struct iv_use
*use
= iv_use (data
, j
);
4982 unsigned uid
= gimple_uid (use
->stmt
);
4984 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
4987 if (uid
< gimple_uid (cand
->incremented_at
)
4988 && (closest_before
== NULL
4989 || uid
> gimple_uid (closest_before
->stmt
)))
4990 closest_before
= use
;
4992 if (uid
> gimple_uid (cand
->incremented_at
)
4993 && (closest_after
== NULL
4994 || uid
< gimple_uid (closest_after
->stmt
)))
4995 closest_after
= use
;
4998 if (closest_before
!= NULL
4999 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5000 cand
->ainc_use
= closest_before
;
5001 else if (closest_after
!= NULL
5002 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5003 cand
->ainc_use
= closest_after
;
5007 /* Finds the candidates for the induction variables. */
5010 find_iv_candidates (struct ivopts_data
*data
)
5012 /* Add commonly used ivs. */
5013 add_standard_iv_candidates (data
);
5015 /* Add old induction variables. */
5016 add_old_ivs_candidates (data
);
5018 /* Add induction variables derived from uses. */
5019 add_derived_ivs_candidates (data
);
5021 set_autoinc_for_original_candidates (data
);
5023 /* Record the important candidates. */
5024 record_important_candidates (data
);
5027 /* Determines costs of basing the use of the iv on an iv candidate. */
5030 determine_use_iv_costs (struct ivopts_data
*data
)
5034 struct iv_cand
*cand
;
5035 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5037 alloc_use_cost_map (data
);
5039 for (i
= 0; i
< n_iv_uses (data
); i
++)
5041 use
= iv_use (data
, i
);
5043 if (data
->consider_all_candidates
)
5045 for (j
= 0; j
< n_iv_cands (data
); j
++)
5047 cand
= iv_cand (data
, j
);
5048 determine_use_iv_cost (data
, use
, cand
);
5055 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5057 cand
= iv_cand (data
, j
);
5058 if (!determine_use_iv_cost (data
, use
, cand
))
5059 bitmap_set_bit (to_clear
, j
);
5062 /* Remove the candidates for that the cost is infinite from
5063 the list of related candidates. */
5064 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5065 bitmap_clear (to_clear
);
5069 BITMAP_FREE (to_clear
);
5071 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5073 fprintf (dump_file
, "Use-candidate costs:\n");
5075 for (i
= 0; i
< n_iv_uses (data
); i
++)
5077 use
= iv_use (data
, i
);
5079 fprintf (dump_file
, "Use %d:\n", i
);
5080 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5081 for (j
= 0; j
< use
->n_map_members
; j
++)
5083 if (!use
->cost_map
[j
].cand
5084 || infinite_cost_p (use
->cost_map
[j
].cost
))
5087 fprintf (dump_file
, " %d\t%d\t%d\t",
5088 use
->cost_map
[j
].cand
->id
,
5089 use
->cost_map
[j
].cost
.cost
,
5090 use
->cost_map
[j
].cost
.complexity
);
5091 if (use
->cost_map
[j
].depends_on
)
5092 bitmap_print (dump_file
,
5093 use
->cost_map
[j
].depends_on
, "","");
5094 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5095 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5096 fprintf (dump_file
, "\n");
5099 fprintf (dump_file
, "\n");
5101 fprintf (dump_file
, "\n");
5105 /* Determines cost of the candidate CAND. */
5108 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5110 comp_cost cost_base
;
5111 unsigned cost
, cost_step
;
5120 /* There are two costs associated with the candidate -- its increment
5121 and its initialization. The second is almost negligible for any loop
5122 that rolls enough, so we take it just very little into account. */
5124 base
= cand
->iv
->base
;
5125 cost_base
= force_var_cost (data
, base
, NULL
);
5126 /* It will be exceptional that the iv register happens to be initialized with
5127 the proper value at no cost. In general, there will at least be a regcopy
5129 if (cost_base
.cost
== 0)
5130 cost_base
.cost
= COSTS_N_INSNS (1);
5131 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5133 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5135 /* Prefer the original ivs unless we may gain something by replacing it.
5136 The reason is to make debugging simpler; so this is not relevant for
5137 artificial ivs created by other optimization passes. */
5138 if (cand
->pos
!= IP_ORIGINAL
5139 || !SSA_NAME_VAR (cand
->var_before
)
5140 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5143 /* Prefer not to insert statements into latch unless there are some
5144 already (so that we do not create unnecessary jumps). */
5145 if (cand
->pos
== IP_END
5146 && empty_block_p (ip_end_pos (data
->current_loop
)))
5150 cand
->cost_step
= cost_step
;
5153 /* Determines costs of computation of the candidates. */
5156 determine_iv_costs (struct ivopts_data
*data
)
5160 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5162 fprintf (dump_file
, "Candidate costs:\n");
5163 fprintf (dump_file
, " cand\tcost\n");
5166 for (i
= 0; i
< n_iv_cands (data
); i
++)
5168 struct iv_cand
*cand
= iv_cand (data
, i
);
5170 determine_iv_cost (data
, cand
);
5172 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5173 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5176 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5177 fprintf (dump_file
, "\n");
5180 /* Calculates cost for having SIZE induction variables. */
5183 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5185 /* We add size to the cost, so that we prefer eliminating ivs
5187 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5188 data
->body_includes_call
);
5191 /* For each size of the induction variable set determine the penalty. */
5194 determine_set_costs (struct ivopts_data
*data
)
5200 struct loop
*loop
= data
->current_loop
;
5203 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5205 fprintf (dump_file
, "Global costs:\n");
5206 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5207 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5208 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5209 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5213 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5216 op
= PHI_RESULT (phi
);
5218 if (virtual_operand_p (op
))
5221 if (get_iv (data
, op
))
5227 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5229 struct version_info
*info
= ver_info (data
, j
);
5231 if (info
->inv_id
&& info
->has_nonlin_use
)
5235 data
->regs_used
= n
;
5236 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5237 fprintf (dump_file
, " regs_used %d\n", n
);
5239 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5241 fprintf (dump_file
, " cost for size:\n");
5242 fprintf (dump_file
, " ivs\tcost\n");
5243 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5244 fprintf (dump_file
, " %d\t%d\n", j
,
5245 ivopts_global_cost_for_size (data
, j
));
5246 fprintf (dump_file
, "\n");
5250 /* Returns true if A is a cheaper cost pair than B. */
5253 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5263 cmp
= compare_costs (a
->cost
, b
->cost
);
5270 /* In case the costs are the same, prefer the cheaper candidate. */
5271 if (a
->cand
->cost
< b
->cand
->cost
)
5278 /* Returns candidate by that USE is expressed in IVS. */
5280 static struct cost_pair
*
5281 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5283 return ivs
->cand_for_use
[use
->id
];
5286 /* Computes the cost field of IVS structure. */
5289 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5291 comp_cost cost
= ivs
->cand_use_cost
;
5293 cost
.cost
+= ivs
->cand_cost
;
5295 cost
.cost
+= ivopts_global_cost_for_size (data
,
5296 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5301 /* Remove invariants in set INVS to set IVS. */
5304 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5312 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5314 ivs
->n_invariant_uses
[iid
]--;
5315 if (ivs
->n_invariant_uses
[iid
] == 0)
5320 /* Set USE not to be expressed by any candidate in IVS. */
5323 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5326 unsigned uid
= use
->id
, cid
;
5327 struct cost_pair
*cp
;
5329 cp
= ivs
->cand_for_use
[uid
];
5335 ivs
->cand_for_use
[uid
] = NULL
;
5336 ivs
->n_cand_uses
[cid
]--;
5338 if (ivs
->n_cand_uses
[cid
] == 0)
5340 bitmap_clear_bit (ivs
->cands
, cid
);
5341 /* Do not count the pseudocandidates. */
5345 ivs
->cand_cost
-= cp
->cand
->cost
;
5347 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5350 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5352 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5354 if (cp
->inv_expr_id
!= -1)
5356 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5357 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5358 ivs
->num_used_inv_expr
--;
5360 iv_ca_recount_cost (data
, ivs
);
5363 /* Add invariants in set INVS to set IVS. */
5366 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5374 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5376 ivs
->n_invariant_uses
[iid
]++;
5377 if (ivs
->n_invariant_uses
[iid
] == 1)
5382 /* Set cost pair for USE in set IVS to CP. */
5385 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5386 struct iv_use
*use
, struct cost_pair
*cp
)
5388 unsigned uid
= use
->id
, cid
;
5390 if (ivs
->cand_for_use
[uid
] == cp
)
5393 if (ivs
->cand_for_use
[uid
])
5394 iv_ca_set_no_cp (data
, ivs
, use
);
5401 ivs
->cand_for_use
[uid
] = cp
;
5402 ivs
->n_cand_uses
[cid
]++;
5403 if (ivs
->n_cand_uses
[cid
] == 1)
5405 bitmap_set_bit (ivs
->cands
, cid
);
5406 /* Do not count the pseudocandidates. */
5410 ivs
->cand_cost
+= cp
->cand
->cost
;
5412 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5415 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5416 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5418 if (cp
->inv_expr_id
!= -1)
5420 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5421 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5422 ivs
->num_used_inv_expr
++;
5424 iv_ca_recount_cost (data
, ivs
);
5428 /* Extend set IVS by expressing USE by some of the candidates in it
5429 if possible. Consider all important candidates if candidates in
5430 set IVS don't give any result. */
5433 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5436 struct cost_pair
*best_cp
= NULL
, *cp
;
5439 struct iv_cand
*cand
;
5441 gcc_assert (ivs
->upto
>= use
->id
);
5445 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5447 cand
= iv_cand (data
, i
);
5448 cp
= get_use_iv_cost (data
, use
, cand
);
5449 if (cheaper_cost_pair (cp
, best_cp
))
5453 if (best_cp
== NULL
)
5455 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5457 cand
= iv_cand (data
, i
);
5458 cp
= get_use_iv_cost (data
, use
, cand
);
5459 if (cheaper_cost_pair (cp
, best_cp
))
5464 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5467 /* Get cost for assignment IVS. */
5470 iv_ca_cost (struct iv_ca
*ivs
)
5472 /* This was a conditional expression but it triggered a bug in
5475 return infinite_cost
;
5480 /* Returns true if all dependences of CP are among invariants in IVS. */
5483 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5488 if (!cp
->depends_on
)
5491 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5493 if (ivs
->n_invariant_uses
[i
] == 0)
5500 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5501 it before NEXT_CHANGE. */
5503 static struct iv_ca_delta
*
5504 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5505 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5507 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5510 change
->old_cp
= old_cp
;
5511 change
->new_cp
= new_cp
;
5512 change
->next_change
= next_change
;
5517 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5520 static struct iv_ca_delta
*
5521 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5523 struct iv_ca_delta
*last
;
5531 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5533 last
->next_change
= l2
;
5538 /* Reverse the list of changes DELTA, forming the inverse to it. */
5540 static struct iv_ca_delta
*
5541 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5543 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5544 struct cost_pair
*tmp
;
5546 for (act
= delta
; act
; act
= next
)
5548 next
= act
->next_change
;
5549 act
->next_change
= prev
;
5553 act
->old_cp
= act
->new_cp
;
5560 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5561 reverted instead. */
5564 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5565 struct iv_ca_delta
*delta
, bool forward
)
5567 struct cost_pair
*from
, *to
;
5568 struct iv_ca_delta
*act
;
5571 delta
= iv_ca_delta_reverse (delta
);
5573 for (act
= delta
; act
; act
= act
->next_change
)
5577 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5578 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5582 iv_ca_delta_reverse (delta
);
5585 /* Returns true if CAND is used in IVS. */
5588 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5590 return ivs
->n_cand_uses
[cand
->id
] > 0;
5593 /* Returns number of induction variable candidates in the set IVS. */
5596 iv_ca_n_cands (struct iv_ca
*ivs
)
5598 return ivs
->n_cands
;
5601 /* Free the list of changes DELTA. */
5604 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5606 struct iv_ca_delta
*act
, *next
;
5608 for (act
= *delta
; act
; act
= next
)
5610 next
= act
->next_change
;
5617 /* Allocates new iv candidates assignment. */
5619 static struct iv_ca
*
5620 iv_ca_new (struct ivopts_data
*data
)
5622 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5626 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5627 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5628 nw
->cands
= BITMAP_ALLOC (NULL
);
5631 nw
->cand_use_cost
= no_cost
;
5633 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5635 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5636 nw
->num_used_inv_expr
= 0;
5641 /* Free memory occupied by the set IVS. */
5644 iv_ca_free (struct iv_ca
**ivs
)
5646 free ((*ivs
)->cand_for_use
);
5647 free ((*ivs
)->n_cand_uses
);
5648 BITMAP_FREE ((*ivs
)->cands
);
5649 free ((*ivs
)->n_invariant_uses
);
5650 free ((*ivs
)->used_inv_expr
);
5655 /* Dumps IVS to FILE. */
5658 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5660 const char *pref
= " invariants ";
5662 comp_cost cost
= iv_ca_cost (ivs
);
5664 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5665 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5666 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5667 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5669 for (i
= 0; i
< ivs
->upto
; i
++)
5671 struct iv_use
*use
= iv_use (data
, i
);
5672 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5674 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5675 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5677 fprintf (file
, " use:%d --> ??\n", use
->id
);
5680 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5681 if (ivs
->n_invariant_uses
[i
])
5683 fprintf (file
, "%s%d", pref
, i
);
5686 fprintf (file
, "\n\n");
5689 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5690 new set, and store differences in DELTA. Number of induction variables
5691 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5692 the function will try to find a solution with mimimal iv candidates. */
5695 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5696 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5697 unsigned *n_ivs
, bool min_ncand
)
5702 struct cost_pair
*old_cp
, *new_cp
;
5705 for (i
= 0; i
< ivs
->upto
; i
++)
5707 use
= iv_use (data
, i
);
5708 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5711 && old_cp
->cand
== cand
)
5714 new_cp
= get_use_iv_cost (data
, use
, cand
);
5718 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5721 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5724 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5727 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5728 cost
= iv_ca_cost (ivs
);
5730 *n_ivs
= iv_ca_n_cands (ivs
);
5731 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5736 /* Try narrowing set IVS by removing CAND. Return the cost of
5737 the new set and store the differences in DELTA. START is
5738 the candidate with which we start narrowing. */
5741 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5742 struct iv_cand
*cand
, struct iv_cand
*start
,
5743 struct iv_ca_delta
**delta
)
5747 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5749 struct iv_cand
*cnd
;
5750 comp_cost cost
, best_cost
, acost
;
5753 for (i
= 0; i
< n_iv_uses (data
); i
++)
5755 use
= iv_use (data
, i
);
5757 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5758 if (old_cp
->cand
!= cand
)
5761 best_cost
= iv_ca_cost (ivs
);
5762 /* Start narrowing with START. */
5763 new_cp
= get_use_iv_cost (data
, use
, start
);
5765 if (data
->consider_all_candidates
)
5767 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5769 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5772 cnd
= iv_cand (data
, ci
);
5774 cp
= get_use_iv_cost (data
, use
, cnd
);
5778 iv_ca_set_cp (data
, ivs
, use
, cp
);
5779 acost
= iv_ca_cost (ivs
);
5781 if (compare_costs (acost
, best_cost
) < 0)
5790 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5792 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5795 cnd
= iv_cand (data
, ci
);
5797 cp
= get_use_iv_cost (data
, use
, cnd
);
5801 iv_ca_set_cp (data
, ivs
, use
, cp
);
5802 acost
= iv_ca_cost (ivs
);
5804 if (compare_costs (acost
, best_cost
) < 0)
5811 /* Restore to old cp for use. */
5812 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
5816 iv_ca_delta_free (delta
);
5817 return infinite_cost
;
5820 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5823 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5824 cost
= iv_ca_cost (ivs
);
5825 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5830 /* Try optimizing the set of candidates IVS by removing candidates different
5831 from to EXCEPT_CAND from it. Return cost of the new set, and store
5832 differences in DELTA. */
5835 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5836 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5839 struct iv_ca_delta
*act_delta
, *best_delta
;
5841 comp_cost best_cost
, acost
;
5842 struct iv_cand
*cand
;
5845 best_cost
= iv_ca_cost (ivs
);
5847 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5849 cand
= iv_cand (data
, i
);
5851 if (cand
== except_cand
)
5854 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
5856 if (compare_costs (acost
, best_cost
) < 0)
5859 iv_ca_delta_free (&best_delta
);
5860 best_delta
= act_delta
;
5863 iv_ca_delta_free (&act_delta
);
5872 /* Recurse to possibly remove other unnecessary ivs. */
5873 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5874 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5875 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5876 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5880 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5881 cheaper local cost for USE than BEST_CP. Return pointer to
5882 the corresponding cost_pair, otherwise just return BEST_CP. */
5884 static struct cost_pair
*
5885 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
5886 unsigned int cand_idx
, struct iv_cand
*old_cand
,
5887 struct cost_pair
*best_cp
)
5889 struct iv_cand
*cand
;
5890 struct cost_pair
*cp
;
5892 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
5893 if (cand_idx
== old_cand
->id
)
5896 cand
= iv_cand (data
, cand_idx
);
5897 cp
= get_use_iv_cost (data
, use
, cand
);
5898 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
5904 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5905 which are used by more than one iv uses. For each of those candidates,
5906 this function tries to represent iv uses under that candidate using
5907 other ones with lower local cost, then tries to prune the new set.
5908 If the new set has lower cost, It returns the new cost after recording
5909 candidate replacement in list DELTA. */
5912 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5913 struct iv_ca_delta
**delta
)
5915 bitmap_iterator bi
, bj
;
5916 unsigned int i
, j
, k
;
5918 struct iv_cand
*cand
;
5919 comp_cost orig_cost
, acost
;
5920 struct iv_ca_delta
*act_delta
, *tmp_delta
;
5921 struct cost_pair
*old_cp
, *best_cp
= NULL
;
5924 orig_cost
= iv_ca_cost (ivs
);
5926 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5928 if (ivs
->n_cand_uses
[i
] == 1
5929 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
5932 cand
= iv_cand (data
, i
);
5935 /* Represent uses under current candidate using other ones with
5936 lower local cost. */
5937 for (j
= 0; j
< ivs
->upto
; j
++)
5939 use
= iv_use (data
, j
);
5940 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5942 if (old_cp
->cand
!= cand
)
5946 if (data
->consider_all_candidates
)
5947 for (k
= 0; k
< n_iv_cands (data
); k
++)
5948 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
5949 old_cp
->cand
, best_cp
);
5951 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
5952 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
5953 old_cp
->cand
, best_cp
);
5955 if (best_cp
== old_cp
)
5958 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
5960 /* No need for further prune. */
5964 /* Prune the new candidate set. */
5965 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
5966 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
5967 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
5968 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
5970 if (compare_costs (acost
, orig_cost
) < 0)
5976 iv_ca_delta_free (&act_delta
);
5982 /* Tries to extend the sets IVS in the best possible way in order
5983 to express the USE. If ORIGINALP is true, prefer candidates from
5984 the original set of IVs, otherwise favor important candidates not
5985 based on any memory object. */
5988 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5989 struct iv_use
*use
, bool originalp
)
5991 comp_cost best_cost
, act_cost
;
5994 struct iv_cand
*cand
;
5995 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
5996 struct cost_pair
*cp
;
5998 iv_ca_add_use (data
, ivs
, use
);
5999 best_cost
= iv_ca_cost (ivs
);
6000 cp
= iv_ca_cand_for_use (ivs
, use
);
6003 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6004 iv_ca_set_no_cp (data
, ivs
, use
);
6007 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6008 first try important candidates not based on any memory object. Only if
6009 this fails, try the specific ones. Rationale -- in loops with many
6010 variables the best choice often is to use just one generic biv. If we
6011 added here many ivs specific to the uses, the optimization algorithm later
6012 would be likely to get stuck in a local minimum, thus causing us to create
6013 too many ivs. The approach from few ivs to more seems more likely to be
6014 successful -- starting from few ivs, replacing an expensive use by a
6015 specific iv should always be a win. */
6016 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6018 cand
= iv_cand (data
, i
);
6020 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6023 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6026 if (iv_ca_cand_used_p (ivs
, cand
))
6029 cp
= get_use_iv_cost (data
, use
, cand
);
6033 iv_ca_set_cp (data
, ivs
, use
, cp
);
6034 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6036 iv_ca_set_no_cp (data
, ivs
, use
);
6037 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6039 if (compare_costs (act_cost
, best_cost
) < 0)
6041 best_cost
= act_cost
;
6043 iv_ca_delta_free (&best_delta
);
6044 best_delta
= act_delta
;
6047 iv_ca_delta_free (&act_delta
);
6050 if (infinite_cost_p (best_cost
))
6052 for (i
= 0; i
< use
->n_map_members
; i
++)
6054 cp
= use
->cost_map
+ i
;
6059 /* Already tried this. */
6060 if (cand
->important
)
6062 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6064 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6068 if (iv_ca_cand_used_p (ivs
, cand
))
6072 iv_ca_set_cp (data
, ivs
, use
, cp
);
6073 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6074 iv_ca_set_no_cp (data
, ivs
, use
);
6075 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6078 if (compare_costs (act_cost
, best_cost
) < 0)
6080 best_cost
= act_cost
;
6083 iv_ca_delta_free (&best_delta
);
6084 best_delta
= act_delta
;
6087 iv_ca_delta_free (&act_delta
);
6091 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6092 iv_ca_delta_free (&best_delta
);
6094 return !infinite_cost_p (best_cost
);
6097 /* Finds an initial assignment of candidates to uses. */
6099 static struct iv_ca
*
6100 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6102 struct iv_ca
*ivs
= iv_ca_new (data
);
6105 for (i
= 0; i
< n_iv_uses (data
); i
++)
6106 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6115 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6116 points to a bool variable, this function tries to break local
6117 optimal fixed-point by replacing candidates in IVS if it's true. */
6120 try_improve_iv_set (struct ivopts_data
*data
,
6121 struct iv_ca
*ivs
, bool *try_replace_p
)
6124 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6125 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6126 struct iv_cand
*cand
;
6128 /* Try extending the set of induction variables by one. */
6129 for (i
= 0; i
< n_iv_cands (data
); i
++)
6131 cand
= iv_cand (data
, i
);
6133 if (iv_ca_cand_used_p (ivs
, cand
))
6136 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6140 /* If we successfully added the candidate and the set is small enough,
6141 try optimizing it by removing other candidates. */
6142 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6144 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6145 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6146 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6147 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6150 if (compare_costs (acost
, best_cost
) < 0)
6153 iv_ca_delta_free (&best_delta
);
6154 best_delta
= act_delta
;
6157 iv_ca_delta_free (&act_delta
);
6162 /* Try removing the candidates from the set instead. */
6163 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6165 if (!best_delta
&& *try_replace_p
)
6167 *try_replace_p
= false;
6168 /* So far candidate selecting algorithm tends to choose fewer IVs
6169 so that it can handle cases in which loops have many variables
6170 but the best choice is often to use only one general biv. One
6171 weakness is it can't handle opposite cases, in which different
6172 candidates should be chosen with respect to each use. To solve
6173 the problem, we replace candidates in a manner described by the
6174 comments of iv_ca_replace, thus give general algorithm a chance
6175 to break local optimal fixed-point in these cases. */
6176 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6183 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6184 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6185 iv_ca_delta_free (&best_delta
);
6189 /* Attempts to find the optimal set of induction variables. We do simple
6190 greedy heuristic -- we try to replace at most one candidate in the selected
6191 solution and remove the unused ivs while this improves the cost. */
6193 static struct iv_ca
*
6194 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6197 bool try_replace_p
= true;
6199 /* Get the initial solution. */
6200 set
= get_initial_solution (data
, originalp
);
6203 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6204 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6208 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6210 fprintf (dump_file
, "Initial set of candidates:\n");
6211 iv_ca_dump (data
, dump_file
, set
);
6214 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6216 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6218 fprintf (dump_file
, "Improved to:\n");
6219 iv_ca_dump (data
, dump_file
, set
);
6226 static struct iv_ca
*
6227 find_optimal_iv_set (struct ivopts_data
*data
)
6230 struct iv_ca
*set
, *origset
;
6232 comp_cost cost
, origcost
;
6234 /* Determine the cost based on a strategy that starts with original IVs,
6235 and try again using a strategy that prefers candidates not based
6237 origset
= find_optimal_iv_set_1 (data
, true);
6238 set
= find_optimal_iv_set_1 (data
, false);
6240 if (!origset
&& !set
)
6243 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6244 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6246 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6248 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6249 origcost
.cost
, origcost
.complexity
);
6250 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6251 cost
.cost
, cost
.complexity
);
6254 /* Choose the one with the best cost. */
6255 if (compare_costs (origcost
, cost
) <= 0)
6262 iv_ca_free (&origset
);
6264 for (i
= 0; i
< n_iv_uses (data
); i
++)
6266 use
= iv_use (data
, i
);
6267 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6273 /* Creates a new induction variable corresponding to CAND. */
6276 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6278 gimple_stmt_iterator incr_pos
;
6288 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6292 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6300 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6304 /* Mark that the iv is preserved. */
6305 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6306 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6308 /* Rewrite the increment so that it uses var_before directly. */
6309 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6313 gimple_add_tmp_var (cand
->var_before
);
6315 base
= unshare_expr (cand
->iv
->base
);
6317 create_iv (base
, unshare_expr (cand
->iv
->step
),
6318 cand
->var_before
, data
->current_loop
,
6319 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6322 /* Creates new induction variables described in SET. */
6325 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6328 struct iv_cand
*cand
;
6331 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6333 cand
= iv_cand (data
, i
);
6334 create_new_iv (data
, cand
);
6337 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6339 fprintf (dump_file
, "\nSelected IV set: \n");
6340 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6342 cand
= iv_cand (data
, i
);
6343 dump_cand (dump_file
, cand
);
6345 fprintf (dump_file
, "\n");
6349 /* Rewrites USE (definition of iv used in a nonlinear expression)
6350 using candidate CAND. */
6353 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6354 struct iv_use
*use
, struct iv_cand
*cand
)
6359 gimple_stmt_iterator bsi
;
6361 /* An important special case -- if we are asked to express value of
6362 the original iv by itself, just exit; there is no need to
6363 introduce a new computation (that might also need casting the
6364 variable to unsigned and back). */
6365 if (cand
->pos
== IP_ORIGINAL
6366 && cand
->incremented_at
== use
->stmt
)
6368 enum tree_code stmt_code
;
6370 gcc_assert (is_gimple_assign (use
->stmt
));
6371 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6373 /* Check whether we may leave the computation unchanged.
6374 This is the case only if it does not rely on other
6375 computations in the loop -- otherwise, the computation
6376 we rely upon may be removed in remove_unused_ivs,
6377 thus leading to ICE. */
6378 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6379 if (stmt_code
== PLUS_EXPR
6380 || stmt_code
== MINUS_EXPR
6381 || stmt_code
== POINTER_PLUS_EXPR
)
6383 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6384 op
= gimple_assign_rhs2 (use
->stmt
);
6385 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6386 op
= gimple_assign_rhs1 (use
->stmt
);
6393 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6397 comp
= get_computation (data
->current_loop
, use
, cand
);
6398 gcc_assert (comp
!= NULL_TREE
);
6400 switch (gimple_code (use
->stmt
))
6403 tgt
= PHI_RESULT (use
->stmt
);
6405 /* If we should keep the biv, do not replace it. */
6406 if (name_info (data
, tgt
)->preserve_biv
)
6409 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6413 tgt
= gimple_assign_lhs (use
->stmt
);
6414 bsi
= gsi_for_stmt (use
->stmt
);
6421 if (!valid_gimple_rhs_p (comp
)
6422 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6423 /* We can't allow re-allocating the stmt as it might be pointed
6425 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6426 >= gimple_num_ops (gsi_stmt (bsi
)))))
6428 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6429 true, GSI_SAME_STMT
);
6430 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6432 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6433 /* As this isn't a plain copy we have to reset alignment
6435 if (SSA_NAME_PTR_INFO (comp
))
6436 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6440 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6442 ass
= gimple_build_assign (tgt
, comp
);
6443 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6445 bsi
= gsi_for_stmt (use
->stmt
);
6446 remove_phi_node (&bsi
, false);
6450 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6451 use
->stmt
= gsi_stmt (bsi
);
6455 /* Performs a peephole optimization to reorder the iv update statement with
6456 a mem ref to enable instruction combining in later phases. The mem ref uses
6457 the iv value before the update, so the reordering transformation requires
6458 adjustment of the offset. CAND is the selected IV_CAND.
6462 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6470 directly propagating t over to (1) will introduce overlapping live range
6471 thus increase register pressure. This peephole transform it into:
6475 t = MEM_REF (base, iv2, 8, 8);
6482 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6485 gimple iv_update
, stmt
;
6487 gimple_stmt_iterator gsi
, gsi_iv
;
6489 if (cand
->pos
!= IP_NORMAL
)
6492 var_after
= cand
->var_after
;
6493 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6495 bb
= gimple_bb (iv_update
);
6496 gsi
= gsi_last_nondebug_bb (bb
);
6497 stmt
= gsi_stmt (gsi
);
6499 /* Only handle conditional statement for now. */
6500 if (gimple_code (stmt
) != GIMPLE_COND
)
6503 gsi_prev_nondebug (&gsi
);
6504 stmt
= gsi_stmt (gsi
);
6505 if (stmt
!= iv_update
)
6508 gsi_prev_nondebug (&gsi
);
6509 if (gsi_end_p (gsi
))
6512 stmt
= gsi_stmt (gsi
);
6513 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6516 if (stmt
!= use
->stmt
)
6519 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6522 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6524 fprintf (dump_file
, "Reordering \n");
6525 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6526 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6527 fprintf (dump_file
, "\n");
6530 gsi
= gsi_for_stmt (use
->stmt
);
6531 gsi_iv
= gsi_for_stmt (iv_update
);
6532 gsi_move_before (&gsi_iv
, &gsi
);
6534 cand
->pos
= IP_BEFORE_USE
;
6535 cand
->incremented_at
= use
->stmt
;
6538 /* Rewrites USE (address that is an iv) using candidate CAND. */
6541 rewrite_use_address (struct ivopts_data
*data
,
6542 struct iv_use
*use
, struct iv_cand
*cand
)
6545 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6546 tree base_hint
= NULL_TREE
;
6550 adjust_iv_update_pos (cand
, use
);
6551 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6553 unshare_aff_combination (&aff
);
6555 /* To avoid undefined overflow problems, all IV candidates use unsigned
6556 integer types. The drawback is that this makes it impossible for
6557 create_mem_ref to distinguish an IV that is based on a memory object
6558 from one that represents simply an offset.
6560 To work around this problem, we pass a hint to create_mem_ref that
6561 indicates which variable (if any) in aff is an IV based on a memory
6562 object. Note that we only consider the candidate. If this is not
6563 based on an object, the base of the reference is in some subexpression
6564 of the use -- but these will use pointer types, so they are recognized
6565 by the create_mem_ref heuristics anyway. */
6566 if (cand
->iv
->base_object
)
6567 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6569 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6570 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6571 reference_alias_ptr_type (*use
->op_p
),
6572 iv
, base_hint
, data
->speed
);
6573 copy_ref_info (ref
, *use
->op_p
);
6577 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6581 rewrite_use_compare (struct ivopts_data
*data
,
6582 struct iv_use
*use
, struct iv_cand
*cand
)
6584 tree comp
, *var_p
, op
, bound
;
6585 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6586 enum tree_code compare
;
6587 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6593 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6594 tree var_type
= TREE_TYPE (var
);
6597 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6599 fprintf (dump_file
, "Replacing exit test: ");
6600 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6603 bound
= unshare_expr (fold_convert (var_type
, bound
));
6604 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6606 gsi_insert_seq_on_edge_immediate (
6607 loop_preheader_edge (data
->current_loop
),
6610 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
6611 gimple_cond_set_lhs (cond_stmt
, var
);
6612 gimple_cond_set_code (cond_stmt
, compare
);
6613 gimple_cond_set_rhs (cond_stmt
, op
);
6617 /* The induction variable elimination failed; just express the original
6619 comp
= get_computation (data
->current_loop
, use
, cand
);
6620 gcc_assert (comp
!= NULL_TREE
);
6622 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6625 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6626 true, GSI_SAME_STMT
);
6629 /* Rewrites USE using candidate CAND. */
6632 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6636 case USE_NONLINEAR_EXPR
:
6637 rewrite_use_nonlinear_expr (data
, use
, cand
);
6641 rewrite_use_address (data
, use
, cand
);
6645 rewrite_use_compare (data
, use
, cand
);
6652 update_stmt (use
->stmt
);
6655 /* Rewrite the uses using the selected induction variables. */
6658 rewrite_uses (struct ivopts_data
*data
)
6661 struct iv_cand
*cand
;
6664 for (i
= 0; i
< n_iv_uses (data
); i
++)
6666 use
= iv_use (data
, i
);
6667 cand
= use
->selected
;
6670 rewrite_use (data
, use
, cand
);
6674 /* Removes the ivs that are not used after rewriting. */
6677 remove_unused_ivs (struct ivopts_data
*data
)
6681 bitmap toremove
= BITMAP_ALLOC (NULL
);
6683 /* Figure out an order in which to release SSA DEFs so that we don't
6684 release something that we'd have to propagate into a debug stmt
6686 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6688 struct version_info
*info
;
6690 info
= ver_info (data
, j
);
6692 && !integer_zerop (info
->iv
->step
)
6694 && !info
->iv
->have_use_for
6695 && !info
->preserve_biv
)
6697 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6699 tree def
= info
->iv
->ssa_name
;
6701 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6703 imm_use_iterator imm_iter
;
6704 use_operand_p use_p
;
6708 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6710 if (!gimple_debug_bind_p (stmt
))
6713 /* We just want to determine whether to do nothing
6714 (count == 0), to substitute the computed
6715 expression into a single use of the SSA DEF by
6716 itself (count == 1), or to use a debug temp
6717 because the SSA DEF is used multiple times or as
6718 part of a larger expression (count > 1). */
6720 if (gimple_debug_bind_get_value (stmt
) != def
)
6724 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6730 struct iv_use dummy_use
;
6731 struct iv_cand
*best_cand
= NULL
, *cand
;
6732 unsigned i
, best_pref
= 0, cand_pref
;
6734 memset (&dummy_use
, 0, sizeof (dummy_use
));
6735 dummy_use
.iv
= info
->iv
;
6736 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6738 cand
= iv_use (data
, i
)->selected
;
6739 if (cand
== best_cand
)
6741 cand_pref
= operand_equal_p (cand
->iv
->step
,
6745 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6746 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6749 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6751 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6754 best_pref
= cand_pref
;
6761 tree comp
= get_computation_at (data
->current_loop
,
6762 &dummy_use
, best_cand
,
6763 SSA_NAME_DEF_STMT (def
));
6769 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6770 DECL_ARTIFICIAL (vexpr
) = 1;
6771 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6772 if (SSA_NAME_VAR (def
))
6773 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6775 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6777 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
6778 gimple_stmt_iterator gsi
;
6780 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6781 gsi
= gsi_after_labels (gimple_bb
6782 (SSA_NAME_DEF_STMT (def
)));
6784 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6786 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6790 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6792 if (!gimple_debug_bind_p (stmt
))
6795 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6796 SET_USE (use_p
, comp
);
6804 release_defs_bitset (toremove
);
6806 BITMAP_FREE (toremove
);
6809 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6810 for hash_map::traverse. */
6813 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
6819 /* Frees data allocated by the optimization of a single loop. */
6822 free_loop_data (struct ivopts_data
*data
)
6830 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
6831 delete data
->niters
;
6832 data
->niters
= NULL
;
6835 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6837 struct version_info
*info
;
6839 info
= ver_info (data
, i
);
6842 info
->has_nonlin_use
= false;
6843 info
->preserve_biv
= false;
6846 bitmap_clear (data
->relevant
);
6847 bitmap_clear (data
->important_candidates
);
6849 for (i
= 0; i
< n_iv_uses (data
); i
++)
6851 struct iv_use
*use
= iv_use (data
, i
);
6854 BITMAP_FREE (use
->related_cands
);
6855 for (j
= 0; j
< use
->n_map_members
; j
++)
6856 if (use
->cost_map
[j
].depends_on
)
6857 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6858 free (use
->cost_map
);
6861 data
->iv_uses
.truncate (0);
6863 for (i
= 0; i
< n_iv_cands (data
); i
++)
6865 struct iv_cand
*cand
= iv_cand (data
, i
);
6868 if (cand
->depends_on
)
6869 BITMAP_FREE (cand
->depends_on
);
6872 data
->iv_candidates
.truncate (0);
6874 if (data
->version_info_size
< num_ssa_names
)
6876 data
->version_info_size
= 2 * num_ssa_names
;
6877 free (data
->version_info
);
6878 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6881 data
->max_inv_id
= 0;
6883 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6884 SET_DECL_RTL (obj
, NULL_RTX
);
6886 decl_rtl_to_reset
.truncate (0);
6888 data
->inv_expr_tab
->empty ();
6889 data
->inv_expr_id
= 0;
6892 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6896 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6898 free_loop_data (data
);
6899 free (data
->version_info
);
6900 BITMAP_FREE (data
->relevant
);
6901 BITMAP_FREE (data
->important_candidates
);
6903 decl_rtl_to_reset
.release ();
6904 data
->iv_uses
.release ();
6905 data
->iv_candidates
.release ();
6906 delete data
->inv_expr_tab
;
6907 data
->inv_expr_tab
= NULL
;
6908 free_affine_expand_cache (&data
->name_expansion_cache
);
6911 /* Returns true if the loop body BODY includes any function calls. */
6914 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6916 gimple_stmt_iterator gsi
;
6919 for (i
= 0; i
< num_nodes
; i
++)
6920 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6922 gimple stmt
= gsi_stmt (gsi
);
6923 if (is_gimple_call (stmt
)
6924 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6930 /* Optimizes the LOOP. Returns true if anything changed. */
6933 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6935 bool changed
= false;
6936 struct iv_ca
*iv_ca
;
6937 edge exit
= single_dom_exit (loop
);
6940 gcc_assert (!data
->niters
);
6941 data
->current_loop
= loop
;
6942 data
->speed
= optimize_loop_for_speed_p (loop
);
6944 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6946 fprintf (dump_file
, "Processing loop %d\n", loop
->num
);
6950 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
6951 exit
->src
->index
, exit
->dest
->index
);
6952 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
6953 fprintf (dump_file
, "\n");
6956 fprintf (dump_file
, "\n");
6959 body
= get_loop_body (loop
);
6960 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
6961 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
6964 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
6966 /* For each ssa name determines whether it behaves as an induction variable
6968 if (!find_induction_variables (data
))
6971 /* Finds interesting uses (item 1). */
6972 find_interesting_uses (data
);
6973 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
6976 /* Finds candidates for the induction variables (item 2). */
6977 find_iv_candidates (data
);
6979 /* Calculates the costs (item 3, part 1). */
6980 determine_iv_costs (data
);
6981 determine_use_iv_costs (data
);
6982 determine_set_costs (data
);
6984 /* Find the optimal set of induction variables (item 3, part 2). */
6985 iv_ca
= find_optimal_iv_set (data
);
6990 /* Create the new induction variables (item 4, part 1). */
6991 create_new_ivs (data
, iv_ca
);
6992 iv_ca_free (&iv_ca
);
6994 /* Rewrite the uses (item 4, part 2). */
6995 rewrite_uses (data
);
6997 /* Remove the ivs that are unused after rewriting. */
6998 remove_unused_ivs (data
);
7000 /* We have changed the structure of induction variables; it might happen
7001 that definitions in the scev database refer to some of them that were
7006 free_loop_data (data
);
7011 /* Main entry point. Optimizes induction variables in loops. */
7014 tree_ssa_iv_optimize (void)
7017 struct ivopts_data data
;
7019 tree_ssa_iv_optimize_init (&data
);
7021 /* Optimize the loops starting with the innermost ones. */
7022 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7024 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7025 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7027 tree_ssa_iv_optimize_loop (&data
, loop
);
7030 tree_ssa_iv_optimize_finalize (&data
);