1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
71 #include "double-int.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
82 #include "hard-reg-set.h"
84 #include "dominance.h"
86 #include "basic-block.h"
87 #include "gimple-pretty-print.h"
89 #include "hash-table.h"
90 #include "tree-ssa-alias.h"
91 #include "internal-fn.h"
93 #include "gimple-expr.h"
97 #include "gimple-iterator.h"
98 #include "gimplify-me.h"
99 #include "gimple-ssa.h"
100 #include "plugin-api.h"
103 #include "tree-cfg.h"
104 #include "tree-phinodes.h"
105 #include "ssa-iterators.h"
106 #include "stringpool.h"
107 #include "tree-ssanames.h"
108 #include "tree-ssa-loop-ivopts.h"
109 #include "tree-ssa-loop-manip.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop.h"
115 #include "statistics.h"
117 #include "fixed-value.h"
118 #include "insn-config.h"
123 #include "emit-rtl.h"
127 #include "tree-dfa.h"
128 #include "tree-ssa.h"
130 #include "tree-pass.h"
131 #include "tree-chrec.h"
132 #include "tree-scalar-evolution.h"
134 #include "langhooks.h"
135 #include "tree-affine.h"
137 #include "tree-inline.h"
138 #include "tree-ssa-propagate.h"
139 #include "tree-ssa-address.h"
140 #include "builtins.h"
141 #include "tree-vectorizer.h"
143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
144 cost of different addressing modes. This should be moved to a TBD
145 interface between the GIMPLE and RTL worlds. */
148 /* The infinite cost. */
149 #define INFTY 10000000
151 #define AVG_LOOP_NITER(LOOP) 5
153 /* Returns the expected number of loop iterations for LOOP.
154 The average trip count is computed from profile data if it
157 static inline HOST_WIDE_INT
158 avg_loop_niter (struct loop
*loop
)
160 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
162 return AVG_LOOP_NITER (loop
);
167 /* Representation of the induction variable. */
170 tree base
; /* Initial value of the iv. */
171 tree base_object
; /* A memory object to that the induction variable points. */
172 tree step
; /* Step of the iv (constant only). */
173 tree ssa_name
; /* The ssa name with the value. */
174 bool biv_p
; /* Is it a biv? */
175 bool have_use_for
; /* Do we already have a use for it? */
176 unsigned use_id
; /* The identifier in the use if it is the case. */
179 /* Per-ssa version information (induction variable descriptions, etc.). */
182 tree name
; /* The ssa name. */
183 struct iv
*iv
; /* Induction variable description. */
184 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
185 an expression that is not an induction variable. */
186 bool preserve_biv
; /* For the original biv, whether to preserve it. */
187 unsigned inv_id
; /* Id of an invariant. */
193 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
194 USE_ADDRESS
, /* Use in an address. */
195 USE_COMPARE
/* Use is a compare. */
198 /* Cost of a computation. */
201 int cost
; /* The runtime cost. */
202 unsigned complexity
; /* The estimate of the complexity of the code for
203 the computation (in no concrete units --
204 complexity field should be larger for more
205 complex expressions and addressing modes). */
208 static const comp_cost no_cost
= {0, 0};
209 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
211 /* The candidate - cost pair. */
214 struct iv_cand
*cand
; /* The candidate. */
215 comp_cost cost
; /* The cost. */
216 bitmap depends_on
; /* The list of invariants that have to be
218 tree value
; /* For final value elimination, the expression for
219 the final value of the iv. For iv elimination,
220 the new bound to compare with. */
221 enum tree_code comp
; /* For iv elimination, the comparison. */
222 int inv_expr_id
; /* Loop invariant expression id. */
228 unsigned id
; /* The id of the use. */
229 unsigned sub_id
; /* The id of the sub use. */
230 enum use_type type
; /* Type of the use. */
231 struct iv
*iv
; /* The induction variable it is based on. */
232 gimple stmt
; /* Statement in that it occurs. */
233 tree
*op_p
; /* The place where it occurs. */
234 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
237 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
238 struct cost_pair
*cost_map
;
239 /* The costs wrto the iv candidates. */
241 struct iv_cand
*selected
;
242 /* The selected candidate. */
244 struct iv_use
*next
; /* The next sub use. */
245 tree addr_base
; /* Base address with const offset stripped. */
246 unsigned HOST_WIDE_INT addr_offset
;
247 /* Const offset stripped from base address. */
250 /* The position where the iv is computed. */
253 IP_NORMAL
, /* At the end, just before the exit condition. */
254 IP_END
, /* At the end of the latch block. */
255 IP_BEFORE_USE
, /* Immediately before a specific use. */
256 IP_AFTER_USE
, /* Immediately after a specific use. */
257 IP_ORIGINAL
/* The original biv. */
260 /* The induction variable candidate. */
263 unsigned id
; /* The number of the candidate. */
264 bool important
; /* Whether this is an "important" candidate, i.e. such
265 that it should be considered by all uses. */
266 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
267 gimple incremented_at
;/* For original biv, the statement where it is
269 tree var_before
; /* The variable used for it before increment. */
270 tree var_after
; /* The variable used for it after increment. */
271 struct iv
*iv
; /* The value of the candidate. NULL for
272 "pseudocandidate" used to indicate the possibility
273 to replace the final value of an iv by direct
274 computation of the value. */
275 unsigned cost
; /* Cost of the candidate. */
276 unsigned cost_step
; /* Cost of the candidate's increment operation. */
277 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
278 where it is incremented. */
279 bitmap depends_on
; /* The list of invariants that are used in step of the
283 /* Loop invariant expression hashtable entry. */
284 struct iv_inv_expr_ent
291 /* The data used by the induction variable optimizations. */
293 typedef struct iv_use
*iv_use_p
;
295 typedef struct iv_cand
*iv_cand_p
;
297 /* Hashtable helpers. */
299 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
301 typedef iv_inv_expr_ent
*value_type
;
302 typedef iv_inv_expr_ent
*compare_type
;
303 static inline hashval_t
hash (const iv_inv_expr_ent
*);
304 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
307 /* Hash function for loop invariant expressions. */
310 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
315 /* Hash table equality function for expressions. */
318 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
319 const iv_inv_expr_ent
*expr2
)
321 return expr1
->hash
== expr2
->hash
322 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
327 /* The currently optimized loop. */
328 struct loop
*current_loop
;
329 source_location loop_loc
;
331 /* Numbers of iterations for all exits of the current loop. */
332 hash_map
<edge
, tree_niter_desc
*> *niters
;
334 /* Number of registers used in it. */
337 /* The size of version_info array allocated. */
338 unsigned version_info_size
;
340 /* The array of information for the ssa names. */
341 struct version_info
*version_info
;
343 /* The hashtable of loop invariant expressions created
345 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
347 /* Loop invariant expression id. */
350 /* The bitmap of indices in version_info whose value was changed. */
353 /* The uses of induction variables. */
354 vec
<iv_use_p
> iv_uses
;
356 /* The candidates. */
357 vec
<iv_cand_p
> iv_candidates
;
359 /* A bitmap of important candidates. */
360 bitmap important_candidates
;
362 /* Cache used by tree_to_aff_combination_expand. */
363 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
365 /* The maximum invariant id. */
368 /* Whether to consider just related and important candidates when replacing a
370 bool consider_all_candidates
;
372 /* Are we optimizing for speed? */
375 /* Whether the loop body includes any function calls. */
376 bool body_includes_call
;
378 /* Whether the loop body can only be exited via single exit. */
379 bool loop_single_exit_p
;
382 /* An assignment of iv candidates to uses. */
386 /* The number of uses covered by the assignment. */
389 /* Number of uses that cannot be expressed by the candidates in the set. */
392 /* Candidate assigned to a use, together with the related costs. */
393 struct cost_pair
**cand_for_use
;
395 /* Number of times each candidate is used. */
396 unsigned *n_cand_uses
;
398 /* The candidates used. */
401 /* The number of candidates in the set. */
404 /* Total number of registers needed. */
407 /* Total cost of expressing uses. */
408 comp_cost cand_use_cost
;
410 /* Total cost of candidates. */
413 /* Number of times each invariant is used. */
414 unsigned *n_invariant_uses
;
416 /* The array holding the number of uses of each loop
417 invariant expressions created by ivopt. */
418 unsigned *used_inv_expr
;
420 /* The number of created loop invariants. */
421 unsigned num_used_inv_expr
;
423 /* Total cost of the assignment. */
427 /* Difference of two iv candidate assignments. */
434 /* An old assignment (for rollback purposes). */
435 struct cost_pair
*old_cp
;
437 /* A new assignment. */
438 struct cost_pair
*new_cp
;
440 /* Next change in the list. */
441 struct iv_ca_delta
*next_change
;
444 /* Bound on number of candidates below that all candidates are considered. */
446 #define CONSIDER_ALL_CANDIDATES_BOUND \
447 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
449 /* If there are more iv occurrences, we just give up (it is quite unlikely that
450 optimizing such a loop would help, and it would take ages). */
452 #define MAX_CONSIDERED_USES \
453 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
455 /* If there are at most this number of ivs in the set, try removing unnecessary
456 ivs from the set always. */
458 #define ALWAYS_PRUNE_CAND_SET_BOUND \
459 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
461 /* The list of trees for that the decl_rtl field must be reset is stored
464 static vec
<tree
> decl_rtl_to_reset
;
466 static comp_cost
force_expr_to_var_cost (tree
, bool);
468 /* Number of uses recorded in DATA. */
470 static inline unsigned
471 n_iv_uses (struct ivopts_data
*data
)
473 return data
->iv_uses
.length ();
476 /* Ith use recorded in DATA. */
478 static inline struct iv_use
*
479 iv_use (struct ivopts_data
*data
, unsigned i
)
481 return data
->iv_uses
[i
];
484 /* Number of candidates recorded in DATA. */
486 static inline unsigned
487 n_iv_cands (struct ivopts_data
*data
)
489 return data
->iv_candidates
.length ();
492 /* Ith candidate recorded in DATA. */
494 static inline struct iv_cand
*
495 iv_cand (struct ivopts_data
*data
, unsigned i
)
497 return data
->iv_candidates
[i
];
500 /* The single loop exit if it dominates the latch, NULL otherwise. */
503 single_dom_exit (struct loop
*loop
)
505 edge exit
= single_exit (loop
);
510 if (!just_once_each_iteration_p (loop
, exit
->src
))
516 /* Dumps information about the induction variable IV to FILE. */
519 dump_iv (FILE *file
, struct iv
*iv
)
523 fprintf (file
, "ssa name ");
524 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
525 fprintf (file
, "\n");
528 fprintf (file
, " type ");
529 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
530 fprintf (file
, "\n");
534 fprintf (file
, " base ");
535 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
536 fprintf (file
, "\n");
538 fprintf (file
, " step ");
539 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
540 fprintf (file
, "\n");
544 fprintf (file
, " invariant ");
545 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
546 fprintf (file
, "\n");
551 fprintf (file
, " base object ");
552 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
553 fprintf (file
, "\n");
557 fprintf (file
, " is a biv\n");
560 /* Dumps information about the USE to FILE. */
563 dump_use (FILE *file
, struct iv_use
*use
)
565 fprintf (file
, "use %d", use
->id
);
567 fprintf (file
, ".%d", use
->sub_id
);
569 fprintf (file
, "\n");
573 case USE_NONLINEAR_EXPR
:
574 fprintf (file
, " generic\n");
578 fprintf (file
, " address\n");
582 fprintf (file
, " compare\n");
589 fprintf (file
, " in statement ");
590 print_gimple_stmt (file
, use
->stmt
, 0, 0);
591 fprintf (file
, "\n");
593 fprintf (file
, " at position ");
595 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
596 fprintf (file
, "\n");
598 dump_iv (file
, use
->iv
);
600 if (use
->related_cands
)
602 fprintf (file
, " related candidates ");
603 dump_bitmap (file
, use
->related_cands
);
607 /* Dumps information about the uses to FILE. */
610 dump_uses (FILE *file
, struct ivopts_data
*data
)
615 for (i
= 0; i
< n_iv_uses (data
); i
++)
617 use
= iv_use (data
, i
);
620 dump_use (file
, use
);
624 fprintf (file
, "\n");
628 /* Dumps information about induction variable candidate CAND to FILE. */
631 dump_cand (FILE *file
, struct iv_cand
*cand
)
633 struct iv
*iv
= cand
->iv
;
635 fprintf (file
, "candidate %d%s\n",
636 cand
->id
, cand
->important
? " (important)" : "");
638 if (cand
->depends_on
)
640 fprintf (file
, " depends on ");
641 dump_bitmap (file
, cand
->depends_on
);
646 fprintf (file
, " final value replacement\n");
650 if (cand
->var_before
)
652 fprintf (file
, " var_before ");
653 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
654 fprintf (file
, "\n");
658 fprintf (file
, " var_after ");
659 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
660 fprintf (file
, "\n");
666 fprintf (file
, " incremented before exit test\n");
670 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
674 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
678 fprintf (file
, " incremented at end\n");
682 fprintf (file
, " original biv\n");
689 /* Returns the info for ssa version VER. */
691 static inline struct version_info
*
692 ver_info (struct ivopts_data
*data
, unsigned ver
)
694 return data
->version_info
+ ver
;
697 /* Returns the info for ssa name NAME. */
699 static inline struct version_info
*
700 name_info (struct ivopts_data
*data
, tree name
)
702 return ver_info (data
, SSA_NAME_VERSION (name
));
705 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
709 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
711 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
715 if (sbb
== loop
->latch
)
721 return stmt
== last_stmt (bb
);
724 /* Returns true if STMT if after the place where the original induction
725 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
726 if the positions are identical. */
729 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
731 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
732 basic_block stmt_bb
= gimple_bb (stmt
);
734 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
737 if (stmt_bb
!= cand_bb
)
741 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
743 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
746 /* Returns true if STMT if after the place where the induction variable
747 CAND is incremented in LOOP. */
750 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
758 return stmt_after_ip_normal_pos (loop
, stmt
);
762 return stmt_after_inc_pos (cand
, stmt
, false);
765 return stmt_after_inc_pos (cand
, stmt
, true);
772 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
775 abnormal_ssa_name_p (tree exp
)
780 if (TREE_CODE (exp
) != SSA_NAME
)
783 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
786 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
787 abnormal phi node. Callback for for_each_index. */
790 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
791 void *data ATTRIBUTE_UNUSED
)
793 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
795 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
797 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
801 return !abnormal_ssa_name_p (*index
);
804 /* Returns true if EXPR contains a ssa name that occurs in an
805 abnormal phi node. */
808 contains_abnormal_ssa_name_p (tree expr
)
811 enum tree_code_class codeclass
;
816 code
= TREE_CODE (expr
);
817 codeclass
= TREE_CODE_CLASS (code
);
819 if (code
== SSA_NAME
)
820 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
822 if (code
== INTEGER_CST
823 || is_gimple_min_invariant (expr
))
826 if (code
== ADDR_EXPR
)
827 return !for_each_index (&TREE_OPERAND (expr
, 0),
828 idx_contains_abnormal_ssa_name_p
,
831 if (code
== COND_EXPR
)
832 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
833 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
834 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
840 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
845 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
857 /* Returns the structure describing number of iterations determined from
858 EXIT of DATA->current_loop, or NULL if something goes wrong. */
860 static struct tree_niter_desc
*
861 niter_for_exit (struct ivopts_data
*data
, edge exit
)
863 struct tree_niter_desc
*desc
;
864 tree_niter_desc
**slot
;
868 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
872 slot
= data
->niters
->get (exit
);
876 /* Try to determine number of iterations. We cannot safely work with ssa
877 names that appear in phi nodes on abnormal edges, so that we do not
878 create overlapping life ranges for them (PR 27283). */
879 desc
= XNEW (struct tree_niter_desc
);
880 if (!number_of_iterations_exit (data
->current_loop
,
882 || contains_abnormal_ssa_name_p (desc
->niter
))
887 data
->niters
->put (exit
, desc
);
895 /* Returns the structure describing number of iterations determined from
896 single dominating exit of DATA->current_loop, or NULL if something
899 static struct tree_niter_desc
*
900 niter_for_single_dom_exit (struct ivopts_data
*data
)
902 edge exit
= single_dom_exit (data
->current_loop
);
907 return niter_for_exit (data
, exit
);
910 /* Initializes data structures used by the iv optimization pass, stored
914 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
916 data
->version_info_size
= 2 * num_ssa_names
;
917 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
918 data
->relevant
= BITMAP_ALLOC (NULL
);
919 data
->important_candidates
= BITMAP_ALLOC (NULL
);
920 data
->max_inv_id
= 0;
922 data
->iv_uses
.create (20);
923 data
->iv_candidates
.create (20);
924 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
925 data
->inv_expr_id
= 0;
926 data
->name_expansion_cache
= NULL
;
927 decl_rtl_to_reset
.create (20);
930 /* Returns a memory object to that EXPR points. In case we are able to
931 determine that it does not point to any such object, NULL is returned. */
934 determine_base_object (tree expr
)
936 enum tree_code code
= TREE_CODE (expr
);
939 /* If this is a pointer casted to any type, we need to determine
940 the base object for the pointer; so handle conversions before
941 throwing away non-pointer expressions. */
942 if (CONVERT_EXPR_P (expr
))
943 return determine_base_object (TREE_OPERAND (expr
, 0));
945 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
954 obj
= TREE_OPERAND (expr
, 0);
955 base
= get_base_address (obj
);
960 if (TREE_CODE (base
) == MEM_REF
)
961 return determine_base_object (TREE_OPERAND (base
, 0));
963 return fold_convert (ptr_type_node
,
964 build_fold_addr_expr (base
));
966 case POINTER_PLUS_EXPR
:
967 return determine_base_object (TREE_OPERAND (expr
, 0));
971 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
975 return fold_convert (ptr_type_node
, expr
);
979 /* Return true if address expression with non-DECL_P operand appears
983 contain_complex_addr_expr (tree expr
)
988 switch (TREE_CODE (expr
))
990 case POINTER_PLUS_EXPR
:
993 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
994 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
998 return (!DECL_P (TREE_OPERAND (expr
, 0)));
1007 /* Allocates an induction variable with given initial value BASE and step STEP
1011 alloc_iv (tree base
, tree step
)
1014 struct iv
*iv
= XCNEW (struct iv
);
1015 gcc_assert (step
!= NULL_TREE
);
1017 /* Lower address expression in base except ones with DECL_P as operand.
1019 1) More accurate cost can be computed for address expressions;
1020 2) Duplicate candidates won't be created for bases in different
1021 forms, like &a[0] and &a. */
1023 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1024 || contain_complex_addr_expr (expr
))
1027 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1028 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1032 iv
->base_object
= determine_base_object (base
);
1035 iv
->have_use_for
= false;
1037 iv
->ssa_name
= NULL_TREE
;
1042 /* Sets STEP and BASE for induction variable IV. */
1045 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
1047 struct version_info
*info
= name_info (data
, iv
);
1049 gcc_assert (!info
->iv
);
1051 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1052 info
->iv
= alloc_iv (base
, step
);
1053 info
->iv
->ssa_name
= iv
;
1056 /* Finds induction variable declaration for VAR. */
1059 get_iv (struct ivopts_data
*data
, tree var
)
1062 tree type
= TREE_TYPE (var
);
1064 if (!POINTER_TYPE_P (type
)
1065 && !INTEGRAL_TYPE_P (type
))
1068 if (!name_info (data
, var
)->iv
)
1070 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1073 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1074 set_iv (data
, var
, var
, build_int_cst (type
, 0));
1077 return name_info (data
, var
)->iv
;
1080 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1081 not define a simple affine biv with nonzero step. */
1084 determine_biv_step (gphi
*phi
)
1086 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1087 tree name
= PHI_RESULT (phi
);
1090 if (virtual_operand_p (name
))
1093 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1096 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1099 /* Return the first non-invariant ssa var found in EXPR. */
1102 extract_single_var_from_expr (tree expr
)
1106 enum tree_code code
;
1108 if (!expr
|| is_gimple_min_invariant (expr
))
1111 code
= TREE_CODE (expr
);
1112 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1114 n
= TREE_OPERAND_LENGTH (expr
);
1115 for (i
= 0; i
< n
; i
++)
1117 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1123 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1126 /* Finds basic ivs. */
1129 find_bivs (struct ivopts_data
*data
)
1132 tree step
, type
, base
, stop
;
1134 struct loop
*loop
= data
->current_loop
;
1137 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1141 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1144 step
= determine_biv_step (phi
);
1148 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1149 /* Stop expanding iv base at the first ssa var referred by iv step.
1150 Ideally we should stop at any ssa var, because that's expensive
1151 and unusual to happen, we just do it on the first one.
1153 See PR64705 for the rationale. */
1154 stop
= extract_single_var_from_expr (step
);
1155 base
= expand_simple_operations (base
, stop
);
1156 if (contains_abnormal_ssa_name_p (base
)
1157 || contains_abnormal_ssa_name_p (step
))
1160 type
= TREE_TYPE (PHI_RESULT (phi
));
1161 base
= fold_convert (type
, base
);
1164 if (POINTER_TYPE_P (type
))
1165 step
= convert_to_ptrofftype (step
);
1167 step
= fold_convert (type
, step
);
1170 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1177 /* Marks basic ivs. */
1180 mark_bivs (struct ivopts_data
*data
)
1185 struct iv
*iv
, *incr_iv
;
1186 struct loop
*loop
= data
->current_loop
;
1187 basic_block incr_bb
;
1190 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1194 iv
= get_iv (data
, PHI_RESULT (phi
));
1198 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1199 def
= SSA_NAME_DEF_STMT (var
);
1200 /* Don't mark iv peeled from other one as biv. */
1202 && gimple_code (def
) == GIMPLE_PHI
1203 && gimple_bb (def
) == loop
->header
)
1206 incr_iv
= get_iv (data
, var
);
1210 /* If the increment is in the subloop, ignore it. */
1211 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1212 if (incr_bb
->loop_father
!= data
->current_loop
1213 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1217 incr_iv
->biv_p
= true;
1221 /* Checks whether STMT defines a linear induction variable and stores its
1222 parameters to IV. */
1225 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1228 struct loop
*loop
= data
->current_loop
;
1230 iv
->base
= NULL_TREE
;
1231 iv
->step
= NULL_TREE
;
1233 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1236 lhs
= gimple_assign_lhs (stmt
);
1237 if (TREE_CODE (lhs
) != SSA_NAME
)
1240 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1243 /* Stop expanding iv base at the first ssa var referred by iv step.
1244 Ideally we should stop at any ssa var, because that's expensive
1245 and unusual to happen, we just do it on the first one.
1247 See PR64705 for the rationale. */
1248 stop
= extract_single_var_from_expr (iv
->step
);
1249 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1250 if (contains_abnormal_ssa_name_p (iv
->base
)
1251 || contains_abnormal_ssa_name_p (iv
->step
))
1254 /* If STMT could throw, then do not consider STMT as defining a GIV.
1255 While this will suppress optimizations, we can not safely delete this
1256 GIV and associated statements, even if it appears it is not used. */
1257 if (stmt_could_throw_p (stmt
))
1263 /* Finds general ivs in statement STMT. */
1266 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1270 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1273 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1276 /* Finds general ivs in basic block BB. */
1279 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1281 gimple_stmt_iterator bsi
;
1283 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1284 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1287 /* Finds general ivs. */
1290 find_givs (struct ivopts_data
*data
)
1292 struct loop
*loop
= data
->current_loop
;
1293 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1296 for (i
= 0; i
< loop
->num_nodes
; i
++)
1297 find_givs_in_bb (data
, body
[i
]);
1301 /* For each ssa name defined in LOOP determines whether it is an induction
1302 variable and if so, its initial value and step. */
1305 find_induction_variables (struct ivopts_data
*data
)
1310 if (!find_bivs (data
))
1316 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1318 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1322 fprintf (dump_file
, " number of iterations ");
1323 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1324 if (!integer_zerop (niter
->may_be_zero
))
1326 fprintf (dump_file
, "; zero if ");
1327 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1329 fprintf (dump_file
, "\n\n");
1332 fprintf (dump_file
, "Induction variables:\n\n");
1334 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1336 if (ver_info (data
, i
)->iv
)
1337 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1344 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1345 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1346 is the const offset stripped from IV base. For uses of other types,
1347 ADDR_BASE and ADDR_OFFSET are zero by default. */
1349 static struct iv_use
*
1350 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1351 gimple stmt
, enum use_type use_type
, tree addr_base
= NULL
,
1352 unsigned HOST_WIDE_INT addr_offset
= 0)
1354 struct iv_use
*use
= XCNEW (struct iv_use
);
1356 use
->id
= n_iv_uses (data
);
1358 use
->type
= use_type
;
1362 use
->related_cands
= BITMAP_ALLOC (NULL
);
1364 use
->addr_base
= addr_base
;
1365 use
->addr_offset
= addr_offset
;
1367 /* To avoid showing ssa name in the dumps, if it was not reset by the
1369 iv
->ssa_name
= NULL_TREE
;
1371 data
->iv_uses
.safe_push (use
);
1376 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1377 The sub use is recorded under the one whose use id is ID_GROUP. */
1379 static struct iv_use
*
1380 record_sub_use (struct ivopts_data
*data
, tree
*use_p
,
1381 struct iv
*iv
, gimple stmt
, enum use_type use_type
,
1382 tree addr_base
, unsigned HOST_WIDE_INT addr_offset
,
1383 unsigned int id_group
)
1385 struct iv_use
*use
= XCNEW (struct iv_use
);
1386 struct iv_use
*group
= iv_use (data
, id_group
);
1388 use
->id
= group
->id
;
1390 use
->type
= use_type
;
1394 use
->related_cands
= NULL
;
1395 use
->addr_base
= addr_base
;
1396 use
->addr_offset
= addr_offset
;
1398 /* Sub use list is maintained in offset ascending order. */
1399 if (addr_offset
<= group
->addr_offset
)
1401 use
->related_cands
= group
->related_cands
;
1402 group
->related_cands
= NULL
;
1404 data
->iv_uses
[id_group
] = use
;
1412 group
= group
->next
;
1414 while (group
&& addr_offset
> group
->addr_offset
);
1415 use
->next
= pre
->next
;
1419 /* To avoid showing ssa name in the dumps, if it was not reset by the
1421 iv
->ssa_name
= NULL_TREE
;
1426 /* Checks whether OP is a loop-level invariant and if so, records it.
1427 NONLINEAR_USE is true if the invariant is used in a way we do not
1428 handle specially. */
1431 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1434 struct version_info
*info
;
1436 if (TREE_CODE (op
) != SSA_NAME
1437 || virtual_operand_p (op
))
1440 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1442 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1445 info
= name_info (data
, op
);
1447 info
->has_nonlin_use
|= nonlinear_use
;
1449 info
->inv_id
= ++data
->max_inv_id
;
1450 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1453 /* Checks whether the use OP is interesting and if so, records it. */
1455 static struct iv_use
*
1456 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1463 if (TREE_CODE (op
) != SSA_NAME
)
1466 iv
= get_iv (data
, op
);
1470 if (iv
->have_use_for
)
1472 use
= iv_use (data
, iv
->use_id
);
1474 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1478 if (integer_zerop (iv
->step
))
1480 record_invariant (data
, op
, true);
1483 iv
->have_use_for
= true;
1485 civ
= XNEW (struct iv
);
1488 stmt
= SSA_NAME_DEF_STMT (op
);
1489 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1490 || is_gimple_assign (stmt
));
1492 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1493 iv
->use_id
= use
->id
;
1498 /* Given a condition in statement STMT, checks whether it is a compare
1499 of an induction variable and an invariant. If this is the case,
1500 CONTROL_VAR is set to location of the iv, BOUND to the location of
1501 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1502 induction variable descriptions, and true is returned. If this is not
1503 the case, CONTROL_VAR and BOUND are set to the arguments of the
1504 condition and false is returned. */
1507 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1508 tree
**control_var
, tree
**bound
,
1509 struct iv
**iv_var
, struct iv
**iv_bound
)
1511 /* The objects returned when COND has constant operands. */
1512 static struct iv const_iv
;
1514 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1515 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1518 if (gimple_code (stmt
) == GIMPLE_COND
)
1520 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1521 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1522 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1526 op0
= gimple_assign_rhs1_ptr (stmt
);
1527 op1
= gimple_assign_rhs2_ptr (stmt
);
1530 zero
= integer_zero_node
;
1531 const_iv
.step
= integer_zero_node
;
1533 if (TREE_CODE (*op0
) == SSA_NAME
)
1534 iv0
= get_iv (data
, *op0
);
1535 if (TREE_CODE (*op1
) == SSA_NAME
)
1536 iv1
= get_iv (data
, *op1
);
1538 /* Exactly one of the compared values must be an iv, and the other one must
1543 if (integer_zerop (iv0
->step
))
1545 /* Control variable may be on the other side. */
1546 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1547 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1549 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1564 /* Checks whether the condition in STMT is interesting and if so,
1568 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1570 tree
*var_p
, *bound_p
;
1571 struct iv
*var_iv
, *civ
;
1573 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1575 find_interesting_uses_op (data
, *var_p
);
1576 find_interesting_uses_op (data
, *bound_p
);
1580 civ
= XNEW (struct iv
);
1582 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1585 /* Returns the outermost loop EXPR is obviously invariant in
1586 relative to the loop LOOP, i.e. if all its operands are defined
1587 outside of the returned loop. Returns NULL if EXPR is not
1588 even obviously invariant in LOOP. */
1591 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1596 if (is_gimple_min_invariant (expr
))
1597 return current_loops
->tree_root
;
1599 if (TREE_CODE (expr
) == SSA_NAME
)
1601 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1604 if (flow_bb_inside_loop_p (loop
, def_bb
))
1606 return superloop_at_depth (loop
,
1607 loop_depth (def_bb
->loop_father
) + 1);
1610 return current_loops
->tree_root
;
1616 unsigned maxdepth
= 0;
1617 len
= TREE_OPERAND_LENGTH (expr
);
1618 for (i
= 0; i
< len
; i
++)
1620 struct loop
*ivloop
;
1621 if (!TREE_OPERAND (expr
, i
))
1624 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1627 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1630 return superloop_at_depth (loop
, maxdepth
);
1633 /* Returns true if expression EXPR is obviously invariant in LOOP,
1634 i.e. if all its operands are defined outside of the LOOP. LOOP
1635 should not be the function body. */
1638 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1643 gcc_assert (loop_depth (loop
) > 0);
1645 if (is_gimple_min_invariant (expr
))
1648 if (TREE_CODE (expr
) == SSA_NAME
)
1650 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1652 && flow_bb_inside_loop_p (loop
, def_bb
))
1661 len
= TREE_OPERAND_LENGTH (expr
);
1662 for (i
= 0; i
< len
; i
++)
1663 if (TREE_OPERAND (expr
, i
)
1664 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1670 /* Cumulates the steps of indices into DATA and replaces their values with the
1671 initial ones. Returns false when the value of the index cannot be determined.
1672 Callback for for_each_index. */
1674 struct ifs_ivopts_data
1676 struct ivopts_data
*ivopts_data
;
1682 idx_find_step (tree base
, tree
*idx
, void *data
)
1684 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1686 tree step
, iv_base
, iv_step
, lbound
, off
;
1687 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1689 /* If base is a component ref, require that the offset of the reference
1691 if (TREE_CODE (base
) == COMPONENT_REF
)
1693 off
= component_ref_field_offset (base
);
1694 return expr_invariant_in_loop_p (loop
, off
);
1697 /* If base is array, first check whether we will be able to move the
1698 reference out of the loop (in order to take its address in strength
1699 reduction). In order for this to work we need both lower bound
1700 and step to be loop invariants. */
1701 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1703 /* Moreover, for a range, the size needs to be invariant as well. */
1704 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1705 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1708 step
= array_ref_element_size (base
);
1709 lbound
= array_ref_low_bound (base
);
1711 if (!expr_invariant_in_loop_p (loop
, step
)
1712 || !expr_invariant_in_loop_p (loop
, lbound
))
1716 if (TREE_CODE (*idx
) != SSA_NAME
)
1719 iv
= get_iv (dta
->ivopts_data
, *idx
);
1723 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1724 *&x[0], which is not folded and does not trigger the
1725 ARRAY_REF path below. */
1728 if (integer_zerop (iv
->step
))
1731 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1733 step
= array_ref_element_size (base
);
1735 /* We only handle addresses whose step is an integer constant. */
1736 if (TREE_CODE (step
) != INTEGER_CST
)
1740 /* The step for pointer arithmetics already is 1 byte. */
1741 step
= size_one_node
;
1745 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1746 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1749 /* The index might wrap. */
1753 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1754 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1759 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1760 object is passed to it in DATA. */
1763 idx_record_use (tree base
, tree
*idx
,
1766 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1767 find_interesting_uses_op (data
, *idx
);
1768 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1770 find_interesting_uses_op (data
, array_ref_element_size (base
));
1771 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1776 /* If we can prove that TOP = cst * BOT for some constant cst,
1777 store cst to MUL and return true. Otherwise return false.
1778 The returned value is always sign-extended, regardless of the
1779 signedness of TOP and BOT. */
1782 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1785 enum tree_code code
;
1786 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1787 widest_int res
, p0
, p1
;
1792 if (operand_equal_p (top
, bot
, 0))
1798 code
= TREE_CODE (top
);
1802 mby
= TREE_OPERAND (top
, 1);
1803 if (TREE_CODE (mby
) != INTEGER_CST
)
1806 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1809 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1814 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1815 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1818 if (code
== MINUS_EXPR
)
1820 *mul
= wi::sext (p0
+ p1
, precision
);
1824 if (TREE_CODE (bot
) != INTEGER_CST
)
1827 p0
= widest_int::from (top
, SIGNED
);
1828 p1
= widest_int::from (bot
, SIGNED
);
1831 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1839 /* Return true if memory reference REF with step STEP may be unaligned. */
1842 may_be_unaligned_p (tree ref
, tree step
)
1844 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1845 thus they are not misaligned. */
1846 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1849 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1850 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1851 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1853 unsigned HOST_WIDE_INT bitpos
;
1854 unsigned int ref_align
;
1855 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1856 if (ref_align
< align
1857 || (bitpos
% align
) != 0
1858 || (bitpos
% BITS_PER_UNIT
) != 0)
1861 unsigned int trailing_zeros
= tree_ctz (step
);
1862 if (trailing_zeros
< HOST_BITS_PER_INT
1863 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1869 /* Return true if EXPR may be non-addressable. */
1872 may_be_nonaddressable_p (tree expr
)
1874 switch (TREE_CODE (expr
))
1876 case TARGET_MEM_REF
:
1877 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1878 target, thus they are always addressable. */
1882 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1883 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1885 case VIEW_CONVERT_EXPR
:
1886 /* This kind of view-conversions may wrap non-addressable objects
1887 and make them look addressable. After some processing the
1888 non-addressability may be uncovered again, causing ADDR_EXPRs
1889 of inappropriate objects to be built. */
1890 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1891 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1894 /* ... fall through ... */
1897 case ARRAY_RANGE_REF
:
1898 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1911 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
);
1913 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1914 If there is an existing use which has same stripped iv base and step,
1915 this function records this one as a sub use to that; otherwise records
1916 it as a normal one. */
1918 static struct iv_use
*
1919 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
1920 struct iv
*iv
, gimple stmt
, enum use_type use_type
)
1925 unsigned HOST_WIDE_INT addr_offset
;
1927 /* Only support sub use for address type uses, that is, with base
1929 if (!iv
->base_object
)
1930 return record_use (data
, use_p
, iv
, stmt
, use_type
);
1932 addr_base
= strip_offset (iv
->base
, &addr_offset
);
1933 for (i
= 0; i
< n_iv_uses (data
); i
++)
1935 use
= iv_use (data
, i
);
1936 if (use
->type
!= USE_ADDRESS
|| !use
->iv
->base_object
)
1939 /* Check if it has the same stripped base and step. */
1940 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
1941 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
1942 && operand_equal_p (addr_base
, use
->addr_base
, 0))
1946 if (i
== n_iv_uses (data
))
1947 return record_use (data
, use_p
, iv
, stmt
,
1948 use_type
, addr_base
, addr_offset
);
1950 return record_sub_use (data
, use_p
, iv
, stmt
,
1951 use_type
, addr_base
, addr_offset
, i
);
1954 /* Finds addresses in *OP_P inside STMT. */
1957 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1959 tree base
= *op_p
, step
= size_zero_node
;
1961 struct ifs_ivopts_data ifs_ivopts_data
;
1963 /* Do not play with volatile memory references. A bit too conservative,
1964 perhaps, but safe. */
1965 if (gimple_has_volatile_ops (stmt
))
1968 /* Ignore bitfields for now. Not really something terribly complicated
1970 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1973 base
= unshare_expr (base
);
1975 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1977 tree type
= build_pointer_type (TREE_TYPE (base
));
1981 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1983 civ
= get_iv (data
, TMR_BASE (base
));
1987 TMR_BASE (base
) = civ
->base
;
1990 if (TMR_INDEX2 (base
)
1991 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1993 civ
= get_iv (data
, TMR_INDEX2 (base
));
1997 TMR_INDEX2 (base
) = civ
->base
;
2000 if (TMR_INDEX (base
)
2001 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
2003 civ
= get_iv (data
, TMR_INDEX (base
));
2007 TMR_INDEX (base
) = civ
->base
;
2012 if (TMR_STEP (base
))
2013 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
2015 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
2019 if (integer_zerop (step
))
2021 base
= tree_mem_ref_addr (type
, base
);
2025 ifs_ivopts_data
.ivopts_data
= data
;
2026 ifs_ivopts_data
.stmt
= stmt
;
2027 ifs_ivopts_data
.step
= size_zero_node
;
2028 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
2029 || integer_zerop (ifs_ivopts_data
.step
))
2031 step
= ifs_ivopts_data
.step
;
2033 /* Check that the base expression is addressable. This needs
2034 to be done after substituting bases of IVs into it. */
2035 if (may_be_nonaddressable_p (base
))
2038 /* Moreover, on strict alignment platforms, check that it is
2039 sufficiently aligned. */
2040 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2043 base
= build_fold_addr_expr (base
);
2045 /* Substituting bases of IVs into the base expression might
2046 have caused folding opportunities. */
2047 if (TREE_CODE (base
) == ADDR_EXPR
)
2049 tree
*ref
= &TREE_OPERAND (base
, 0);
2050 while (handled_component_p (*ref
))
2051 ref
= &TREE_OPERAND (*ref
, 0);
2052 if (TREE_CODE (*ref
) == MEM_REF
)
2054 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2055 TREE_OPERAND (*ref
, 0),
2056 TREE_OPERAND (*ref
, 1));
2063 civ
= alloc_iv (base
, step
);
2064 record_group_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
2068 for_each_index (op_p
, idx_record_use
, data
);
2071 /* Finds and records invariants used in STMT. */
2074 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
2077 use_operand_p use_p
;
2080 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2082 op
= USE_FROM_PTR (use_p
);
2083 record_invariant (data
, op
, false);
2087 /* Finds interesting uses of induction variables in the statement STMT. */
2090 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
2093 tree op
, *lhs
, *rhs
;
2095 use_operand_p use_p
;
2096 enum tree_code code
;
2098 find_invariants_stmt (data
, stmt
);
2100 if (gimple_code (stmt
) == GIMPLE_COND
)
2102 find_interesting_uses_cond (data
, stmt
);
2106 if (is_gimple_assign (stmt
))
2108 lhs
= gimple_assign_lhs_ptr (stmt
);
2109 rhs
= gimple_assign_rhs1_ptr (stmt
);
2111 if (TREE_CODE (*lhs
) == SSA_NAME
)
2113 /* If the statement defines an induction variable, the uses are not
2114 interesting by themselves. */
2116 iv
= get_iv (data
, *lhs
);
2118 if (iv
&& !integer_zerop (iv
->step
))
2122 code
= gimple_assign_rhs_code (stmt
);
2123 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2124 && (REFERENCE_CLASS_P (*rhs
)
2125 || is_gimple_val (*rhs
)))
2127 if (REFERENCE_CLASS_P (*rhs
))
2128 find_interesting_uses_address (data
, stmt
, rhs
);
2130 find_interesting_uses_op (data
, *rhs
);
2132 if (REFERENCE_CLASS_P (*lhs
))
2133 find_interesting_uses_address (data
, stmt
, lhs
);
2136 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2138 find_interesting_uses_cond (data
, stmt
);
2142 /* TODO -- we should also handle address uses of type
2144 memory = call (whatever);
2151 if (gimple_code (stmt
) == GIMPLE_PHI
2152 && gimple_bb (stmt
) == data
->current_loop
->header
)
2154 iv
= get_iv (data
, PHI_RESULT (stmt
));
2156 if (iv
&& !integer_zerop (iv
->step
))
2160 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2162 op
= USE_FROM_PTR (use_p
);
2164 if (TREE_CODE (op
) != SSA_NAME
)
2167 iv
= get_iv (data
, op
);
2171 find_interesting_uses_op (data
, op
);
2175 /* Finds interesting uses of induction variables outside of loops
2176 on loop exit edge EXIT. */
2179 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2185 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2188 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2189 if (!virtual_operand_p (def
))
2190 find_interesting_uses_op (data
, def
);
2194 /* Finds uses of the induction variables that are interesting. */
2197 find_interesting_uses (struct ivopts_data
*data
)
2200 gimple_stmt_iterator bsi
;
2201 basic_block
*body
= get_loop_body (data
->current_loop
);
2203 struct version_info
*info
;
2206 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2207 fprintf (dump_file
, "Uses:\n\n");
2209 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2214 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2215 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2216 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2217 find_interesting_uses_outside (data
, e
);
2219 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2220 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2221 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2222 if (!is_gimple_debug (gsi_stmt (bsi
)))
2223 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2226 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2230 fprintf (dump_file
, "\n");
2232 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2234 info
= ver_info (data
, i
);
2237 fprintf (dump_file
, " ");
2238 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2239 fprintf (dump_file
, " is invariant (%d)%s\n",
2240 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2244 fprintf (dump_file
, "\n");
2250 /* Compute maximum offset of [base + offset] addressing mode
2251 for memory reference represented by USE. */
2253 static HOST_WIDE_INT
2254 compute_max_addr_offset (struct iv_use
*use
)
2258 HOST_WIDE_INT i
, off
;
2259 unsigned list_index
, num
;
2261 machine_mode mem_mode
, addr_mode
;
2262 static vec
<HOST_WIDE_INT
> max_offset_list
;
2264 as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2265 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2267 num
= max_offset_list
.length ();
2268 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2269 if (list_index
>= num
)
2271 max_offset_list
.safe_grow (list_index
+ MAX_MACHINE_MODE
);
2272 for (; num
< max_offset_list
.length (); num
++)
2273 max_offset_list
[num
] = -1;
2276 off
= max_offset_list
[list_index
];
2280 addr_mode
= targetm
.addr_space
.address_mode (as
);
2281 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2282 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2284 width
= GET_MODE_BITSIZE (addr_mode
) - 1;
2285 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
2286 width
= HOST_BITS_PER_WIDE_INT
- 1;
2288 for (i
= width
; i
> 0; i
--)
2290 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
2291 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2292 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2295 /* For some strict-alignment targets, the offset must be naturally
2296 aligned. Try an aligned offset if mem_mode is not QImode. */
2297 off
= ((unsigned HOST_WIDE_INT
) 1 << i
);
2298 if (off
> GET_MODE_SIZE (mem_mode
) && mem_mode
!= QImode
)
2300 off
-= GET_MODE_SIZE (mem_mode
);
2301 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2302 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2309 max_offset_list
[list_index
] = off
;
2313 /* Check if all small groups should be split. Return true if and
2316 1) At least one groups contain two uses with different offsets.
2317 2) No group contains more than two uses with different offsets.
2319 Return false otherwise. We want to split such groups because:
2321 1) Small groups don't have much benefit and may interfer with
2322 general candidate selection.
2323 2) Size for problem with only small groups is usually small and
2324 general algorithm can handle it well.
2326 TODO -- Above claim may not hold when auto increment is supported. */
2329 split_all_small_groups (struct ivopts_data
*data
)
2331 bool split_p
= false;
2332 unsigned int i
, n
, distinct
;
2333 struct iv_use
*pre
, *use
;
2335 n
= n_iv_uses (data
);
2336 for (i
= 0; i
< n
; i
++)
2338 use
= iv_use (data
, i
);
2343 gcc_assert (use
->type
== USE_ADDRESS
);
2344 for (pre
= use
, use
= use
->next
; use
; pre
= use
, use
= use
->next
)
2346 if (pre
->addr_offset
!= use
->addr_offset
)
2359 /* For each group of address type uses, this function further groups
2360 these uses according to the maximum offset supported by target's
2361 [base + offset] addressing mode. */
2364 group_address_uses (struct ivopts_data
*data
)
2366 HOST_WIDE_INT max_offset
= -1;
2367 unsigned int i
, n
, sub_id
;
2368 struct iv_use
*pre
, *use
;
2369 unsigned HOST_WIDE_INT addr_offset_first
;
2371 /* Reset max offset to split all small groups. */
2372 if (split_all_small_groups (data
))
2375 n
= n_iv_uses (data
);
2376 for (i
= 0; i
< n
; i
++)
2378 use
= iv_use (data
, i
);
2382 gcc_assert (use
->type
== USE_ADDRESS
);
2383 if (max_offset
!= 0)
2384 max_offset
= compute_max_addr_offset (use
);
2389 addr_offset_first
= use
->addr_offset
;
2390 /* Only uses with offset that can fit in offset part against
2391 the first use can be grouped together. */
2392 for (pre
= use
, use
= use
->next
;
2393 use
&& (use
->addr_offset
- addr_offset_first
2394 <= (unsigned HOST_WIDE_INT
) max_offset
);
2395 pre
= use
, use
= use
->next
)
2398 use
->sub_id
= ++sub_id
;
2401 /* Break the list and create new group. */
2405 use
->id
= n_iv_uses (data
);
2406 use
->related_cands
= BITMAP_ALLOC (NULL
);
2407 data
->iv_uses
.safe_push (use
);
2412 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2413 dump_uses (dump_file
, data
);
2416 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2417 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2418 we are at the top-level of the processed address. */
2421 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2422 HOST_WIDE_INT
*offset
)
2424 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2425 enum tree_code code
;
2426 tree type
, orig_type
= TREE_TYPE (expr
);
2427 HOST_WIDE_INT off0
, off1
, st
;
2428 tree orig_expr
= expr
;
2432 type
= TREE_TYPE (expr
);
2433 code
= TREE_CODE (expr
);
2439 if (!cst_and_fits_in_hwi (expr
)
2440 || integer_zerop (expr
))
2443 *offset
= int_cst_value (expr
);
2444 return build_int_cst (orig_type
, 0);
2446 case POINTER_PLUS_EXPR
:
2449 op0
= TREE_OPERAND (expr
, 0);
2450 op1
= TREE_OPERAND (expr
, 1);
2452 op0
= strip_offset_1 (op0
, false, false, &off0
);
2453 op1
= strip_offset_1 (op1
, false, false, &off1
);
2455 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2456 if (op0
== TREE_OPERAND (expr
, 0)
2457 && op1
== TREE_OPERAND (expr
, 1))
2460 if (integer_zerop (op1
))
2462 else if (integer_zerop (op0
))
2464 if (code
== MINUS_EXPR
)
2465 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2470 expr
= fold_build2 (code
, type
, op0
, op1
);
2472 return fold_convert (orig_type
, expr
);
2475 op1
= TREE_OPERAND (expr
, 1);
2476 if (!cst_and_fits_in_hwi (op1
))
2479 op0
= TREE_OPERAND (expr
, 0);
2480 op0
= strip_offset_1 (op0
, false, false, &off0
);
2481 if (op0
== TREE_OPERAND (expr
, 0))
2484 *offset
= off0
* int_cst_value (op1
);
2485 if (integer_zerop (op0
))
2488 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2490 return fold_convert (orig_type
, expr
);
2493 case ARRAY_RANGE_REF
:
2497 step
= array_ref_element_size (expr
);
2498 if (!cst_and_fits_in_hwi (step
))
2501 st
= int_cst_value (step
);
2502 op1
= TREE_OPERAND (expr
, 1);
2503 op1
= strip_offset_1 (op1
, false, false, &off1
);
2504 *offset
= off1
* st
;
2507 && integer_zerop (op1
))
2509 /* Strip the component reference completely. */
2510 op0
= TREE_OPERAND (expr
, 0);
2511 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2524 tmp
= component_ref_field_offset (expr
);
2525 field
= TREE_OPERAND (expr
, 1);
2527 && cst_and_fits_in_hwi (tmp
)
2528 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2530 HOST_WIDE_INT boffset
, abs_off
;
2532 /* Strip the component reference completely. */
2533 op0
= TREE_OPERAND (expr
, 0);
2534 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2535 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2536 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2540 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2547 op0
= TREE_OPERAND (expr
, 0);
2548 op0
= strip_offset_1 (op0
, true, true, &off0
);
2551 if (op0
== TREE_OPERAND (expr
, 0))
2554 expr
= build_fold_addr_expr (op0
);
2555 return fold_convert (orig_type
, expr
);
2558 /* ??? Offset operand? */
2559 inside_addr
= false;
2566 /* Default handling of expressions for that we want to recurse into
2567 the first operand. */
2568 op0
= TREE_OPERAND (expr
, 0);
2569 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2572 if (op0
== TREE_OPERAND (expr
, 0)
2573 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2576 expr
= copy_node (expr
);
2577 TREE_OPERAND (expr
, 0) = op0
;
2579 TREE_OPERAND (expr
, 1) = op1
;
2581 /* Inside address, we might strip the top level component references,
2582 thus changing type of the expression. Handling of ADDR_EXPR
2584 expr
= fold_convert (orig_type
, expr
);
2589 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2592 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2595 tree core
= strip_offset_1 (expr
, false, false, &off
);
2600 /* Returns variant of TYPE that can be used as base for different uses.
2601 We return unsigned type with the same precision, which avoids problems
2605 generic_type_for (tree type
)
2607 if (POINTER_TYPE_P (type
))
2608 return unsigned_type_for (type
);
2610 if (TYPE_UNSIGNED (type
))
2613 return unsigned_type_for (type
);
2616 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2617 the bitmap to that we should store it. */
2619 static struct ivopts_data
*fd_ivopts_data
;
2621 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2623 bitmap
*depends_on
= (bitmap
*) data
;
2624 struct version_info
*info
;
2626 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2628 info
= name_info (fd_ivopts_data
, *expr_p
);
2630 if (!info
->inv_id
|| info
->has_nonlin_use
)
2634 *depends_on
= BITMAP_ALLOC (NULL
);
2635 bitmap_set_bit (*depends_on
, info
->inv_id
);
2640 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2641 position to POS. If USE is not NULL, the candidate is set as related to
2642 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2643 replacement of the final value of the iv by a direct computation. */
2645 static struct iv_cand
*
2646 add_candidate_1 (struct ivopts_data
*data
,
2647 tree base
, tree step
, bool important
, enum iv_position pos
,
2648 struct iv_use
*use
, gimple incremented_at
)
2651 struct iv_cand
*cand
= NULL
;
2652 tree type
, orig_type
;
2654 /* For non-original variables, make sure their values are computed in a type
2655 that does not invoke undefined behavior on overflows (since in general,
2656 we cannot prove that these induction variables are non-wrapping). */
2657 if (pos
!= IP_ORIGINAL
)
2659 orig_type
= TREE_TYPE (base
);
2660 type
= generic_type_for (orig_type
);
2661 if (type
!= orig_type
)
2663 base
= fold_convert (type
, base
);
2664 step
= fold_convert (type
, step
);
2668 for (i
= 0; i
< n_iv_cands (data
); i
++)
2670 cand
= iv_cand (data
, i
);
2672 if (cand
->pos
!= pos
)
2675 if (cand
->incremented_at
!= incremented_at
2676 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2677 && cand
->ainc_use
!= use
))
2691 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2692 && operand_equal_p (step
, cand
->iv
->step
, 0)
2693 && (TYPE_PRECISION (TREE_TYPE (base
))
2694 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2698 if (i
== n_iv_cands (data
))
2700 cand
= XCNEW (struct iv_cand
);
2706 cand
->iv
= alloc_iv (base
, step
);
2709 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2711 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2712 cand
->var_after
= cand
->var_before
;
2714 cand
->important
= important
;
2715 cand
->incremented_at
= incremented_at
;
2716 data
->iv_candidates
.safe_push (cand
);
2719 && TREE_CODE (step
) != INTEGER_CST
)
2721 fd_ivopts_data
= data
;
2722 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2725 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2726 cand
->ainc_use
= use
;
2728 cand
->ainc_use
= NULL
;
2730 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2731 dump_cand (dump_file
, cand
);
2734 if (important
&& !cand
->important
)
2736 cand
->important
= true;
2737 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2738 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2743 bitmap_set_bit (use
->related_cands
, i
);
2744 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2745 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2752 /* Returns true if incrementing the induction variable at the end of the LOOP
2755 The purpose is to avoid splitting latch edge with a biv increment, thus
2756 creating a jump, possibly confusing other optimization passes and leaving
2757 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2758 is not available (so we do not have a better alternative), or if the latch
2759 edge is already nonempty. */
2762 allow_ip_end_pos_p (struct loop
*loop
)
2764 if (!ip_normal_pos (loop
))
2767 if (!empty_block_p (ip_end_pos (loop
)))
2773 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2774 Important field is set to IMPORTANT. */
2777 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2778 bool important
, struct iv_use
*use
)
2780 basic_block use_bb
= gimple_bb (use
->stmt
);
2781 machine_mode mem_mode
;
2782 unsigned HOST_WIDE_INT cstepi
;
2784 /* If we insert the increment in any position other than the standard
2785 ones, we must ensure that it is incremented once per iteration.
2786 It must not be in an inner nested loop, or one side of an if
2788 if (use_bb
->loop_father
!= data
->current_loop
2789 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2790 || stmt_could_throw_p (use
->stmt
)
2791 || !cst_and_fits_in_hwi (step
))
2794 cstepi
= int_cst_value (step
);
2796 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2797 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2798 || USE_STORE_PRE_INCREMENT (mem_mode
))
2799 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2800 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2801 || USE_STORE_PRE_DECREMENT (mem_mode
))
2802 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2804 enum tree_code code
= MINUS_EXPR
;
2806 tree new_step
= step
;
2808 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2810 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2811 code
= POINTER_PLUS_EXPR
;
2814 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2815 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2816 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2819 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2820 || USE_STORE_POST_INCREMENT (mem_mode
))
2821 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2822 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2823 || USE_STORE_POST_DECREMENT (mem_mode
))
2824 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2826 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2831 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2832 position to POS. If USE is not NULL, the candidate is set as related to
2833 it. The candidate computation is scheduled on all available positions. */
2836 add_candidate (struct ivopts_data
*data
,
2837 tree base
, tree step
, bool important
, struct iv_use
*use
)
2839 gcc_assert (use
== NULL
|| use
->sub_id
== 0);
2841 if (ip_normal_pos (data
->current_loop
))
2842 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2843 if (ip_end_pos (data
->current_loop
)
2844 && allow_ip_end_pos_p (data
->current_loop
))
2845 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2847 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2848 add_autoinc_candidates (data
, base
, step
, important
, use
);
2851 /* Adds standard iv candidates. */
2854 add_standard_iv_candidates (struct ivopts_data
*data
)
2856 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2858 /* The same for a double-integer type if it is still fast enough. */
2860 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2861 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2862 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2863 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2865 /* The same for a double-integer type if it is still fast enough. */
2867 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2868 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2869 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2870 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2874 /* Adds candidates bases on the old induction variable IV. */
2877 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2881 struct iv_cand
*cand
;
2883 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2885 /* The same, but with initial value zero. */
2886 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2887 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2889 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2890 iv
->step
, true, NULL
);
2892 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2893 if (gimple_code (phi
) == GIMPLE_PHI
)
2895 /* Additionally record the possibility of leaving the original iv
2897 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2898 /* Don't add candidate if it's from another PHI node because
2899 it's an affine iv appearing in the form of PEELED_CHREC. */
2900 phi
= SSA_NAME_DEF_STMT (def
);
2901 if (gimple_code (phi
) != GIMPLE_PHI
)
2903 cand
= add_candidate_1 (data
,
2904 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2905 SSA_NAME_DEF_STMT (def
));
2906 cand
->var_before
= iv
->ssa_name
;
2907 cand
->var_after
= def
;
2910 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2914 /* Adds candidates based on the old induction variables. */
2917 add_old_ivs_candidates (struct ivopts_data
*data
)
2923 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2925 iv
= ver_info (data
, i
)->iv
;
2926 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2927 add_old_iv_candidates (data
, iv
);
2931 /* Adds candidates based on the value of the induction variable IV and USE. */
2934 add_iv_value_candidates (struct ivopts_data
*data
,
2935 struct iv
*iv
, struct iv_use
*use
)
2937 unsigned HOST_WIDE_INT offset
;
2941 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2943 /* The same, but with initial value zero. Make such variable important,
2944 since it is generic enough so that possibly many uses may be based
2946 basetype
= TREE_TYPE (iv
->base
);
2947 if (POINTER_TYPE_P (basetype
))
2948 basetype
= sizetype
;
2949 add_candidate (data
, build_int_cst (basetype
, 0),
2950 iv
->step
, true, use
);
2952 /* Third, try removing the constant offset. Make sure to even
2953 add a candidate for &a[0] vs. (T *)&a. */
2954 base
= strip_offset (iv
->base
, &offset
);
2956 || base
!= iv
->base
)
2957 add_candidate (data
, base
, iv
->step
, false, use
);
2960 /* Adds candidates based on the uses. */
2963 add_derived_ivs_candidates (struct ivopts_data
*data
)
2967 for (i
= 0; i
< n_iv_uses (data
); i
++)
2969 struct iv_use
*use
= iv_use (data
, i
);
2976 case USE_NONLINEAR_EXPR
:
2979 /* Just add the ivs based on the value of the iv used here. */
2980 add_iv_value_candidates (data
, use
->iv
, use
);
2989 /* Record important candidates and add them to related_cands bitmaps
2993 record_important_candidates (struct ivopts_data
*data
)
2998 for (i
= 0; i
< n_iv_cands (data
); i
++)
3000 struct iv_cand
*cand
= iv_cand (data
, i
);
3002 if (cand
->important
)
3003 bitmap_set_bit (data
->important_candidates
, i
);
3006 data
->consider_all_candidates
= (n_iv_cands (data
)
3007 <= CONSIDER_ALL_CANDIDATES_BOUND
);
3009 if (data
->consider_all_candidates
)
3011 /* We will not need "related_cands" bitmaps in this case,
3012 so release them to decrease peak memory consumption. */
3013 for (i
= 0; i
< n_iv_uses (data
); i
++)
3015 use
= iv_use (data
, i
);
3016 BITMAP_FREE (use
->related_cands
);
3021 /* Add important candidates to the related_cands bitmaps. */
3022 for (i
= 0; i
< n_iv_uses (data
); i
++)
3023 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
3024 data
->important_candidates
);
3028 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3029 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3030 we allocate a simple list to every use. */
3033 alloc_use_cost_map (struct ivopts_data
*data
)
3035 unsigned i
, size
, s
;
3037 for (i
= 0; i
< n_iv_uses (data
); i
++)
3039 struct iv_use
*use
= iv_use (data
, i
);
3041 if (data
->consider_all_candidates
)
3042 size
= n_iv_cands (data
);
3045 s
= bitmap_count_bits (use
->related_cands
);
3047 /* Round up to the power of two, so that moduling by it is fast. */
3048 size
= s
? (1 << ceil_log2 (s
)) : 1;
3051 use
->n_map_members
= size
;
3052 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3056 /* Returns description of computation cost of expression whose runtime
3057 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3060 new_cost (unsigned runtime
, unsigned complexity
)
3064 cost
.cost
= runtime
;
3065 cost
.complexity
= complexity
;
3070 /* Returns true if COST is infinite. */
3073 infinite_cost_p (comp_cost cost
)
3075 return cost
.cost
== INFTY
;
3078 /* Adds costs COST1 and COST2. */
3081 add_costs (comp_cost cost1
, comp_cost cost2
)
3083 if (infinite_cost_p (cost1
) || infinite_cost_p (cost2
))
3084 return infinite_cost
;
3086 cost1
.cost
+= cost2
.cost
;
3087 cost1
.complexity
+= cost2
.complexity
;
3091 /* Subtracts costs COST1 and COST2. */
3094 sub_costs (comp_cost cost1
, comp_cost cost2
)
3096 cost1
.cost
-= cost2
.cost
;
3097 cost1
.complexity
-= cost2
.complexity
;
3102 /* Returns a negative number if COST1 < COST2, a positive number if
3103 COST1 > COST2, and 0 if COST1 = COST2. */
3106 compare_costs (comp_cost cost1
, comp_cost cost2
)
3108 if (cost1
.cost
== cost2
.cost
)
3109 return cost1
.complexity
- cost2
.complexity
;
3111 return cost1
.cost
- cost2
.cost
;
3114 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3115 on invariants DEPENDS_ON and that the value used in expressing it
3116 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3119 set_use_iv_cost (struct ivopts_data
*data
,
3120 struct iv_use
*use
, struct iv_cand
*cand
,
3121 comp_cost cost
, bitmap depends_on
, tree value
,
3122 enum tree_code comp
, int inv_expr_id
)
3126 if (infinite_cost_p (cost
))
3128 BITMAP_FREE (depends_on
);
3132 if (data
->consider_all_candidates
)
3134 use
->cost_map
[cand
->id
].cand
= cand
;
3135 use
->cost_map
[cand
->id
].cost
= cost
;
3136 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
3137 use
->cost_map
[cand
->id
].value
= value
;
3138 use
->cost_map
[cand
->id
].comp
= comp
;
3139 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
3143 /* n_map_members is a power of two, so this computes modulo. */
3144 s
= cand
->id
& (use
->n_map_members
- 1);
3145 for (i
= s
; i
< use
->n_map_members
; i
++)
3146 if (!use
->cost_map
[i
].cand
)
3148 for (i
= 0; i
< s
; i
++)
3149 if (!use
->cost_map
[i
].cand
)
3155 use
->cost_map
[i
].cand
= cand
;
3156 use
->cost_map
[i
].cost
= cost
;
3157 use
->cost_map
[i
].depends_on
= depends_on
;
3158 use
->cost_map
[i
].value
= value
;
3159 use
->cost_map
[i
].comp
= comp
;
3160 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
3163 /* Gets cost of (USE, CANDIDATE) pair. */
3165 static struct cost_pair
*
3166 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
3167 struct iv_cand
*cand
)
3170 struct cost_pair
*ret
;
3175 if (data
->consider_all_candidates
)
3177 ret
= use
->cost_map
+ cand
->id
;
3184 /* n_map_members is a power of two, so this computes modulo. */
3185 s
= cand
->id
& (use
->n_map_members
- 1);
3186 for (i
= s
; i
< use
->n_map_members
; i
++)
3187 if (use
->cost_map
[i
].cand
== cand
)
3188 return use
->cost_map
+ i
;
3189 else if (use
->cost_map
[i
].cand
== NULL
)
3191 for (i
= 0; i
< s
; i
++)
3192 if (use
->cost_map
[i
].cand
== cand
)
3193 return use
->cost_map
+ i
;
3194 else if (use
->cost_map
[i
].cand
== NULL
)
3200 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3202 produce_memory_decl_rtl (tree obj
, int *regno
)
3204 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3205 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3209 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3211 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3212 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3213 SET_SYMBOL_REF_DECL (x
, obj
);
3214 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3215 set_mem_addr_space (x
, as
);
3216 targetm
.encode_section_info (obj
, x
, true);
3220 x
= gen_raw_REG (address_mode
, (*regno
)++);
3221 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3222 set_mem_addr_space (x
, as
);
3228 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3229 walk_tree. DATA contains the actual fake register number. */
3232 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3234 tree obj
= NULL_TREE
;
3236 int *regno
= (int *) data
;
3238 switch (TREE_CODE (*expr_p
))
3241 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3242 handled_component_p (*expr_p
);
3243 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3246 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3247 x
= produce_memory_decl_rtl (obj
, regno
);
3252 obj
= SSA_NAME_VAR (*expr_p
);
3253 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3256 if (!DECL_RTL_SET_P (obj
))
3257 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3266 if (DECL_RTL_SET_P (obj
))
3269 if (DECL_MODE (obj
) == BLKmode
)
3270 x
= produce_memory_decl_rtl (obj
, regno
);
3272 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3282 decl_rtl_to_reset
.safe_push (obj
);
3283 SET_DECL_RTL (obj
, x
);
3289 /* Determines cost of the computation of EXPR. */
3292 computation_cost (tree expr
, bool speed
)
3296 tree type
= TREE_TYPE (expr
);
3298 /* Avoid using hard regs in ways which may be unsupported. */
3299 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3300 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3301 enum node_frequency real_frequency
= node
->frequency
;
3303 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3304 crtl
->maybe_hot_insn_p
= speed
;
3305 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3307 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3310 default_rtl_profile ();
3311 node
->frequency
= real_frequency
;
3313 cost
= seq_cost (seq
, speed
);
3315 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3316 TYPE_ADDR_SPACE (type
), speed
);
3317 else if (!REG_P (rslt
))
3318 cost
+= set_src_cost (rslt
, speed
);
3323 /* Returns variable containing the value of candidate CAND at statement AT. */
3326 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
3328 if (stmt_after_increment (loop
, cand
, stmt
))
3329 return cand
->var_after
;
3331 return cand
->var_before
;
3334 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3335 same precision that is at least as wide as the precision of TYPE, stores
3336 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3340 determine_common_wider_type (tree
*a
, tree
*b
)
3342 tree wider_type
= NULL
;
3344 tree atype
= TREE_TYPE (*a
);
3346 if (CONVERT_EXPR_P (*a
))
3348 suba
= TREE_OPERAND (*a
, 0);
3349 wider_type
= TREE_TYPE (suba
);
3350 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3356 if (CONVERT_EXPR_P (*b
))
3358 subb
= TREE_OPERAND (*b
, 0);
3359 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3370 /* Determines the expression by that USE is expressed from induction variable
3371 CAND at statement AT in LOOP. The expression is stored in a decomposed
3372 form into AFF. Returns false if USE cannot be expressed using CAND. */
3375 get_computation_aff (struct loop
*loop
,
3376 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3377 struct aff_tree
*aff
)
3379 tree ubase
= use
->iv
->base
;
3380 tree ustep
= use
->iv
->step
;
3381 tree cbase
= cand
->iv
->base
;
3382 tree cstep
= cand
->iv
->step
, cstep_common
;
3383 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3384 tree common_type
, var
;
3386 aff_tree cbase_aff
, var_aff
;
3389 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3391 /* We do not have a precision to express the values of use. */
3395 var
= var_at_stmt (loop
, cand
, at
);
3396 uutype
= unsigned_type_for (utype
);
3398 /* If the conversion is not noop, perform it. */
3399 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3401 cstep
= fold_convert (uutype
, cstep
);
3402 cbase
= fold_convert (uutype
, cbase
);
3403 var
= fold_convert (uutype
, var
);
3406 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3409 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3410 type, we achieve better folding by computing their difference in this
3411 wider type, and cast the result to UUTYPE. We do not need to worry about
3412 overflows, as all the arithmetics will in the end be performed in UUTYPE
3414 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3416 /* use = ubase - ratio * cbase + ratio * var. */
3417 tree_to_aff_combination (ubase
, common_type
, aff
);
3418 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3419 tree_to_aff_combination (var
, uutype
, &var_aff
);
3421 /* We need to shift the value if we are after the increment. */
3422 if (stmt_after_increment (loop
, cand
, at
))
3426 if (common_type
!= uutype
)
3427 cstep_common
= fold_convert (common_type
, cstep
);
3429 cstep_common
= cstep
;
3431 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3432 aff_combination_add (&cbase_aff
, &cstep_aff
);
3435 aff_combination_scale (&cbase_aff
, -rat
);
3436 aff_combination_add (aff
, &cbase_aff
);
3437 if (common_type
!= uutype
)
3438 aff_combination_convert (aff
, uutype
);
3440 aff_combination_scale (&var_aff
, rat
);
3441 aff_combination_add (aff
, &var_aff
);
3446 /* Return the type of USE. */
3449 get_use_type (struct iv_use
*use
)
3451 tree base_type
= TREE_TYPE (use
->iv
->base
);
3454 if (use
->type
== USE_ADDRESS
)
3456 /* The base_type may be a void pointer. Create a pointer type based on
3457 the mem_ref instead. */
3458 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3459 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3460 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3468 /* Determines the expression by that USE is expressed from induction variable
3469 CAND at statement AT in LOOP. The computation is unshared. */
3472 get_computation_at (struct loop
*loop
,
3473 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3476 tree type
= get_use_type (use
);
3478 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3480 unshare_aff_combination (&aff
);
3481 return fold_convert (type
, aff_combination_to_tree (&aff
));
3484 /* Determines the expression by that USE is expressed from induction variable
3485 CAND in LOOP. The computation is unshared. */
3488 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3490 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3493 /* Adjust the cost COST for being in loop setup rather than loop body.
3494 If we're optimizing for space, the loop setup overhead is constant;
3495 if we're optimizing for speed, amortize it over the per-iteration cost. */
3497 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3501 else if (optimize_loop_for_speed_p (data
->current_loop
))
3502 return cost
/ avg_loop_niter (data
->current_loop
);
3507 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3508 validity for a memory reference accessing memory of mode MODE in
3509 address space AS. */
3513 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3516 #define MAX_RATIO 128
3517 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3518 static vec
<sbitmap
> valid_mult_list
;
3521 if (data_index
>= valid_mult_list
.length ())
3522 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3524 valid_mult
= valid_mult_list
[data_index
];
3527 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3528 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3529 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3533 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3534 bitmap_clear (valid_mult
);
3535 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3536 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3537 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3539 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3540 if (memory_address_addr_space_p (mode
, addr
, as
)
3541 || memory_address_addr_space_p (mode
, scaled
, as
))
3542 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3545 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3547 fprintf (dump_file
, " allowed multipliers:");
3548 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3549 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3550 fprintf (dump_file
, " %d", (int) i
);
3551 fprintf (dump_file
, "\n");
3552 fprintf (dump_file
, "\n");
3555 valid_mult_list
[data_index
] = valid_mult
;
3558 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3561 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3564 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3565 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3566 variable is omitted. Compute the cost for a memory reference that accesses
3567 a memory location of mode MEM_MODE in address space AS.
3569 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3570 size of MEM_MODE / RATIO) is available. To make this determination, we
3571 look at the size of the increment to be made, which is given in CSTEP.
3572 CSTEP may be zero if the step is unknown.
3573 STMT_AFTER_INC is true iff the statement we're looking at is after the
3574 increment of the original biv.
3576 TODO -- there must be some better way. This all is quite crude. */
3580 AINC_PRE_INC
, /* Pre increment. */
3581 AINC_PRE_DEC
, /* Pre decrement. */
3582 AINC_POST_INC
, /* Post increment. */
3583 AINC_POST_DEC
, /* Post decrement. */
3584 AINC_NONE
/* Also the number of auto increment types. */
3587 typedef struct address_cost_data_s
3589 HOST_WIDE_INT min_offset
, max_offset
;
3590 unsigned costs
[2][2][2][2];
3591 unsigned ainc_costs
[AINC_NONE
];
3592 } *address_cost_data
;
3596 get_address_cost (bool symbol_present
, bool var_present
,
3597 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3598 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3599 addr_space_t as
, bool speed
,
3600 bool stmt_after_inc
, bool *may_autoinc
)
3602 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3603 static vec
<address_cost_data
> address_cost_data_list
;
3604 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3605 address_cost_data data
;
3606 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3607 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3608 unsigned cost
, acost
, complexity
;
3609 enum ainc_type autoinc_type
;
3610 bool offset_p
, ratio_p
, autoinc
;
3611 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3612 unsigned HOST_WIDE_INT mask
;
3615 if (data_index
>= address_cost_data_list
.length ())
3616 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3618 data
= address_cost_data_list
[data_index
];
3622 HOST_WIDE_INT rat
, off
= 0;
3623 int old_cse_not_expected
, width
;
3624 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3629 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3631 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3633 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3634 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3635 width
= HOST_BITS_PER_WIDE_INT
- 1;
3636 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3638 for (i
= width
; i
>= 0; i
--)
3640 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3641 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3642 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3645 data
->min_offset
= (i
== -1? 0 : off
);
3647 for (i
= width
; i
>= 0; i
--)
3649 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3650 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3651 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3653 /* For some strict-alignment targets, the offset must be naturally
3654 aligned. Try an aligned offset if mem_mode is not QImode. */
3655 off
= mem_mode
!= QImode
3656 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
3657 - GET_MODE_SIZE (mem_mode
)
3661 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3662 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3668 data
->max_offset
= off
;
3670 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3672 fprintf (dump_file
, "get_address_cost:\n");
3673 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3674 GET_MODE_NAME (mem_mode
),
3676 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3677 GET_MODE_NAME (mem_mode
),
3682 for (i
= 2; i
<= MAX_RATIO
; i
++)
3683 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3689 /* Compute the cost of various addressing modes. */
3691 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3692 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3694 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3695 || USE_STORE_PRE_DECREMENT (mem_mode
))
3697 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3698 has_predec
[mem_mode
]
3699 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3701 if (has_predec
[mem_mode
])
3702 data
->ainc_costs
[AINC_PRE_DEC
]
3703 = address_cost (addr
, mem_mode
, as
, speed
);
3705 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3706 || USE_STORE_POST_DECREMENT (mem_mode
))
3708 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3709 has_postdec
[mem_mode
]
3710 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3712 if (has_postdec
[mem_mode
])
3713 data
->ainc_costs
[AINC_POST_DEC
]
3714 = address_cost (addr
, mem_mode
, as
, speed
);
3716 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3717 || USE_STORE_PRE_DECREMENT (mem_mode
))
3719 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3720 has_preinc
[mem_mode
]
3721 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3723 if (has_preinc
[mem_mode
])
3724 data
->ainc_costs
[AINC_PRE_INC
]
3725 = address_cost (addr
, mem_mode
, as
, speed
);
3727 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3728 || USE_STORE_POST_INCREMENT (mem_mode
))
3730 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3731 has_postinc
[mem_mode
]
3732 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3734 if (has_postinc
[mem_mode
])
3735 data
->ainc_costs
[AINC_POST_INC
]
3736 = address_cost (addr
, mem_mode
, as
, speed
);
3738 for (i
= 0; i
< 16; i
++)
3741 var_p
= (i
>> 1) & 1;
3742 off_p
= (i
>> 2) & 1;
3743 rat_p
= (i
>> 3) & 1;
3747 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3748 gen_int_mode (rat
, address_mode
));
3751 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3755 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3756 /* ??? We can run into trouble with some backends by presenting
3757 it with symbols which haven't been properly passed through
3758 targetm.encode_section_info. By setting the local bit, we
3759 enhance the probability of things working. */
3760 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3763 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3765 (PLUS
, address_mode
, base
,
3766 gen_int_mode (off
, address_mode
)));
3769 base
= gen_int_mode (off
, address_mode
);
3774 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3777 /* To avoid splitting addressing modes, pretend that no cse will
3779 old_cse_not_expected
= cse_not_expected
;
3780 cse_not_expected
= true;
3781 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3782 cse_not_expected
= old_cse_not_expected
;
3786 acost
= seq_cost (seq
, speed
);
3787 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3791 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3794 /* On some targets, it is quite expensive to load symbol to a register,
3795 which makes addresses that contain symbols look much more expensive.
3796 However, the symbol will have to be loaded in any case before the
3797 loop (and quite likely we have it in register already), so it does not
3798 make much sense to penalize them too heavily. So make some final
3799 tweaks for the SYMBOL_PRESENT modes:
3801 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3802 var is cheaper, use this mode with small penalty.
3803 If VAR_PRESENT is true, try whether the mode with
3804 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3805 if this is the case, use it. */
3806 add_c
= add_cost (speed
, address_mode
);
3807 for (i
= 0; i
< 8; i
++)
3810 off_p
= (i
>> 1) & 1;
3811 rat_p
= (i
>> 2) & 1;
3813 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3817 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3818 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3821 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3823 fprintf (dump_file
, "Address costs:\n");
3825 for (i
= 0; i
< 16; i
++)
3828 var_p
= (i
>> 1) & 1;
3829 off_p
= (i
>> 2) & 1;
3830 rat_p
= (i
>> 3) & 1;
3832 fprintf (dump_file
, " ");
3834 fprintf (dump_file
, "sym + ");
3836 fprintf (dump_file
, "var + ");
3838 fprintf (dump_file
, "cst + ");
3840 fprintf (dump_file
, "rat * ");
3842 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3843 fprintf (dump_file
, "index costs %d\n", acost
);
3845 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3846 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3847 fprintf (dump_file
, " May include autoinc/dec\n");
3848 fprintf (dump_file
, "\n");
3851 address_cost_data_list
[data_index
] = data
;
3854 bits
= GET_MODE_BITSIZE (address_mode
);
3855 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3857 if ((offset
>> (bits
- 1) & 1))
3862 autoinc_type
= AINC_NONE
;
3863 msize
= GET_MODE_SIZE (mem_mode
);
3864 autoinc_offset
= offset
;
3866 autoinc_offset
+= ratio
* cstep
;
3867 if (symbol_present
|| var_present
|| ratio
!= 1)
3871 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3873 autoinc_type
= AINC_POST_INC
;
3874 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3876 autoinc_type
= AINC_POST_DEC
;
3877 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3879 autoinc_type
= AINC_PRE_INC
;
3880 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3882 autoinc_type
= AINC_PRE_DEC
;
3884 if (autoinc_type
!= AINC_NONE
)
3889 offset_p
= (s_offset
!= 0
3890 && data
->min_offset
<= s_offset
3891 && s_offset
<= data
->max_offset
);
3892 ratio_p
= (ratio
!= 1
3893 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3895 if (ratio
!= 1 && !ratio_p
)
3896 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3898 if (s_offset
&& !offset_p
&& !symbol_present
)
3899 cost
+= add_cost (speed
, address_mode
);
3902 *may_autoinc
= autoinc
;
3904 acost
= data
->ainc_costs
[autoinc_type
];
3906 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3907 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3908 return new_cost (cost
+ acost
, complexity
);
3911 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3912 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3913 calculating the operands of EXPR. Returns true if successful, and returns
3914 the cost in COST. */
3917 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
3918 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3921 tree op1
= TREE_OPERAND (expr
, 1);
3922 tree cst
= TREE_OPERAND (mult
, 1);
3923 tree multop
= TREE_OPERAND (mult
, 0);
3924 int m
= exact_log2 (int_cst_value (cst
));
3925 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3926 int as_cost
, sa_cost
;
3929 if (!(m
>= 0 && m
< maxm
))
3932 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
3934 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3936 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3937 use that in preference to a shift insn followed by an add insn. */
3938 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3939 ? shiftadd_cost (speed
, mode
, m
)
3941 ? shiftsub1_cost (speed
, mode
, m
)
3942 : shiftsub0_cost (speed
, mode
, m
)));
3944 res
= new_cost (MIN (as_cost
, sa_cost
), 0);
3945 res
= add_costs (res
, mult_in_op1
? cost0
: cost1
);
3947 STRIP_NOPS (multop
);
3948 if (!is_gimple_val (multop
))
3949 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3955 /* Estimates cost of forcing expression EXPR into a variable. */
3958 force_expr_to_var_cost (tree expr
, bool speed
)
3960 static bool costs_initialized
= false;
3961 static unsigned integer_cost
[2];
3962 static unsigned symbol_cost
[2];
3963 static unsigned address_cost
[2];
3965 comp_cost cost0
, cost1
, cost
;
3968 if (!costs_initialized
)
3970 tree type
= build_pointer_type (integer_type_node
);
3975 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3976 TREE_STATIC (var
) = 1;
3977 x
= produce_memory_decl_rtl (var
, NULL
);
3978 SET_DECL_RTL (var
, x
);
3980 addr
= build1 (ADDR_EXPR
, type
, var
);
3983 for (i
= 0; i
< 2; i
++)
3985 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3988 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3991 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3992 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3994 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3995 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3996 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3997 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3998 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3999 fprintf (dump_file
, "\n");
4003 costs_initialized
= true;
4008 if (SSA_VAR_P (expr
))
4011 if (is_gimple_min_invariant (expr
))
4013 if (TREE_CODE (expr
) == INTEGER_CST
)
4014 return new_cost (integer_cost
[speed
], 0);
4016 if (TREE_CODE (expr
) == ADDR_EXPR
)
4018 tree obj
= TREE_OPERAND (expr
, 0);
4020 if (TREE_CODE (obj
) == VAR_DECL
4021 || TREE_CODE (obj
) == PARM_DECL
4022 || TREE_CODE (obj
) == RESULT_DECL
)
4023 return new_cost (symbol_cost
[speed
], 0);
4026 return new_cost (address_cost
[speed
], 0);
4029 switch (TREE_CODE (expr
))
4031 case POINTER_PLUS_EXPR
:
4035 op0
= TREE_OPERAND (expr
, 0);
4036 op1
= TREE_OPERAND (expr
, 1);
4043 op0
= TREE_OPERAND (expr
, 0);
4049 /* Just an arbitrary value, FIXME. */
4050 return new_cost (target_spill_cost
[speed
], 0);
4053 if (op0
== NULL_TREE
4054 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4057 cost0
= force_expr_to_var_cost (op0
, speed
);
4059 if (op1
== NULL_TREE
4060 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4063 cost1
= force_expr_to_var_cost (op1
, speed
);
4065 mode
= TYPE_MODE (TREE_TYPE (expr
));
4066 switch (TREE_CODE (expr
))
4068 case POINTER_PLUS_EXPR
:
4072 cost
= new_cost (add_cost (speed
, mode
), 0);
4073 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4075 tree mult
= NULL_TREE
;
4077 if (TREE_CODE (op1
) == MULT_EXPR
)
4079 else if (TREE_CODE (op0
) == MULT_EXPR
)
4082 if (mult
!= NULL_TREE
4083 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4084 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
4092 tree inner_mode
, outer_mode
;
4093 outer_mode
= TREE_TYPE (expr
);
4094 inner_mode
= TREE_TYPE (op0
);
4095 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
4096 TYPE_MODE (inner_mode
), speed
), 0);
4101 if (cst_and_fits_in_hwi (op0
))
4102 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
4104 else if (cst_and_fits_in_hwi (op1
))
4105 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
4108 return new_cost (target_spill_cost
[speed
], 0);
4115 cost
= add_costs (cost
, cost0
);
4116 cost
= add_costs (cost
, cost1
);
4118 /* Bound the cost by target_spill_cost. The parts of complicated
4119 computations often are either loop invariant or at least can
4120 be shared between several iv uses, so letting this grow without
4121 limits would not give reasonable results. */
4122 if (cost
.cost
> (int) target_spill_cost
[speed
])
4123 cost
.cost
= target_spill_cost
[speed
];
4128 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4129 invariants the computation depends on. */
4132 force_var_cost (struct ivopts_data
*data
,
4133 tree expr
, bitmap
*depends_on
)
4137 fd_ivopts_data
= data
;
4138 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
4141 return force_expr_to_var_cost (expr
, data
->speed
);
4144 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4145 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4146 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4147 invariants the computation depends on. */
4150 split_address_cost (struct ivopts_data
*data
,
4151 tree addr
, bool *symbol_present
, bool *var_present
,
4152 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4155 HOST_WIDE_INT bitsize
;
4156 HOST_WIDE_INT bitpos
;
4159 int unsignedp
, volatilep
;
4161 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
4162 &unsignedp
, &volatilep
, false);
4165 || bitpos
% BITS_PER_UNIT
!= 0
4166 || TREE_CODE (core
) != VAR_DECL
)
4168 *symbol_present
= false;
4169 *var_present
= true;
4170 fd_ivopts_data
= data
;
4171 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
4172 return new_cost (target_spill_cost
[data
->speed
], 0);
4175 *offset
+= bitpos
/ BITS_PER_UNIT
;
4176 if (TREE_STATIC (core
)
4177 || DECL_EXTERNAL (core
))
4179 *symbol_present
= true;
4180 *var_present
= false;
4184 *symbol_present
= false;
4185 *var_present
= true;
4189 /* Estimates cost of expressing difference of addresses E1 - E2 as
4190 var + symbol + offset. The value of offset is added to OFFSET,
4191 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4192 part is missing. DEPENDS_ON is a set of the invariants the computation
4196 ptr_difference_cost (struct ivopts_data
*data
,
4197 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4198 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4200 HOST_WIDE_INT diff
= 0;
4201 aff_tree aff_e1
, aff_e2
;
4204 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
4206 if (ptr_difference_const (e1
, e2
, &diff
))
4209 *symbol_present
= false;
4210 *var_present
= false;
4214 if (integer_zerop (e2
))
4215 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
4216 symbol_present
, var_present
, offset
, depends_on
);
4218 *symbol_present
= false;
4219 *var_present
= true;
4221 type
= signed_type_for (TREE_TYPE (e1
));
4222 tree_to_aff_combination (e1
, type
, &aff_e1
);
4223 tree_to_aff_combination (e2
, type
, &aff_e2
);
4224 aff_combination_scale (&aff_e2
, -1);
4225 aff_combination_add (&aff_e1
, &aff_e2
);
4227 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4230 /* Estimates cost of expressing difference E1 - E2 as
4231 var + symbol + offset. The value of offset is added to OFFSET,
4232 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4233 part is missing. DEPENDS_ON is a set of the invariants the computation
4237 difference_cost (struct ivopts_data
*data
,
4238 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4239 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4241 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
4242 unsigned HOST_WIDE_INT off1
, off2
;
4243 aff_tree aff_e1
, aff_e2
;
4246 e1
= strip_offset (e1
, &off1
);
4247 e2
= strip_offset (e2
, &off2
);
4248 *offset
+= off1
- off2
;
4253 if (TREE_CODE (e1
) == ADDR_EXPR
)
4254 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
4255 offset
, depends_on
);
4256 *symbol_present
= false;
4258 if (operand_equal_p (e1
, e2
, 0))
4260 *var_present
= false;
4264 *var_present
= true;
4266 if (integer_zerop (e2
))
4267 return force_var_cost (data
, e1
, depends_on
);
4269 if (integer_zerop (e1
))
4271 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4272 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
4276 type
= signed_type_for (TREE_TYPE (e1
));
4277 tree_to_aff_combination (e1
, type
, &aff_e1
);
4278 tree_to_aff_combination (e2
, type
, &aff_e2
);
4279 aff_combination_scale (&aff_e2
, -1);
4280 aff_combination_add (&aff_e1
, &aff_e2
);
4282 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4285 /* Returns true if AFF1 and AFF2 are identical. */
4288 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4292 if (aff1
->n
!= aff2
->n
)
4295 for (i
= 0; i
< aff1
->n
; i
++)
4297 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4300 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4306 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4309 get_expr_id (struct ivopts_data
*data
, tree expr
)
4311 struct iv_inv_expr_ent ent
;
4312 struct iv_inv_expr_ent
**slot
;
4315 ent
.hash
= iterative_hash_expr (expr
, 0);
4316 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4320 *slot
= XNEW (struct iv_inv_expr_ent
);
4321 (*slot
)->expr
= expr
;
4322 (*slot
)->hash
= ent
.hash
;
4323 (*slot
)->id
= data
->inv_expr_id
++;
4327 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4328 requires a new compiler generated temporary. Returns -1 otherwise.
4329 ADDRESS_P is a flag indicating if the expression is for address
4333 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4334 tree cbase
, HOST_WIDE_INT ratio
,
4337 aff_tree ubase_aff
, cbase_aff
;
4345 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4346 && (TREE_CODE (cbase
) == INTEGER_CST
))
4349 /* Strips the constant part. */
4350 if (TREE_CODE (ubase
) == PLUS_EXPR
4351 || TREE_CODE (ubase
) == MINUS_EXPR
4352 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4354 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4355 ubase
= TREE_OPERAND (ubase
, 0);
4358 /* Strips the constant part. */
4359 if (TREE_CODE (cbase
) == PLUS_EXPR
4360 || TREE_CODE (cbase
) == MINUS_EXPR
4361 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4363 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4364 cbase
= TREE_OPERAND (cbase
, 0);
4369 if (((TREE_CODE (ubase
) == SSA_NAME
)
4370 || (TREE_CODE (ubase
) == ADDR_EXPR
4371 && is_gimple_min_invariant (ubase
)))
4372 && (TREE_CODE (cbase
) == INTEGER_CST
))
4375 if (((TREE_CODE (cbase
) == SSA_NAME
)
4376 || (TREE_CODE (cbase
) == ADDR_EXPR
4377 && is_gimple_min_invariant (cbase
)))
4378 && (TREE_CODE (ubase
) == INTEGER_CST
))
4384 if (operand_equal_p (ubase
, cbase
, 0))
4387 if (TREE_CODE (ubase
) == ADDR_EXPR
4388 && TREE_CODE (cbase
) == ADDR_EXPR
)
4392 usym
= TREE_OPERAND (ubase
, 0);
4393 csym
= TREE_OPERAND (cbase
, 0);
4394 if (TREE_CODE (usym
) == ARRAY_REF
)
4396 tree ind
= TREE_OPERAND (usym
, 1);
4397 if (TREE_CODE (ind
) == INTEGER_CST
4398 && tree_fits_shwi_p (ind
)
4399 && tree_to_shwi (ind
) == 0)
4400 usym
= TREE_OPERAND (usym
, 0);
4402 if (TREE_CODE (csym
) == ARRAY_REF
)
4404 tree ind
= TREE_OPERAND (csym
, 1);
4405 if (TREE_CODE (ind
) == INTEGER_CST
4406 && tree_fits_shwi_p (ind
)
4407 && tree_to_shwi (ind
) == 0)
4408 csym
= TREE_OPERAND (csym
, 0);
4410 if (operand_equal_p (usym
, csym
, 0))
4413 /* Now do more complex comparison */
4414 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4415 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4416 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4420 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4421 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4423 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4424 aff_combination_add (&ubase_aff
, &cbase_aff
);
4425 expr
= aff_combination_to_tree (&ubase_aff
);
4426 return get_expr_id (data
, expr
);
4431 /* Determines the cost of the computation by that USE is expressed
4432 from induction variable CAND. If ADDRESS_P is true, we just need
4433 to create an address from it, otherwise we want to get it into
4434 register. A set of invariants we depend on is stored in
4435 DEPENDS_ON. AT is the statement at that the value is computed.
4436 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4437 addressing is likely. */
4440 get_computation_cost_at (struct ivopts_data
*data
,
4441 struct iv_use
*use
, struct iv_cand
*cand
,
4442 bool address_p
, bitmap
*depends_on
, gimple at
,
4446 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4448 tree utype
= TREE_TYPE (ubase
), ctype
;
4449 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4450 HOST_WIDE_INT ratio
, aratio
;
4451 bool var_present
, symbol_present
, stmt_is_after_inc
;
4454 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4455 machine_mode mem_mode
= (address_p
4456 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4461 /* Only consider real candidates. */
4463 return infinite_cost
;
4465 cbase
= cand
->iv
->base
;
4466 cstep
= cand
->iv
->step
;
4467 ctype
= TREE_TYPE (cbase
);
4469 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4471 /* We do not have a precision to express the values of use. */
4472 return infinite_cost
;
4476 || (use
->iv
->base_object
4477 && cand
->iv
->base_object
4478 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4479 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4481 /* Do not try to express address of an object with computation based
4482 on address of a different object. This may cause problems in rtl
4483 level alias analysis (that does not expect this to be happening,
4484 as this is illegal in C), and would be unlikely to be useful
4486 if (use
->iv
->base_object
4487 && cand
->iv
->base_object
4488 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4489 return infinite_cost
;
4492 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4494 /* TODO -- add direct handling of this case. */
4498 /* CSTEPI is removed from the offset in case statement is after the
4499 increment. If the step is not constant, we use zero instead.
4500 This is a bit imprecise (there is the extra addition), but
4501 redundancy elimination is likely to transform the code so that
4502 it uses value of the variable before increment anyway,
4503 so it is not that much unrealistic. */
4504 if (cst_and_fits_in_hwi (cstep
))
4505 cstepi
= int_cst_value (cstep
);
4509 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4510 return infinite_cost
;
4512 if (wi::fits_shwi_p (rat
))
4513 ratio
= rat
.to_shwi ();
4515 return infinite_cost
;
4518 ctype
= TREE_TYPE (cbase
);
4520 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4522 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4523 or ratio == 1, it is better to handle this like
4525 ubase - ratio * cbase + ratio * var
4527 (also holds in the case ratio == -1, TODO. */
4529 if (cst_and_fits_in_hwi (cbase
))
4531 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4532 cost
= difference_cost (data
,
4533 ubase
, build_int_cst (utype
, 0),
4534 &symbol_present
, &var_present
, &offset
,
4536 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4538 else if (ratio
== 1)
4540 tree real_cbase
= cbase
;
4542 /* Check to see if any adjustment is needed. */
4543 if (cstepi
== 0 && stmt_is_after_inc
)
4545 aff_tree real_cbase_aff
;
4548 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4550 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4552 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4553 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4556 cost
= difference_cost (data
,
4558 &symbol_present
, &var_present
, &offset
,
4560 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4563 && !POINTER_TYPE_P (ctype
)
4564 && multiplier_allowed_in_address_p
4566 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4569 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4570 cost
= difference_cost (data
,
4572 &symbol_present
, &var_present
, &offset
,
4574 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4578 cost
= force_var_cost (data
, cbase
, depends_on
);
4579 cost
= add_costs (cost
,
4580 difference_cost (data
,
4581 ubase
, build_int_cst (utype
, 0),
4582 &symbol_present
, &var_present
,
4583 &offset
, depends_on
));
4584 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4585 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4588 /* Set of invariants depended on by sub use has already been computed
4589 for the first use in the group. */
4593 if (depends_on
&& *depends_on
)
4594 bitmap_clear (*depends_on
);
4596 else if (inv_expr_id
)
4599 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4600 /* Clear depends on. */
4601 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4602 bitmap_clear (*depends_on
);
4605 /* If we are after the increment, the value of the candidate is higher by
4607 if (stmt_is_after_inc
)
4608 offset
-= ratio
* cstepi
;
4610 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4611 (symbol/var1/const parts may be omitted). If we are looking for an
4612 address, find the cost of addressing this. */
4614 return add_costs (cost
,
4615 get_address_cost (symbol_present
, var_present
,
4616 offset
, ratio
, cstepi
,
4618 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4619 speed
, stmt_is_after_inc
,
4622 /* Otherwise estimate the costs for computing the expression. */
4623 if (!symbol_present
&& !var_present
&& !offset
)
4626 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4630 /* Symbol + offset should be compile-time computable so consider that they
4631 are added once to the variable, if present. */
4632 if (var_present
&& (symbol_present
|| offset
))
4633 cost
.cost
+= adjust_setup_cost (data
,
4634 add_cost (speed
, TYPE_MODE (ctype
)));
4636 /* Having offset does not affect runtime cost in case it is added to
4637 symbol, but it increases complexity. */
4641 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4643 aratio
= ratio
> 0 ? ratio
: -ratio
;
4645 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4650 *can_autoinc
= false;
4653 /* Just get the expression, expand it and measure the cost. */
4654 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4657 return infinite_cost
;
4660 comp
= build_simple_mem_ref (comp
);
4662 return new_cost (computation_cost (comp
, speed
), 0);
4666 /* Determines the cost of the computation by that USE is expressed
4667 from induction variable CAND. If ADDRESS_P is true, we just need
4668 to create an address from it, otherwise we want to get it into
4669 register. A set of invariants we depend on is stored in
4670 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4671 autoinc addressing is likely. */
4674 get_computation_cost (struct ivopts_data
*data
,
4675 struct iv_use
*use
, struct iv_cand
*cand
,
4676 bool address_p
, bitmap
*depends_on
,
4677 bool *can_autoinc
, int *inv_expr_id
)
4679 return get_computation_cost_at (data
,
4680 use
, cand
, address_p
, depends_on
, use
->stmt
,
4681 can_autoinc
, inv_expr_id
);
4684 /* Determines cost of basing replacement of USE on CAND in a generic
4688 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4689 struct iv_use
*use
, struct iv_cand
*cand
)
4693 int inv_expr_id
= -1;
4695 /* The simple case first -- if we need to express value of the preserved
4696 original biv, the cost is 0. This also prevents us from counting the
4697 cost of increment twice -- once at this use and once in the cost of
4699 if (cand
->pos
== IP_ORIGINAL
4700 && cand
->incremented_at
== use
->stmt
)
4702 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4707 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4708 NULL
, &inv_expr_id
);
4710 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4713 return !infinite_cost_p (cost
);
4716 /* Determines cost of basing replacement of USE on CAND in an address. */
4719 determine_use_iv_cost_address (struct ivopts_data
*data
,
4720 struct iv_use
*use
, struct iv_cand
*cand
)
4724 int inv_expr_id
= -1;
4725 struct iv_use
*sub_use
;
4727 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4728 &can_autoinc
, &inv_expr_id
);
4730 if (cand
->ainc_use
== use
)
4733 cost
.cost
-= cand
->cost_step
;
4734 /* If we generated the candidate solely for exploiting autoincrement
4735 opportunities, and it turns out it can't be used, set the cost to
4736 infinity to make sure we ignore it. */
4737 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4738 cost
= infinite_cost
;
4740 for (sub_use
= use
->next
;
4741 sub_use
&& !infinite_cost_p (cost
);
4742 sub_use
= sub_use
->next
)
4744 sub_cost
= get_computation_cost (data
, sub_use
, cand
, true, &depends_on
,
4745 &can_autoinc
, &inv_expr_id
);
4746 cost
= add_costs (cost
, sub_cost
);
4749 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4752 return !infinite_cost_p (cost
);
4755 /* Computes value of candidate CAND at position AT in iteration NITER, and
4756 stores it to VAL. */
4759 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4762 aff_tree step
, delta
, nit
;
4763 struct iv
*iv
= cand
->iv
;
4764 tree type
= TREE_TYPE (iv
->base
);
4765 tree steptype
= type
;
4766 if (POINTER_TYPE_P (type
))
4767 steptype
= sizetype
;
4768 steptype
= unsigned_type_for (type
);
4770 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4771 aff_combination_convert (&step
, steptype
);
4772 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4773 aff_combination_convert (&nit
, steptype
);
4774 aff_combination_mult (&nit
, &step
, &delta
);
4775 if (stmt_after_increment (loop
, cand
, at
))
4776 aff_combination_add (&delta
, &step
);
4778 tree_to_aff_combination (iv
->base
, type
, val
);
4779 if (!POINTER_TYPE_P (type
))
4780 aff_combination_convert (val
, steptype
);
4781 aff_combination_add (val
, &delta
);
4784 /* Returns period of induction variable iv. */
4787 iv_period (struct iv
*iv
)
4789 tree step
= iv
->step
, period
, type
;
4792 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4794 type
= unsigned_type_for (TREE_TYPE (step
));
4795 /* Period of the iv is lcm (step, type_range)/step -1,
4796 i.e., N*type_range/step - 1. Since type range is power
4797 of two, N == (step >> num_of_ending_zeros_binary (step),
4798 so the final result is
4800 (type_range >> num_of_ending_zeros_binary (step)) - 1
4803 pow2div
= num_ending_zeros (step
);
4805 period
= build_low_bits_mask (type
,
4806 (TYPE_PRECISION (type
)
4807 - tree_to_uhwi (pow2div
)));
4812 /* Returns the comparison operator used when eliminating the iv USE. */
4814 static enum tree_code
4815 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4817 struct loop
*loop
= data
->current_loop
;
4821 ex_bb
= gimple_bb (use
->stmt
);
4822 exit
= EDGE_SUCC (ex_bb
, 0);
4823 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4824 exit
= EDGE_SUCC (ex_bb
, 1);
4826 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4829 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4830 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4831 calculation is performed in non-wrapping type.
4833 TODO: More generally, we could test for the situation that
4834 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4835 This would require knowing the sign of OFFSET. */
4838 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4840 enum tree_code code
;
4842 aff_tree aff_e1
, aff_e2
, aff_offset
;
4844 if (!nowrap_type_p (TREE_TYPE (base
)))
4847 base
= expand_simple_operations (base
);
4849 if (TREE_CODE (base
) == SSA_NAME
)
4851 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4853 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4856 code
= gimple_assign_rhs_code (stmt
);
4857 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4860 e1
= gimple_assign_rhs1 (stmt
);
4861 e2
= gimple_assign_rhs2 (stmt
);
4865 code
= TREE_CODE (base
);
4866 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4868 e1
= TREE_OPERAND (base
, 0);
4869 e2
= TREE_OPERAND (base
, 1);
4872 /* Use affine expansion as deeper inspection to prove the equality. */
4873 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4874 &aff_e2
, &data
->name_expansion_cache
);
4875 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4876 &aff_offset
, &data
->name_expansion_cache
);
4877 aff_combination_scale (&aff_offset
, -1);
4881 aff_combination_add (&aff_e2
, &aff_offset
);
4882 if (aff_combination_zero_p (&aff_e2
))
4885 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4886 &aff_e1
, &data
->name_expansion_cache
);
4887 aff_combination_add (&aff_e1
, &aff_offset
);
4888 return aff_combination_zero_p (&aff_e1
);
4890 case POINTER_PLUS_EXPR
:
4891 aff_combination_add (&aff_e2
, &aff_offset
);
4892 return aff_combination_zero_p (&aff_e2
);
4899 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4900 comparison with CAND. NITER describes the number of iterations of
4901 the loops. If successful, the comparison in COMP_P is altered accordingly.
4903 We aim to handle the following situation:
4919 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4920 We aim to optimize this to
4928 while (p < p_0 - a + b);
4930 This preserves the correctness, since the pointer arithmetics does not
4931 overflow. More precisely:
4933 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4934 overflow in computing it or the values of p.
4935 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4936 overflow. To prove this, we use the fact that p_0 = base + a. */
4939 iv_elimination_compare_lt (struct ivopts_data
*data
,
4940 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4941 struct tree_niter_desc
*niter
)
4943 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4944 struct aff_tree nit
, tmpa
, tmpb
;
4945 enum tree_code comp
;
4948 /* We need to know that the candidate induction variable does not overflow.
4949 While more complex analysis may be used to prove this, for now just
4950 check that the variable appears in the original program and that it
4951 is computed in a type that guarantees no overflows. */
4952 cand_type
= TREE_TYPE (cand
->iv
->base
);
4953 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4956 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4957 the calculation of the BOUND could overflow, making the comparison
4959 if (!data
->loop_single_exit_p
)
4962 /* We need to be able to decide whether candidate is increasing or decreasing
4963 in order to choose the right comparison operator. */
4964 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4966 step
= int_cst_value (cand
->iv
->step
);
4968 /* Check that the number of iterations matches the expected pattern:
4969 a + 1 > b ? 0 : b - a - 1. */
4970 mbz
= niter
->may_be_zero
;
4971 if (TREE_CODE (mbz
) == GT_EXPR
)
4973 /* Handle a + 1 > b. */
4974 tree op0
= TREE_OPERAND (mbz
, 0);
4975 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4977 a
= TREE_OPERAND (op0
, 0);
4978 b
= TREE_OPERAND (mbz
, 1);
4983 else if (TREE_CODE (mbz
) == LT_EXPR
)
4985 tree op1
= TREE_OPERAND (mbz
, 1);
4987 /* Handle b < a + 1. */
4988 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4990 a
= TREE_OPERAND (op1
, 0);
4991 b
= TREE_OPERAND (mbz
, 0);
4999 /* Expected number of iterations is B - A - 1. Check that it matches
5000 the actual number, i.e., that B - A - NITER = 1. */
5001 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
5002 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
5003 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
5004 aff_combination_scale (&nit
, -1);
5005 aff_combination_scale (&tmpa
, -1);
5006 aff_combination_add (&tmpb
, &tmpa
);
5007 aff_combination_add (&tmpb
, &nit
);
5008 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
5011 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5013 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
5015 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
5016 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
5019 /* Determine the new comparison operator. */
5020 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
5021 if (*comp_p
== NE_EXPR
)
5023 else if (*comp_p
== EQ_EXPR
)
5024 *comp_p
= invert_tree_comparison (comp
, false);
5031 /* Check whether it is possible to express the condition in USE by comparison
5032 of candidate CAND. If so, store the value compared with to BOUND, and the
5033 comparison operator to COMP. */
5036 may_eliminate_iv (struct ivopts_data
*data
,
5037 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
5038 enum tree_code
*comp
)
5043 struct loop
*loop
= data
->current_loop
;
5045 struct tree_niter_desc
*desc
= NULL
;
5047 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5050 /* For now works only for exits that dominate the loop latch.
5051 TODO: extend to other conditions inside loop body. */
5052 ex_bb
= gimple_bb (use
->stmt
);
5053 if (use
->stmt
!= last_stmt (ex_bb
)
5054 || gimple_code (use
->stmt
) != GIMPLE_COND
5055 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5058 exit
= EDGE_SUCC (ex_bb
, 0);
5059 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5060 exit
= EDGE_SUCC (ex_bb
, 1);
5061 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5064 desc
= niter_for_exit (data
, exit
);
5068 /* Determine whether we can use the variable to test the exit condition.
5069 This is the case iff the period of the induction variable is greater
5070 than the number of iterations for which the exit condition is true. */
5071 period
= iv_period (cand
->iv
);
5073 /* If the number of iterations is constant, compare against it directly. */
5074 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5076 /* See cand_value_at. */
5077 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5079 if (!tree_int_cst_lt (desc
->niter
, period
))
5084 if (tree_int_cst_lt (period
, desc
->niter
))
5089 /* If not, and if this is the only possible exit of the loop, see whether
5090 we can get a conservative estimate on the number of iterations of the
5091 entire loop and compare against that instead. */
5094 widest_int period_value
, max_niter
;
5096 max_niter
= desc
->max
;
5097 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5099 period_value
= wi::to_widest (period
);
5100 if (wi::gtu_p (max_niter
, period_value
))
5102 /* See if we can take advantage of inferred loop bound information. */
5103 if (data
->loop_single_exit_p
)
5105 if (!max_loop_iterations (loop
, &max_niter
))
5107 /* The loop bound is already adjusted by adding 1. */
5108 if (wi::gtu_p (max_niter
, period_value
))
5116 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5118 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5119 aff_combination_to_tree (&bnd
));
5120 *comp
= iv_elimination_compare (data
, use
);
5122 /* It is unlikely that computing the number of iterations using division
5123 would be more profitable than keeping the original induction variable. */
5124 if (expression_expensive_p (*bound
))
5127 /* Sometimes, it is possible to handle the situation that the number of
5128 iterations may be zero unless additional assumtions by using <
5129 instead of != in the exit condition.
5131 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5132 base the exit condition on it. However, that is often too
5134 if (!integer_zerop (desc
->may_be_zero
))
5135 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5140 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5141 be copied, if is is used in the loop body and DATA->body_includes_call. */
5144 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5146 tree sbound
= bound
;
5147 STRIP_NOPS (sbound
);
5149 if (TREE_CODE (sbound
) == SSA_NAME
5150 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5151 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5152 && data
->body_includes_call
)
5153 return COSTS_N_INSNS (1);
5158 /* Determines cost of basing replacement of USE on CAND in a condition. */
5161 determine_use_iv_cost_condition (struct ivopts_data
*data
,
5162 struct iv_use
*use
, struct iv_cand
*cand
)
5164 tree bound
= NULL_TREE
;
5166 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
5167 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
5169 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
5170 tree
*control_var
, *bound_cst
;
5171 enum tree_code comp
= ERROR_MARK
;
5173 /* Only consider real candidates. */
5176 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
5181 /* Try iv elimination. */
5182 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5184 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
5185 if (elim_cost
.cost
== 0)
5186 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5187 else if (TREE_CODE (bound
) == INTEGER_CST
)
5189 /* If we replace a loop condition 'i < n' with 'p < base + n',
5190 depends_on_elim will have 'base' and 'n' set, which implies
5191 that both 'base' and 'n' will be live during the loop. More likely,
5192 'base + n' will be loop invariant, resulting in only one live value
5193 during the loop. So in that case we clear depends_on_elim and set
5194 elim_inv_expr_id instead. */
5195 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
5197 elim_inv_expr_id
= get_expr_id (data
, bound
);
5198 bitmap_clear (depends_on_elim
);
5200 /* The bound is a loop invariant, so it will be only computed
5202 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5205 elim_cost
= infinite_cost
;
5207 /* Try expressing the original giv. If it is compared with an invariant,
5208 note that we cannot get rid of it. */
5209 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
5213 /* When the condition is a comparison of the candidate IV against
5214 zero, prefer this IV.
5216 TODO: The constant that we're subtracting from the cost should
5217 be target-dependent. This information should be added to the
5218 target costs for each backend. */
5219 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
5220 && integer_zerop (*bound_cst
)
5221 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5222 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5223 elim_cost
.cost
-= 1;
5225 express_cost
= get_computation_cost (data
, use
, cand
, false,
5226 &depends_on_express
, NULL
,
5227 &express_inv_expr_id
);
5228 fd_ivopts_data
= data
;
5229 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
5231 /* Count the cost of the original bound as well. */
5232 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5233 if (bound_cost
.cost
== 0)
5234 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5235 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5236 bound_cost
.cost
= 0;
5237 express_cost
.cost
+= bound_cost
.cost
;
5239 /* Choose the better approach, preferring the eliminated IV. */
5240 if (compare_costs (elim_cost
, express_cost
) <= 0)
5243 depends_on
= depends_on_elim
;
5244 depends_on_elim
= NULL
;
5245 inv_expr_id
= elim_inv_expr_id
;
5249 cost
= express_cost
;
5250 depends_on
= depends_on_express
;
5251 depends_on_express
= NULL
;
5254 inv_expr_id
= express_inv_expr_id
;
5257 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
5259 if (depends_on_elim
)
5260 BITMAP_FREE (depends_on_elim
);
5261 if (depends_on_express
)
5262 BITMAP_FREE (depends_on_express
);
5264 return !infinite_cost_p (cost
);
5267 /* Determines cost of basing replacement of USE on CAND. Returns false
5268 if USE cannot be based on CAND. */
5271 determine_use_iv_cost (struct ivopts_data
*data
,
5272 struct iv_use
*use
, struct iv_cand
*cand
)
5276 case USE_NONLINEAR_EXPR
:
5277 return determine_use_iv_cost_generic (data
, use
, cand
);
5280 return determine_use_iv_cost_address (data
, use
, cand
);
5283 return determine_use_iv_cost_condition (data
, use
, cand
);
5290 /* Return true if get_computation_cost indicates that autoincrement is
5291 a possibility for the pair of USE and CAND, false otherwise. */
5294 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5295 struct iv_cand
*cand
)
5301 if (use
->type
!= USE_ADDRESS
)
5304 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5305 &can_autoinc
, NULL
);
5307 BITMAP_FREE (depends_on
);
5309 return !infinite_cost_p (cost
) && can_autoinc
;
5312 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5313 use that allows autoincrement, and set their AINC_USE if possible. */
5316 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5320 for (i
= 0; i
< n_iv_cands (data
); i
++)
5322 struct iv_cand
*cand
= iv_cand (data
, i
);
5323 struct iv_use
*closest_before
= NULL
;
5324 struct iv_use
*closest_after
= NULL
;
5325 if (cand
->pos
!= IP_ORIGINAL
)
5328 for (j
= 0; j
< n_iv_uses (data
); j
++)
5330 struct iv_use
*use
= iv_use (data
, j
);
5331 unsigned uid
= gimple_uid (use
->stmt
);
5333 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5336 if (uid
< gimple_uid (cand
->incremented_at
)
5337 && (closest_before
== NULL
5338 || uid
> gimple_uid (closest_before
->stmt
)))
5339 closest_before
= use
;
5341 if (uid
> gimple_uid (cand
->incremented_at
)
5342 && (closest_after
== NULL
5343 || uid
< gimple_uid (closest_after
->stmt
)))
5344 closest_after
= use
;
5347 if (closest_before
!= NULL
5348 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5349 cand
->ainc_use
= closest_before
;
5350 else if (closest_after
!= NULL
5351 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5352 cand
->ainc_use
= closest_after
;
5356 /* Finds the candidates for the induction variables. */
5359 find_iv_candidates (struct ivopts_data
*data
)
5361 /* Add commonly used ivs. */
5362 add_standard_iv_candidates (data
);
5364 /* Add old induction variables. */
5365 add_old_ivs_candidates (data
);
5367 /* Add induction variables derived from uses. */
5368 add_derived_ivs_candidates (data
);
5370 set_autoinc_for_original_candidates (data
);
5372 /* Record the important candidates. */
5373 record_important_candidates (data
);
5376 /* Determines costs of basing the use of the iv on an iv candidate. */
5379 determine_use_iv_costs (struct ivopts_data
*data
)
5383 struct iv_cand
*cand
;
5384 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5386 alloc_use_cost_map (data
);
5388 for (i
= 0; i
< n_iv_uses (data
); i
++)
5390 use
= iv_use (data
, i
);
5392 if (data
->consider_all_candidates
)
5394 for (j
= 0; j
< n_iv_cands (data
); j
++)
5396 cand
= iv_cand (data
, j
);
5397 determine_use_iv_cost (data
, use
, cand
);
5404 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5406 cand
= iv_cand (data
, j
);
5407 if (!determine_use_iv_cost (data
, use
, cand
))
5408 bitmap_set_bit (to_clear
, j
);
5411 /* Remove the candidates for that the cost is infinite from
5412 the list of related candidates. */
5413 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5414 bitmap_clear (to_clear
);
5418 BITMAP_FREE (to_clear
);
5420 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5422 fprintf (dump_file
, "Use-candidate costs:\n");
5424 for (i
= 0; i
< n_iv_uses (data
); i
++)
5426 use
= iv_use (data
, i
);
5428 fprintf (dump_file
, "Use %d:\n", i
);
5429 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5430 for (j
= 0; j
< use
->n_map_members
; j
++)
5432 if (!use
->cost_map
[j
].cand
5433 || infinite_cost_p (use
->cost_map
[j
].cost
))
5436 fprintf (dump_file
, " %d\t%d\t%d\t",
5437 use
->cost_map
[j
].cand
->id
,
5438 use
->cost_map
[j
].cost
.cost
,
5439 use
->cost_map
[j
].cost
.complexity
);
5440 if (use
->cost_map
[j
].depends_on
)
5441 bitmap_print (dump_file
,
5442 use
->cost_map
[j
].depends_on
, "","");
5443 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5444 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5445 fprintf (dump_file
, "\n");
5448 fprintf (dump_file
, "\n");
5450 fprintf (dump_file
, "\n");
5454 /* Determines cost of the candidate CAND. */
5457 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5459 comp_cost cost_base
;
5460 unsigned cost
, cost_step
;
5469 /* There are two costs associated with the candidate -- its increment
5470 and its initialization. The second is almost negligible for any loop
5471 that rolls enough, so we take it just very little into account. */
5473 base
= cand
->iv
->base
;
5474 cost_base
= force_var_cost (data
, base
, NULL
);
5475 /* It will be exceptional that the iv register happens to be initialized with
5476 the proper value at no cost. In general, there will at least be a regcopy
5478 if (cost_base
.cost
== 0)
5479 cost_base
.cost
= COSTS_N_INSNS (1);
5480 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5482 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5484 /* Prefer the original ivs unless we may gain something by replacing it.
5485 The reason is to make debugging simpler; so this is not relevant for
5486 artificial ivs created by other optimization passes. */
5487 if (cand
->pos
!= IP_ORIGINAL
5488 || !SSA_NAME_VAR (cand
->var_before
)
5489 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5492 /* Prefer not to insert statements into latch unless there are some
5493 already (so that we do not create unnecessary jumps). */
5494 if (cand
->pos
== IP_END
5495 && empty_block_p (ip_end_pos (data
->current_loop
)))
5499 cand
->cost_step
= cost_step
;
5502 /* Determines costs of computation of the candidates. */
5505 determine_iv_costs (struct ivopts_data
*data
)
5509 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5511 fprintf (dump_file
, "Candidate costs:\n");
5512 fprintf (dump_file
, " cand\tcost\n");
5515 for (i
= 0; i
< n_iv_cands (data
); i
++)
5517 struct iv_cand
*cand
= iv_cand (data
, i
);
5519 determine_iv_cost (data
, cand
);
5521 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5522 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5525 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5526 fprintf (dump_file
, "\n");
5529 /* Calculates cost for having SIZE induction variables. */
5532 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5534 /* We add size to the cost, so that we prefer eliminating ivs
5536 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5537 data
->body_includes_call
);
5540 /* For each size of the induction variable set determine the penalty. */
5543 determine_set_costs (struct ivopts_data
*data
)
5549 struct loop
*loop
= data
->current_loop
;
5552 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5554 fprintf (dump_file
, "Global costs:\n");
5555 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5556 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5557 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5558 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5562 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5565 op
= PHI_RESULT (phi
);
5567 if (virtual_operand_p (op
))
5570 if (get_iv (data
, op
))
5576 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5578 struct version_info
*info
= ver_info (data
, j
);
5580 if (info
->inv_id
&& info
->has_nonlin_use
)
5584 data
->regs_used
= n
;
5585 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5586 fprintf (dump_file
, " regs_used %d\n", n
);
5588 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5590 fprintf (dump_file
, " cost for size:\n");
5591 fprintf (dump_file
, " ivs\tcost\n");
5592 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5593 fprintf (dump_file
, " %d\t%d\n", j
,
5594 ivopts_global_cost_for_size (data
, j
));
5595 fprintf (dump_file
, "\n");
5599 /* Returns true if A is a cheaper cost pair than B. */
5602 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5612 cmp
= compare_costs (a
->cost
, b
->cost
);
5619 /* In case the costs are the same, prefer the cheaper candidate. */
5620 if (a
->cand
->cost
< b
->cand
->cost
)
5627 /* Returns candidate by that USE is expressed in IVS. */
5629 static struct cost_pair
*
5630 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5632 return ivs
->cand_for_use
[use
->id
];
5635 /* Computes the cost field of IVS structure. */
5638 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5640 comp_cost cost
= ivs
->cand_use_cost
;
5642 cost
.cost
+= ivs
->cand_cost
;
5644 cost
.cost
+= ivopts_global_cost_for_size (data
,
5645 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5650 /* Remove invariants in set INVS to set IVS. */
5653 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5661 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5663 ivs
->n_invariant_uses
[iid
]--;
5664 if (ivs
->n_invariant_uses
[iid
] == 0)
5669 /* Set USE not to be expressed by any candidate in IVS. */
5672 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5675 unsigned uid
= use
->id
, cid
;
5676 struct cost_pair
*cp
;
5678 cp
= ivs
->cand_for_use
[uid
];
5684 ivs
->cand_for_use
[uid
] = NULL
;
5685 ivs
->n_cand_uses
[cid
]--;
5687 if (ivs
->n_cand_uses
[cid
] == 0)
5689 bitmap_clear_bit (ivs
->cands
, cid
);
5690 /* Do not count the pseudocandidates. */
5694 ivs
->cand_cost
-= cp
->cand
->cost
;
5696 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5699 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5701 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5703 if (cp
->inv_expr_id
!= -1)
5705 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5706 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5707 ivs
->num_used_inv_expr
--;
5709 iv_ca_recount_cost (data
, ivs
);
5712 /* Add invariants in set INVS to set IVS. */
5715 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5723 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5725 ivs
->n_invariant_uses
[iid
]++;
5726 if (ivs
->n_invariant_uses
[iid
] == 1)
5731 /* Set cost pair for USE in set IVS to CP. */
5734 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5735 struct iv_use
*use
, struct cost_pair
*cp
)
5737 unsigned uid
= use
->id
, cid
;
5739 if (ivs
->cand_for_use
[uid
] == cp
)
5742 if (ivs
->cand_for_use
[uid
])
5743 iv_ca_set_no_cp (data
, ivs
, use
);
5750 ivs
->cand_for_use
[uid
] = cp
;
5751 ivs
->n_cand_uses
[cid
]++;
5752 if (ivs
->n_cand_uses
[cid
] == 1)
5754 bitmap_set_bit (ivs
->cands
, cid
);
5755 /* Do not count the pseudocandidates. */
5759 ivs
->cand_cost
+= cp
->cand
->cost
;
5761 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5764 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5765 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5767 if (cp
->inv_expr_id
!= -1)
5769 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5770 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5771 ivs
->num_used_inv_expr
++;
5773 iv_ca_recount_cost (data
, ivs
);
5777 /* Extend set IVS by expressing USE by some of the candidates in it
5778 if possible. Consider all important candidates if candidates in
5779 set IVS don't give any result. */
5782 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5785 struct cost_pair
*best_cp
= NULL
, *cp
;
5788 struct iv_cand
*cand
;
5790 gcc_assert (ivs
->upto
>= use
->id
);
5794 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5796 cand
= iv_cand (data
, i
);
5797 cp
= get_use_iv_cost (data
, use
, cand
);
5798 if (cheaper_cost_pair (cp
, best_cp
))
5802 if (best_cp
== NULL
)
5804 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5806 cand
= iv_cand (data
, i
);
5807 cp
= get_use_iv_cost (data
, use
, cand
);
5808 if (cheaper_cost_pair (cp
, best_cp
))
5813 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5816 /* Get cost for assignment IVS. */
5819 iv_ca_cost (struct iv_ca
*ivs
)
5821 /* This was a conditional expression but it triggered a bug in
5824 return infinite_cost
;
5829 /* Returns true if all dependences of CP are among invariants in IVS. */
5832 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5837 if (!cp
->depends_on
)
5840 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5842 if (ivs
->n_invariant_uses
[i
] == 0)
5849 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5850 it before NEXT_CHANGE. */
5852 static struct iv_ca_delta
*
5853 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5854 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5856 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5859 change
->old_cp
= old_cp
;
5860 change
->new_cp
= new_cp
;
5861 change
->next_change
= next_change
;
5866 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5869 static struct iv_ca_delta
*
5870 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5872 struct iv_ca_delta
*last
;
5880 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5882 last
->next_change
= l2
;
5887 /* Reverse the list of changes DELTA, forming the inverse to it. */
5889 static struct iv_ca_delta
*
5890 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5892 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5894 for (act
= delta
; act
; act
= next
)
5896 next
= act
->next_change
;
5897 act
->next_change
= prev
;
5900 std::swap (act
->old_cp
, act
->new_cp
);
5906 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5907 reverted instead. */
5910 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5911 struct iv_ca_delta
*delta
, bool forward
)
5913 struct cost_pair
*from
, *to
;
5914 struct iv_ca_delta
*act
;
5917 delta
= iv_ca_delta_reverse (delta
);
5919 for (act
= delta
; act
; act
= act
->next_change
)
5923 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5924 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5928 iv_ca_delta_reverse (delta
);
5931 /* Returns true if CAND is used in IVS. */
5934 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5936 return ivs
->n_cand_uses
[cand
->id
] > 0;
5939 /* Returns number of induction variable candidates in the set IVS. */
5942 iv_ca_n_cands (struct iv_ca
*ivs
)
5944 return ivs
->n_cands
;
5947 /* Free the list of changes DELTA. */
5950 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5952 struct iv_ca_delta
*act
, *next
;
5954 for (act
= *delta
; act
; act
= next
)
5956 next
= act
->next_change
;
5963 /* Allocates new iv candidates assignment. */
5965 static struct iv_ca
*
5966 iv_ca_new (struct ivopts_data
*data
)
5968 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5972 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5973 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5974 nw
->cands
= BITMAP_ALLOC (NULL
);
5977 nw
->cand_use_cost
= no_cost
;
5979 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5981 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5982 nw
->num_used_inv_expr
= 0;
5987 /* Free memory occupied by the set IVS. */
5990 iv_ca_free (struct iv_ca
**ivs
)
5992 free ((*ivs
)->cand_for_use
);
5993 free ((*ivs
)->n_cand_uses
);
5994 BITMAP_FREE ((*ivs
)->cands
);
5995 free ((*ivs
)->n_invariant_uses
);
5996 free ((*ivs
)->used_inv_expr
);
6001 /* Dumps IVS to FILE. */
6004 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
6006 const char *pref
= " invariants ";
6008 comp_cost cost
= iv_ca_cost (ivs
);
6010 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
6011 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6012 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
6013 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
6015 for (i
= 0; i
< ivs
->upto
; i
++)
6017 struct iv_use
*use
= iv_use (data
, i
);
6018 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
6020 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6021 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
6023 fprintf (file
, " use:%d --> ??\n", use
->id
);
6026 for (i
= 1; i
<= data
->max_inv_id
; i
++)
6027 if (ivs
->n_invariant_uses
[i
])
6029 fprintf (file
, "%s%d", pref
, i
);
6032 fprintf (file
, "\n\n");
6035 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6036 new set, and store differences in DELTA. Number of induction variables
6037 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6038 the function will try to find a solution with mimimal iv candidates. */
6041 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6042 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6043 unsigned *n_ivs
, bool min_ncand
)
6048 struct cost_pair
*old_cp
, *new_cp
;
6051 for (i
= 0; i
< ivs
->upto
; i
++)
6053 use
= iv_use (data
, i
);
6054 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6057 && old_cp
->cand
== cand
)
6060 new_cp
= get_use_iv_cost (data
, use
, cand
);
6064 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
6067 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
6070 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6073 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6074 cost
= iv_ca_cost (ivs
);
6076 *n_ivs
= iv_ca_n_cands (ivs
);
6077 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6082 /* Try narrowing set IVS by removing CAND. Return the cost of
6083 the new set and store the differences in DELTA. START is
6084 the candidate with which we start narrowing. */
6087 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6088 struct iv_cand
*cand
, struct iv_cand
*start
,
6089 struct iv_ca_delta
**delta
)
6093 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6095 struct iv_cand
*cnd
;
6096 comp_cost cost
, best_cost
, acost
;
6099 for (i
= 0; i
< n_iv_uses (data
); i
++)
6101 use
= iv_use (data
, i
);
6103 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6104 if (old_cp
->cand
!= cand
)
6107 best_cost
= iv_ca_cost (ivs
);
6108 /* Start narrowing with START. */
6109 new_cp
= get_use_iv_cost (data
, use
, start
);
6111 if (data
->consider_all_candidates
)
6113 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6115 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6118 cnd
= iv_cand (data
, ci
);
6120 cp
= get_use_iv_cost (data
, use
, cnd
);
6124 iv_ca_set_cp (data
, ivs
, use
, cp
);
6125 acost
= iv_ca_cost (ivs
);
6127 if (compare_costs (acost
, best_cost
) < 0)
6136 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6138 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6141 cnd
= iv_cand (data
, ci
);
6143 cp
= get_use_iv_cost (data
, use
, cnd
);
6147 iv_ca_set_cp (data
, ivs
, use
, cp
);
6148 acost
= iv_ca_cost (ivs
);
6150 if (compare_costs (acost
, best_cost
) < 0)
6157 /* Restore to old cp for use. */
6158 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
6162 iv_ca_delta_free (delta
);
6163 return infinite_cost
;
6166 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6169 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6170 cost
= iv_ca_cost (ivs
);
6171 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6176 /* Try optimizing the set of candidates IVS by removing candidates different
6177 from to EXCEPT_CAND from it. Return cost of the new set, and store
6178 differences in DELTA. */
6181 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6182 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6185 struct iv_ca_delta
*act_delta
, *best_delta
;
6187 comp_cost best_cost
, acost
;
6188 struct iv_cand
*cand
;
6191 best_cost
= iv_ca_cost (ivs
);
6193 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6195 cand
= iv_cand (data
, i
);
6197 if (cand
== except_cand
)
6200 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6202 if (compare_costs (acost
, best_cost
) < 0)
6205 iv_ca_delta_free (&best_delta
);
6206 best_delta
= act_delta
;
6209 iv_ca_delta_free (&act_delta
);
6218 /* Recurse to possibly remove other unnecessary ivs. */
6219 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6220 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6221 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6222 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6226 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6227 cheaper local cost for USE than BEST_CP. Return pointer to
6228 the corresponding cost_pair, otherwise just return BEST_CP. */
6230 static struct cost_pair
*
6231 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
6232 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6233 struct cost_pair
*best_cp
)
6235 struct iv_cand
*cand
;
6236 struct cost_pair
*cp
;
6238 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6239 if (cand_idx
== old_cand
->id
)
6242 cand
= iv_cand (data
, cand_idx
);
6243 cp
= get_use_iv_cost (data
, use
, cand
);
6244 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6250 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6251 which are used by more than one iv uses. For each of those candidates,
6252 this function tries to represent iv uses under that candidate using
6253 other ones with lower local cost, then tries to prune the new set.
6254 If the new set has lower cost, It returns the new cost after recording
6255 candidate replacement in list DELTA. */
6258 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6259 struct iv_ca_delta
**delta
)
6261 bitmap_iterator bi
, bj
;
6262 unsigned int i
, j
, k
;
6264 struct iv_cand
*cand
;
6265 comp_cost orig_cost
, acost
;
6266 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6267 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6270 orig_cost
= iv_ca_cost (ivs
);
6272 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6274 if (ivs
->n_cand_uses
[i
] == 1
6275 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6278 cand
= iv_cand (data
, i
);
6281 /* Represent uses under current candidate using other ones with
6282 lower local cost. */
6283 for (j
= 0; j
< ivs
->upto
; j
++)
6285 use
= iv_use (data
, j
);
6286 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6288 if (old_cp
->cand
!= cand
)
6292 if (data
->consider_all_candidates
)
6293 for (k
= 0; k
< n_iv_cands (data
); k
++)
6294 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6295 old_cp
->cand
, best_cp
);
6297 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
6298 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6299 old_cp
->cand
, best_cp
);
6301 if (best_cp
== old_cp
)
6304 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
6306 /* No need for further prune. */
6310 /* Prune the new candidate set. */
6311 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6312 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6313 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6314 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6316 if (compare_costs (acost
, orig_cost
) < 0)
6322 iv_ca_delta_free (&act_delta
);
6328 /* Tries to extend the sets IVS in the best possible way in order
6329 to express the USE. If ORIGINALP is true, prefer candidates from
6330 the original set of IVs, otherwise favor important candidates not
6331 based on any memory object. */
6334 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6335 struct iv_use
*use
, bool originalp
)
6337 comp_cost best_cost
, act_cost
;
6340 struct iv_cand
*cand
;
6341 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6342 struct cost_pair
*cp
;
6344 iv_ca_add_use (data
, ivs
, use
);
6345 best_cost
= iv_ca_cost (ivs
);
6346 cp
= iv_ca_cand_for_use (ivs
, use
);
6349 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6350 iv_ca_set_no_cp (data
, ivs
, use
);
6353 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6354 first try important candidates not based on any memory object. Only if
6355 this fails, try the specific ones. Rationale -- in loops with many
6356 variables the best choice often is to use just one generic biv. If we
6357 added here many ivs specific to the uses, the optimization algorithm later
6358 would be likely to get stuck in a local minimum, thus causing us to create
6359 too many ivs. The approach from few ivs to more seems more likely to be
6360 successful -- starting from few ivs, replacing an expensive use by a
6361 specific iv should always be a win. */
6362 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6364 cand
= iv_cand (data
, i
);
6366 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6369 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6372 if (iv_ca_cand_used_p (ivs
, cand
))
6375 cp
= get_use_iv_cost (data
, use
, cand
);
6379 iv_ca_set_cp (data
, ivs
, use
, cp
);
6380 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6382 iv_ca_set_no_cp (data
, ivs
, use
);
6383 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6385 if (compare_costs (act_cost
, best_cost
) < 0)
6387 best_cost
= act_cost
;
6389 iv_ca_delta_free (&best_delta
);
6390 best_delta
= act_delta
;
6393 iv_ca_delta_free (&act_delta
);
6396 if (infinite_cost_p (best_cost
))
6398 for (i
= 0; i
< use
->n_map_members
; i
++)
6400 cp
= use
->cost_map
+ i
;
6405 /* Already tried this. */
6406 if (cand
->important
)
6408 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6410 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6414 if (iv_ca_cand_used_p (ivs
, cand
))
6418 iv_ca_set_cp (data
, ivs
, use
, cp
);
6419 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6420 iv_ca_set_no_cp (data
, ivs
, use
);
6421 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6424 if (compare_costs (act_cost
, best_cost
) < 0)
6426 best_cost
= act_cost
;
6429 iv_ca_delta_free (&best_delta
);
6430 best_delta
= act_delta
;
6433 iv_ca_delta_free (&act_delta
);
6437 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6438 iv_ca_delta_free (&best_delta
);
6440 return !infinite_cost_p (best_cost
);
6443 /* Finds an initial assignment of candidates to uses. */
6445 static struct iv_ca
*
6446 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6448 struct iv_ca
*ivs
= iv_ca_new (data
);
6451 for (i
= 0; i
< n_iv_uses (data
); i
++)
6452 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6461 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6462 points to a bool variable, this function tries to break local
6463 optimal fixed-point by replacing candidates in IVS if it's true. */
6466 try_improve_iv_set (struct ivopts_data
*data
,
6467 struct iv_ca
*ivs
, bool *try_replace_p
)
6470 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6471 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6472 struct iv_cand
*cand
;
6474 /* Try extending the set of induction variables by one. */
6475 for (i
= 0; i
< n_iv_cands (data
); i
++)
6477 cand
= iv_cand (data
, i
);
6479 if (iv_ca_cand_used_p (ivs
, cand
))
6482 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6486 /* If we successfully added the candidate and the set is small enough,
6487 try optimizing it by removing other candidates. */
6488 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6490 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6491 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6492 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6493 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6496 if (compare_costs (acost
, best_cost
) < 0)
6499 iv_ca_delta_free (&best_delta
);
6500 best_delta
= act_delta
;
6503 iv_ca_delta_free (&act_delta
);
6508 /* Try removing the candidates from the set instead. */
6509 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6511 if (!best_delta
&& *try_replace_p
)
6513 *try_replace_p
= false;
6514 /* So far candidate selecting algorithm tends to choose fewer IVs
6515 so that it can handle cases in which loops have many variables
6516 but the best choice is often to use only one general biv. One
6517 weakness is it can't handle opposite cases, in which different
6518 candidates should be chosen with respect to each use. To solve
6519 the problem, we replace candidates in a manner described by the
6520 comments of iv_ca_replace, thus give general algorithm a chance
6521 to break local optimal fixed-point in these cases. */
6522 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6529 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6530 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6531 iv_ca_delta_free (&best_delta
);
6535 /* Attempts to find the optimal set of induction variables. We do simple
6536 greedy heuristic -- we try to replace at most one candidate in the selected
6537 solution and remove the unused ivs while this improves the cost. */
6539 static struct iv_ca
*
6540 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6543 bool try_replace_p
= true;
6545 /* Get the initial solution. */
6546 set
= get_initial_solution (data
, originalp
);
6549 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6550 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6554 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6556 fprintf (dump_file
, "Initial set of candidates:\n");
6557 iv_ca_dump (data
, dump_file
, set
);
6560 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6562 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6564 fprintf (dump_file
, "Improved to:\n");
6565 iv_ca_dump (data
, dump_file
, set
);
6572 static struct iv_ca
*
6573 find_optimal_iv_set (struct ivopts_data
*data
)
6576 struct iv_ca
*set
, *origset
;
6578 comp_cost cost
, origcost
;
6580 /* Determine the cost based on a strategy that starts with original IVs,
6581 and try again using a strategy that prefers candidates not based
6583 origset
= find_optimal_iv_set_1 (data
, true);
6584 set
= find_optimal_iv_set_1 (data
, false);
6586 if (!origset
&& !set
)
6589 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6590 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6592 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6594 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6595 origcost
.cost
, origcost
.complexity
);
6596 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6597 cost
.cost
, cost
.complexity
);
6600 /* Choose the one with the best cost. */
6601 if (compare_costs (origcost
, cost
) <= 0)
6608 iv_ca_free (&origset
);
6610 for (i
= 0; i
< n_iv_uses (data
); i
++)
6612 use
= iv_use (data
, i
);
6613 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6619 /* Creates a new induction variable corresponding to CAND. */
6622 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6624 gimple_stmt_iterator incr_pos
;
6634 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6638 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6646 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6650 /* Mark that the iv is preserved. */
6651 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6652 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6654 /* Rewrite the increment so that it uses var_before directly. */
6655 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6659 gimple_add_tmp_var (cand
->var_before
);
6661 base
= unshare_expr (cand
->iv
->base
);
6663 create_iv (base
, unshare_expr (cand
->iv
->step
),
6664 cand
->var_before
, data
->current_loop
,
6665 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6668 /* Creates new induction variables described in SET. */
6671 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6674 struct iv_cand
*cand
;
6677 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6679 cand
= iv_cand (data
, i
);
6680 create_new_iv (data
, cand
);
6683 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6685 fprintf (dump_file
, "Selected IV set for loop %d",
6686 data
->current_loop
->num
);
6687 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6688 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6689 LOCATION_LINE (data
->loop_loc
));
6690 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6691 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6693 cand
= iv_cand (data
, i
);
6694 dump_cand (dump_file
, cand
);
6696 fprintf (dump_file
, "\n");
6700 /* Rewrites USE (definition of iv used in a nonlinear expression)
6701 using candidate CAND. */
6704 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6705 struct iv_use
*use
, struct iv_cand
*cand
)
6710 gimple_stmt_iterator bsi
;
6712 /* An important special case -- if we are asked to express value of
6713 the original iv by itself, just exit; there is no need to
6714 introduce a new computation (that might also need casting the
6715 variable to unsigned and back). */
6716 if (cand
->pos
== IP_ORIGINAL
6717 && cand
->incremented_at
== use
->stmt
)
6719 enum tree_code stmt_code
;
6721 gcc_assert (is_gimple_assign (use
->stmt
));
6722 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6724 /* Check whether we may leave the computation unchanged.
6725 This is the case only if it does not rely on other
6726 computations in the loop -- otherwise, the computation
6727 we rely upon may be removed in remove_unused_ivs,
6728 thus leading to ICE. */
6729 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6730 if (stmt_code
== PLUS_EXPR
6731 || stmt_code
== MINUS_EXPR
6732 || stmt_code
== POINTER_PLUS_EXPR
)
6734 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6735 op
= gimple_assign_rhs2 (use
->stmt
);
6736 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6737 op
= gimple_assign_rhs1 (use
->stmt
);
6744 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6748 comp
= get_computation (data
->current_loop
, use
, cand
);
6749 gcc_assert (comp
!= NULL_TREE
);
6751 switch (gimple_code (use
->stmt
))
6754 tgt
= PHI_RESULT (use
->stmt
);
6756 /* If we should keep the biv, do not replace it. */
6757 if (name_info (data
, tgt
)->preserve_biv
)
6760 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6764 tgt
= gimple_assign_lhs (use
->stmt
);
6765 bsi
= gsi_for_stmt (use
->stmt
);
6772 if (!valid_gimple_rhs_p (comp
)
6773 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6774 /* We can't allow re-allocating the stmt as it might be pointed
6776 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6777 >= gimple_num_ops (gsi_stmt (bsi
)))))
6779 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6780 true, GSI_SAME_STMT
);
6781 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6783 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6784 /* As this isn't a plain copy we have to reset alignment
6786 if (SSA_NAME_PTR_INFO (comp
))
6787 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6791 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6793 ass
= gimple_build_assign (tgt
, comp
);
6794 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6796 bsi
= gsi_for_stmt (use
->stmt
);
6797 remove_phi_node (&bsi
, false);
6801 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6802 use
->stmt
= gsi_stmt (bsi
);
6806 /* Performs a peephole optimization to reorder the iv update statement with
6807 a mem ref to enable instruction combining in later phases. The mem ref uses
6808 the iv value before the update, so the reordering transformation requires
6809 adjustment of the offset. CAND is the selected IV_CAND.
6813 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6821 directly propagating t over to (1) will introduce overlapping live range
6822 thus increase register pressure. This peephole transform it into:
6826 t = MEM_REF (base, iv2, 8, 8);
6833 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6836 gimple iv_update
, stmt
;
6838 gimple_stmt_iterator gsi
, gsi_iv
;
6840 if (cand
->pos
!= IP_NORMAL
)
6843 var_after
= cand
->var_after
;
6844 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6846 bb
= gimple_bb (iv_update
);
6847 gsi
= gsi_last_nondebug_bb (bb
);
6848 stmt
= gsi_stmt (gsi
);
6850 /* Only handle conditional statement for now. */
6851 if (gimple_code (stmt
) != GIMPLE_COND
)
6854 gsi_prev_nondebug (&gsi
);
6855 stmt
= gsi_stmt (gsi
);
6856 if (stmt
!= iv_update
)
6859 gsi_prev_nondebug (&gsi
);
6860 if (gsi_end_p (gsi
))
6863 stmt
= gsi_stmt (gsi
);
6864 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6867 if (stmt
!= use
->stmt
)
6870 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6873 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6875 fprintf (dump_file
, "Reordering \n");
6876 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6877 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6878 fprintf (dump_file
, "\n");
6881 gsi
= gsi_for_stmt (use
->stmt
);
6882 gsi_iv
= gsi_for_stmt (iv_update
);
6883 gsi_move_before (&gsi_iv
, &gsi
);
6885 cand
->pos
= IP_BEFORE_USE
;
6886 cand
->incremented_at
= use
->stmt
;
6889 /* Rewrites USE (address that is an iv) using candidate CAND. */
6892 rewrite_use_address_1 (struct ivopts_data
*data
,
6893 struct iv_use
*use
, struct iv_cand
*cand
)
6896 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6897 tree base_hint
= NULL_TREE
;
6901 adjust_iv_update_pos (cand
, use
);
6902 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6904 unshare_aff_combination (&aff
);
6906 /* To avoid undefined overflow problems, all IV candidates use unsigned
6907 integer types. The drawback is that this makes it impossible for
6908 create_mem_ref to distinguish an IV that is based on a memory object
6909 from one that represents simply an offset.
6911 To work around this problem, we pass a hint to create_mem_ref that
6912 indicates which variable (if any) in aff is an IV based on a memory
6913 object. Note that we only consider the candidate. If this is not
6914 based on an object, the base of the reference is in some subexpression
6915 of the use -- but these will use pointer types, so they are recognized
6916 by the create_mem_ref heuristics anyway. */
6917 if (cand
->iv
->base_object
)
6918 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6920 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6921 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6922 reference_alias_ptr_type (*use
->op_p
),
6923 iv
, base_hint
, data
->speed
);
6924 copy_ref_info (ref
, *use
->op_p
);
6928 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6929 first use of a group, rewrites sub uses in the group too. */
6932 rewrite_use_address (struct ivopts_data
*data
,
6933 struct iv_use
*use
, struct iv_cand
*cand
)
6935 struct iv_use
*next
;
6937 gcc_assert (use
->sub_id
== 0);
6938 rewrite_use_address_1 (data
, use
, cand
);
6939 update_stmt (use
->stmt
);
6941 for (next
= use
->next
; next
!= NULL
; next
= next
->next
)
6943 rewrite_use_address_1 (data
, next
, cand
);
6944 update_stmt (next
->stmt
);
6950 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6954 rewrite_use_compare (struct ivopts_data
*data
,
6955 struct iv_use
*use
, struct iv_cand
*cand
)
6957 tree comp
, *var_p
, op
, bound
;
6958 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6959 enum tree_code compare
;
6960 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6966 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6967 tree var_type
= TREE_TYPE (var
);
6970 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6972 fprintf (dump_file
, "Replacing exit test: ");
6973 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6976 bound
= unshare_expr (fold_convert (var_type
, bound
));
6977 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6979 gsi_insert_seq_on_edge_immediate (
6980 loop_preheader_edge (data
->current_loop
),
6983 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
6984 gimple_cond_set_lhs (cond_stmt
, var
);
6985 gimple_cond_set_code (cond_stmt
, compare
);
6986 gimple_cond_set_rhs (cond_stmt
, op
);
6990 /* The induction variable elimination failed; just express the original
6992 comp
= get_computation (data
->current_loop
, use
, cand
);
6993 gcc_assert (comp
!= NULL_TREE
);
6995 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6998 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6999 true, GSI_SAME_STMT
);
7002 /* Rewrites USE using candidate CAND. */
7005 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
7009 case USE_NONLINEAR_EXPR
:
7010 rewrite_use_nonlinear_expr (data
, use
, cand
);
7014 rewrite_use_address (data
, use
, cand
);
7018 rewrite_use_compare (data
, use
, cand
);
7025 update_stmt (use
->stmt
);
7028 /* Rewrite the uses using the selected induction variables. */
7031 rewrite_uses (struct ivopts_data
*data
)
7034 struct iv_cand
*cand
;
7037 for (i
= 0; i
< n_iv_uses (data
); i
++)
7039 use
= iv_use (data
, i
);
7040 cand
= use
->selected
;
7043 rewrite_use (data
, use
, cand
);
7047 /* Removes the ivs that are not used after rewriting. */
7050 remove_unused_ivs (struct ivopts_data
*data
)
7054 bitmap toremove
= BITMAP_ALLOC (NULL
);
7056 /* Figure out an order in which to release SSA DEFs so that we don't
7057 release something that we'd have to propagate into a debug stmt
7059 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7061 struct version_info
*info
;
7063 info
= ver_info (data
, j
);
7065 && !integer_zerop (info
->iv
->step
)
7067 && !info
->iv
->have_use_for
7068 && !info
->preserve_biv
)
7070 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7072 tree def
= info
->iv
->ssa_name
;
7074 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
7076 imm_use_iterator imm_iter
;
7077 use_operand_p use_p
;
7081 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7083 if (!gimple_debug_bind_p (stmt
))
7086 /* We just want to determine whether to do nothing
7087 (count == 0), to substitute the computed
7088 expression into a single use of the SSA DEF by
7089 itself (count == 1), or to use a debug temp
7090 because the SSA DEF is used multiple times or as
7091 part of a larger expression (count > 1). */
7093 if (gimple_debug_bind_get_value (stmt
) != def
)
7097 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7103 struct iv_use dummy_use
;
7104 struct iv_cand
*best_cand
= NULL
, *cand
;
7105 unsigned i
, best_pref
= 0, cand_pref
;
7107 memset (&dummy_use
, 0, sizeof (dummy_use
));
7108 dummy_use
.iv
= info
->iv
;
7109 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
7111 cand
= iv_use (data
, i
)->selected
;
7112 if (cand
== best_cand
)
7114 cand_pref
= operand_equal_p (cand
->iv
->step
,
7118 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7119 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7122 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7124 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7127 best_pref
= cand_pref
;
7134 tree comp
= get_computation_at (data
->current_loop
,
7135 &dummy_use
, best_cand
,
7136 SSA_NAME_DEF_STMT (def
));
7142 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7143 DECL_ARTIFICIAL (vexpr
) = 1;
7144 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7145 if (SSA_NAME_VAR (def
))
7146 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
7148 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
7150 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7151 gimple_stmt_iterator gsi
;
7153 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7154 gsi
= gsi_after_labels (gimple_bb
7155 (SSA_NAME_DEF_STMT (def
)));
7157 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7159 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7163 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7165 if (!gimple_debug_bind_p (stmt
))
7168 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7169 SET_USE (use_p
, comp
);
7177 release_defs_bitset (toremove
);
7179 BITMAP_FREE (toremove
);
7182 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7183 for hash_map::traverse. */
7186 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7192 /* Frees data allocated by the optimization of a single loop. */
7195 free_loop_data (struct ivopts_data
*data
)
7203 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7204 delete data
->niters
;
7205 data
->niters
= NULL
;
7208 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7210 struct version_info
*info
;
7212 info
= ver_info (data
, i
);
7215 info
->has_nonlin_use
= false;
7216 info
->preserve_biv
= false;
7219 bitmap_clear (data
->relevant
);
7220 bitmap_clear (data
->important_candidates
);
7222 for (i
= 0; i
< n_iv_uses (data
); i
++)
7224 struct iv_use
*use
= iv_use (data
, i
);
7225 struct iv_use
*pre
= use
, *sub
= use
->next
;
7229 gcc_assert (sub
->related_cands
== NULL
);
7230 gcc_assert (sub
->n_map_members
== 0 && sub
->cost_map
== NULL
);
7239 BITMAP_FREE (use
->related_cands
);
7240 for (j
= 0; j
< use
->n_map_members
; j
++)
7241 if (use
->cost_map
[j
].depends_on
)
7242 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
7243 free (use
->cost_map
);
7246 data
->iv_uses
.truncate (0);
7248 for (i
= 0; i
< n_iv_cands (data
); i
++)
7250 struct iv_cand
*cand
= iv_cand (data
, i
);
7253 if (cand
->depends_on
)
7254 BITMAP_FREE (cand
->depends_on
);
7257 data
->iv_candidates
.truncate (0);
7259 if (data
->version_info_size
< num_ssa_names
)
7261 data
->version_info_size
= 2 * num_ssa_names
;
7262 free (data
->version_info
);
7263 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7266 data
->max_inv_id
= 0;
7268 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7269 SET_DECL_RTL (obj
, NULL_RTX
);
7271 decl_rtl_to_reset
.truncate (0);
7273 data
->inv_expr_tab
->empty ();
7274 data
->inv_expr_id
= 0;
7277 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7281 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7283 free_loop_data (data
);
7284 free (data
->version_info
);
7285 BITMAP_FREE (data
->relevant
);
7286 BITMAP_FREE (data
->important_candidates
);
7288 decl_rtl_to_reset
.release ();
7289 data
->iv_uses
.release ();
7290 data
->iv_candidates
.release ();
7291 delete data
->inv_expr_tab
;
7292 data
->inv_expr_tab
= NULL
;
7293 free_affine_expand_cache (&data
->name_expansion_cache
);
7296 /* Returns true if the loop body BODY includes any function calls. */
7299 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7301 gimple_stmt_iterator gsi
;
7304 for (i
= 0; i
< num_nodes
; i
++)
7305 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7307 gimple stmt
= gsi_stmt (gsi
);
7308 if (is_gimple_call (stmt
)
7309 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7315 /* Optimizes the LOOP. Returns true if anything changed. */
7318 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
7320 bool changed
= false;
7321 struct iv_ca
*iv_ca
;
7322 edge exit
= single_dom_exit (loop
);
7325 gcc_assert (!data
->niters
);
7326 data
->current_loop
= loop
;
7327 data
->loop_loc
= find_loop_location (loop
);
7328 data
->speed
= optimize_loop_for_speed_p (loop
);
7330 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7332 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7333 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7334 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7335 LOCATION_LINE (data
->loop_loc
));
7336 fprintf (dump_file
, "\n");
7340 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7341 exit
->src
->index
, exit
->dest
->index
);
7342 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7343 fprintf (dump_file
, "\n");
7346 fprintf (dump_file
, "\n");
7349 body
= get_loop_body (loop
);
7350 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7351 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7354 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7356 /* For each ssa name determines whether it behaves as an induction variable
7358 if (!find_induction_variables (data
))
7361 /* Finds interesting uses (item 1). */
7362 find_interesting_uses (data
);
7363 group_address_uses (data
);
7364 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
7367 /* Finds candidates for the induction variables (item 2). */
7368 find_iv_candidates (data
);
7370 /* Calculates the costs (item 3, part 1). */
7371 determine_iv_costs (data
);
7372 determine_use_iv_costs (data
);
7373 determine_set_costs (data
);
7375 /* Find the optimal set of induction variables (item 3, part 2). */
7376 iv_ca
= find_optimal_iv_set (data
);
7381 /* Create the new induction variables (item 4, part 1). */
7382 create_new_ivs (data
, iv_ca
);
7383 iv_ca_free (&iv_ca
);
7385 /* Rewrite the uses (item 4, part 2). */
7386 rewrite_uses (data
);
7388 /* Remove the ivs that are unused after rewriting. */
7389 remove_unused_ivs (data
);
7391 /* We have changed the structure of induction variables; it might happen
7392 that definitions in the scev database refer to some of them that were
7397 free_loop_data (data
);
7402 /* Main entry point. Optimizes induction variables in loops. */
7405 tree_ssa_iv_optimize (void)
7408 struct ivopts_data data
;
7410 tree_ssa_iv_optimize_init (&data
);
7412 /* Optimize the loops starting with the innermost ones. */
7413 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7415 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7416 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7418 tree_ssa_iv_optimize_loop (&data
, loop
);
7421 tree_ssa_iv_optimize_finalize (&data
);