1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
71 #include "double-int.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
82 #include "hard-reg-set.h"
84 #include "dominance.h"
86 #include "basic-block.h"
87 #include "gimple-pretty-print.h"
89 #include "hash-table.h"
90 #include "tree-ssa-alias.h"
91 #include "internal-fn.h"
93 #include "gimple-expr.h"
97 #include "gimple-iterator.h"
98 #include "gimplify-me.h"
99 #include "gimple-ssa.h"
100 #include "plugin-api.h"
103 #include "tree-cfg.h"
104 #include "tree-phinodes.h"
105 #include "ssa-iterators.h"
106 #include "stringpool.h"
107 #include "tree-ssanames.h"
108 #include "tree-ssa-loop-ivopts.h"
109 #include "tree-ssa-loop-manip.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop.h"
115 #include "statistics.h"
117 #include "fixed-value.h"
118 #include "insn-config.h"
123 #include "emit-rtl.h"
127 #include "tree-dfa.h"
128 #include "tree-ssa.h"
130 #include "tree-pass.h"
131 #include "tree-chrec.h"
132 #include "tree-scalar-evolution.h"
134 #include "langhooks.h"
135 #include "tree-affine.h"
137 #include "tree-inline.h"
138 #include "tree-ssa-propagate.h"
139 #include "tree-ssa-address.h"
140 #include "builtins.h"
141 #include "tree-vectorizer.h"
143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
144 cost of different addressing modes. This should be moved to a TBD
145 interface between the GIMPLE and RTL worlds. */
148 /* The infinite cost. */
149 #define INFTY 10000000
151 #define AVG_LOOP_NITER(LOOP) 5
153 /* Returns the expected number of loop iterations for LOOP.
154 The average trip count is computed from profile data if it
157 static inline HOST_WIDE_INT
158 avg_loop_niter (struct loop
*loop
)
160 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
162 return AVG_LOOP_NITER (loop
);
167 /* Representation of the induction variable. */
170 tree base
; /* Initial value of the iv. */
171 tree base_object
; /* A memory object to that the induction variable points. */
172 tree step
; /* Step of the iv (constant only). */
173 tree ssa_name
; /* The ssa name with the value. */
174 bool biv_p
; /* Is it a biv? */
175 bool have_use_for
; /* Do we already have a use for it? */
176 unsigned use_id
; /* The identifier in the use if it is the case. */
179 /* Per-ssa version information (induction variable descriptions, etc.). */
182 tree name
; /* The ssa name. */
183 struct iv
*iv
; /* Induction variable description. */
184 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
185 an expression that is not an induction variable. */
186 bool preserve_biv
; /* For the original biv, whether to preserve it. */
187 unsigned inv_id
; /* Id of an invariant. */
193 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
194 USE_ADDRESS
, /* Use in an address. */
195 USE_COMPARE
/* Use is a compare. */
198 /* Cost of a computation. */
201 int cost
; /* The runtime cost. */
202 unsigned complexity
; /* The estimate of the complexity of the code for
203 the computation (in no concrete units --
204 complexity field should be larger for more
205 complex expressions and addressing modes). */
208 static const comp_cost no_cost
= {0, 0};
209 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
211 /* The candidate - cost pair. */
214 struct iv_cand
*cand
; /* The candidate. */
215 comp_cost cost
; /* The cost. */
216 bitmap depends_on
; /* The list of invariants that have to be
218 tree value
; /* For final value elimination, the expression for
219 the final value of the iv. For iv elimination,
220 the new bound to compare with. */
221 enum tree_code comp
; /* For iv elimination, the comparison. */
222 int inv_expr_id
; /* Loop invariant expression id. */
228 unsigned id
; /* The id of the use. */
229 enum use_type type
; /* Type of the use. */
230 struct iv
*iv
; /* The induction variable it is based on. */
231 gimple stmt
; /* Statement in that it occurs. */
232 tree
*op_p
; /* The place where it occurs. */
233 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
236 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
237 struct cost_pair
*cost_map
;
238 /* The costs wrto the iv candidates. */
240 struct iv_cand
*selected
;
241 /* The selected candidate. */
244 /* The position where the iv is computed. */
247 IP_NORMAL
, /* At the end, just before the exit condition. */
248 IP_END
, /* At the end of the latch block. */
249 IP_BEFORE_USE
, /* Immediately before a specific use. */
250 IP_AFTER_USE
, /* Immediately after a specific use. */
251 IP_ORIGINAL
/* The original biv. */
254 /* The induction variable candidate. */
257 unsigned id
; /* The number of the candidate. */
258 bool important
; /* Whether this is an "important" candidate, i.e. such
259 that it should be considered by all uses. */
260 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
261 gimple incremented_at
;/* For original biv, the statement where it is
263 tree var_before
; /* The variable used for it before increment. */
264 tree var_after
; /* The variable used for it after increment. */
265 struct iv
*iv
; /* The value of the candidate. NULL for
266 "pseudocandidate" used to indicate the possibility
267 to replace the final value of an iv by direct
268 computation of the value. */
269 unsigned cost
; /* Cost of the candidate. */
270 unsigned cost_step
; /* Cost of the candidate's increment operation. */
271 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
272 where it is incremented. */
273 bitmap depends_on
; /* The list of invariants that are used in step of the
277 /* Loop invariant expression hashtable entry. */
278 struct iv_inv_expr_ent
285 /* The data used by the induction variable optimizations. */
287 typedef struct iv_use
*iv_use_p
;
289 typedef struct iv_cand
*iv_cand_p
;
291 /* Hashtable helpers. */
293 struct iv_inv_expr_hasher
: typed_free_remove
<iv_inv_expr_ent
>
295 typedef iv_inv_expr_ent
*value_type
;
296 typedef iv_inv_expr_ent
*compare_type
;
297 static inline hashval_t
hash (const iv_inv_expr_ent
*);
298 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
301 /* Hash function for loop invariant expressions. */
304 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
309 /* Hash table equality function for expressions. */
312 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
313 const iv_inv_expr_ent
*expr2
)
315 return expr1
->hash
== expr2
->hash
316 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
321 /* The currently optimized loop. */
322 struct loop
*current_loop
;
323 source_location loop_loc
;
325 /* Numbers of iterations for all exits of the current loop. */
326 hash_map
<edge
, tree_niter_desc
*> *niters
;
328 /* Number of registers used in it. */
331 /* The size of version_info array allocated. */
332 unsigned version_info_size
;
334 /* The array of information for the ssa names. */
335 struct version_info
*version_info
;
337 /* The hashtable of loop invariant expressions created
339 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
341 /* Loop invariant expression id. */
344 /* The bitmap of indices in version_info whose value was changed. */
347 /* The uses of induction variables. */
348 vec
<iv_use_p
> iv_uses
;
350 /* The candidates. */
351 vec
<iv_cand_p
> iv_candidates
;
353 /* A bitmap of important candidates. */
354 bitmap important_candidates
;
356 /* Cache used by tree_to_aff_combination_expand. */
357 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
359 /* The maximum invariant id. */
362 /* Whether to consider just related and important candidates when replacing a
364 bool consider_all_candidates
;
366 /* Are we optimizing for speed? */
369 /* Whether the loop body includes any function calls. */
370 bool body_includes_call
;
372 /* Whether the loop body can only be exited via single exit. */
373 bool loop_single_exit_p
;
376 /* An assignment of iv candidates to uses. */
380 /* The number of uses covered by the assignment. */
383 /* Number of uses that cannot be expressed by the candidates in the set. */
386 /* Candidate assigned to a use, together with the related costs. */
387 struct cost_pair
**cand_for_use
;
389 /* Number of times each candidate is used. */
390 unsigned *n_cand_uses
;
392 /* The candidates used. */
395 /* The number of candidates in the set. */
398 /* Total number of registers needed. */
401 /* Total cost of expressing uses. */
402 comp_cost cand_use_cost
;
404 /* Total cost of candidates. */
407 /* Number of times each invariant is used. */
408 unsigned *n_invariant_uses
;
410 /* The array holding the number of uses of each loop
411 invariant expressions created by ivopt. */
412 unsigned *used_inv_expr
;
414 /* The number of created loop invariants. */
415 unsigned num_used_inv_expr
;
417 /* Total cost of the assignment. */
421 /* Difference of two iv candidate assignments. */
428 /* An old assignment (for rollback purposes). */
429 struct cost_pair
*old_cp
;
431 /* A new assignment. */
432 struct cost_pair
*new_cp
;
434 /* Next change in the list. */
435 struct iv_ca_delta
*next_change
;
438 /* Bound on number of candidates below that all candidates are considered. */
440 #define CONSIDER_ALL_CANDIDATES_BOUND \
441 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
443 /* If there are more iv occurrences, we just give up (it is quite unlikely that
444 optimizing such a loop would help, and it would take ages). */
446 #define MAX_CONSIDERED_USES \
447 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
449 /* If there are at most this number of ivs in the set, try removing unnecessary
450 ivs from the set always. */
452 #define ALWAYS_PRUNE_CAND_SET_BOUND \
453 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
455 /* The list of trees for that the decl_rtl field must be reset is stored
458 static vec
<tree
> decl_rtl_to_reset
;
460 static comp_cost
force_expr_to_var_cost (tree
, bool);
462 /* Number of uses recorded in DATA. */
464 static inline unsigned
465 n_iv_uses (struct ivopts_data
*data
)
467 return data
->iv_uses
.length ();
470 /* Ith use recorded in DATA. */
472 static inline struct iv_use
*
473 iv_use (struct ivopts_data
*data
, unsigned i
)
475 return data
->iv_uses
[i
];
478 /* Number of candidates recorded in DATA. */
480 static inline unsigned
481 n_iv_cands (struct ivopts_data
*data
)
483 return data
->iv_candidates
.length ();
486 /* Ith candidate recorded in DATA. */
488 static inline struct iv_cand
*
489 iv_cand (struct ivopts_data
*data
, unsigned i
)
491 return data
->iv_candidates
[i
];
494 /* The single loop exit if it dominates the latch, NULL otherwise. */
497 single_dom_exit (struct loop
*loop
)
499 edge exit
= single_exit (loop
);
504 if (!just_once_each_iteration_p (loop
, exit
->src
))
510 /* Dumps information about the induction variable IV to FILE. */
513 dump_iv (FILE *file
, struct iv
*iv
)
517 fprintf (file
, "ssa name ");
518 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
519 fprintf (file
, "\n");
522 fprintf (file
, " type ");
523 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
524 fprintf (file
, "\n");
528 fprintf (file
, " base ");
529 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
530 fprintf (file
, "\n");
532 fprintf (file
, " step ");
533 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
534 fprintf (file
, "\n");
538 fprintf (file
, " invariant ");
539 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
540 fprintf (file
, "\n");
545 fprintf (file
, " base object ");
546 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
547 fprintf (file
, "\n");
551 fprintf (file
, " is a biv\n");
554 /* Dumps information about the USE to FILE. */
557 dump_use (FILE *file
, struct iv_use
*use
)
559 fprintf (file
, "use %d\n", use
->id
);
563 case USE_NONLINEAR_EXPR
:
564 fprintf (file
, " generic\n");
568 fprintf (file
, " address\n");
572 fprintf (file
, " compare\n");
579 fprintf (file
, " in statement ");
580 print_gimple_stmt (file
, use
->stmt
, 0, 0);
581 fprintf (file
, "\n");
583 fprintf (file
, " at position ");
585 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
586 fprintf (file
, "\n");
588 dump_iv (file
, use
->iv
);
590 if (use
->related_cands
)
592 fprintf (file
, " related candidates ");
593 dump_bitmap (file
, use
->related_cands
);
597 /* Dumps information about the uses to FILE. */
600 dump_uses (FILE *file
, struct ivopts_data
*data
)
605 for (i
= 0; i
< n_iv_uses (data
); i
++)
607 use
= iv_use (data
, i
);
609 dump_use (file
, use
);
610 fprintf (file
, "\n");
614 /* Dumps information about induction variable candidate CAND to FILE. */
617 dump_cand (FILE *file
, struct iv_cand
*cand
)
619 struct iv
*iv
= cand
->iv
;
621 fprintf (file
, "candidate %d%s\n",
622 cand
->id
, cand
->important
? " (important)" : "");
624 if (cand
->depends_on
)
626 fprintf (file
, " depends on ");
627 dump_bitmap (file
, cand
->depends_on
);
632 fprintf (file
, " final value replacement\n");
636 if (cand
->var_before
)
638 fprintf (file
, " var_before ");
639 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
640 fprintf (file
, "\n");
644 fprintf (file
, " var_after ");
645 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
646 fprintf (file
, "\n");
652 fprintf (file
, " incremented before exit test\n");
656 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
660 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
664 fprintf (file
, " incremented at end\n");
668 fprintf (file
, " original biv\n");
675 /* Returns the info for ssa version VER. */
677 static inline struct version_info
*
678 ver_info (struct ivopts_data
*data
, unsigned ver
)
680 return data
->version_info
+ ver
;
683 /* Returns the info for ssa name NAME. */
685 static inline struct version_info
*
686 name_info (struct ivopts_data
*data
, tree name
)
688 return ver_info (data
, SSA_NAME_VERSION (name
));
691 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
695 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
697 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
701 if (sbb
== loop
->latch
)
707 return stmt
== last_stmt (bb
);
710 /* Returns true if STMT if after the place where the original induction
711 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
712 if the positions are identical. */
715 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
717 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
718 basic_block stmt_bb
= gimple_bb (stmt
);
720 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
723 if (stmt_bb
!= cand_bb
)
727 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
729 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
732 /* Returns true if STMT if after the place where the induction variable
733 CAND is incremented in LOOP. */
736 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
744 return stmt_after_ip_normal_pos (loop
, stmt
);
748 return stmt_after_inc_pos (cand
, stmt
, false);
751 return stmt_after_inc_pos (cand
, stmt
, true);
758 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
761 abnormal_ssa_name_p (tree exp
)
766 if (TREE_CODE (exp
) != SSA_NAME
)
769 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
772 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
773 abnormal phi node. Callback for for_each_index. */
776 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
777 void *data ATTRIBUTE_UNUSED
)
779 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
781 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
783 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
787 return !abnormal_ssa_name_p (*index
);
790 /* Returns true if EXPR contains a ssa name that occurs in an
791 abnormal phi node. */
794 contains_abnormal_ssa_name_p (tree expr
)
797 enum tree_code_class codeclass
;
802 code
= TREE_CODE (expr
);
803 codeclass
= TREE_CODE_CLASS (code
);
805 if (code
== SSA_NAME
)
806 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
808 if (code
== INTEGER_CST
809 || is_gimple_min_invariant (expr
))
812 if (code
== ADDR_EXPR
)
813 return !for_each_index (&TREE_OPERAND (expr
, 0),
814 idx_contains_abnormal_ssa_name_p
,
817 if (code
== COND_EXPR
)
818 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
819 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
820 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
826 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
831 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
843 /* Returns the structure describing number of iterations determined from
844 EXIT of DATA->current_loop, or NULL if something goes wrong. */
846 static struct tree_niter_desc
*
847 niter_for_exit (struct ivopts_data
*data
, edge exit
)
849 struct tree_niter_desc
*desc
;
850 tree_niter_desc
**slot
;
854 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
858 slot
= data
->niters
->get (exit
);
862 /* Try to determine number of iterations. We cannot safely work with ssa
863 names that appear in phi nodes on abnormal edges, so that we do not
864 create overlapping life ranges for them (PR 27283). */
865 desc
= XNEW (struct tree_niter_desc
);
866 if (!number_of_iterations_exit (data
->current_loop
,
868 || contains_abnormal_ssa_name_p (desc
->niter
))
873 data
->niters
->put (exit
, desc
);
881 /* Returns the structure describing number of iterations determined from
882 single dominating exit of DATA->current_loop, or NULL if something
885 static struct tree_niter_desc
*
886 niter_for_single_dom_exit (struct ivopts_data
*data
)
888 edge exit
= single_dom_exit (data
->current_loop
);
893 return niter_for_exit (data
, exit
);
896 /* Initializes data structures used by the iv optimization pass, stored
900 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
902 data
->version_info_size
= 2 * num_ssa_names
;
903 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
904 data
->relevant
= BITMAP_ALLOC (NULL
);
905 data
->important_candidates
= BITMAP_ALLOC (NULL
);
906 data
->max_inv_id
= 0;
908 data
->iv_uses
.create (20);
909 data
->iv_candidates
.create (20);
910 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
911 data
->inv_expr_id
= 0;
912 data
->name_expansion_cache
= NULL
;
913 decl_rtl_to_reset
.create (20);
916 /* Returns a memory object to that EXPR points. In case we are able to
917 determine that it does not point to any such object, NULL is returned. */
920 determine_base_object (tree expr
)
922 enum tree_code code
= TREE_CODE (expr
);
925 /* If this is a pointer casted to any type, we need to determine
926 the base object for the pointer; so handle conversions before
927 throwing away non-pointer expressions. */
928 if (CONVERT_EXPR_P (expr
))
929 return determine_base_object (TREE_OPERAND (expr
, 0));
931 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
940 obj
= TREE_OPERAND (expr
, 0);
941 base
= get_base_address (obj
);
946 if (TREE_CODE (base
) == MEM_REF
)
947 return determine_base_object (TREE_OPERAND (base
, 0));
949 return fold_convert (ptr_type_node
,
950 build_fold_addr_expr (base
));
952 case POINTER_PLUS_EXPR
:
953 return determine_base_object (TREE_OPERAND (expr
, 0));
957 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
961 return fold_convert (ptr_type_node
, expr
);
965 /* Return true if address expression with non-DECL_P operand appears
969 contain_complex_addr_expr (tree expr
)
974 switch (TREE_CODE (expr
))
976 case POINTER_PLUS_EXPR
:
979 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
980 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
984 return (!DECL_P (TREE_OPERAND (expr
, 0)));
993 /* Allocates an induction variable with given initial value BASE and step STEP
997 alloc_iv (tree base
, tree step
)
1000 struct iv
*iv
= XCNEW (struct iv
);
1001 gcc_assert (step
!= NULL_TREE
);
1003 /* Lower address expression in base except ones with DECL_P as operand.
1005 1) More accurate cost can be computed for address expressions;
1006 2) Duplicate candidates won't be created for bases in different
1007 forms, like &a[0] and &a. */
1009 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1010 || contain_complex_addr_expr (expr
))
1013 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1014 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1018 iv
->base_object
= determine_base_object (base
);
1021 iv
->have_use_for
= false;
1023 iv
->ssa_name
= NULL_TREE
;
1028 /* Sets STEP and BASE for induction variable IV. */
1031 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
)
1033 struct version_info
*info
= name_info (data
, iv
);
1035 gcc_assert (!info
->iv
);
1037 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1038 info
->iv
= alloc_iv (base
, step
);
1039 info
->iv
->ssa_name
= iv
;
1042 /* Finds induction variable declaration for VAR. */
1045 get_iv (struct ivopts_data
*data
, tree var
)
1048 tree type
= TREE_TYPE (var
);
1050 if (!POINTER_TYPE_P (type
)
1051 && !INTEGRAL_TYPE_P (type
))
1054 if (!name_info (data
, var
)->iv
)
1056 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1059 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1060 set_iv (data
, var
, var
, build_int_cst (type
, 0));
1063 return name_info (data
, var
)->iv
;
1066 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1067 not define a simple affine biv with nonzero step. */
1070 determine_biv_step (gphi
*phi
)
1072 struct loop
*loop
= gimple_bb (phi
)->loop_father
;
1073 tree name
= PHI_RESULT (phi
);
1076 if (virtual_operand_p (name
))
1079 if (!simple_iv (loop
, loop
, name
, &iv
, true))
1082 return integer_zerop (iv
.step
) ? NULL_TREE
: iv
.step
;
1085 /* Return the first non-invariant ssa var found in EXPR. */
1088 extract_single_var_from_expr (tree expr
)
1092 enum tree_code code
;
1094 if (!expr
|| is_gimple_min_invariant (expr
))
1097 code
= TREE_CODE (expr
);
1098 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1100 n
= TREE_OPERAND_LENGTH (expr
);
1101 for (i
= 0; i
< n
; i
++)
1103 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1109 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1112 /* Finds basic ivs. */
1115 find_bivs (struct ivopts_data
*data
)
1118 tree step
, type
, base
, stop
;
1120 struct loop
*loop
= data
->current_loop
;
1123 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1127 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1130 step
= determine_biv_step (phi
);
1134 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1135 /* Stop expanding iv base at the first ssa var referred by iv step.
1136 Ideally we should stop at any ssa var, because that's expensive
1137 and unusual to happen, we just do it on the first one.
1139 See PR64705 for the rationale. */
1140 stop
= extract_single_var_from_expr (step
);
1141 base
= expand_simple_operations (base
, stop
);
1142 if (contains_abnormal_ssa_name_p (base
)
1143 || contains_abnormal_ssa_name_p (step
))
1146 type
= TREE_TYPE (PHI_RESULT (phi
));
1147 base
= fold_convert (type
, base
);
1150 if (POINTER_TYPE_P (type
))
1151 step
= convert_to_ptrofftype (step
);
1153 step
= fold_convert (type
, step
);
1156 set_iv (data
, PHI_RESULT (phi
), base
, step
);
1163 /* Marks basic ivs. */
1166 mark_bivs (struct ivopts_data
*data
)
1171 struct iv
*iv
, *incr_iv
;
1172 struct loop
*loop
= data
->current_loop
;
1173 basic_block incr_bb
;
1176 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1180 iv
= get_iv (data
, PHI_RESULT (phi
));
1184 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1185 def
= SSA_NAME_DEF_STMT (var
);
1186 /* Don't mark iv peeled from other one as biv. */
1188 && gimple_code (def
) == GIMPLE_PHI
1189 && gimple_bb (def
) == loop
->header
)
1192 incr_iv
= get_iv (data
, var
);
1196 /* If the increment is in the subloop, ignore it. */
1197 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1198 if (incr_bb
->loop_father
!= data
->current_loop
1199 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1203 incr_iv
->biv_p
= true;
1207 /* Checks whether STMT defines a linear induction variable and stores its
1208 parameters to IV. */
1211 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1214 struct loop
*loop
= data
->current_loop
;
1216 iv
->base
= NULL_TREE
;
1217 iv
->step
= NULL_TREE
;
1219 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1222 lhs
= gimple_assign_lhs (stmt
);
1223 if (TREE_CODE (lhs
) != SSA_NAME
)
1226 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1229 /* Stop expanding iv base at the first ssa var referred by iv step.
1230 Ideally we should stop at any ssa var, because that's expensive
1231 and unusual to happen, we just do it on the first one.
1233 See PR64705 for the rationale. */
1234 stop
= extract_single_var_from_expr (iv
->step
);
1235 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1236 if (contains_abnormal_ssa_name_p (iv
->base
)
1237 || contains_abnormal_ssa_name_p (iv
->step
))
1240 /* If STMT could throw, then do not consider STMT as defining a GIV.
1241 While this will suppress optimizations, we can not safely delete this
1242 GIV and associated statements, even if it appears it is not used. */
1243 if (stmt_could_throw_p (stmt
))
1249 /* Finds general ivs in statement STMT. */
1252 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1256 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1259 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
);
1262 /* Finds general ivs in basic block BB. */
1265 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1267 gimple_stmt_iterator bsi
;
1269 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1270 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1273 /* Finds general ivs. */
1276 find_givs (struct ivopts_data
*data
)
1278 struct loop
*loop
= data
->current_loop
;
1279 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1282 for (i
= 0; i
< loop
->num_nodes
; i
++)
1283 find_givs_in_bb (data
, body
[i
]);
1287 /* For each ssa name defined in LOOP determines whether it is an induction
1288 variable and if so, its initial value and step. */
1291 find_induction_variables (struct ivopts_data
*data
)
1296 if (!find_bivs (data
))
1302 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1304 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1308 fprintf (dump_file
, " number of iterations ");
1309 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1310 if (!integer_zerop (niter
->may_be_zero
))
1312 fprintf (dump_file
, "; zero if ");
1313 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1315 fprintf (dump_file
, "\n\n");
1318 fprintf (dump_file
, "Induction variables:\n\n");
1320 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1322 if (ver_info (data
, i
)->iv
)
1323 dump_iv (dump_file
, ver_info (data
, i
)->iv
);
1330 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1332 static struct iv_use
*
1333 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1334 gimple stmt
, enum use_type use_type
)
1336 struct iv_use
*use
= XCNEW (struct iv_use
);
1338 use
->id
= n_iv_uses (data
);
1339 use
->type
= use_type
;
1343 use
->related_cands
= BITMAP_ALLOC (NULL
);
1345 /* To avoid showing ssa name in the dumps, if it was not reset by the
1347 iv
->ssa_name
= NULL_TREE
;
1349 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1350 dump_use (dump_file
, use
);
1352 data
->iv_uses
.safe_push (use
);
1357 /* Checks whether OP is a loop-level invariant and if so, records it.
1358 NONLINEAR_USE is true if the invariant is used in a way we do not
1359 handle specially. */
1362 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1365 struct version_info
*info
;
1367 if (TREE_CODE (op
) != SSA_NAME
1368 || virtual_operand_p (op
))
1371 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1373 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1376 info
= name_info (data
, op
);
1378 info
->has_nonlin_use
|= nonlinear_use
;
1380 info
->inv_id
= ++data
->max_inv_id
;
1381 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1384 /* Checks whether the use OP is interesting and if so, records it. */
1386 static struct iv_use
*
1387 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1394 if (TREE_CODE (op
) != SSA_NAME
)
1397 iv
= get_iv (data
, op
);
1401 if (iv
->have_use_for
)
1403 use
= iv_use (data
, iv
->use_id
);
1405 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1409 if (integer_zerop (iv
->step
))
1411 record_invariant (data
, op
, true);
1414 iv
->have_use_for
= true;
1416 civ
= XNEW (struct iv
);
1419 stmt
= SSA_NAME_DEF_STMT (op
);
1420 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1421 || is_gimple_assign (stmt
));
1423 use
= record_use (data
, NULL
, civ
, stmt
, USE_NONLINEAR_EXPR
);
1424 iv
->use_id
= use
->id
;
1429 /* Given a condition in statement STMT, checks whether it is a compare
1430 of an induction variable and an invariant. If this is the case,
1431 CONTROL_VAR is set to location of the iv, BOUND to the location of
1432 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1433 induction variable descriptions, and true is returned. If this is not
1434 the case, CONTROL_VAR and BOUND are set to the arguments of the
1435 condition and false is returned. */
1438 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1439 tree
**control_var
, tree
**bound
,
1440 struct iv
**iv_var
, struct iv
**iv_bound
)
1442 /* The objects returned when COND has constant operands. */
1443 static struct iv const_iv
;
1445 tree
*op0
= &zero
, *op1
= &zero
, *tmp_op
;
1446 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
, *tmp_iv
;
1449 if (gimple_code (stmt
) == GIMPLE_COND
)
1451 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1452 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1453 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1457 op0
= gimple_assign_rhs1_ptr (stmt
);
1458 op1
= gimple_assign_rhs2_ptr (stmt
);
1461 zero
= integer_zero_node
;
1462 const_iv
.step
= integer_zero_node
;
1464 if (TREE_CODE (*op0
) == SSA_NAME
)
1465 iv0
= get_iv (data
, *op0
);
1466 if (TREE_CODE (*op1
) == SSA_NAME
)
1467 iv1
= get_iv (data
, *op1
);
1469 /* Exactly one of the compared values must be an iv, and the other one must
1474 if (integer_zerop (iv0
->step
))
1476 /* Control variable may be on the other side. */
1477 tmp_op
= op0
; op0
= op1
; op1
= tmp_op
;
1478 tmp_iv
= iv0
; iv0
= iv1
; iv1
= tmp_iv
;
1480 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1495 /* Checks whether the condition in STMT is interesting and if so,
1499 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1501 tree
*var_p
, *bound_p
;
1502 struct iv
*var_iv
, *civ
;
1504 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1506 find_interesting_uses_op (data
, *var_p
);
1507 find_interesting_uses_op (data
, *bound_p
);
1511 civ
= XNEW (struct iv
);
1513 record_use (data
, NULL
, civ
, stmt
, USE_COMPARE
);
1516 /* Returns the outermost loop EXPR is obviously invariant in
1517 relative to the loop LOOP, i.e. if all its operands are defined
1518 outside of the returned loop. Returns NULL if EXPR is not
1519 even obviously invariant in LOOP. */
1522 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1527 if (is_gimple_min_invariant (expr
))
1528 return current_loops
->tree_root
;
1530 if (TREE_CODE (expr
) == SSA_NAME
)
1532 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1535 if (flow_bb_inside_loop_p (loop
, def_bb
))
1537 return superloop_at_depth (loop
,
1538 loop_depth (def_bb
->loop_father
) + 1);
1541 return current_loops
->tree_root
;
1547 unsigned maxdepth
= 0;
1548 len
= TREE_OPERAND_LENGTH (expr
);
1549 for (i
= 0; i
< len
; i
++)
1551 struct loop
*ivloop
;
1552 if (!TREE_OPERAND (expr
, i
))
1555 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1558 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1561 return superloop_at_depth (loop
, maxdepth
);
1564 /* Returns true if expression EXPR is obviously invariant in LOOP,
1565 i.e. if all its operands are defined outside of the LOOP. LOOP
1566 should not be the function body. */
1569 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1574 gcc_assert (loop_depth (loop
) > 0);
1576 if (is_gimple_min_invariant (expr
))
1579 if (TREE_CODE (expr
) == SSA_NAME
)
1581 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1583 && flow_bb_inside_loop_p (loop
, def_bb
))
1592 len
= TREE_OPERAND_LENGTH (expr
);
1593 for (i
= 0; i
< len
; i
++)
1594 if (TREE_OPERAND (expr
, i
)
1595 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1601 /* Cumulates the steps of indices into DATA and replaces their values with the
1602 initial ones. Returns false when the value of the index cannot be determined.
1603 Callback for for_each_index. */
1605 struct ifs_ivopts_data
1607 struct ivopts_data
*ivopts_data
;
1613 idx_find_step (tree base
, tree
*idx
, void *data
)
1615 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1617 tree step
, iv_base
, iv_step
, lbound
, off
;
1618 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1620 /* If base is a component ref, require that the offset of the reference
1622 if (TREE_CODE (base
) == COMPONENT_REF
)
1624 off
= component_ref_field_offset (base
);
1625 return expr_invariant_in_loop_p (loop
, off
);
1628 /* If base is array, first check whether we will be able to move the
1629 reference out of the loop (in order to take its address in strength
1630 reduction). In order for this to work we need both lower bound
1631 and step to be loop invariants. */
1632 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1634 /* Moreover, for a range, the size needs to be invariant as well. */
1635 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1636 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1639 step
= array_ref_element_size (base
);
1640 lbound
= array_ref_low_bound (base
);
1642 if (!expr_invariant_in_loop_p (loop
, step
)
1643 || !expr_invariant_in_loop_p (loop
, lbound
))
1647 if (TREE_CODE (*idx
) != SSA_NAME
)
1650 iv
= get_iv (dta
->ivopts_data
, *idx
);
1654 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1655 *&x[0], which is not folded and does not trigger the
1656 ARRAY_REF path below. */
1659 if (integer_zerop (iv
->step
))
1662 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1664 step
= array_ref_element_size (base
);
1666 /* We only handle addresses whose step is an integer constant. */
1667 if (TREE_CODE (step
) != INTEGER_CST
)
1671 /* The step for pointer arithmetics already is 1 byte. */
1672 step
= size_one_node
;
1676 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1677 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1680 /* The index might wrap. */
1684 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1685 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1690 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1691 object is passed to it in DATA. */
1694 idx_record_use (tree base
, tree
*idx
,
1697 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1698 find_interesting_uses_op (data
, *idx
);
1699 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1701 find_interesting_uses_op (data
, array_ref_element_size (base
));
1702 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1707 /* If we can prove that TOP = cst * BOT for some constant cst,
1708 store cst to MUL and return true. Otherwise return false.
1709 The returned value is always sign-extended, regardless of the
1710 signedness of TOP and BOT. */
1713 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1716 enum tree_code code
;
1717 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1718 widest_int res
, p0
, p1
;
1723 if (operand_equal_p (top
, bot
, 0))
1729 code
= TREE_CODE (top
);
1733 mby
= TREE_OPERAND (top
, 1);
1734 if (TREE_CODE (mby
) != INTEGER_CST
)
1737 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1740 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1745 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1746 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1749 if (code
== MINUS_EXPR
)
1751 *mul
= wi::sext (p0
+ p1
, precision
);
1755 if (TREE_CODE (bot
) != INTEGER_CST
)
1758 p0
= widest_int::from (top
, SIGNED
);
1759 p1
= widest_int::from (bot
, SIGNED
);
1762 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1770 /* Return true if memory reference REF with step STEP may be unaligned. */
1773 may_be_unaligned_p (tree ref
, tree step
)
1775 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1776 thus they are not misaligned. */
1777 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1780 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1781 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1782 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1784 unsigned HOST_WIDE_INT bitpos
;
1785 unsigned int ref_align
;
1786 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1787 if (ref_align
< align
1788 || (bitpos
% align
) != 0
1789 || (bitpos
% BITS_PER_UNIT
) != 0)
1792 unsigned int trailing_zeros
= tree_ctz (step
);
1793 if (trailing_zeros
< HOST_BITS_PER_INT
1794 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1800 /* Return true if EXPR may be non-addressable. */
1803 may_be_nonaddressable_p (tree expr
)
1805 switch (TREE_CODE (expr
))
1807 case TARGET_MEM_REF
:
1808 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1809 target, thus they are always addressable. */
1813 /* Likewise for MEM_REFs, modulo the storage order. */
1814 return REF_REVERSE_STORAGE_ORDER (expr
);
1817 if (REF_REVERSE_STORAGE_ORDER (expr
))
1819 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1822 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
1824 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1825 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1828 case ARRAY_RANGE_REF
:
1829 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
1831 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1833 case VIEW_CONVERT_EXPR
:
1834 /* This kind of view-conversions may wrap non-addressable objects
1835 and make them look addressable. After some processing the
1836 non-addressability may be uncovered again, causing ADDR_EXPRs
1837 of inappropriate objects to be built. */
1838 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1839 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1841 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1853 /* Finds addresses in *OP_P inside STMT. */
1856 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1858 tree base
= *op_p
, step
= size_zero_node
;
1860 struct ifs_ivopts_data ifs_ivopts_data
;
1862 /* Do not play with volatile memory references. A bit too conservative,
1863 perhaps, but safe. */
1864 if (gimple_has_volatile_ops (stmt
))
1867 /* Ignore bitfields for now. Not really something terribly complicated
1869 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1872 base
= unshare_expr (base
);
1874 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1876 tree type
= build_pointer_type (TREE_TYPE (base
));
1880 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1882 civ
= get_iv (data
, TMR_BASE (base
));
1886 TMR_BASE (base
) = civ
->base
;
1889 if (TMR_INDEX2 (base
)
1890 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1892 civ
= get_iv (data
, TMR_INDEX2 (base
));
1896 TMR_INDEX2 (base
) = civ
->base
;
1899 if (TMR_INDEX (base
)
1900 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1902 civ
= get_iv (data
, TMR_INDEX (base
));
1906 TMR_INDEX (base
) = civ
->base
;
1911 if (TMR_STEP (base
))
1912 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1914 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1918 if (integer_zerop (step
))
1920 base
= tree_mem_ref_addr (type
, base
);
1924 ifs_ivopts_data
.ivopts_data
= data
;
1925 ifs_ivopts_data
.stmt
= stmt
;
1926 ifs_ivopts_data
.step
= size_zero_node
;
1927 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1928 || integer_zerop (ifs_ivopts_data
.step
))
1930 step
= ifs_ivopts_data
.step
;
1932 /* Check that the base expression is addressable. This needs
1933 to be done after substituting bases of IVs into it. */
1934 if (may_be_nonaddressable_p (base
))
1937 /* Moreover, on strict alignment platforms, check that it is
1938 sufficiently aligned. */
1939 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
1942 base
= build_fold_addr_expr (base
);
1944 /* Substituting bases of IVs into the base expression might
1945 have caused folding opportunities. */
1946 if (TREE_CODE (base
) == ADDR_EXPR
)
1948 tree
*ref
= &TREE_OPERAND (base
, 0);
1949 while (handled_component_p (*ref
))
1950 ref
= &TREE_OPERAND (*ref
, 0);
1951 if (TREE_CODE (*ref
) == MEM_REF
)
1953 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
1954 TREE_OPERAND (*ref
, 0),
1955 TREE_OPERAND (*ref
, 1));
1962 civ
= alloc_iv (base
, step
);
1963 record_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
1967 for_each_index (op_p
, idx_record_use
, data
);
1970 /* Finds and records invariants used in STMT. */
1973 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
1976 use_operand_p use_p
;
1979 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
1981 op
= USE_FROM_PTR (use_p
);
1982 record_invariant (data
, op
, false);
1986 /* Finds interesting uses of induction variables in the statement STMT. */
1989 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
1992 tree op
, *lhs
, *rhs
;
1994 use_operand_p use_p
;
1995 enum tree_code code
;
1997 find_invariants_stmt (data
, stmt
);
1999 if (gimple_code (stmt
) == GIMPLE_COND
)
2001 find_interesting_uses_cond (data
, stmt
);
2005 if (is_gimple_assign (stmt
))
2007 lhs
= gimple_assign_lhs_ptr (stmt
);
2008 rhs
= gimple_assign_rhs1_ptr (stmt
);
2010 if (TREE_CODE (*lhs
) == SSA_NAME
)
2012 /* If the statement defines an induction variable, the uses are not
2013 interesting by themselves. */
2015 iv
= get_iv (data
, *lhs
);
2017 if (iv
&& !integer_zerop (iv
->step
))
2021 code
= gimple_assign_rhs_code (stmt
);
2022 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2023 && (REFERENCE_CLASS_P (*rhs
)
2024 || is_gimple_val (*rhs
)))
2026 if (REFERENCE_CLASS_P (*rhs
))
2027 find_interesting_uses_address (data
, stmt
, rhs
);
2029 find_interesting_uses_op (data
, *rhs
);
2031 if (REFERENCE_CLASS_P (*lhs
))
2032 find_interesting_uses_address (data
, stmt
, lhs
);
2035 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2037 find_interesting_uses_cond (data
, stmt
);
2041 /* TODO -- we should also handle address uses of type
2043 memory = call (whatever);
2050 if (gimple_code (stmt
) == GIMPLE_PHI
2051 && gimple_bb (stmt
) == data
->current_loop
->header
)
2053 iv
= get_iv (data
, PHI_RESULT (stmt
));
2055 if (iv
&& !integer_zerop (iv
->step
))
2059 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2061 op
= USE_FROM_PTR (use_p
);
2063 if (TREE_CODE (op
) != SSA_NAME
)
2066 iv
= get_iv (data
, op
);
2070 find_interesting_uses_op (data
, op
);
2074 /* Finds interesting uses of induction variables outside of loops
2075 on loop exit edge EXIT. */
2078 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2084 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2087 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2088 if (!virtual_operand_p (def
))
2089 find_interesting_uses_op (data
, def
);
2093 /* Finds uses of the induction variables that are interesting. */
2096 find_interesting_uses (struct ivopts_data
*data
)
2099 gimple_stmt_iterator bsi
;
2100 basic_block
*body
= get_loop_body (data
->current_loop
);
2102 struct version_info
*info
;
2105 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2106 fprintf (dump_file
, "Uses:\n\n");
2108 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2113 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2114 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2115 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2116 find_interesting_uses_outside (data
, e
);
2118 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2119 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2120 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2121 if (!is_gimple_debug (gsi_stmt (bsi
)))
2122 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2125 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2129 fprintf (dump_file
, "\n");
2131 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2133 info
= ver_info (data
, i
);
2136 fprintf (dump_file
, " ");
2137 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2138 fprintf (dump_file
, " is invariant (%d)%s\n",
2139 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2143 fprintf (dump_file
, "\n");
2149 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2150 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2151 we are at the top-level of the processed address. */
2154 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2155 HOST_WIDE_INT
*offset
)
2157 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2158 enum tree_code code
;
2159 tree type
, orig_type
= TREE_TYPE (expr
);
2160 HOST_WIDE_INT off0
, off1
, st
;
2161 tree orig_expr
= expr
;
2165 type
= TREE_TYPE (expr
);
2166 code
= TREE_CODE (expr
);
2172 if (!cst_and_fits_in_hwi (expr
)
2173 || integer_zerop (expr
))
2176 *offset
= int_cst_value (expr
);
2177 return build_int_cst (orig_type
, 0);
2179 case POINTER_PLUS_EXPR
:
2182 op0
= TREE_OPERAND (expr
, 0);
2183 op1
= TREE_OPERAND (expr
, 1);
2185 op0
= strip_offset_1 (op0
, false, false, &off0
);
2186 op1
= strip_offset_1 (op1
, false, false, &off1
);
2188 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2189 if (op0
== TREE_OPERAND (expr
, 0)
2190 && op1
== TREE_OPERAND (expr
, 1))
2193 if (integer_zerop (op1
))
2195 else if (integer_zerop (op0
))
2197 if (code
== MINUS_EXPR
)
2198 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2203 expr
= fold_build2 (code
, type
, op0
, op1
);
2205 return fold_convert (orig_type
, expr
);
2208 op1
= TREE_OPERAND (expr
, 1);
2209 if (!cst_and_fits_in_hwi (op1
))
2212 op0
= TREE_OPERAND (expr
, 0);
2213 op0
= strip_offset_1 (op0
, false, false, &off0
);
2214 if (op0
== TREE_OPERAND (expr
, 0))
2217 *offset
= off0
* int_cst_value (op1
);
2218 if (integer_zerop (op0
))
2221 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2223 return fold_convert (orig_type
, expr
);
2226 case ARRAY_RANGE_REF
:
2230 step
= array_ref_element_size (expr
);
2231 if (!cst_and_fits_in_hwi (step
))
2234 st
= int_cst_value (step
);
2235 op1
= TREE_OPERAND (expr
, 1);
2236 op1
= strip_offset_1 (op1
, false, false, &off1
);
2237 *offset
= off1
* st
;
2240 && integer_zerop (op1
))
2242 /* Strip the component reference completely. */
2243 op0
= TREE_OPERAND (expr
, 0);
2244 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2257 tmp
= component_ref_field_offset (expr
);
2258 field
= TREE_OPERAND (expr
, 1);
2260 && cst_and_fits_in_hwi (tmp
)
2261 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2263 HOST_WIDE_INT boffset
, abs_off
;
2265 /* Strip the component reference completely. */
2266 op0
= TREE_OPERAND (expr
, 0);
2267 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2268 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2269 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2273 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2280 op0
= TREE_OPERAND (expr
, 0);
2281 op0
= strip_offset_1 (op0
, true, true, &off0
);
2284 if (op0
== TREE_OPERAND (expr
, 0))
2287 expr
= build_fold_addr_expr (op0
);
2288 return fold_convert (orig_type
, expr
);
2291 /* ??? Offset operand? */
2292 inside_addr
= false;
2299 /* Default handling of expressions for that we want to recurse into
2300 the first operand. */
2301 op0
= TREE_OPERAND (expr
, 0);
2302 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2305 if (op0
== TREE_OPERAND (expr
, 0)
2306 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2309 expr
= copy_node (expr
);
2310 TREE_OPERAND (expr
, 0) = op0
;
2312 TREE_OPERAND (expr
, 1) = op1
;
2314 /* Inside address, we might strip the top level component references,
2315 thus changing type of the expression. Handling of ADDR_EXPR
2317 expr
= fold_convert (orig_type
, expr
);
2322 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2325 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2328 tree core
= strip_offset_1 (expr
, false, false, &off
);
2333 /* Returns variant of TYPE that can be used as base for different uses.
2334 We return unsigned type with the same precision, which avoids problems
2338 generic_type_for (tree type
)
2340 if (POINTER_TYPE_P (type
))
2341 return unsigned_type_for (type
);
2343 if (TYPE_UNSIGNED (type
))
2346 return unsigned_type_for (type
);
2349 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2350 the bitmap to that we should store it. */
2352 static struct ivopts_data
*fd_ivopts_data
;
2354 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2356 bitmap
*depends_on
= (bitmap
*) data
;
2357 struct version_info
*info
;
2359 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2361 info
= name_info (fd_ivopts_data
, *expr_p
);
2363 if (!info
->inv_id
|| info
->has_nonlin_use
)
2367 *depends_on
= BITMAP_ALLOC (NULL
);
2368 bitmap_set_bit (*depends_on
, info
->inv_id
);
2373 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2374 position to POS. If USE is not NULL, the candidate is set as related to
2375 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2376 replacement of the final value of the iv by a direct computation. */
2378 static struct iv_cand
*
2379 add_candidate_1 (struct ivopts_data
*data
,
2380 tree base
, tree step
, bool important
, enum iv_position pos
,
2381 struct iv_use
*use
, gimple incremented_at
)
2384 struct iv_cand
*cand
= NULL
;
2385 tree type
, orig_type
;
2387 /* For non-original variables, make sure their values are computed in a type
2388 that does not invoke undefined behavior on overflows (since in general,
2389 we cannot prove that these induction variables are non-wrapping). */
2390 if (pos
!= IP_ORIGINAL
)
2392 orig_type
= TREE_TYPE (base
);
2393 type
= generic_type_for (orig_type
);
2394 if (type
!= orig_type
)
2396 base
= fold_convert (type
, base
);
2397 step
= fold_convert (type
, step
);
2401 for (i
= 0; i
< n_iv_cands (data
); i
++)
2403 cand
= iv_cand (data
, i
);
2405 if (cand
->pos
!= pos
)
2408 if (cand
->incremented_at
!= incremented_at
2409 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2410 && cand
->ainc_use
!= use
))
2424 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2425 && operand_equal_p (step
, cand
->iv
->step
, 0)
2426 && (TYPE_PRECISION (TREE_TYPE (base
))
2427 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2431 if (i
== n_iv_cands (data
))
2433 cand
= XCNEW (struct iv_cand
);
2439 cand
->iv
= alloc_iv (base
, step
);
2442 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2444 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2445 cand
->var_after
= cand
->var_before
;
2447 cand
->important
= important
;
2448 cand
->incremented_at
= incremented_at
;
2449 data
->iv_candidates
.safe_push (cand
);
2452 && TREE_CODE (step
) != INTEGER_CST
)
2454 fd_ivopts_data
= data
;
2455 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2458 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2459 cand
->ainc_use
= use
;
2461 cand
->ainc_use
= NULL
;
2463 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2464 dump_cand (dump_file
, cand
);
2467 if (important
&& !cand
->important
)
2469 cand
->important
= true;
2470 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2471 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2476 bitmap_set_bit (use
->related_cands
, i
);
2477 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2478 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2485 /* Returns true if incrementing the induction variable at the end of the LOOP
2488 The purpose is to avoid splitting latch edge with a biv increment, thus
2489 creating a jump, possibly confusing other optimization passes and leaving
2490 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2491 is not available (so we do not have a better alternative), or if the latch
2492 edge is already nonempty. */
2495 allow_ip_end_pos_p (struct loop
*loop
)
2497 if (!ip_normal_pos (loop
))
2500 if (!empty_block_p (ip_end_pos (loop
)))
2506 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2507 Important field is set to IMPORTANT. */
2510 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2511 bool important
, struct iv_use
*use
)
2513 basic_block use_bb
= gimple_bb (use
->stmt
);
2514 machine_mode mem_mode
;
2515 unsigned HOST_WIDE_INT cstepi
;
2517 /* If we insert the increment in any position other than the standard
2518 ones, we must ensure that it is incremented once per iteration.
2519 It must not be in an inner nested loop, or one side of an if
2521 if (use_bb
->loop_father
!= data
->current_loop
2522 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2523 || stmt_could_throw_p (use
->stmt
)
2524 || !cst_and_fits_in_hwi (step
))
2527 cstepi
= int_cst_value (step
);
2529 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2530 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2531 || USE_STORE_PRE_INCREMENT (mem_mode
))
2532 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2533 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2534 || USE_STORE_PRE_DECREMENT (mem_mode
))
2535 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2537 enum tree_code code
= MINUS_EXPR
;
2539 tree new_step
= step
;
2541 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2543 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2544 code
= POINTER_PLUS_EXPR
;
2547 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2548 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2549 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2552 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2553 || USE_STORE_POST_INCREMENT (mem_mode
))
2554 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2555 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2556 || USE_STORE_POST_DECREMENT (mem_mode
))
2557 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2559 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2564 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2565 position to POS. If USE is not NULL, the candidate is set as related to
2566 it. The candidate computation is scheduled on all available positions. */
2569 add_candidate (struct ivopts_data
*data
,
2570 tree base
, tree step
, bool important
, struct iv_use
*use
)
2572 if (ip_normal_pos (data
->current_loop
))
2573 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2574 if (ip_end_pos (data
->current_loop
)
2575 && allow_ip_end_pos_p (data
->current_loop
))
2576 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2578 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2579 add_autoinc_candidates (data
, base
, step
, important
, use
);
2582 /* Adds standard iv candidates. */
2585 add_standard_iv_candidates (struct ivopts_data
*data
)
2587 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2589 /* The same for a double-integer type if it is still fast enough. */
2591 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2592 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2593 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2594 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2596 /* The same for a double-integer type if it is still fast enough. */
2598 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2599 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2600 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2601 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2605 /* Adds candidates bases on the old induction variable IV. */
2608 add_old_iv_candidates (struct ivopts_data
*data
, struct iv
*iv
)
2612 struct iv_cand
*cand
;
2614 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2616 /* The same, but with initial value zero. */
2617 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2618 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2620 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2621 iv
->step
, true, NULL
);
2623 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2624 if (gimple_code (phi
) == GIMPLE_PHI
)
2626 /* Additionally record the possibility of leaving the original iv
2628 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2629 /* Don't add candidate if it's from another PHI node because
2630 it's an affine iv appearing in the form of PEELED_CHREC. */
2631 phi
= SSA_NAME_DEF_STMT (def
);
2632 if (gimple_code (phi
) != GIMPLE_PHI
)
2634 cand
= add_candidate_1 (data
,
2635 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2636 SSA_NAME_DEF_STMT (def
));
2637 cand
->var_before
= iv
->ssa_name
;
2638 cand
->var_after
= def
;
2641 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2645 /* Adds candidates based on the old induction variables. */
2648 add_old_ivs_candidates (struct ivopts_data
*data
)
2654 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2656 iv
= ver_info (data
, i
)->iv
;
2657 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2658 add_old_iv_candidates (data
, iv
);
2662 /* Adds candidates based on the value of the induction variable IV and USE. */
2665 add_iv_value_candidates (struct ivopts_data
*data
,
2666 struct iv
*iv
, struct iv_use
*use
)
2668 unsigned HOST_WIDE_INT offset
;
2672 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2674 /* The same, but with initial value zero. Make such variable important,
2675 since it is generic enough so that possibly many uses may be based
2677 basetype
= TREE_TYPE (iv
->base
);
2678 if (POINTER_TYPE_P (basetype
))
2679 basetype
= sizetype
;
2680 add_candidate (data
, build_int_cst (basetype
, 0),
2681 iv
->step
, true, use
);
2683 /* Third, try removing the constant offset. Make sure to even
2684 add a candidate for &a[0] vs. (T *)&a. */
2685 base
= strip_offset (iv
->base
, &offset
);
2687 || base
!= iv
->base
)
2688 add_candidate (data
, base
, iv
->step
, false, use
);
2691 /* Adds candidates based on the uses. */
2694 add_derived_ivs_candidates (struct ivopts_data
*data
)
2698 for (i
= 0; i
< n_iv_uses (data
); i
++)
2700 struct iv_use
*use
= iv_use (data
, i
);
2707 case USE_NONLINEAR_EXPR
:
2710 /* Just add the ivs based on the value of the iv used here. */
2711 add_iv_value_candidates (data
, use
->iv
, use
);
2720 /* Record important candidates and add them to related_cands bitmaps
2724 record_important_candidates (struct ivopts_data
*data
)
2729 for (i
= 0; i
< n_iv_cands (data
); i
++)
2731 struct iv_cand
*cand
= iv_cand (data
, i
);
2733 if (cand
->important
)
2734 bitmap_set_bit (data
->important_candidates
, i
);
2737 data
->consider_all_candidates
= (n_iv_cands (data
)
2738 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2740 if (data
->consider_all_candidates
)
2742 /* We will not need "related_cands" bitmaps in this case,
2743 so release them to decrease peak memory consumption. */
2744 for (i
= 0; i
< n_iv_uses (data
); i
++)
2746 use
= iv_use (data
, i
);
2747 BITMAP_FREE (use
->related_cands
);
2752 /* Add important candidates to the related_cands bitmaps. */
2753 for (i
= 0; i
< n_iv_uses (data
); i
++)
2754 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2755 data
->important_candidates
);
2759 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2760 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2761 we allocate a simple list to every use. */
2764 alloc_use_cost_map (struct ivopts_data
*data
)
2766 unsigned i
, size
, s
;
2768 for (i
= 0; i
< n_iv_uses (data
); i
++)
2770 struct iv_use
*use
= iv_use (data
, i
);
2772 if (data
->consider_all_candidates
)
2773 size
= n_iv_cands (data
);
2776 s
= bitmap_count_bits (use
->related_cands
);
2778 /* Round up to the power of two, so that moduling by it is fast. */
2779 size
= s
? (1 << ceil_log2 (s
)) : 1;
2782 use
->n_map_members
= size
;
2783 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
2787 /* Returns description of computation cost of expression whose runtime
2788 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2791 new_cost (unsigned runtime
, unsigned complexity
)
2795 cost
.cost
= runtime
;
2796 cost
.complexity
= complexity
;
2801 /* Adds costs COST1 and COST2. */
2804 add_costs (comp_cost cost1
, comp_cost cost2
)
2806 cost1
.cost
+= cost2
.cost
;
2807 cost1
.complexity
+= cost2
.complexity
;
2811 /* Subtracts costs COST1 and COST2. */
2814 sub_costs (comp_cost cost1
, comp_cost cost2
)
2816 cost1
.cost
-= cost2
.cost
;
2817 cost1
.complexity
-= cost2
.complexity
;
2822 /* Returns a negative number if COST1 < COST2, a positive number if
2823 COST1 > COST2, and 0 if COST1 = COST2. */
2826 compare_costs (comp_cost cost1
, comp_cost cost2
)
2828 if (cost1
.cost
== cost2
.cost
)
2829 return cost1
.complexity
- cost2
.complexity
;
2831 return cost1
.cost
- cost2
.cost
;
2834 /* Returns true if COST is infinite. */
2837 infinite_cost_p (comp_cost cost
)
2839 return cost
.cost
== INFTY
;
2842 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2843 on invariants DEPENDS_ON and that the value used in expressing it
2844 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2847 set_use_iv_cost (struct ivopts_data
*data
,
2848 struct iv_use
*use
, struct iv_cand
*cand
,
2849 comp_cost cost
, bitmap depends_on
, tree value
,
2850 enum tree_code comp
, int inv_expr_id
)
2854 if (infinite_cost_p (cost
))
2856 BITMAP_FREE (depends_on
);
2860 if (data
->consider_all_candidates
)
2862 use
->cost_map
[cand
->id
].cand
= cand
;
2863 use
->cost_map
[cand
->id
].cost
= cost
;
2864 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
2865 use
->cost_map
[cand
->id
].value
= value
;
2866 use
->cost_map
[cand
->id
].comp
= comp
;
2867 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
2871 /* n_map_members is a power of two, so this computes modulo. */
2872 s
= cand
->id
& (use
->n_map_members
- 1);
2873 for (i
= s
; i
< use
->n_map_members
; i
++)
2874 if (!use
->cost_map
[i
].cand
)
2876 for (i
= 0; i
< s
; i
++)
2877 if (!use
->cost_map
[i
].cand
)
2883 use
->cost_map
[i
].cand
= cand
;
2884 use
->cost_map
[i
].cost
= cost
;
2885 use
->cost_map
[i
].depends_on
= depends_on
;
2886 use
->cost_map
[i
].value
= value
;
2887 use
->cost_map
[i
].comp
= comp
;
2888 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
2891 /* Gets cost of (USE, CANDIDATE) pair. */
2893 static struct cost_pair
*
2894 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
2895 struct iv_cand
*cand
)
2898 struct cost_pair
*ret
;
2903 if (data
->consider_all_candidates
)
2905 ret
= use
->cost_map
+ cand
->id
;
2912 /* n_map_members is a power of two, so this computes modulo. */
2913 s
= cand
->id
& (use
->n_map_members
- 1);
2914 for (i
= s
; i
< use
->n_map_members
; i
++)
2915 if (use
->cost_map
[i
].cand
== cand
)
2916 return use
->cost_map
+ i
;
2917 else if (use
->cost_map
[i
].cand
== NULL
)
2919 for (i
= 0; i
< s
; i
++)
2920 if (use
->cost_map
[i
].cand
== cand
)
2921 return use
->cost_map
+ i
;
2922 else if (use
->cost_map
[i
].cand
== NULL
)
2928 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2930 produce_memory_decl_rtl (tree obj
, int *regno
)
2932 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
2933 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
2937 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
2939 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
2940 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
2941 SET_SYMBOL_REF_DECL (x
, obj
);
2942 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2943 set_mem_addr_space (x
, as
);
2944 targetm
.encode_section_info (obj
, x
, true);
2948 x
= gen_raw_REG (address_mode
, (*regno
)++);
2949 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
2950 set_mem_addr_space (x
, as
);
2956 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2957 walk_tree. DATA contains the actual fake register number. */
2960 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
2962 tree obj
= NULL_TREE
;
2964 int *regno
= (int *) data
;
2966 switch (TREE_CODE (*expr_p
))
2969 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
2970 handled_component_p (*expr_p
);
2971 expr_p
= &TREE_OPERAND (*expr_p
, 0))
2974 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
2975 x
= produce_memory_decl_rtl (obj
, regno
);
2980 obj
= SSA_NAME_VAR (*expr_p
);
2981 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2984 if (!DECL_RTL_SET_P (obj
))
2985 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
2994 if (DECL_RTL_SET_P (obj
))
2997 if (DECL_MODE (obj
) == BLKmode
)
2998 x
= produce_memory_decl_rtl (obj
, regno
);
3000 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3010 decl_rtl_to_reset
.safe_push (obj
);
3011 SET_DECL_RTL (obj
, x
);
3017 /* Determines cost of the computation of EXPR. */
3020 computation_cost (tree expr
, bool speed
)
3024 tree type
= TREE_TYPE (expr
);
3026 /* Avoid using hard regs in ways which may be unsupported. */
3027 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3028 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3029 enum node_frequency real_frequency
= node
->frequency
;
3031 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3032 crtl
->maybe_hot_insn_p
= speed
;
3033 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3035 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3038 default_rtl_profile ();
3039 node
->frequency
= real_frequency
;
3041 cost
= seq_cost (seq
, speed
);
3043 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3044 TYPE_ADDR_SPACE (type
), speed
);
3045 else if (!REG_P (rslt
))
3046 cost
+= set_src_cost (rslt
, speed
);
3051 /* Returns variable containing the value of candidate CAND at statement AT. */
3054 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
3056 if (stmt_after_increment (loop
, cand
, stmt
))
3057 return cand
->var_after
;
3059 return cand
->var_before
;
3062 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3063 same precision that is at least as wide as the precision of TYPE, stores
3064 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3068 determine_common_wider_type (tree
*a
, tree
*b
)
3070 tree wider_type
= NULL
;
3072 tree atype
= TREE_TYPE (*a
);
3074 if (CONVERT_EXPR_P (*a
))
3076 suba
= TREE_OPERAND (*a
, 0);
3077 wider_type
= TREE_TYPE (suba
);
3078 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3084 if (CONVERT_EXPR_P (*b
))
3086 subb
= TREE_OPERAND (*b
, 0);
3087 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3098 /* Determines the expression by that USE is expressed from induction variable
3099 CAND at statement AT in LOOP. The expression is stored in a decomposed
3100 form into AFF. Returns false if USE cannot be expressed using CAND. */
3103 get_computation_aff (struct loop
*loop
,
3104 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3105 struct aff_tree
*aff
)
3107 tree ubase
= use
->iv
->base
;
3108 tree ustep
= use
->iv
->step
;
3109 tree cbase
= cand
->iv
->base
;
3110 tree cstep
= cand
->iv
->step
, cstep_common
;
3111 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3112 tree common_type
, var
;
3114 aff_tree cbase_aff
, var_aff
;
3117 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3119 /* We do not have a precision to express the values of use. */
3123 var
= var_at_stmt (loop
, cand
, at
);
3124 uutype
= unsigned_type_for (utype
);
3126 /* If the conversion is not noop, perform it. */
3127 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3129 cstep
= fold_convert (uutype
, cstep
);
3130 cbase
= fold_convert (uutype
, cbase
);
3131 var
= fold_convert (uutype
, var
);
3134 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3137 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3138 type, we achieve better folding by computing their difference in this
3139 wider type, and cast the result to UUTYPE. We do not need to worry about
3140 overflows, as all the arithmetics will in the end be performed in UUTYPE
3142 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3144 /* use = ubase - ratio * cbase + ratio * var. */
3145 tree_to_aff_combination (ubase
, common_type
, aff
);
3146 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3147 tree_to_aff_combination (var
, uutype
, &var_aff
);
3149 /* We need to shift the value if we are after the increment. */
3150 if (stmt_after_increment (loop
, cand
, at
))
3154 if (common_type
!= uutype
)
3155 cstep_common
= fold_convert (common_type
, cstep
);
3157 cstep_common
= cstep
;
3159 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3160 aff_combination_add (&cbase_aff
, &cstep_aff
);
3163 aff_combination_scale (&cbase_aff
, -rat
);
3164 aff_combination_add (aff
, &cbase_aff
);
3165 if (common_type
!= uutype
)
3166 aff_combination_convert (aff
, uutype
);
3168 aff_combination_scale (&var_aff
, rat
);
3169 aff_combination_add (aff
, &var_aff
);
3174 /* Return the type of USE. */
3177 get_use_type (struct iv_use
*use
)
3179 tree base_type
= TREE_TYPE (use
->iv
->base
);
3182 if (use
->type
== USE_ADDRESS
)
3184 /* The base_type may be a void pointer. Create a pointer type based on
3185 the mem_ref instead. */
3186 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3187 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3188 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3196 /* Determines the expression by that USE is expressed from induction variable
3197 CAND at statement AT in LOOP. The computation is unshared. */
3200 get_computation_at (struct loop
*loop
,
3201 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3204 tree type
= get_use_type (use
);
3206 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3208 unshare_aff_combination (&aff
);
3209 return fold_convert (type
, aff_combination_to_tree (&aff
));
3212 /* Determines the expression by that USE is expressed from induction variable
3213 CAND in LOOP. The computation is unshared. */
3216 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3218 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3221 /* Adjust the cost COST for being in loop setup rather than loop body.
3222 If we're optimizing for space, the loop setup overhead is constant;
3223 if we're optimizing for speed, amortize it over the per-iteration cost. */
3225 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3229 else if (optimize_loop_for_speed_p (data
->current_loop
))
3230 return cost
/ avg_loop_niter (data
->current_loop
);
3235 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3236 validity for a memory reference accessing memory of mode MODE in
3237 address space AS. */
3241 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3244 #define MAX_RATIO 128
3245 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3246 static vec
<sbitmap
> valid_mult_list
;
3249 if (data_index
>= valid_mult_list
.length ())
3250 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3252 valid_mult
= valid_mult_list
[data_index
];
3255 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3256 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3257 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3261 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3262 bitmap_clear (valid_mult
);
3263 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3264 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3265 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3267 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3268 if (memory_address_addr_space_p (mode
, addr
, as
)
3269 || memory_address_addr_space_p (mode
, scaled
, as
))
3270 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3273 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3275 fprintf (dump_file
, " allowed multipliers:");
3276 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3277 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3278 fprintf (dump_file
, " %d", (int) i
);
3279 fprintf (dump_file
, "\n");
3280 fprintf (dump_file
, "\n");
3283 valid_mult_list
[data_index
] = valid_mult
;
3286 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3289 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3292 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3293 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3294 variable is omitted. Compute the cost for a memory reference that accesses
3295 a memory location of mode MEM_MODE in address space AS.
3297 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3298 size of MEM_MODE / RATIO) is available. To make this determination, we
3299 look at the size of the increment to be made, which is given in CSTEP.
3300 CSTEP may be zero if the step is unknown.
3301 STMT_AFTER_INC is true iff the statement we're looking at is after the
3302 increment of the original biv.
3304 TODO -- there must be some better way. This all is quite crude. */
3308 AINC_PRE_INC
, /* Pre increment. */
3309 AINC_PRE_DEC
, /* Pre decrement. */
3310 AINC_POST_INC
, /* Post increment. */
3311 AINC_POST_DEC
, /* Post decrement. */
3312 AINC_NONE
/* Also the number of auto increment types. */
3315 typedef struct address_cost_data_s
3317 HOST_WIDE_INT min_offset
, max_offset
;
3318 unsigned costs
[2][2][2][2];
3319 unsigned ainc_costs
[AINC_NONE
];
3320 } *address_cost_data
;
3324 get_address_cost (bool symbol_present
, bool var_present
,
3325 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3326 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3327 addr_space_t as
, bool speed
,
3328 bool stmt_after_inc
, bool *may_autoinc
)
3330 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3331 static vec
<address_cost_data
> address_cost_data_list
;
3332 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3333 address_cost_data data
;
3334 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3335 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3336 unsigned cost
, acost
, complexity
;
3337 enum ainc_type autoinc_type
;
3338 bool offset_p
, ratio_p
, autoinc
;
3339 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3340 unsigned HOST_WIDE_INT mask
;
3343 if (data_index
>= address_cost_data_list
.length ())
3344 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3346 data
= address_cost_data_list
[data_index
];
3350 HOST_WIDE_INT rat
, off
= 0;
3351 int old_cse_not_expected
, width
;
3352 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3357 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3359 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3361 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3362 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3363 width
= HOST_BITS_PER_WIDE_INT
- 1;
3364 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3366 for (i
= width
; i
>= 0; i
--)
3368 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3369 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3370 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3373 data
->min_offset
= (i
== -1? 0 : off
);
3375 for (i
= width
; i
>= 0; i
--)
3377 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3378 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3379 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3381 /* For some strict-alignment targets, the offset must be naturally
3382 aligned. Try an aligned offset if mem_mode is not QImode. */
3383 off
= mem_mode
!= QImode
3384 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
3385 - GET_MODE_SIZE (mem_mode
)
3389 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3390 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3396 data
->max_offset
= off
;
3398 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3400 fprintf (dump_file
, "get_address_cost:\n");
3401 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3402 GET_MODE_NAME (mem_mode
),
3404 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3405 GET_MODE_NAME (mem_mode
),
3410 for (i
= 2; i
<= MAX_RATIO
; i
++)
3411 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3417 /* Compute the cost of various addressing modes. */
3419 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3420 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3422 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3423 || USE_STORE_PRE_DECREMENT (mem_mode
))
3425 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3426 has_predec
[mem_mode
]
3427 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3429 if (has_predec
[mem_mode
])
3430 data
->ainc_costs
[AINC_PRE_DEC
]
3431 = address_cost (addr
, mem_mode
, as
, speed
);
3433 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3434 || USE_STORE_POST_DECREMENT (mem_mode
))
3436 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3437 has_postdec
[mem_mode
]
3438 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3440 if (has_postdec
[mem_mode
])
3441 data
->ainc_costs
[AINC_POST_DEC
]
3442 = address_cost (addr
, mem_mode
, as
, speed
);
3444 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3445 || USE_STORE_PRE_DECREMENT (mem_mode
))
3447 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3448 has_preinc
[mem_mode
]
3449 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3451 if (has_preinc
[mem_mode
])
3452 data
->ainc_costs
[AINC_PRE_INC
]
3453 = address_cost (addr
, mem_mode
, as
, speed
);
3455 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3456 || USE_STORE_POST_INCREMENT (mem_mode
))
3458 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3459 has_postinc
[mem_mode
]
3460 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3462 if (has_postinc
[mem_mode
])
3463 data
->ainc_costs
[AINC_POST_INC
]
3464 = address_cost (addr
, mem_mode
, as
, speed
);
3466 for (i
= 0; i
< 16; i
++)
3469 var_p
= (i
>> 1) & 1;
3470 off_p
= (i
>> 2) & 1;
3471 rat_p
= (i
>> 3) & 1;
3475 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3476 gen_int_mode (rat
, address_mode
));
3479 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3483 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3484 /* ??? We can run into trouble with some backends by presenting
3485 it with symbols which haven't been properly passed through
3486 targetm.encode_section_info. By setting the local bit, we
3487 enhance the probability of things working. */
3488 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3491 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3493 (PLUS
, address_mode
, base
,
3494 gen_int_mode (off
, address_mode
)));
3497 base
= gen_int_mode (off
, address_mode
);
3502 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3505 /* To avoid splitting addressing modes, pretend that no cse will
3507 old_cse_not_expected
= cse_not_expected
;
3508 cse_not_expected
= true;
3509 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3510 cse_not_expected
= old_cse_not_expected
;
3514 acost
= seq_cost (seq
, speed
);
3515 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3519 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3522 /* On some targets, it is quite expensive to load symbol to a register,
3523 which makes addresses that contain symbols look much more expensive.
3524 However, the symbol will have to be loaded in any case before the
3525 loop (and quite likely we have it in register already), so it does not
3526 make much sense to penalize them too heavily. So make some final
3527 tweaks for the SYMBOL_PRESENT modes:
3529 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3530 var is cheaper, use this mode with small penalty.
3531 If VAR_PRESENT is true, try whether the mode with
3532 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3533 if this is the case, use it. */
3534 add_c
= add_cost (speed
, address_mode
);
3535 for (i
= 0; i
< 8; i
++)
3538 off_p
= (i
>> 1) & 1;
3539 rat_p
= (i
>> 2) & 1;
3541 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3545 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3546 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3549 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3551 fprintf (dump_file
, "Address costs:\n");
3553 for (i
= 0; i
< 16; i
++)
3556 var_p
= (i
>> 1) & 1;
3557 off_p
= (i
>> 2) & 1;
3558 rat_p
= (i
>> 3) & 1;
3560 fprintf (dump_file
, " ");
3562 fprintf (dump_file
, "sym + ");
3564 fprintf (dump_file
, "var + ");
3566 fprintf (dump_file
, "cst + ");
3568 fprintf (dump_file
, "rat * ");
3570 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3571 fprintf (dump_file
, "index costs %d\n", acost
);
3573 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3574 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3575 fprintf (dump_file
, " May include autoinc/dec\n");
3576 fprintf (dump_file
, "\n");
3579 address_cost_data_list
[data_index
] = data
;
3582 bits
= GET_MODE_BITSIZE (address_mode
);
3583 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3585 if ((offset
>> (bits
- 1) & 1))
3590 autoinc_type
= AINC_NONE
;
3591 msize
= GET_MODE_SIZE (mem_mode
);
3592 autoinc_offset
= offset
;
3594 autoinc_offset
+= ratio
* cstep
;
3595 if (symbol_present
|| var_present
|| ratio
!= 1)
3599 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3601 autoinc_type
= AINC_POST_INC
;
3602 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3604 autoinc_type
= AINC_POST_DEC
;
3605 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3607 autoinc_type
= AINC_PRE_INC
;
3608 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3610 autoinc_type
= AINC_PRE_DEC
;
3612 if (autoinc_type
!= AINC_NONE
)
3617 offset_p
= (s_offset
!= 0
3618 && data
->min_offset
<= s_offset
3619 && s_offset
<= data
->max_offset
);
3620 ratio_p
= (ratio
!= 1
3621 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3623 if (ratio
!= 1 && !ratio_p
)
3624 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3626 if (s_offset
&& !offset_p
&& !symbol_present
)
3627 cost
+= add_cost (speed
, address_mode
);
3630 *may_autoinc
= autoinc
;
3632 acost
= data
->ainc_costs
[autoinc_type
];
3634 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3635 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3636 return new_cost (cost
+ acost
, complexity
);
3639 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3640 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3641 calculating the operands of EXPR. Returns true if successful, and returns
3642 the cost in COST. */
3645 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
3646 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3649 tree op1
= TREE_OPERAND (expr
, 1);
3650 tree cst
= TREE_OPERAND (mult
, 1);
3651 tree multop
= TREE_OPERAND (mult
, 0);
3652 int m
= exact_log2 (int_cst_value (cst
));
3653 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3654 int as_cost
, sa_cost
;
3657 if (!(m
>= 0 && m
< maxm
))
3660 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
3662 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3664 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3665 use that in preference to a shift insn followed by an add insn. */
3666 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3667 ? shiftadd_cost (speed
, mode
, m
)
3669 ? shiftsub1_cost (speed
, mode
, m
)
3670 : shiftsub0_cost (speed
, mode
, m
)));
3672 res
= new_cost (MIN (as_cost
, sa_cost
), 0);
3673 res
= add_costs (res
, mult_in_op1
? cost0
: cost1
);
3675 STRIP_NOPS (multop
);
3676 if (!is_gimple_val (multop
))
3677 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3683 /* Estimates cost of forcing expression EXPR into a variable. */
3686 force_expr_to_var_cost (tree expr
, bool speed
)
3688 static bool costs_initialized
= false;
3689 static unsigned integer_cost
[2];
3690 static unsigned symbol_cost
[2];
3691 static unsigned address_cost
[2];
3693 comp_cost cost0
, cost1
, cost
;
3696 if (!costs_initialized
)
3698 tree type
= build_pointer_type (integer_type_node
);
3703 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3704 TREE_STATIC (var
) = 1;
3705 x
= produce_memory_decl_rtl (var
, NULL
);
3706 SET_DECL_RTL (var
, x
);
3708 addr
= build1 (ADDR_EXPR
, type
, var
);
3711 for (i
= 0; i
< 2; i
++)
3713 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3716 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3719 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3720 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3722 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3723 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3724 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3725 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3726 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3727 fprintf (dump_file
, "\n");
3731 costs_initialized
= true;
3736 if (SSA_VAR_P (expr
))
3739 if (is_gimple_min_invariant (expr
))
3741 if (TREE_CODE (expr
) == INTEGER_CST
)
3742 return new_cost (integer_cost
[speed
], 0);
3744 if (TREE_CODE (expr
) == ADDR_EXPR
)
3746 tree obj
= TREE_OPERAND (expr
, 0);
3748 if (TREE_CODE (obj
) == VAR_DECL
3749 || TREE_CODE (obj
) == PARM_DECL
3750 || TREE_CODE (obj
) == RESULT_DECL
)
3751 return new_cost (symbol_cost
[speed
], 0);
3754 return new_cost (address_cost
[speed
], 0);
3757 switch (TREE_CODE (expr
))
3759 case POINTER_PLUS_EXPR
:
3763 op0
= TREE_OPERAND (expr
, 0);
3764 op1
= TREE_OPERAND (expr
, 1);
3771 op0
= TREE_OPERAND (expr
, 0);
3777 /* Just an arbitrary value, FIXME. */
3778 return new_cost (target_spill_cost
[speed
], 0);
3781 if (op0
== NULL_TREE
3782 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
3785 cost0
= force_expr_to_var_cost (op0
, speed
);
3787 if (op1
== NULL_TREE
3788 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
3791 cost1
= force_expr_to_var_cost (op1
, speed
);
3793 mode
= TYPE_MODE (TREE_TYPE (expr
));
3794 switch (TREE_CODE (expr
))
3796 case POINTER_PLUS_EXPR
:
3800 cost
= new_cost (add_cost (speed
, mode
), 0);
3801 if (TREE_CODE (expr
) != NEGATE_EXPR
)
3803 tree mult
= NULL_TREE
;
3805 if (TREE_CODE (op1
) == MULT_EXPR
)
3807 else if (TREE_CODE (op0
) == MULT_EXPR
)
3810 if (mult
!= NULL_TREE
3811 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
3812 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
3820 tree inner_mode
, outer_mode
;
3821 outer_mode
= TREE_TYPE (expr
);
3822 inner_mode
= TREE_TYPE (op0
);
3823 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
3824 TYPE_MODE (inner_mode
), speed
), 0);
3829 if (cst_and_fits_in_hwi (op0
))
3830 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
3832 else if (cst_and_fits_in_hwi (op1
))
3833 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
3836 return new_cost (target_spill_cost
[speed
], 0);
3843 cost
= add_costs (cost
, cost0
);
3844 cost
= add_costs (cost
, cost1
);
3846 /* Bound the cost by target_spill_cost. The parts of complicated
3847 computations often are either loop invariant or at least can
3848 be shared between several iv uses, so letting this grow without
3849 limits would not give reasonable results. */
3850 if (cost
.cost
> (int) target_spill_cost
[speed
])
3851 cost
.cost
= target_spill_cost
[speed
];
3856 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3857 invariants the computation depends on. */
3860 force_var_cost (struct ivopts_data
*data
,
3861 tree expr
, bitmap
*depends_on
)
3865 fd_ivopts_data
= data
;
3866 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
3869 return force_expr_to_var_cost (expr
, data
->speed
);
3872 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3873 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3874 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3875 invariants the computation depends on. */
3878 split_address_cost (struct ivopts_data
*data
,
3879 tree addr
, bool *symbol_present
, bool *var_present
,
3880 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3883 HOST_WIDE_INT bitsize
;
3884 HOST_WIDE_INT bitpos
;
3887 int unsignedp
, reversep
, volatilep
;
3889 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
3890 &unsignedp
, &reversep
, &volatilep
, false);
3893 || bitpos
% BITS_PER_UNIT
!= 0
3895 || TREE_CODE (core
) != VAR_DECL
)
3897 *symbol_present
= false;
3898 *var_present
= true;
3899 fd_ivopts_data
= data
;
3900 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
3901 return new_cost (target_spill_cost
[data
->speed
], 0);
3904 *offset
+= bitpos
/ BITS_PER_UNIT
;
3905 if (TREE_STATIC (core
)
3906 || DECL_EXTERNAL (core
))
3908 *symbol_present
= true;
3909 *var_present
= false;
3913 *symbol_present
= false;
3914 *var_present
= true;
3918 /* Estimates cost of expressing difference of addresses E1 - E2 as
3919 var + symbol + offset. The value of offset is added to OFFSET,
3920 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3921 part is missing. DEPENDS_ON is a set of the invariants the computation
3925 ptr_difference_cost (struct ivopts_data
*data
,
3926 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3927 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3929 HOST_WIDE_INT diff
= 0;
3930 aff_tree aff_e1
, aff_e2
;
3933 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
3935 if (ptr_difference_const (e1
, e2
, &diff
))
3938 *symbol_present
= false;
3939 *var_present
= false;
3943 if (integer_zerop (e2
))
3944 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
3945 symbol_present
, var_present
, offset
, depends_on
);
3947 *symbol_present
= false;
3948 *var_present
= true;
3950 type
= signed_type_for (TREE_TYPE (e1
));
3951 tree_to_aff_combination (e1
, type
, &aff_e1
);
3952 tree_to_aff_combination (e2
, type
, &aff_e2
);
3953 aff_combination_scale (&aff_e2
, -1);
3954 aff_combination_add (&aff_e1
, &aff_e2
);
3956 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
3959 /* Estimates cost of expressing difference E1 - E2 as
3960 var + symbol + offset. The value of offset is added to OFFSET,
3961 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3962 part is missing. DEPENDS_ON is a set of the invariants the computation
3966 difference_cost (struct ivopts_data
*data
,
3967 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
3968 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
3970 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
3971 unsigned HOST_WIDE_INT off1
, off2
;
3972 aff_tree aff_e1
, aff_e2
;
3975 e1
= strip_offset (e1
, &off1
);
3976 e2
= strip_offset (e2
, &off2
);
3977 *offset
+= off1
- off2
;
3982 if (TREE_CODE (e1
) == ADDR_EXPR
)
3983 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
3984 offset
, depends_on
);
3985 *symbol_present
= false;
3987 if (operand_equal_p (e1
, e2
, 0))
3989 *var_present
= false;
3993 *var_present
= true;
3995 if (integer_zerop (e2
))
3996 return force_var_cost (data
, e1
, depends_on
);
3998 if (integer_zerop (e1
))
4000 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4001 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
4005 type
= signed_type_for (TREE_TYPE (e1
));
4006 tree_to_aff_combination (e1
, type
, &aff_e1
);
4007 tree_to_aff_combination (e2
, type
, &aff_e2
);
4008 aff_combination_scale (&aff_e2
, -1);
4009 aff_combination_add (&aff_e1
, &aff_e2
);
4011 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4014 /* Returns true if AFF1 and AFF2 are identical. */
4017 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4021 if (aff1
->n
!= aff2
->n
)
4024 for (i
= 0; i
< aff1
->n
; i
++)
4026 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4029 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4035 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4038 get_expr_id (struct ivopts_data
*data
, tree expr
)
4040 struct iv_inv_expr_ent ent
;
4041 struct iv_inv_expr_ent
**slot
;
4044 ent
.hash
= iterative_hash_expr (expr
, 0);
4045 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4049 *slot
= XNEW (struct iv_inv_expr_ent
);
4050 (*slot
)->expr
= expr
;
4051 (*slot
)->hash
= ent
.hash
;
4052 (*slot
)->id
= data
->inv_expr_id
++;
4056 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4057 requires a new compiler generated temporary. Returns -1 otherwise.
4058 ADDRESS_P is a flag indicating if the expression is for address
4062 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4063 tree cbase
, HOST_WIDE_INT ratio
,
4066 aff_tree ubase_aff
, cbase_aff
;
4074 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4075 && (TREE_CODE (cbase
) == INTEGER_CST
))
4078 /* Strips the constant part. */
4079 if (TREE_CODE (ubase
) == PLUS_EXPR
4080 || TREE_CODE (ubase
) == MINUS_EXPR
4081 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4083 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4084 ubase
= TREE_OPERAND (ubase
, 0);
4087 /* Strips the constant part. */
4088 if (TREE_CODE (cbase
) == PLUS_EXPR
4089 || TREE_CODE (cbase
) == MINUS_EXPR
4090 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4092 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4093 cbase
= TREE_OPERAND (cbase
, 0);
4098 if (((TREE_CODE (ubase
) == SSA_NAME
)
4099 || (TREE_CODE (ubase
) == ADDR_EXPR
4100 && is_gimple_min_invariant (ubase
)))
4101 && (TREE_CODE (cbase
) == INTEGER_CST
))
4104 if (((TREE_CODE (cbase
) == SSA_NAME
)
4105 || (TREE_CODE (cbase
) == ADDR_EXPR
4106 && is_gimple_min_invariant (cbase
)))
4107 && (TREE_CODE (ubase
) == INTEGER_CST
))
4113 if (operand_equal_p (ubase
, cbase
, 0))
4116 if (TREE_CODE (ubase
) == ADDR_EXPR
4117 && TREE_CODE (cbase
) == ADDR_EXPR
)
4121 usym
= TREE_OPERAND (ubase
, 0);
4122 csym
= TREE_OPERAND (cbase
, 0);
4123 if (TREE_CODE (usym
) == ARRAY_REF
)
4125 tree ind
= TREE_OPERAND (usym
, 1);
4126 if (TREE_CODE (ind
) == INTEGER_CST
4127 && tree_fits_shwi_p (ind
)
4128 && tree_to_shwi (ind
) == 0)
4129 usym
= TREE_OPERAND (usym
, 0);
4131 if (TREE_CODE (csym
) == ARRAY_REF
)
4133 tree ind
= TREE_OPERAND (csym
, 1);
4134 if (TREE_CODE (ind
) == INTEGER_CST
4135 && tree_fits_shwi_p (ind
)
4136 && tree_to_shwi (ind
) == 0)
4137 csym
= TREE_OPERAND (csym
, 0);
4139 if (operand_equal_p (usym
, csym
, 0))
4142 /* Now do more complex comparison */
4143 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4144 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4145 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4149 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4150 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4152 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4153 aff_combination_add (&ubase_aff
, &cbase_aff
);
4154 expr
= aff_combination_to_tree (&ubase_aff
);
4155 return get_expr_id (data
, expr
);
4160 /* Determines the cost of the computation by that USE is expressed
4161 from induction variable CAND. If ADDRESS_P is true, we just need
4162 to create an address from it, otherwise we want to get it into
4163 register. A set of invariants we depend on is stored in
4164 DEPENDS_ON. AT is the statement at that the value is computed.
4165 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4166 addressing is likely. */
4169 get_computation_cost_at (struct ivopts_data
*data
,
4170 struct iv_use
*use
, struct iv_cand
*cand
,
4171 bool address_p
, bitmap
*depends_on
, gimple at
,
4175 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4177 tree utype
= TREE_TYPE (ubase
), ctype
;
4178 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4179 HOST_WIDE_INT ratio
, aratio
;
4180 bool var_present
, symbol_present
, stmt_is_after_inc
;
4183 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4184 machine_mode mem_mode
= (address_p
4185 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4190 /* Only consider real candidates. */
4192 return infinite_cost
;
4194 cbase
= cand
->iv
->base
;
4195 cstep
= cand
->iv
->step
;
4196 ctype
= TREE_TYPE (cbase
);
4198 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4200 /* We do not have a precision to express the values of use. */
4201 return infinite_cost
;
4205 || (use
->iv
->base_object
4206 && cand
->iv
->base_object
4207 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4208 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4210 /* Do not try to express address of an object with computation based
4211 on address of a different object. This may cause problems in rtl
4212 level alias analysis (that does not expect this to be happening,
4213 as this is illegal in C), and would be unlikely to be useful
4215 if (use
->iv
->base_object
4216 && cand
->iv
->base_object
4217 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4218 return infinite_cost
;
4221 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4223 /* TODO -- add direct handling of this case. */
4227 /* CSTEPI is removed from the offset in case statement is after the
4228 increment. If the step is not constant, we use zero instead.
4229 This is a bit imprecise (there is the extra addition), but
4230 redundancy elimination is likely to transform the code so that
4231 it uses value of the variable before increment anyway,
4232 so it is not that much unrealistic. */
4233 if (cst_and_fits_in_hwi (cstep
))
4234 cstepi
= int_cst_value (cstep
);
4238 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4239 return infinite_cost
;
4241 if (wi::fits_shwi_p (rat
))
4242 ratio
= rat
.to_shwi ();
4244 return infinite_cost
;
4247 ctype
= TREE_TYPE (cbase
);
4249 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4251 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4252 or ratio == 1, it is better to handle this like
4254 ubase - ratio * cbase + ratio * var
4256 (also holds in the case ratio == -1, TODO. */
4258 if (cst_and_fits_in_hwi (cbase
))
4260 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4261 cost
= difference_cost (data
,
4262 ubase
, build_int_cst (utype
, 0),
4263 &symbol_present
, &var_present
, &offset
,
4265 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4267 else if (ratio
== 1)
4269 tree real_cbase
= cbase
;
4271 /* Check to see if any adjustment is needed. */
4272 if (cstepi
== 0 && stmt_is_after_inc
)
4274 aff_tree real_cbase_aff
;
4277 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4279 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4281 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4282 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4285 cost
= difference_cost (data
,
4287 &symbol_present
, &var_present
, &offset
,
4289 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4292 && !POINTER_TYPE_P (ctype
)
4293 && multiplier_allowed_in_address_p
4295 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4298 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4299 cost
= difference_cost (data
,
4301 &symbol_present
, &var_present
, &offset
,
4303 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4307 cost
= force_var_cost (data
, cbase
, depends_on
);
4308 cost
= add_costs (cost
,
4309 difference_cost (data
,
4310 ubase
, build_int_cst (utype
, 0),
4311 &symbol_present
, &var_present
,
4312 &offset
, depends_on
));
4313 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4314 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4320 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4321 /* Clear depends on. */
4322 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4323 bitmap_clear (*depends_on
);
4326 /* If we are after the increment, the value of the candidate is higher by
4328 if (stmt_is_after_inc
)
4329 offset
-= ratio
* cstepi
;
4331 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4332 (symbol/var1/const parts may be omitted). If we are looking for an
4333 address, find the cost of addressing this. */
4335 return add_costs (cost
,
4336 get_address_cost (symbol_present
, var_present
,
4337 offset
, ratio
, cstepi
,
4339 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4340 speed
, stmt_is_after_inc
,
4343 /* Otherwise estimate the costs for computing the expression. */
4344 if (!symbol_present
&& !var_present
&& !offset
)
4347 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4351 /* Symbol + offset should be compile-time computable so consider that they
4352 are added once to the variable, if present. */
4353 if (var_present
&& (symbol_present
|| offset
))
4354 cost
.cost
+= adjust_setup_cost (data
,
4355 add_cost (speed
, TYPE_MODE (ctype
)));
4357 /* Having offset does not affect runtime cost in case it is added to
4358 symbol, but it increases complexity. */
4362 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4364 aratio
= ratio
> 0 ? ratio
: -ratio
;
4366 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4371 *can_autoinc
= false;
4374 /* Just get the expression, expand it and measure the cost. */
4375 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4378 return infinite_cost
;
4381 comp
= build_simple_mem_ref (comp
);
4383 return new_cost (computation_cost (comp
, speed
), 0);
4387 /* Determines the cost of the computation by that USE is expressed
4388 from induction variable CAND. If ADDRESS_P is true, we just need
4389 to create an address from it, otherwise we want to get it into
4390 register. A set of invariants we depend on is stored in
4391 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4392 autoinc addressing is likely. */
4395 get_computation_cost (struct ivopts_data
*data
,
4396 struct iv_use
*use
, struct iv_cand
*cand
,
4397 bool address_p
, bitmap
*depends_on
,
4398 bool *can_autoinc
, int *inv_expr_id
)
4400 return get_computation_cost_at (data
,
4401 use
, cand
, address_p
, depends_on
, use
->stmt
,
4402 can_autoinc
, inv_expr_id
);
4405 /* Determines cost of basing replacement of USE on CAND in a generic
4409 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4410 struct iv_use
*use
, struct iv_cand
*cand
)
4414 int inv_expr_id
= -1;
4416 /* The simple case first -- if we need to express value of the preserved
4417 original biv, the cost is 0. This also prevents us from counting the
4418 cost of increment twice -- once at this use and once in the cost of
4420 if (cand
->pos
== IP_ORIGINAL
4421 && cand
->incremented_at
== use
->stmt
)
4423 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4428 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4429 NULL
, &inv_expr_id
);
4431 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4434 return !infinite_cost_p (cost
);
4437 /* Determines cost of basing replacement of USE on CAND in an address. */
4440 determine_use_iv_cost_address (struct ivopts_data
*data
,
4441 struct iv_use
*use
, struct iv_cand
*cand
)
4445 int inv_expr_id
= -1;
4446 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4447 &can_autoinc
, &inv_expr_id
);
4449 if (cand
->ainc_use
== use
)
4452 cost
.cost
-= cand
->cost_step
;
4453 /* If we generated the candidate solely for exploiting autoincrement
4454 opportunities, and it turns out it can't be used, set the cost to
4455 infinity to make sure we ignore it. */
4456 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4457 cost
= infinite_cost
;
4459 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4462 return !infinite_cost_p (cost
);
4465 /* Computes value of candidate CAND at position AT in iteration NITER, and
4466 stores it to VAL. */
4469 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4472 aff_tree step
, delta
, nit
;
4473 struct iv
*iv
= cand
->iv
;
4474 tree type
= TREE_TYPE (iv
->base
);
4475 tree steptype
= type
;
4476 if (POINTER_TYPE_P (type
))
4477 steptype
= sizetype
;
4478 steptype
= unsigned_type_for (type
);
4480 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4481 aff_combination_convert (&step
, steptype
);
4482 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4483 aff_combination_convert (&nit
, steptype
);
4484 aff_combination_mult (&nit
, &step
, &delta
);
4485 if (stmt_after_increment (loop
, cand
, at
))
4486 aff_combination_add (&delta
, &step
);
4488 tree_to_aff_combination (iv
->base
, type
, val
);
4489 if (!POINTER_TYPE_P (type
))
4490 aff_combination_convert (val
, steptype
);
4491 aff_combination_add (val
, &delta
);
4494 /* Returns period of induction variable iv. */
4497 iv_period (struct iv
*iv
)
4499 tree step
= iv
->step
, period
, type
;
4502 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4504 type
= unsigned_type_for (TREE_TYPE (step
));
4505 /* Period of the iv is lcm (step, type_range)/step -1,
4506 i.e., N*type_range/step - 1. Since type range is power
4507 of two, N == (step >> num_of_ending_zeros_binary (step),
4508 so the final result is
4510 (type_range >> num_of_ending_zeros_binary (step)) - 1
4513 pow2div
= num_ending_zeros (step
);
4515 period
= build_low_bits_mask (type
,
4516 (TYPE_PRECISION (type
)
4517 - tree_to_uhwi (pow2div
)));
4522 /* Returns the comparison operator used when eliminating the iv USE. */
4524 static enum tree_code
4525 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4527 struct loop
*loop
= data
->current_loop
;
4531 ex_bb
= gimple_bb (use
->stmt
);
4532 exit
= EDGE_SUCC (ex_bb
, 0);
4533 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4534 exit
= EDGE_SUCC (ex_bb
, 1);
4536 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4539 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4540 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4541 calculation is performed in non-wrapping type.
4543 TODO: More generally, we could test for the situation that
4544 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4545 This would require knowing the sign of OFFSET. */
4548 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4550 enum tree_code code
;
4552 aff_tree aff_e1
, aff_e2
, aff_offset
;
4554 if (!nowrap_type_p (TREE_TYPE (base
)))
4557 base
= expand_simple_operations (base
);
4559 if (TREE_CODE (base
) == SSA_NAME
)
4561 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4563 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4566 code
= gimple_assign_rhs_code (stmt
);
4567 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4570 e1
= gimple_assign_rhs1 (stmt
);
4571 e2
= gimple_assign_rhs2 (stmt
);
4575 code
= TREE_CODE (base
);
4576 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4578 e1
= TREE_OPERAND (base
, 0);
4579 e2
= TREE_OPERAND (base
, 1);
4582 /* Use affine expansion as deeper inspection to prove the equality. */
4583 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4584 &aff_e2
, &data
->name_expansion_cache
);
4585 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4586 &aff_offset
, &data
->name_expansion_cache
);
4587 aff_combination_scale (&aff_offset
, -1);
4591 aff_combination_add (&aff_e2
, &aff_offset
);
4592 if (aff_combination_zero_p (&aff_e2
))
4595 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4596 &aff_e1
, &data
->name_expansion_cache
);
4597 aff_combination_add (&aff_e1
, &aff_offset
);
4598 return aff_combination_zero_p (&aff_e1
);
4600 case POINTER_PLUS_EXPR
:
4601 aff_combination_add (&aff_e2
, &aff_offset
);
4602 return aff_combination_zero_p (&aff_e2
);
4609 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4610 comparison with CAND. NITER describes the number of iterations of
4611 the loops. If successful, the comparison in COMP_P is altered accordingly.
4613 We aim to handle the following situation:
4629 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4630 We aim to optimize this to
4638 while (p < p_0 - a + b);
4640 This preserves the correctness, since the pointer arithmetics does not
4641 overflow. More precisely:
4643 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4644 overflow in computing it or the values of p.
4645 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4646 overflow. To prove this, we use the fact that p_0 = base + a. */
4649 iv_elimination_compare_lt (struct ivopts_data
*data
,
4650 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4651 struct tree_niter_desc
*niter
)
4653 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4654 struct aff_tree nit
, tmpa
, tmpb
;
4655 enum tree_code comp
;
4658 /* We need to know that the candidate induction variable does not overflow.
4659 While more complex analysis may be used to prove this, for now just
4660 check that the variable appears in the original program and that it
4661 is computed in a type that guarantees no overflows. */
4662 cand_type
= TREE_TYPE (cand
->iv
->base
);
4663 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4666 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4667 the calculation of the BOUND could overflow, making the comparison
4669 if (!data
->loop_single_exit_p
)
4672 /* We need to be able to decide whether candidate is increasing or decreasing
4673 in order to choose the right comparison operator. */
4674 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4676 step
= int_cst_value (cand
->iv
->step
);
4678 /* Check that the number of iterations matches the expected pattern:
4679 a + 1 > b ? 0 : b - a - 1. */
4680 mbz
= niter
->may_be_zero
;
4681 if (TREE_CODE (mbz
) == GT_EXPR
)
4683 /* Handle a + 1 > b. */
4684 tree op0
= TREE_OPERAND (mbz
, 0);
4685 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4687 a
= TREE_OPERAND (op0
, 0);
4688 b
= TREE_OPERAND (mbz
, 1);
4693 else if (TREE_CODE (mbz
) == LT_EXPR
)
4695 tree op1
= TREE_OPERAND (mbz
, 1);
4697 /* Handle b < a + 1. */
4698 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4700 a
= TREE_OPERAND (op1
, 0);
4701 b
= TREE_OPERAND (mbz
, 0);
4709 /* Expected number of iterations is B - A - 1. Check that it matches
4710 the actual number, i.e., that B - A - NITER = 1. */
4711 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4712 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4713 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4714 aff_combination_scale (&nit
, -1);
4715 aff_combination_scale (&tmpa
, -1);
4716 aff_combination_add (&tmpb
, &tmpa
);
4717 aff_combination_add (&tmpb
, &nit
);
4718 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
4721 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4723 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4725 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4726 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
4729 /* Determine the new comparison operator. */
4730 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4731 if (*comp_p
== NE_EXPR
)
4733 else if (*comp_p
== EQ_EXPR
)
4734 *comp_p
= invert_tree_comparison (comp
, false);
4741 /* Check whether it is possible to express the condition in USE by comparison
4742 of candidate CAND. If so, store the value compared with to BOUND, and the
4743 comparison operator to COMP. */
4746 may_eliminate_iv (struct ivopts_data
*data
,
4747 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4748 enum tree_code
*comp
)
4753 struct loop
*loop
= data
->current_loop
;
4755 struct tree_niter_desc
*desc
= NULL
;
4757 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
4760 /* For now works only for exits that dominate the loop latch.
4761 TODO: extend to other conditions inside loop body. */
4762 ex_bb
= gimple_bb (use
->stmt
);
4763 if (use
->stmt
!= last_stmt (ex_bb
)
4764 || gimple_code (use
->stmt
) != GIMPLE_COND
4765 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
4768 exit
= EDGE_SUCC (ex_bb
, 0);
4769 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4770 exit
= EDGE_SUCC (ex_bb
, 1);
4771 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4774 desc
= niter_for_exit (data
, exit
);
4778 /* Determine whether we can use the variable to test the exit condition.
4779 This is the case iff the period of the induction variable is greater
4780 than the number of iterations for which the exit condition is true. */
4781 period
= iv_period (cand
->iv
);
4783 /* If the number of iterations is constant, compare against it directly. */
4784 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
4786 /* See cand_value_at. */
4787 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4789 if (!tree_int_cst_lt (desc
->niter
, period
))
4794 if (tree_int_cst_lt (period
, desc
->niter
))
4799 /* If not, and if this is the only possible exit of the loop, see whether
4800 we can get a conservative estimate on the number of iterations of the
4801 entire loop and compare against that instead. */
4804 widest_int period_value
, max_niter
;
4806 max_niter
= desc
->max
;
4807 if (stmt_after_increment (loop
, cand
, use
->stmt
))
4809 period_value
= wi::to_widest (period
);
4810 if (wi::gtu_p (max_niter
, period_value
))
4812 /* See if we can take advantage of inferred loop bound information. */
4813 if (data
->loop_single_exit_p
)
4815 if (!max_loop_iterations (loop
, &max_niter
))
4817 /* The loop bound is already adjusted by adding 1. */
4818 if (wi::gtu_p (max_niter
, period_value
))
4826 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
4828 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
4829 aff_combination_to_tree (&bnd
));
4830 *comp
= iv_elimination_compare (data
, use
);
4832 /* It is unlikely that computing the number of iterations using division
4833 would be more profitable than keeping the original induction variable. */
4834 if (expression_expensive_p (*bound
))
4837 /* Sometimes, it is possible to handle the situation that the number of
4838 iterations may be zero unless additional assumtions by using <
4839 instead of != in the exit condition.
4841 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4842 base the exit condition on it. However, that is often too
4844 if (!integer_zerop (desc
->may_be_zero
))
4845 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
4850 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4851 be copied, if is is used in the loop body and DATA->body_includes_call. */
4854 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
4856 tree sbound
= bound
;
4857 STRIP_NOPS (sbound
);
4859 if (TREE_CODE (sbound
) == SSA_NAME
4860 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
4861 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
4862 && data
->body_includes_call
)
4863 return COSTS_N_INSNS (1);
4868 /* Determines cost of basing replacement of USE on CAND in a condition. */
4871 determine_use_iv_cost_condition (struct ivopts_data
*data
,
4872 struct iv_use
*use
, struct iv_cand
*cand
)
4874 tree bound
= NULL_TREE
;
4876 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
4877 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
4879 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
4880 tree
*control_var
, *bound_cst
;
4881 enum tree_code comp
= ERROR_MARK
;
4883 /* Only consider real candidates. */
4886 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
4891 /* Try iv elimination. */
4892 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
4894 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
4895 if (elim_cost
.cost
== 0)
4896 elim_cost
.cost
= parm_decl_cost (data
, bound
);
4897 else if (TREE_CODE (bound
) == INTEGER_CST
)
4899 /* If we replace a loop condition 'i < n' with 'p < base + n',
4900 depends_on_elim will have 'base' and 'n' set, which implies
4901 that both 'base' and 'n' will be live during the loop. More likely,
4902 'base + n' will be loop invariant, resulting in only one live value
4903 during the loop. So in that case we clear depends_on_elim and set
4904 elim_inv_expr_id instead. */
4905 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
4907 elim_inv_expr_id
= get_expr_id (data
, bound
);
4908 bitmap_clear (depends_on_elim
);
4910 /* The bound is a loop invariant, so it will be only computed
4912 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
4915 elim_cost
= infinite_cost
;
4917 /* Try expressing the original giv. If it is compared with an invariant,
4918 note that we cannot get rid of it. */
4919 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
4923 /* When the condition is a comparison of the candidate IV against
4924 zero, prefer this IV.
4926 TODO: The constant that we're subtracting from the cost should
4927 be target-dependent. This information should be added to the
4928 target costs for each backend. */
4929 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
4930 && integer_zerop (*bound_cst
)
4931 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
4932 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
4933 elim_cost
.cost
-= 1;
4935 express_cost
= get_computation_cost (data
, use
, cand
, false,
4936 &depends_on_express
, NULL
,
4937 &express_inv_expr_id
);
4938 fd_ivopts_data
= data
;
4939 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
4941 /* Count the cost of the original bound as well. */
4942 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
4943 if (bound_cost
.cost
== 0)
4944 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
4945 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
4946 bound_cost
.cost
= 0;
4947 express_cost
.cost
+= bound_cost
.cost
;
4949 /* Choose the better approach, preferring the eliminated IV. */
4950 if (compare_costs (elim_cost
, express_cost
) <= 0)
4953 depends_on
= depends_on_elim
;
4954 depends_on_elim
= NULL
;
4955 inv_expr_id
= elim_inv_expr_id
;
4959 cost
= express_cost
;
4960 depends_on
= depends_on_express
;
4961 depends_on_express
= NULL
;
4964 inv_expr_id
= express_inv_expr_id
;
4967 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
4969 if (depends_on_elim
)
4970 BITMAP_FREE (depends_on_elim
);
4971 if (depends_on_express
)
4972 BITMAP_FREE (depends_on_express
);
4974 return !infinite_cost_p (cost
);
4977 /* Determines cost of basing replacement of USE on CAND. Returns false
4978 if USE cannot be based on CAND. */
4981 determine_use_iv_cost (struct ivopts_data
*data
,
4982 struct iv_use
*use
, struct iv_cand
*cand
)
4986 case USE_NONLINEAR_EXPR
:
4987 return determine_use_iv_cost_generic (data
, use
, cand
);
4990 return determine_use_iv_cost_address (data
, use
, cand
);
4993 return determine_use_iv_cost_condition (data
, use
, cand
);
5000 /* Return true if get_computation_cost indicates that autoincrement is
5001 a possibility for the pair of USE and CAND, false otherwise. */
5004 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5005 struct iv_cand
*cand
)
5011 if (use
->type
!= USE_ADDRESS
)
5014 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5015 &can_autoinc
, NULL
);
5017 BITMAP_FREE (depends_on
);
5019 return !infinite_cost_p (cost
) && can_autoinc
;
5022 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5023 use that allows autoincrement, and set their AINC_USE if possible. */
5026 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5030 for (i
= 0; i
< n_iv_cands (data
); i
++)
5032 struct iv_cand
*cand
= iv_cand (data
, i
);
5033 struct iv_use
*closest_before
= NULL
;
5034 struct iv_use
*closest_after
= NULL
;
5035 if (cand
->pos
!= IP_ORIGINAL
)
5038 for (j
= 0; j
< n_iv_uses (data
); j
++)
5040 struct iv_use
*use
= iv_use (data
, j
);
5041 unsigned uid
= gimple_uid (use
->stmt
);
5043 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5046 if (uid
< gimple_uid (cand
->incremented_at
)
5047 && (closest_before
== NULL
5048 || uid
> gimple_uid (closest_before
->stmt
)))
5049 closest_before
= use
;
5051 if (uid
> gimple_uid (cand
->incremented_at
)
5052 && (closest_after
== NULL
5053 || uid
< gimple_uid (closest_after
->stmt
)))
5054 closest_after
= use
;
5057 if (closest_before
!= NULL
5058 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5059 cand
->ainc_use
= closest_before
;
5060 else if (closest_after
!= NULL
5061 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5062 cand
->ainc_use
= closest_after
;
5066 /* Finds the candidates for the induction variables. */
5069 find_iv_candidates (struct ivopts_data
*data
)
5071 /* Add commonly used ivs. */
5072 add_standard_iv_candidates (data
);
5074 /* Add old induction variables. */
5075 add_old_ivs_candidates (data
);
5077 /* Add induction variables derived from uses. */
5078 add_derived_ivs_candidates (data
);
5080 set_autoinc_for_original_candidates (data
);
5082 /* Record the important candidates. */
5083 record_important_candidates (data
);
5086 /* Determines costs of basing the use of the iv on an iv candidate. */
5089 determine_use_iv_costs (struct ivopts_data
*data
)
5093 struct iv_cand
*cand
;
5094 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5096 alloc_use_cost_map (data
);
5098 for (i
= 0; i
< n_iv_uses (data
); i
++)
5100 use
= iv_use (data
, i
);
5102 if (data
->consider_all_candidates
)
5104 for (j
= 0; j
< n_iv_cands (data
); j
++)
5106 cand
= iv_cand (data
, j
);
5107 determine_use_iv_cost (data
, use
, cand
);
5114 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5116 cand
= iv_cand (data
, j
);
5117 if (!determine_use_iv_cost (data
, use
, cand
))
5118 bitmap_set_bit (to_clear
, j
);
5121 /* Remove the candidates for that the cost is infinite from
5122 the list of related candidates. */
5123 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5124 bitmap_clear (to_clear
);
5128 BITMAP_FREE (to_clear
);
5130 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5132 fprintf (dump_file
, "Use-candidate costs:\n");
5134 for (i
= 0; i
< n_iv_uses (data
); i
++)
5136 use
= iv_use (data
, i
);
5138 fprintf (dump_file
, "Use %d:\n", i
);
5139 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5140 for (j
= 0; j
< use
->n_map_members
; j
++)
5142 if (!use
->cost_map
[j
].cand
5143 || infinite_cost_p (use
->cost_map
[j
].cost
))
5146 fprintf (dump_file
, " %d\t%d\t%d\t",
5147 use
->cost_map
[j
].cand
->id
,
5148 use
->cost_map
[j
].cost
.cost
,
5149 use
->cost_map
[j
].cost
.complexity
);
5150 if (use
->cost_map
[j
].depends_on
)
5151 bitmap_print (dump_file
,
5152 use
->cost_map
[j
].depends_on
, "","");
5153 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5154 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5155 fprintf (dump_file
, "\n");
5158 fprintf (dump_file
, "\n");
5160 fprintf (dump_file
, "\n");
5164 /* Determines cost of the candidate CAND. */
5167 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5169 comp_cost cost_base
;
5170 unsigned cost
, cost_step
;
5179 /* There are two costs associated with the candidate -- its increment
5180 and its initialization. The second is almost negligible for any loop
5181 that rolls enough, so we take it just very little into account. */
5183 base
= cand
->iv
->base
;
5184 cost_base
= force_var_cost (data
, base
, NULL
);
5185 /* It will be exceptional that the iv register happens to be initialized with
5186 the proper value at no cost. In general, there will at least be a regcopy
5188 if (cost_base
.cost
== 0)
5189 cost_base
.cost
= COSTS_N_INSNS (1);
5190 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5192 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5194 /* Prefer the original ivs unless we may gain something by replacing it.
5195 The reason is to make debugging simpler; so this is not relevant for
5196 artificial ivs created by other optimization passes. */
5197 if (cand
->pos
!= IP_ORIGINAL
5198 || !SSA_NAME_VAR (cand
->var_before
)
5199 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5202 /* Prefer not to insert statements into latch unless there are some
5203 already (so that we do not create unnecessary jumps). */
5204 if (cand
->pos
== IP_END
5205 && empty_block_p (ip_end_pos (data
->current_loop
)))
5209 cand
->cost_step
= cost_step
;
5212 /* Determines costs of computation of the candidates. */
5215 determine_iv_costs (struct ivopts_data
*data
)
5219 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5221 fprintf (dump_file
, "Candidate costs:\n");
5222 fprintf (dump_file
, " cand\tcost\n");
5225 for (i
= 0; i
< n_iv_cands (data
); i
++)
5227 struct iv_cand
*cand
= iv_cand (data
, i
);
5229 determine_iv_cost (data
, cand
);
5231 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5232 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5235 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5236 fprintf (dump_file
, "\n");
5239 /* Calculates cost for having SIZE induction variables. */
5242 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5244 /* We add size to the cost, so that we prefer eliminating ivs
5246 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5247 data
->body_includes_call
);
5250 /* For each size of the induction variable set determine the penalty. */
5253 determine_set_costs (struct ivopts_data
*data
)
5259 struct loop
*loop
= data
->current_loop
;
5262 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5264 fprintf (dump_file
, "Global costs:\n");
5265 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5266 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5267 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5268 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5272 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5275 op
= PHI_RESULT (phi
);
5277 if (virtual_operand_p (op
))
5280 if (get_iv (data
, op
))
5286 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5288 struct version_info
*info
= ver_info (data
, j
);
5290 if (info
->inv_id
&& info
->has_nonlin_use
)
5294 data
->regs_used
= n
;
5295 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5296 fprintf (dump_file
, " regs_used %d\n", n
);
5298 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5300 fprintf (dump_file
, " cost for size:\n");
5301 fprintf (dump_file
, " ivs\tcost\n");
5302 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5303 fprintf (dump_file
, " %d\t%d\n", j
,
5304 ivopts_global_cost_for_size (data
, j
));
5305 fprintf (dump_file
, "\n");
5309 /* Returns true if A is a cheaper cost pair than B. */
5312 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5322 cmp
= compare_costs (a
->cost
, b
->cost
);
5329 /* In case the costs are the same, prefer the cheaper candidate. */
5330 if (a
->cand
->cost
< b
->cand
->cost
)
5337 /* Returns candidate by that USE is expressed in IVS. */
5339 static struct cost_pair
*
5340 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5342 return ivs
->cand_for_use
[use
->id
];
5345 /* Computes the cost field of IVS structure. */
5348 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5350 comp_cost cost
= ivs
->cand_use_cost
;
5352 cost
.cost
+= ivs
->cand_cost
;
5354 cost
.cost
+= ivopts_global_cost_for_size (data
,
5355 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5360 /* Remove invariants in set INVS to set IVS. */
5363 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5371 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5373 ivs
->n_invariant_uses
[iid
]--;
5374 if (ivs
->n_invariant_uses
[iid
] == 0)
5379 /* Set USE not to be expressed by any candidate in IVS. */
5382 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5385 unsigned uid
= use
->id
, cid
;
5386 struct cost_pair
*cp
;
5388 cp
= ivs
->cand_for_use
[uid
];
5394 ivs
->cand_for_use
[uid
] = NULL
;
5395 ivs
->n_cand_uses
[cid
]--;
5397 if (ivs
->n_cand_uses
[cid
] == 0)
5399 bitmap_clear_bit (ivs
->cands
, cid
);
5400 /* Do not count the pseudocandidates. */
5404 ivs
->cand_cost
-= cp
->cand
->cost
;
5406 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5409 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5411 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5413 if (cp
->inv_expr_id
!= -1)
5415 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5416 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5417 ivs
->num_used_inv_expr
--;
5419 iv_ca_recount_cost (data
, ivs
);
5422 /* Add invariants in set INVS to set IVS. */
5425 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5433 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5435 ivs
->n_invariant_uses
[iid
]++;
5436 if (ivs
->n_invariant_uses
[iid
] == 1)
5441 /* Set cost pair for USE in set IVS to CP. */
5444 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5445 struct iv_use
*use
, struct cost_pair
*cp
)
5447 unsigned uid
= use
->id
, cid
;
5449 if (ivs
->cand_for_use
[uid
] == cp
)
5452 if (ivs
->cand_for_use
[uid
])
5453 iv_ca_set_no_cp (data
, ivs
, use
);
5460 ivs
->cand_for_use
[uid
] = cp
;
5461 ivs
->n_cand_uses
[cid
]++;
5462 if (ivs
->n_cand_uses
[cid
] == 1)
5464 bitmap_set_bit (ivs
->cands
, cid
);
5465 /* Do not count the pseudocandidates. */
5469 ivs
->cand_cost
+= cp
->cand
->cost
;
5471 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5474 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5475 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5477 if (cp
->inv_expr_id
!= -1)
5479 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5480 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5481 ivs
->num_used_inv_expr
++;
5483 iv_ca_recount_cost (data
, ivs
);
5487 /* Extend set IVS by expressing USE by some of the candidates in it
5488 if possible. Consider all important candidates if candidates in
5489 set IVS don't give any result. */
5492 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5495 struct cost_pair
*best_cp
= NULL
, *cp
;
5498 struct iv_cand
*cand
;
5500 gcc_assert (ivs
->upto
>= use
->id
);
5504 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5506 cand
= iv_cand (data
, i
);
5507 cp
= get_use_iv_cost (data
, use
, cand
);
5508 if (cheaper_cost_pair (cp
, best_cp
))
5512 if (best_cp
== NULL
)
5514 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5516 cand
= iv_cand (data
, i
);
5517 cp
= get_use_iv_cost (data
, use
, cand
);
5518 if (cheaper_cost_pair (cp
, best_cp
))
5523 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5526 /* Get cost for assignment IVS. */
5529 iv_ca_cost (struct iv_ca
*ivs
)
5531 /* This was a conditional expression but it triggered a bug in
5534 return infinite_cost
;
5539 /* Returns true if all dependences of CP are among invariants in IVS. */
5542 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5547 if (!cp
->depends_on
)
5550 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5552 if (ivs
->n_invariant_uses
[i
] == 0)
5559 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5560 it before NEXT_CHANGE. */
5562 static struct iv_ca_delta
*
5563 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5564 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5566 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5569 change
->old_cp
= old_cp
;
5570 change
->new_cp
= new_cp
;
5571 change
->next_change
= next_change
;
5576 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5579 static struct iv_ca_delta
*
5580 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5582 struct iv_ca_delta
*last
;
5590 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5592 last
->next_change
= l2
;
5597 /* Reverse the list of changes DELTA, forming the inverse to it. */
5599 static struct iv_ca_delta
*
5600 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5602 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5603 struct cost_pair
*tmp
;
5605 for (act
= delta
; act
; act
= next
)
5607 next
= act
->next_change
;
5608 act
->next_change
= prev
;
5612 act
->old_cp
= act
->new_cp
;
5619 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5620 reverted instead. */
5623 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5624 struct iv_ca_delta
*delta
, bool forward
)
5626 struct cost_pair
*from
, *to
;
5627 struct iv_ca_delta
*act
;
5630 delta
= iv_ca_delta_reverse (delta
);
5632 for (act
= delta
; act
; act
= act
->next_change
)
5636 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5637 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5641 iv_ca_delta_reverse (delta
);
5644 /* Returns true if CAND is used in IVS. */
5647 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5649 return ivs
->n_cand_uses
[cand
->id
] > 0;
5652 /* Returns number of induction variable candidates in the set IVS. */
5655 iv_ca_n_cands (struct iv_ca
*ivs
)
5657 return ivs
->n_cands
;
5660 /* Free the list of changes DELTA. */
5663 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5665 struct iv_ca_delta
*act
, *next
;
5667 for (act
= *delta
; act
; act
= next
)
5669 next
= act
->next_change
;
5676 /* Allocates new iv candidates assignment. */
5678 static struct iv_ca
*
5679 iv_ca_new (struct ivopts_data
*data
)
5681 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5685 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5686 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5687 nw
->cands
= BITMAP_ALLOC (NULL
);
5690 nw
->cand_use_cost
= no_cost
;
5692 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5694 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5695 nw
->num_used_inv_expr
= 0;
5700 /* Free memory occupied by the set IVS. */
5703 iv_ca_free (struct iv_ca
**ivs
)
5705 free ((*ivs
)->cand_for_use
);
5706 free ((*ivs
)->n_cand_uses
);
5707 BITMAP_FREE ((*ivs
)->cands
);
5708 free ((*ivs
)->n_invariant_uses
);
5709 free ((*ivs
)->used_inv_expr
);
5714 /* Dumps IVS to FILE. */
5717 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5719 const char *pref
= " invariants ";
5721 comp_cost cost
= iv_ca_cost (ivs
);
5723 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5724 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5725 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5726 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5728 for (i
= 0; i
< ivs
->upto
; i
++)
5730 struct iv_use
*use
= iv_use (data
, i
);
5731 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5733 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5734 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5736 fprintf (file
, " use:%d --> ??\n", use
->id
);
5739 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5740 if (ivs
->n_invariant_uses
[i
])
5742 fprintf (file
, "%s%d", pref
, i
);
5745 fprintf (file
, "\n\n");
5748 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5749 new set, and store differences in DELTA. Number of induction variables
5750 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5751 the function will try to find a solution with mimimal iv candidates. */
5754 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5755 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
5756 unsigned *n_ivs
, bool min_ncand
)
5761 struct cost_pair
*old_cp
, *new_cp
;
5764 for (i
= 0; i
< ivs
->upto
; i
++)
5766 use
= iv_use (data
, i
);
5767 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5770 && old_cp
->cand
== cand
)
5773 new_cp
= get_use_iv_cost (data
, use
, cand
);
5777 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
5780 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
5783 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5786 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5787 cost
= iv_ca_cost (ivs
);
5789 *n_ivs
= iv_ca_n_cands (ivs
);
5790 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5795 /* Try narrowing set IVS by removing CAND. Return the cost of
5796 the new set and store the differences in DELTA. START is
5797 the candidate with which we start narrowing. */
5800 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5801 struct iv_cand
*cand
, struct iv_cand
*start
,
5802 struct iv_ca_delta
**delta
)
5806 struct cost_pair
*old_cp
, *new_cp
, *cp
;
5808 struct iv_cand
*cnd
;
5809 comp_cost cost
, best_cost
, acost
;
5812 for (i
= 0; i
< n_iv_uses (data
); i
++)
5814 use
= iv_use (data
, i
);
5816 old_cp
= iv_ca_cand_for_use (ivs
, use
);
5817 if (old_cp
->cand
!= cand
)
5820 best_cost
= iv_ca_cost (ivs
);
5821 /* Start narrowing with START. */
5822 new_cp
= get_use_iv_cost (data
, use
, start
);
5824 if (data
->consider_all_candidates
)
5826 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
5828 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5831 cnd
= iv_cand (data
, ci
);
5833 cp
= get_use_iv_cost (data
, use
, cnd
);
5837 iv_ca_set_cp (data
, ivs
, use
, cp
);
5838 acost
= iv_ca_cost (ivs
);
5840 if (compare_costs (acost
, best_cost
) < 0)
5849 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
5851 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
5854 cnd
= iv_cand (data
, ci
);
5856 cp
= get_use_iv_cost (data
, use
, cnd
);
5860 iv_ca_set_cp (data
, ivs
, use
, cp
);
5861 acost
= iv_ca_cost (ivs
);
5863 if (compare_costs (acost
, best_cost
) < 0)
5870 /* Restore to old cp for use. */
5871 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
5875 iv_ca_delta_free (delta
);
5876 return infinite_cost
;
5879 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
5882 iv_ca_delta_commit (data
, ivs
, *delta
, true);
5883 cost
= iv_ca_cost (ivs
);
5884 iv_ca_delta_commit (data
, ivs
, *delta
, false);
5889 /* Try optimizing the set of candidates IVS by removing candidates different
5890 from to EXCEPT_CAND from it. Return cost of the new set, and store
5891 differences in DELTA. */
5894 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5895 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
5898 struct iv_ca_delta
*act_delta
, *best_delta
;
5900 comp_cost best_cost
, acost
;
5901 struct iv_cand
*cand
;
5904 best_cost
= iv_ca_cost (ivs
);
5906 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5908 cand
= iv_cand (data
, i
);
5910 if (cand
== except_cand
)
5913 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
5915 if (compare_costs (acost
, best_cost
) < 0)
5918 iv_ca_delta_free (&best_delta
);
5919 best_delta
= act_delta
;
5922 iv_ca_delta_free (&act_delta
);
5931 /* Recurse to possibly remove other unnecessary ivs. */
5932 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
5933 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
5934 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
5935 *delta
= iv_ca_delta_join (best_delta
, *delta
);
5939 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5940 cheaper local cost for USE than BEST_CP. Return pointer to
5941 the corresponding cost_pair, otherwise just return BEST_CP. */
5943 static struct cost_pair
*
5944 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
5945 unsigned int cand_idx
, struct iv_cand
*old_cand
,
5946 struct cost_pair
*best_cp
)
5948 struct iv_cand
*cand
;
5949 struct cost_pair
*cp
;
5951 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
5952 if (cand_idx
== old_cand
->id
)
5955 cand
= iv_cand (data
, cand_idx
);
5956 cp
= get_use_iv_cost (data
, use
, cand
);
5957 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
5963 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5964 which are used by more than one iv uses. For each of those candidates,
5965 this function tries to represent iv uses under that candidate using
5966 other ones with lower local cost, then tries to prune the new set.
5967 If the new set has lower cost, It returns the new cost after recording
5968 candidate replacement in list DELTA. */
5971 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5972 struct iv_ca_delta
**delta
)
5974 bitmap_iterator bi
, bj
;
5975 unsigned int i
, j
, k
;
5977 struct iv_cand
*cand
;
5978 comp_cost orig_cost
, acost
;
5979 struct iv_ca_delta
*act_delta
, *tmp_delta
;
5980 struct cost_pair
*old_cp
, *best_cp
= NULL
;
5983 orig_cost
= iv_ca_cost (ivs
);
5985 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5987 if (ivs
->n_cand_uses
[i
] == 1
5988 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
5991 cand
= iv_cand (data
, i
);
5994 /* Represent uses under current candidate using other ones with
5995 lower local cost. */
5996 for (j
= 0; j
< ivs
->upto
; j
++)
5998 use
= iv_use (data
, j
);
5999 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6001 if (old_cp
->cand
!= cand
)
6005 if (data
->consider_all_candidates
)
6006 for (k
= 0; k
< n_iv_cands (data
); k
++)
6007 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6008 old_cp
->cand
, best_cp
);
6010 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
6011 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6012 old_cp
->cand
, best_cp
);
6014 if (best_cp
== old_cp
)
6017 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
6019 /* No need for further prune. */
6023 /* Prune the new candidate set. */
6024 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6025 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6026 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6027 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6029 if (compare_costs (acost
, orig_cost
) < 0)
6035 iv_ca_delta_free (&act_delta
);
6041 /* Tries to extend the sets IVS in the best possible way in order
6042 to express the USE. If ORIGINALP is true, prefer candidates from
6043 the original set of IVs, otherwise favor important candidates not
6044 based on any memory object. */
6047 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6048 struct iv_use
*use
, bool originalp
)
6050 comp_cost best_cost
, act_cost
;
6053 struct iv_cand
*cand
;
6054 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6055 struct cost_pair
*cp
;
6057 iv_ca_add_use (data
, ivs
, use
);
6058 best_cost
= iv_ca_cost (ivs
);
6059 cp
= iv_ca_cand_for_use (ivs
, use
);
6062 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6063 iv_ca_set_no_cp (data
, ivs
, use
);
6066 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6067 first try important candidates not based on any memory object. Only if
6068 this fails, try the specific ones. Rationale -- in loops with many
6069 variables the best choice often is to use just one generic biv. If we
6070 added here many ivs specific to the uses, the optimization algorithm later
6071 would be likely to get stuck in a local minimum, thus causing us to create
6072 too many ivs. The approach from few ivs to more seems more likely to be
6073 successful -- starting from few ivs, replacing an expensive use by a
6074 specific iv should always be a win. */
6075 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6077 cand
= iv_cand (data
, i
);
6079 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6082 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6085 if (iv_ca_cand_used_p (ivs
, cand
))
6088 cp
= get_use_iv_cost (data
, use
, cand
);
6092 iv_ca_set_cp (data
, ivs
, use
, cp
);
6093 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6095 iv_ca_set_no_cp (data
, ivs
, use
);
6096 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6098 if (compare_costs (act_cost
, best_cost
) < 0)
6100 best_cost
= act_cost
;
6102 iv_ca_delta_free (&best_delta
);
6103 best_delta
= act_delta
;
6106 iv_ca_delta_free (&act_delta
);
6109 if (infinite_cost_p (best_cost
))
6111 for (i
= 0; i
< use
->n_map_members
; i
++)
6113 cp
= use
->cost_map
+ i
;
6118 /* Already tried this. */
6119 if (cand
->important
)
6121 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6123 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6127 if (iv_ca_cand_used_p (ivs
, cand
))
6131 iv_ca_set_cp (data
, ivs
, use
, cp
);
6132 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6133 iv_ca_set_no_cp (data
, ivs
, use
);
6134 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6137 if (compare_costs (act_cost
, best_cost
) < 0)
6139 best_cost
= act_cost
;
6142 iv_ca_delta_free (&best_delta
);
6143 best_delta
= act_delta
;
6146 iv_ca_delta_free (&act_delta
);
6150 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6151 iv_ca_delta_free (&best_delta
);
6153 return !infinite_cost_p (best_cost
);
6156 /* Finds an initial assignment of candidates to uses. */
6158 static struct iv_ca
*
6159 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6161 struct iv_ca
*ivs
= iv_ca_new (data
);
6164 for (i
= 0; i
< n_iv_uses (data
); i
++)
6165 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6174 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6175 points to a bool variable, this function tries to break local
6176 optimal fixed-point by replacing candidates in IVS if it's true. */
6179 try_improve_iv_set (struct ivopts_data
*data
,
6180 struct iv_ca
*ivs
, bool *try_replace_p
)
6183 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6184 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6185 struct iv_cand
*cand
;
6187 /* Try extending the set of induction variables by one. */
6188 for (i
= 0; i
< n_iv_cands (data
); i
++)
6190 cand
= iv_cand (data
, i
);
6192 if (iv_ca_cand_used_p (ivs
, cand
))
6195 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6199 /* If we successfully added the candidate and the set is small enough,
6200 try optimizing it by removing other candidates. */
6201 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6203 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6204 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6205 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6206 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6209 if (compare_costs (acost
, best_cost
) < 0)
6212 iv_ca_delta_free (&best_delta
);
6213 best_delta
= act_delta
;
6216 iv_ca_delta_free (&act_delta
);
6221 /* Try removing the candidates from the set instead. */
6222 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6224 if (!best_delta
&& *try_replace_p
)
6226 *try_replace_p
= false;
6227 /* So far candidate selecting algorithm tends to choose fewer IVs
6228 so that it can handle cases in which loops have many variables
6229 but the best choice is often to use only one general biv. One
6230 weakness is it can't handle opposite cases, in which different
6231 candidates should be chosen with respect to each use. To solve
6232 the problem, we replace candidates in a manner described by the
6233 comments of iv_ca_replace, thus give general algorithm a chance
6234 to break local optimal fixed-point in these cases. */
6235 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6242 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6243 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6244 iv_ca_delta_free (&best_delta
);
6248 /* Attempts to find the optimal set of induction variables. We do simple
6249 greedy heuristic -- we try to replace at most one candidate in the selected
6250 solution and remove the unused ivs while this improves the cost. */
6252 static struct iv_ca
*
6253 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6256 bool try_replace_p
= true;
6258 /* Get the initial solution. */
6259 set
= get_initial_solution (data
, originalp
);
6262 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6263 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6267 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6269 fprintf (dump_file
, "Initial set of candidates:\n");
6270 iv_ca_dump (data
, dump_file
, set
);
6273 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6275 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6277 fprintf (dump_file
, "Improved to:\n");
6278 iv_ca_dump (data
, dump_file
, set
);
6285 static struct iv_ca
*
6286 find_optimal_iv_set (struct ivopts_data
*data
)
6289 struct iv_ca
*set
, *origset
;
6291 comp_cost cost
, origcost
;
6293 /* Determine the cost based on a strategy that starts with original IVs,
6294 and try again using a strategy that prefers candidates not based
6296 origset
= find_optimal_iv_set_1 (data
, true);
6297 set
= find_optimal_iv_set_1 (data
, false);
6299 if (!origset
&& !set
)
6302 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6303 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6305 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6307 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6308 origcost
.cost
, origcost
.complexity
);
6309 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6310 cost
.cost
, cost
.complexity
);
6313 /* Choose the one with the best cost. */
6314 if (compare_costs (origcost
, cost
) <= 0)
6321 iv_ca_free (&origset
);
6323 for (i
= 0; i
< n_iv_uses (data
); i
++)
6325 use
= iv_use (data
, i
);
6326 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6332 /* Creates a new induction variable corresponding to CAND. */
6335 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6337 gimple_stmt_iterator incr_pos
;
6347 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6351 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6359 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6363 /* Mark that the iv is preserved. */
6364 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6365 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6367 /* Rewrite the increment so that it uses var_before directly. */
6368 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6372 gimple_add_tmp_var (cand
->var_before
);
6374 base
= unshare_expr (cand
->iv
->base
);
6376 create_iv (base
, unshare_expr (cand
->iv
->step
),
6377 cand
->var_before
, data
->current_loop
,
6378 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6381 /* Creates new induction variables described in SET. */
6384 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6387 struct iv_cand
*cand
;
6390 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6392 cand
= iv_cand (data
, i
);
6393 create_new_iv (data
, cand
);
6396 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6398 fprintf (dump_file
, "Selected IV set for loop %d",
6399 data
->current_loop
->num
);
6400 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6401 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6402 LOCATION_LINE (data
->loop_loc
));
6403 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6404 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6406 cand
= iv_cand (data
, i
);
6407 dump_cand (dump_file
, cand
);
6409 fprintf (dump_file
, "\n");
6413 /* Rewrites USE (definition of iv used in a nonlinear expression)
6414 using candidate CAND. */
6417 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6418 struct iv_use
*use
, struct iv_cand
*cand
)
6423 gimple_stmt_iterator bsi
;
6425 /* An important special case -- if we are asked to express value of
6426 the original iv by itself, just exit; there is no need to
6427 introduce a new computation (that might also need casting the
6428 variable to unsigned and back). */
6429 if (cand
->pos
== IP_ORIGINAL
6430 && cand
->incremented_at
== use
->stmt
)
6432 enum tree_code stmt_code
;
6434 gcc_assert (is_gimple_assign (use
->stmt
));
6435 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6437 /* Check whether we may leave the computation unchanged.
6438 This is the case only if it does not rely on other
6439 computations in the loop -- otherwise, the computation
6440 we rely upon may be removed in remove_unused_ivs,
6441 thus leading to ICE. */
6442 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6443 if (stmt_code
== PLUS_EXPR
6444 || stmt_code
== MINUS_EXPR
6445 || stmt_code
== POINTER_PLUS_EXPR
)
6447 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6448 op
= gimple_assign_rhs2 (use
->stmt
);
6449 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6450 op
= gimple_assign_rhs1 (use
->stmt
);
6457 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6461 comp
= get_computation (data
->current_loop
, use
, cand
);
6462 gcc_assert (comp
!= NULL_TREE
);
6464 switch (gimple_code (use
->stmt
))
6467 tgt
= PHI_RESULT (use
->stmt
);
6469 /* If we should keep the biv, do not replace it. */
6470 if (name_info (data
, tgt
)->preserve_biv
)
6473 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6477 tgt
= gimple_assign_lhs (use
->stmt
);
6478 bsi
= gsi_for_stmt (use
->stmt
);
6485 if (!valid_gimple_rhs_p (comp
)
6486 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6487 /* We can't allow re-allocating the stmt as it might be pointed
6489 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6490 >= gimple_num_ops (gsi_stmt (bsi
)))))
6492 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6493 true, GSI_SAME_STMT
);
6494 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6496 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6497 /* As this isn't a plain copy we have to reset alignment
6499 if (SSA_NAME_PTR_INFO (comp
))
6500 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6504 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6506 ass
= gimple_build_assign (tgt
, comp
);
6507 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6509 bsi
= gsi_for_stmt (use
->stmt
);
6510 remove_phi_node (&bsi
, false);
6514 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6515 use
->stmt
= gsi_stmt (bsi
);
6519 /* Performs a peephole optimization to reorder the iv update statement with
6520 a mem ref to enable instruction combining in later phases. The mem ref uses
6521 the iv value before the update, so the reordering transformation requires
6522 adjustment of the offset. CAND is the selected IV_CAND.
6526 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6534 directly propagating t over to (1) will introduce overlapping live range
6535 thus increase register pressure. This peephole transform it into:
6539 t = MEM_REF (base, iv2, 8, 8);
6546 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6549 gimple iv_update
, stmt
;
6551 gimple_stmt_iterator gsi
, gsi_iv
;
6553 if (cand
->pos
!= IP_NORMAL
)
6556 var_after
= cand
->var_after
;
6557 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6559 bb
= gimple_bb (iv_update
);
6560 gsi
= gsi_last_nondebug_bb (bb
);
6561 stmt
= gsi_stmt (gsi
);
6563 /* Only handle conditional statement for now. */
6564 if (gimple_code (stmt
) != GIMPLE_COND
)
6567 gsi_prev_nondebug (&gsi
);
6568 stmt
= gsi_stmt (gsi
);
6569 if (stmt
!= iv_update
)
6572 gsi_prev_nondebug (&gsi
);
6573 if (gsi_end_p (gsi
))
6576 stmt
= gsi_stmt (gsi
);
6577 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6580 if (stmt
!= use
->stmt
)
6583 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6586 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6588 fprintf (dump_file
, "Reordering \n");
6589 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6590 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6591 fprintf (dump_file
, "\n");
6594 gsi
= gsi_for_stmt (use
->stmt
);
6595 gsi_iv
= gsi_for_stmt (iv_update
);
6596 gsi_move_before (&gsi_iv
, &gsi
);
6598 cand
->pos
= IP_BEFORE_USE
;
6599 cand
->incremented_at
= use
->stmt
;
6602 /* Rewrites USE (address that is an iv) using candidate CAND. */
6605 rewrite_use_address (struct ivopts_data
*data
,
6606 struct iv_use
*use
, struct iv_cand
*cand
)
6609 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6610 tree base_hint
= NULL_TREE
;
6614 adjust_iv_update_pos (cand
, use
);
6615 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6617 unshare_aff_combination (&aff
);
6619 /* To avoid undefined overflow problems, all IV candidates use unsigned
6620 integer types. The drawback is that this makes it impossible for
6621 create_mem_ref to distinguish an IV that is based on a memory object
6622 from one that represents simply an offset.
6624 To work around this problem, we pass a hint to create_mem_ref that
6625 indicates which variable (if any) in aff is an IV based on a memory
6626 object. Note that we only consider the candidate. If this is not
6627 based on an object, the base of the reference is in some subexpression
6628 of the use -- but these will use pointer types, so they are recognized
6629 by the create_mem_ref heuristics anyway. */
6630 if (cand
->iv
->base_object
)
6631 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6633 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6634 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6635 reference_alias_ptr_type (*use
->op_p
),
6636 iv
, base_hint
, data
->speed
);
6637 copy_ref_info (ref
, *use
->op_p
);
6641 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6645 rewrite_use_compare (struct ivopts_data
*data
,
6646 struct iv_use
*use
, struct iv_cand
*cand
)
6648 tree comp
, *var_p
, op
, bound
;
6649 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6650 enum tree_code compare
;
6651 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6657 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6658 tree var_type
= TREE_TYPE (var
);
6661 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6663 fprintf (dump_file
, "Replacing exit test: ");
6664 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6667 bound
= unshare_expr (fold_convert (var_type
, bound
));
6668 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6670 gsi_insert_seq_on_edge_immediate (
6671 loop_preheader_edge (data
->current_loop
),
6674 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
6675 gimple_cond_set_lhs (cond_stmt
, var
);
6676 gimple_cond_set_code (cond_stmt
, compare
);
6677 gimple_cond_set_rhs (cond_stmt
, op
);
6681 /* The induction variable elimination failed; just express the original
6683 comp
= get_computation (data
->current_loop
, use
, cand
);
6684 gcc_assert (comp
!= NULL_TREE
);
6686 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6689 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6690 true, GSI_SAME_STMT
);
6693 /* Rewrites USE using candidate CAND. */
6696 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6700 case USE_NONLINEAR_EXPR
:
6701 rewrite_use_nonlinear_expr (data
, use
, cand
);
6705 rewrite_use_address (data
, use
, cand
);
6709 rewrite_use_compare (data
, use
, cand
);
6716 update_stmt (use
->stmt
);
6719 /* Rewrite the uses using the selected induction variables. */
6722 rewrite_uses (struct ivopts_data
*data
)
6725 struct iv_cand
*cand
;
6728 for (i
= 0; i
< n_iv_uses (data
); i
++)
6730 use
= iv_use (data
, i
);
6731 cand
= use
->selected
;
6734 rewrite_use (data
, use
, cand
);
6738 /* Removes the ivs that are not used after rewriting. */
6741 remove_unused_ivs (struct ivopts_data
*data
)
6745 bitmap toremove
= BITMAP_ALLOC (NULL
);
6747 /* Figure out an order in which to release SSA DEFs so that we don't
6748 release something that we'd have to propagate into a debug stmt
6750 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
6752 struct version_info
*info
;
6754 info
= ver_info (data
, j
);
6756 && !integer_zerop (info
->iv
->step
)
6758 && !info
->iv
->have_use_for
6759 && !info
->preserve_biv
)
6761 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
6763 tree def
= info
->iv
->ssa_name
;
6765 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
6767 imm_use_iterator imm_iter
;
6768 use_operand_p use_p
;
6772 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6774 if (!gimple_debug_bind_p (stmt
))
6777 /* We just want to determine whether to do nothing
6778 (count == 0), to substitute the computed
6779 expression into a single use of the SSA DEF by
6780 itself (count == 1), or to use a debug temp
6781 because the SSA DEF is used multiple times or as
6782 part of a larger expression (count > 1). */
6784 if (gimple_debug_bind_get_value (stmt
) != def
)
6788 BREAK_FROM_IMM_USE_STMT (imm_iter
);
6794 struct iv_use dummy_use
;
6795 struct iv_cand
*best_cand
= NULL
, *cand
;
6796 unsigned i
, best_pref
= 0, cand_pref
;
6798 memset (&dummy_use
, 0, sizeof (dummy_use
));
6799 dummy_use
.iv
= info
->iv
;
6800 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
6802 cand
= iv_use (data
, i
)->selected
;
6803 if (cand
== best_cand
)
6805 cand_pref
= operand_equal_p (cand
->iv
->step
,
6809 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
6810 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
6813 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
6815 if (best_cand
== NULL
|| best_pref
< cand_pref
)
6818 best_pref
= cand_pref
;
6825 tree comp
= get_computation_at (data
->current_loop
,
6826 &dummy_use
, best_cand
,
6827 SSA_NAME_DEF_STMT (def
));
6833 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
6834 DECL_ARTIFICIAL (vexpr
) = 1;
6835 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
6836 if (SSA_NAME_VAR (def
))
6837 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
6839 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
6841 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
6842 gimple_stmt_iterator gsi
;
6844 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
6845 gsi
= gsi_after_labels (gimple_bb
6846 (SSA_NAME_DEF_STMT (def
)));
6848 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
6850 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
6854 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
6856 if (!gimple_debug_bind_p (stmt
))
6859 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
6860 SET_USE (use_p
, comp
);
6868 release_defs_bitset (toremove
);
6870 BITMAP_FREE (toremove
);
6873 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6874 for hash_map::traverse. */
6877 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
6883 /* Frees data allocated by the optimization of a single loop. */
6886 free_loop_data (struct ivopts_data
*data
)
6894 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
6895 delete data
->niters
;
6896 data
->niters
= NULL
;
6899 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
6901 struct version_info
*info
;
6903 info
= ver_info (data
, i
);
6906 info
->has_nonlin_use
= false;
6907 info
->preserve_biv
= false;
6910 bitmap_clear (data
->relevant
);
6911 bitmap_clear (data
->important_candidates
);
6913 for (i
= 0; i
< n_iv_uses (data
); i
++)
6915 struct iv_use
*use
= iv_use (data
, i
);
6918 BITMAP_FREE (use
->related_cands
);
6919 for (j
= 0; j
< use
->n_map_members
; j
++)
6920 if (use
->cost_map
[j
].depends_on
)
6921 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
6922 free (use
->cost_map
);
6925 data
->iv_uses
.truncate (0);
6927 for (i
= 0; i
< n_iv_cands (data
); i
++)
6929 struct iv_cand
*cand
= iv_cand (data
, i
);
6932 if (cand
->depends_on
)
6933 BITMAP_FREE (cand
->depends_on
);
6936 data
->iv_candidates
.truncate (0);
6938 if (data
->version_info_size
< num_ssa_names
)
6940 data
->version_info_size
= 2 * num_ssa_names
;
6941 free (data
->version_info
);
6942 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
6945 data
->max_inv_id
= 0;
6947 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
6948 SET_DECL_RTL (obj
, NULL_RTX
);
6950 decl_rtl_to_reset
.truncate (0);
6952 data
->inv_expr_tab
->empty ();
6953 data
->inv_expr_id
= 0;
6956 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6960 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
6962 free_loop_data (data
);
6963 free (data
->version_info
);
6964 BITMAP_FREE (data
->relevant
);
6965 BITMAP_FREE (data
->important_candidates
);
6967 decl_rtl_to_reset
.release ();
6968 data
->iv_uses
.release ();
6969 data
->iv_candidates
.release ();
6970 delete data
->inv_expr_tab
;
6971 data
->inv_expr_tab
= NULL
;
6972 free_affine_expand_cache (&data
->name_expansion_cache
);
6975 /* Returns true if the loop body BODY includes any function calls. */
6978 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
6980 gimple_stmt_iterator gsi
;
6983 for (i
= 0; i
< num_nodes
; i
++)
6984 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
6986 gimple stmt
= gsi_stmt (gsi
);
6987 if (is_gimple_call (stmt
)
6988 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
6994 /* Optimizes the LOOP. Returns true if anything changed. */
6997 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
6999 bool changed
= false;
7000 struct iv_ca
*iv_ca
;
7001 edge exit
= single_dom_exit (loop
);
7004 gcc_assert (!data
->niters
);
7005 data
->current_loop
= loop
;
7006 data
->loop_loc
= find_loop_location (loop
);
7007 data
->speed
= optimize_loop_for_speed_p (loop
);
7009 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7011 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7012 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7013 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7014 LOCATION_LINE (data
->loop_loc
));
7015 fprintf (dump_file
, "\n");
7019 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7020 exit
->src
->index
, exit
->dest
->index
);
7021 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7022 fprintf (dump_file
, "\n");
7025 fprintf (dump_file
, "\n");
7028 body
= get_loop_body (loop
);
7029 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7030 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7033 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7035 /* For each ssa name determines whether it behaves as an induction variable
7037 if (!find_induction_variables (data
))
7040 /* Finds interesting uses (item 1). */
7041 find_interesting_uses (data
);
7042 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
7045 /* Finds candidates for the induction variables (item 2). */
7046 find_iv_candidates (data
);
7048 /* Calculates the costs (item 3, part 1). */
7049 determine_iv_costs (data
);
7050 determine_use_iv_costs (data
);
7051 determine_set_costs (data
);
7053 /* Find the optimal set of induction variables (item 3, part 2). */
7054 iv_ca
= find_optimal_iv_set (data
);
7059 /* Create the new induction variables (item 4, part 1). */
7060 create_new_ivs (data
, iv_ca
);
7061 iv_ca_free (&iv_ca
);
7063 /* Rewrite the uses (item 4, part 2). */
7064 rewrite_uses (data
);
7066 /* Remove the ivs that are unused after rewriting. */
7067 remove_unused_ivs (data
);
7069 /* We have changed the structure of induction variables; it might happen
7070 that definitions in the scev database refer to some of them that were
7075 free_loop_data (data
);
7080 /* Main entry point. Optimizes induction variables in loops. */
7083 tree_ssa_iv_optimize (void)
7086 struct ivopts_data data
;
7088 tree_ssa_iv_optimize_init (&data
);
7090 /* Optimize the loops starting with the innermost ones. */
7091 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7093 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7094 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7096 tree_ssa_iv_optimize_loop (&data
, loop
);
7099 tree_ssa_iv_optimize_finalize (&data
);