Merge from trunk @222673.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobea8a66cb3135f475b4481805bf2f6c1dcd18207a
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "hash-set.h"
69 #include "machmode.h"
70 #include "vec.h"
71 #include "double-int.h"
72 #include "input.h"
73 #include "alias.h"
74 #include "symtab.h"
75 #include "wide-int.h"
76 #include "inchash.h"
77 #include "tree.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
80 #include "tm_p.h"
81 #include "predict.h"
82 #include "hard-reg-set.h"
83 #include "function.h"
84 #include "dominance.h"
85 #include "cfg.h"
86 #include "basic-block.h"
87 #include "gimple-pretty-print.h"
88 #include "hash-map.h"
89 #include "hash-table.h"
90 #include "tree-ssa-alias.h"
91 #include "internal-fn.h"
92 #include "tree-eh.h"
93 #include "gimple-expr.h"
94 #include "is-a.h"
95 #include "gimple.h"
96 #include "gimplify.h"
97 #include "gimple-iterator.h"
98 #include "gimplify-me.h"
99 #include "gimple-ssa.h"
100 #include "plugin-api.h"
101 #include "ipa-ref.h"
102 #include "cgraph.h"
103 #include "tree-cfg.h"
104 #include "tree-phinodes.h"
105 #include "ssa-iterators.h"
106 #include "stringpool.h"
107 #include "tree-ssanames.h"
108 #include "tree-ssa-loop-ivopts.h"
109 #include "tree-ssa-loop-manip.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop.h"
112 #include "hashtab.h"
113 #include "rtl.h"
114 #include "flags.h"
115 #include "statistics.h"
116 #include "real.h"
117 #include "fixed-value.h"
118 #include "insn-config.h"
119 #include "expmed.h"
120 #include "dojump.h"
121 #include "explow.h"
122 #include "calls.h"
123 #include "emit-rtl.h"
124 #include "varasm.h"
125 #include "stmt.h"
126 #include "expr.h"
127 #include "tree-dfa.h"
128 #include "tree-ssa.h"
129 #include "cfgloop.h"
130 #include "tree-pass.h"
131 #include "tree-chrec.h"
132 #include "tree-scalar-evolution.h"
133 #include "params.h"
134 #include "langhooks.h"
135 #include "tree-affine.h"
136 #include "target.h"
137 #include "tree-inline.h"
138 #include "tree-ssa-propagate.h"
139 #include "tree-ssa-address.h"
140 #include "builtins.h"
141 #include "tree-vectorizer.h"
143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
144 cost of different addressing modes. This should be moved to a TBD
145 interface between the GIMPLE and RTL worlds. */
146 #include "recog.h"
148 /* The infinite cost. */
149 #define INFTY 10000000
151 #define AVG_LOOP_NITER(LOOP) 5
153 /* Returns the expected number of loop iterations for LOOP.
154 The average trip count is computed from profile data if it
155 exists. */
157 static inline HOST_WIDE_INT
158 avg_loop_niter (struct loop *loop)
160 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
161 if (niter == -1)
162 return AVG_LOOP_NITER (loop);
164 return niter;
167 /* Representation of the induction variable. */
168 struct iv
170 tree base; /* Initial value of the iv. */
171 tree base_object; /* A memory object to that the induction variable points. */
172 tree step; /* Step of the iv (constant only). */
173 tree ssa_name; /* The ssa name with the value. */
174 bool biv_p; /* Is it a biv? */
175 bool have_use_for; /* Do we already have a use for it? */
176 unsigned use_id; /* The identifier in the use if it is the case. */
179 /* Per-ssa version information (induction variable descriptions, etc.). */
180 struct version_info
182 tree name; /* The ssa name. */
183 struct iv *iv; /* Induction variable description. */
184 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
185 an expression that is not an induction variable. */
186 bool preserve_biv; /* For the original biv, whether to preserve it. */
187 unsigned inv_id; /* Id of an invariant. */
190 /* Types of uses. */
191 enum use_type
193 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
194 USE_ADDRESS, /* Use in an address. */
195 USE_COMPARE /* Use is a compare. */
198 /* Cost of a computation. */
199 typedef struct
201 int cost; /* The runtime cost. */
202 unsigned complexity; /* The estimate of the complexity of the code for
203 the computation (in no concrete units --
204 complexity field should be larger for more
205 complex expressions and addressing modes). */
206 } comp_cost;
208 static const comp_cost no_cost = {0, 0};
209 static const comp_cost infinite_cost = {INFTY, INFTY};
211 /* The candidate - cost pair. */
212 struct cost_pair
214 struct iv_cand *cand; /* The candidate. */
215 comp_cost cost; /* The cost. */
216 bitmap depends_on; /* The list of invariants that have to be
217 preserved. */
218 tree value; /* For final value elimination, the expression for
219 the final value of the iv. For iv elimination,
220 the new bound to compare with. */
221 enum tree_code comp; /* For iv elimination, the comparison. */
222 int inv_expr_id; /* Loop invariant expression id. */
225 /* Use. */
226 struct iv_use
228 unsigned id; /* The id of the use. */
229 enum use_type type; /* Type of the use. */
230 struct iv *iv; /* The induction variable it is based on. */
231 gimple stmt; /* Statement in that it occurs. */
232 tree *op_p; /* The place where it occurs. */
233 bitmap related_cands; /* The set of "related" iv candidates, plus the common
234 important ones. */
236 unsigned n_map_members; /* Number of candidates in the cost_map list. */
237 struct cost_pair *cost_map;
238 /* The costs wrto the iv candidates. */
240 struct iv_cand *selected;
241 /* The selected candidate. */
244 /* The position where the iv is computed. */
245 enum iv_position
247 IP_NORMAL, /* At the end, just before the exit condition. */
248 IP_END, /* At the end of the latch block. */
249 IP_BEFORE_USE, /* Immediately before a specific use. */
250 IP_AFTER_USE, /* Immediately after a specific use. */
251 IP_ORIGINAL /* The original biv. */
254 /* The induction variable candidate. */
255 struct iv_cand
257 unsigned id; /* The number of the candidate. */
258 bool important; /* Whether this is an "important" candidate, i.e. such
259 that it should be considered by all uses. */
260 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
261 gimple incremented_at;/* For original biv, the statement where it is
262 incremented. */
263 tree var_before; /* The variable used for it before increment. */
264 tree var_after; /* The variable used for it after increment. */
265 struct iv *iv; /* The value of the candidate. NULL for
266 "pseudocandidate" used to indicate the possibility
267 to replace the final value of an iv by direct
268 computation of the value. */
269 unsigned cost; /* Cost of the candidate. */
270 unsigned cost_step; /* Cost of the candidate's increment operation. */
271 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
272 where it is incremented. */
273 bitmap depends_on; /* The list of invariants that are used in step of the
274 biv. */
277 /* Loop invariant expression hashtable entry. */
278 struct iv_inv_expr_ent
280 tree expr;
281 int id;
282 hashval_t hash;
285 /* The data used by the induction variable optimizations. */
287 typedef struct iv_use *iv_use_p;
289 typedef struct iv_cand *iv_cand_p;
291 /* Hashtable helpers. */
293 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
295 typedef iv_inv_expr_ent *value_type;
296 typedef iv_inv_expr_ent *compare_type;
297 static inline hashval_t hash (const iv_inv_expr_ent *);
298 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
301 /* Hash function for loop invariant expressions. */
303 inline hashval_t
304 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
306 return expr->hash;
309 /* Hash table equality function for expressions. */
311 inline bool
312 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
313 const iv_inv_expr_ent *expr2)
315 return expr1->hash == expr2->hash
316 && operand_equal_p (expr1->expr, expr2->expr, 0);
319 struct ivopts_data
321 /* The currently optimized loop. */
322 struct loop *current_loop;
323 source_location loop_loc;
325 /* Numbers of iterations for all exits of the current loop. */
326 hash_map<edge, tree_niter_desc *> *niters;
328 /* Number of registers used in it. */
329 unsigned regs_used;
331 /* The size of version_info array allocated. */
332 unsigned version_info_size;
334 /* The array of information for the ssa names. */
335 struct version_info *version_info;
337 /* The hashtable of loop invariant expressions created
338 by ivopt. */
339 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
341 /* Loop invariant expression id. */
342 int inv_expr_id;
344 /* The bitmap of indices in version_info whose value was changed. */
345 bitmap relevant;
347 /* The uses of induction variables. */
348 vec<iv_use_p> iv_uses;
350 /* The candidates. */
351 vec<iv_cand_p> iv_candidates;
353 /* A bitmap of important candidates. */
354 bitmap important_candidates;
356 /* Cache used by tree_to_aff_combination_expand. */
357 hash_map<tree, name_expansion *> *name_expansion_cache;
359 /* The maximum invariant id. */
360 unsigned max_inv_id;
362 /* Whether to consider just related and important candidates when replacing a
363 use. */
364 bool consider_all_candidates;
366 /* Are we optimizing for speed? */
367 bool speed;
369 /* Whether the loop body includes any function calls. */
370 bool body_includes_call;
372 /* Whether the loop body can only be exited via single exit. */
373 bool loop_single_exit_p;
376 /* An assignment of iv candidates to uses. */
378 struct iv_ca
380 /* The number of uses covered by the assignment. */
381 unsigned upto;
383 /* Number of uses that cannot be expressed by the candidates in the set. */
384 unsigned bad_uses;
386 /* Candidate assigned to a use, together with the related costs. */
387 struct cost_pair **cand_for_use;
389 /* Number of times each candidate is used. */
390 unsigned *n_cand_uses;
392 /* The candidates used. */
393 bitmap cands;
395 /* The number of candidates in the set. */
396 unsigned n_cands;
398 /* Total number of registers needed. */
399 unsigned n_regs;
401 /* Total cost of expressing uses. */
402 comp_cost cand_use_cost;
404 /* Total cost of candidates. */
405 unsigned cand_cost;
407 /* Number of times each invariant is used. */
408 unsigned *n_invariant_uses;
410 /* The array holding the number of uses of each loop
411 invariant expressions created by ivopt. */
412 unsigned *used_inv_expr;
414 /* The number of created loop invariants. */
415 unsigned num_used_inv_expr;
417 /* Total cost of the assignment. */
418 comp_cost cost;
421 /* Difference of two iv candidate assignments. */
423 struct iv_ca_delta
425 /* Changed use. */
426 struct iv_use *use;
428 /* An old assignment (for rollback purposes). */
429 struct cost_pair *old_cp;
431 /* A new assignment. */
432 struct cost_pair *new_cp;
434 /* Next change in the list. */
435 struct iv_ca_delta *next_change;
438 /* Bound on number of candidates below that all candidates are considered. */
440 #define CONSIDER_ALL_CANDIDATES_BOUND \
441 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
443 /* If there are more iv occurrences, we just give up (it is quite unlikely that
444 optimizing such a loop would help, and it would take ages). */
446 #define MAX_CONSIDERED_USES \
447 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
449 /* If there are at most this number of ivs in the set, try removing unnecessary
450 ivs from the set always. */
452 #define ALWAYS_PRUNE_CAND_SET_BOUND \
453 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
455 /* The list of trees for that the decl_rtl field must be reset is stored
456 here. */
458 static vec<tree> decl_rtl_to_reset;
460 static comp_cost force_expr_to_var_cost (tree, bool);
462 /* Number of uses recorded in DATA. */
464 static inline unsigned
465 n_iv_uses (struct ivopts_data *data)
467 return data->iv_uses.length ();
470 /* Ith use recorded in DATA. */
472 static inline struct iv_use *
473 iv_use (struct ivopts_data *data, unsigned i)
475 return data->iv_uses[i];
478 /* Number of candidates recorded in DATA. */
480 static inline unsigned
481 n_iv_cands (struct ivopts_data *data)
483 return data->iv_candidates.length ();
486 /* Ith candidate recorded in DATA. */
488 static inline struct iv_cand *
489 iv_cand (struct ivopts_data *data, unsigned i)
491 return data->iv_candidates[i];
494 /* The single loop exit if it dominates the latch, NULL otherwise. */
496 edge
497 single_dom_exit (struct loop *loop)
499 edge exit = single_exit (loop);
501 if (!exit)
502 return NULL;
504 if (!just_once_each_iteration_p (loop, exit->src))
505 return NULL;
507 return exit;
510 /* Dumps information about the induction variable IV to FILE. */
512 void
513 dump_iv (FILE *file, struct iv *iv)
515 if (iv->ssa_name)
517 fprintf (file, "ssa name ");
518 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
519 fprintf (file, "\n");
522 fprintf (file, " type ");
523 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
524 fprintf (file, "\n");
526 if (iv->step)
528 fprintf (file, " base ");
529 print_generic_expr (file, iv->base, TDF_SLIM);
530 fprintf (file, "\n");
532 fprintf (file, " step ");
533 print_generic_expr (file, iv->step, TDF_SLIM);
534 fprintf (file, "\n");
536 else
538 fprintf (file, " invariant ");
539 print_generic_expr (file, iv->base, TDF_SLIM);
540 fprintf (file, "\n");
543 if (iv->base_object)
545 fprintf (file, " base object ");
546 print_generic_expr (file, iv->base_object, TDF_SLIM);
547 fprintf (file, "\n");
550 if (iv->biv_p)
551 fprintf (file, " is a biv\n");
554 /* Dumps information about the USE to FILE. */
556 void
557 dump_use (FILE *file, struct iv_use *use)
559 fprintf (file, "use %d\n", use->id);
561 switch (use->type)
563 case USE_NONLINEAR_EXPR:
564 fprintf (file, " generic\n");
565 break;
567 case USE_ADDRESS:
568 fprintf (file, " address\n");
569 break;
571 case USE_COMPARE:
572 fprintf (file, " compare\n");
573 break;
575 default:
576 gcc_unreachable ();
579 fprintf (file, " in statement ");
580 print_gimple_stmt (file, use->stmt, 0, 0);
581 fprintf (file, "\n");
583 fprintf (file, " at position ");
584 if (use->op_p)
585 print_generic_expr (file, *use->op_p, TDF_SLIM);
586 fprintf (file, "\n");
588 dump_iv (file, use->iv);
590 if (use->related_cands)
592 fprintf (file, " related candidates ");
593 dump_bitmap (file, use->related_cands);
597 /* Dumps information about the uses to FILE. */
599 void
600 dump_uses (FILE *file, struct ivopts_data *data)
602 unsigned i;
603 struct iv_use *use;
605 for (i = 0; i < n_iv_uses (data); i++)
607 use = iv_use (data, i);
609 dump_use (file, use);
610 fprintf (file, "\n");
614 /* Dumps information about induction variable candidate CAND to FILE. */
616 void
617 dump_cand (FILE *file, struct iv_cand *cand)
619 struct iv *iv = cand->iv;
621 fprintf (file, "candidate %d%s\n",
622 cand->id, cand->important ? " (important)" : "");
624 if (cand->depends_on)
626 fprintf (file, " depends on ");
627 dump_bitmap (file, cand->depends_on);
630 if (!iv)
632 fprintf (file, " final value replacement\n");
633 return;
636 if (cand->var_before)
638 fprintf (file, " var_before ");
639 print_generic_expr (file, cand->var_before, TDF_SLIM);
640 fprintf (file, "\n");
642 if (cand->var_after)
644 fprintf (file, " var_after ");
645 print_generic_expr (file, cand->var_after, TDF_SLIM);
646 fprintf (file, "\n");
649 switch (cand->pos)
651 case IP_NORMAL:
652 fprintf (file, " incremented before exit test\n");
653 break;
655 case IP_BEFORE_USE:
656 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
657 break;
659 case IP_AFTER_USE:
660 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
661 break;
663 case IP_END:
664 fprintf (file, " incremented at end\n");
665 break;
667 case IP_ORIGINAL:
668 fprintf (file, " original biv\n");
669 break;
672 dump_iv (file, iv);
675 /* Returns the info for ssa version VER. */
677 static inline struct version_info *
678 ver_info (struct ivopts_data *data, unsigned ver)
680 return data->version_info + ver;
683 /* Returns the info for ssa name NAME. */
685 static inline struct version_info *
686 name_info (struct ivopts_data *data, tree name)
688 return ver_info (data, SSA_NAME_VERSION (name));
691 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
692 emitted in LOOP. */
694 static bool
695 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
697 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
699 gcc_assert (bb);
701 if (sbb == loop->latch)
702 return true;
704 if (sbb != bb)
705 return false;
707 return stmt == last_stmt (bb);
710 /* Returns true if STMT if after the place where the original induction
711 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
712 if the positions are identical. */
714 static bool
715 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
717 basic_block cand_bb = gimple_bb (cand->incremented_at);
718 basic_block stmt_bb = gimple_bb (stmt);
720 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
721 return false;
723 if (stmt_bb != cand_bb)
724 return true;
726 if (true_if_equal
727 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
728 return true;
729 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
732 /* Returns true if STMT if after the place where the induction variable
733 CAND is incremented in LOOP. */
735 static bool
736 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
738 switch (cand->pos)
740 case IP_END:
741 return false;
743 case IP_NORMAL:
744 return stmt_after_ip_normal_pos (loop, stmt);
746 case IP_ORIGINAL:
747 case IP_AFTER_USE:
748 return stmt_after_inc_pos (cand, stmt, false);
750 case IP_BEFORE_USE:
751 return stmt_after_inc_pos (cand, stmt, true);
753 default:
754 gcc_unreachable ();
758 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
760 static bool
761 abnormal_ssa_name_p (tree exp)
763 if (!exp)
764 return false;
766 if (TREE_CODE (exp) != SSA_NAME)
767 return false;
769 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
772 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
773 abnormal phi node. Callback for for_each_index. */
775 static bool
776 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
777 void *data ATTRIBUTE_UNUSED)
779 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
781 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
782 return false;
783 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
784 return false;
787 return !abnormal_ssa_name_p (*index);
790 /* Returns true if EXPR contains a ssa name that occurs in an
791 abnormal phi node. */
793 bool
794 contains_abnormal_ssa_name_p (tree expr)
796 enum tree_code code;
797 enum tree_code_class codeclass;
799 if (!expr)
800 return false;
802 code = TREE_CODE (expr);
803 codeclass = TREE_CODE_CLASS (code);
805 if (code == SSA_NAME)
806 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
808 if (code == INTEGER_CST
809 || is_gimple_min_invariant (expr))
810 return false;
812 if (code == ADDR_EXPR)
813 return !for_each_index (&TREE_OPERAND (expr, 0),
814 idx_contains_abnormal_ssa_name_p,
815 NULL);
817 if (code == COND_EXPR)
818 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
819 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
820 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
822 switch (codeclass)
824 case tcc_binary:
825 case tcc_comparison:
826 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
827 return true;
829 /* Fallthru. */
830 case tcc_unary:
831 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
832 return true;
834 break;
836 default:
837 gcc_unreachable ();
840 return false;
843 /* Returns the structure describing number of iterations determined from
844 EXIT of DATA->current_loop, or NULL if something goes wrong. */
846 static struct tree_niter_desc *
847 niter_for_exit (struct ivopts_data *data, edge exit)
849 struct tree_niter_desc *desc;
850 tree_niter_desc **slot;
852 if (!data->niters)
854 data->niters = new hash_map<edge, tree_niter_desc *>;
855 slot = NULL;
857 else
858 slot = data->niters->get (exit);
860 if (!slot)
862 /* Try to determine number of iterations. We cannot safely work with ssa
863 names that appear in phi nodes on abnormal edges, so that we do not
864 create overlapping life ranges for them (PR 27283). */
865 desc = XNEW (struct tree_niter_desc);
866 if (!number_of_iterations_exit (data->current_loop,
867 exit, desc, true)
868 || contains_abnormal_ssa_name_p (desc->niter))
870 XDELETE (desc);
871 desc = NULL;
873 data->niters->put (exit, desc);
875 else
876 desc = *slot;
878 return desc;
881 /* Returns the structure describing number of iterations determined from
882 single dominating exit of DATA->current_loop, or NULL if something
883 goes wrong. */
885 static struct tree_niter_desc *
886 niter_for_single_dom_exit (struct ivopts_data *data)
888 edge exit = single_dom_exit (data->current_loop);
890 if (!exit)
891 return NULL;
893 return niter_for_exit (data, exit);
896 /* Initializes data structures used by the iv optimization pass, stored
897 in DATA. */
899 static void
900 tree_ssa_iv_optimize_init (struct ivopts_data *data)
902 data->version_info_size = 2 * num_ssa_names;
903 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
904 data->relevant = BITMAP_ALLOC (NULL);
905 data->important_candidates = BITMAP_ALLOC (NULL);
906 data->max_inv_id = 0;
907 data->niters = NULL;
908 data->iv_uses.create (20);
909 data->iv_candidates.create (20);
910 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
911 data->inv_expr_id = 0;
912 data->name_expansion_cache = NULL;
913 decl_rtl_to_reset.create (20);
916 /* Returns a memory object to that EXPR points. In case we are able to
917 determine that it does not point to any such object, NULL is returned. */
919 static tree
920 determine_base_object (tree expr)
922 enum tree_code code = TREE_CODE (expr);
923 tree base, obj;
925 /* If this is a pointer casted to any type, we need to determine
926 the base object for the pointer; so handle conversions before
927 throwing away non-pointer expressions. */
928 if (CONVERT_EXPR_P (expr))
929 return determine_base_object (TREE_OPERAND (expr, 0));
931 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
932 return NULL_TREE;
934 switch (code)
936 case INTEGER_CST:
937 return NULL_TREE;
939 case ADDR_EXPR:
940 obj = TREE_OPERAND (expr, 0);
941 base = get_base_address (obj);
943 if (!base)
944 return expr;
946 if (TREE_CODE (base) == MEM_REF)
947 return determine_base_object (TREE_OPERAND (base, 0));
949 return fold_convert (ptr_type_node,
950 build_fold_addr_expr (base));
952 case POINTER_PLUS_EXPR:
953 return determine_base_object (TREE_OPERAND (expr, 0));
955 case PLUS_EXPR:
956 case MINUS_EXPR:
957 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
958 gcc_unreachable ();
960 default:
961 return fold_convert (ptr_type_node, expr);
965 /* Return true if address expression with non-DECL_P operand appears
966 in EXPR. */
968 static bool
969 contain_complex_addr_expr (tree expr)
971 bool res = false;
973 STRIP_NOPS (expr);
974 switch (TREE_CODE (expr))
976 case POINTER_PLUS_EXPR:
977 case PLUS_EXPR:
978 case MINUS_EXPR:
979 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
980 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
981 break;
983 case ADDR_EXPR:
984 return (!DECL_P (TREE_OPERAND (expr, 0)));
986 default:
987 return false;
990 return res;
993 /* Allocates an induction variable with given initial value BASE and step STEP
994 for loop LOOP. */
996 static struct iv *
997 alloc_iv (tree base, tree step)
999 tree expr = base;
1000 struct iv *iv = XCNEW (struct iv);
1001 gcc_assert (step != NULL_TREE);
1003 /* Lower address expression in base except ones with DECL_P as operand.
1004 By doing this:
1005 1) More accurate cost can be computed for address expressions;
1006 2) Duplicate candidates won't be created for bases in different
1007 forms, like &a[0] and &a. */
1008 STRIP_NOPS (expr);
1009 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1010 || contain_complex_addr_expr (expr))
1012 aff_tree comb;
1013 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1014 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1017 iv->base = base;
1018 iv->base_object = determine_base_object (base);
1019 iv->step = step;
1020 iv->biv_p = false;
1021 iv->have_use_for = false;
1022 iv->use_id = 0;
1023 iv->ssa_name = NULL_TREE;
1025 return iv;
1028 /* Sets STEP and BASE for induction variable IV. */
1030 static void
1031 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1033 struct version_info *info = name_info (data, iv);
1035 gcc_assert (!info->iv);
1037 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1038 info->iv = alloc_iv (base, step);
1039 info->iv->ssa_name = iv;
1042 /* Finds induction variable declaration for VAR. */
1044 static struct iv *
1045 get_iv (struct ivopts_data *data, tree var)
1047 basic_block bb;
1048 tree type = TREE_TYPE (var);
1050 if (!POINTER_TYPE_P (type)
1051 && !INTEGRAL_TYPE_P (type))
1052 return NULL;
1054 if (!name_info (data, var)->iv)
1056 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1058 if (!bb
1059 || !flow_bb_inside_loop_p (data->current_loop, bb))
1060 set_iv (data, var, var, build_int_cst (type, 0));
1063 return name_info (data, var)->iv;
1066 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1067 not define a simple affine biv with nonzero step. */
1069 static tree
1070 determine_biv_step (gphi *phi)
1072 struct loop *loop = gimple_bb (phi)->loop_father;
1073 tree name = PHI_RESULT (phi);
1074 affine_iv iv;
1076 if (virtual_operand_p (name))
1077 return NULL_TREE;
1079 if (!simple_iv (loop, loop, name, &iv, true))
1080 return NULL_TREE;
1082 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1085 /* Return the first non-invariant ssa var found in EXPR. */
1087 static tree
1088 extract_single_var_from_expr (tree expr)
1090 int i, n;
1091 tree tmp;
1092 enum tree_code code;
1094 if (!expr || is_gimple_min_invariant (expr))
1095 return NULL;
1097 code = TREE_CODE (expr);
1098 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1100 n = TREE_OPERAND_LENGTH (expr);
1101 for (i = 0; i < n; i++)
1103 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1105 if (tmp)
1106 return tmp;
1109 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1112 /* Finds basic ivs. */
1114 static bool
1115 find_bivs (struct ivopts_data *data)
1117 gphi *phi;
1118 tree step, type, base, stop;
1119 bool found = false;
1120 struct loop *loop = data->current_loop;
1121 gphi_iterator psi;
1123 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1125 phi = psi.phi ();
1127 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1128 continue;
1130 step = determine_biv_step (phi);
1131 if (!step)
1132 continue;
1134 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1135 /* Stop expanding iv base at the first ssa var referred by iv step.
1136 Ideally we should stop at any ssa var, because that's expensive
1137 and unusual to happen, we just do it on the first one.
1139 See PR64705 for the rationale. */
1140 stop = extract_single_var_from_expr (step);
1141 base = expand_simple_operations (base, stop);
1142 if (contains_abnormal_ssa_name_p (base)
1143 || contains_abnormal_ssa_name_p (step))
1144 continue;
1146 type = TREE_TYPE (PHI_RESULT (phi));
1147 base = fold_convert (type, base);
1148 if (step)
1150 if (POINTER_TYPE_P (type))
1151 step = convert_to_ptrofftype (step);
1152 else
1153 step = fold_convert (type, step);
1156 set_iv (data, PHI_RESULT (phi), base, step);
1157 found = true;
1160 return found;
1163 /* Marks basic ivs. */
1165 static void
1166 mark_bivs (struct ivopts_data *data)
1168 gphi *phi;
1169 gimple def;
1170 tree var;
1171 struct iv *iv, *incr_iv;
1172 struct loop *loop = data->current_loop;
1173 basic_block incr_bb;
1174 gphi_iterator psi;
1176 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1178 phi = psi.phi ();
1180 iv = get_iv (data, PHI_RESULT (phi));
1181 if (!iv)
1182 continue;
1184 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1185 def = SSA_NAME_DEF_STMT (var);
1186 /* Don't mark iv peeled from other one as biv. */
1187 if (def
1188 && gimple_code (def) == GIMPLE_PHI
1189 && gimple_bb (def) == loop->header)
1190 continue;
1192 incr_iv = get_iv (data, var);
1193 if (!incr_iv)
1194 continue;
1196 /* If the increment is in the subloop, ignore it. */
1197 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1198 if (incr_bb->loop_father != data->current_loop
1199 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1200 continue;
1202 iv->biv_p = true;
1203 incr_iv->biv_p = true;
1207 /* Checks whether STMT defines a linear induction variable and stores its
1208 parameters to IV. */
1210 static bool
1211 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1213 tree lhs, stop;
1214 struct loop *loop = data->current_loop;
1216 iv->base = NULL_TREE;
1217 iv->step = NULL_TREE;
1219 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1220 return false;
1222 lhs = gimple_assign_lhs (stmt);
1223 if (TREE_CODE (lhs) != SSA_NAME)
1224 return false;
1226 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1227 return false;
1229 /* Stop expanding iv base at the first ssa var referred by iv step.
1230 Ideally we should stop at any ssa var, because that's expensive
1231 and unusual to happen, we just do it on the first one.
1233 See PR64705 for the rationale. */
1234 stop = extract_single_var_from_expr (iv->step);
1235 iv->base = expand_simple_operations (iv->base, stop);
1236 if (contains_abnormal_ssa_name_p (iv->base)
1237 || contains_abnormal_ssa_name_p (iv->step))
1238 return false;
1240 /* If STMT could throw, then do not consider STMT as defining a GIV.
1241 While this will suppress optimizations, we can not safely delete this
1242 GIV and associated statements, even if it appears it is not used. */
1243 if (stmt_could_throw_p (stmt))
1244 return false;
1246 return true;
1249 /* Finds general ivs in statement STMT. */
1251 static void
1252 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1254 affine_iv iv;
1256 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1257 return;
1259 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1262 /* Finds general ivs in basic block BB. */
1264 static void
1265 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1267 gimple_stmt_iterator bsi;
1269 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1270 find_givs_in_stmt (data, gsi_stmt (bsi));
1273 /* Finds general ivs. */
1275 static void
1276 find_givs (struct ivopts_data *data)
1278 struct loop *loop = data->current_loop;
1279 basic_block *body = get_loop_body_in_dom_order (loop);
1280 unsigned i;
1282 for (i = 0; i < loop->num_nodes; i++)
1283 find_givs_in_bb (data, body[i]);
1284 free (body);
1287 /* For each ssa name defined in LOOP determines whether it is an induction
1288 variable and if so, its initial value and step. */
1290 static bool
1291 find_induction_variables (struct ivopts_data *data)
1293 unsigned i;
1294 bitmap_iterator bi;
1296 if (!find_bivs (data))
1297 return false;
1299 find_givs (data);
1300 mark_bivs (data);
1302 if (dump_file && (dump_flags & TDF_DETAILS))
1304 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1306 if (niter)
1308 fprintf (dump_file, " number of iterations ");
1309 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1310 if (!integer_zerop (niter->may_be_zero))
1312 fprintf (dump_file, "; zero if ");
1313 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1315 fprintf (dump_file, "\n\n");
1318 fprintf (dump_file, "Induction variables:\n\n");
1320 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1322 if (ver_info (data, i)->iv)
1323 dump_iv (dump_file, ver_info (data, i)->iv);
1327 return true;
1330 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1332 static struct iv_use *
1333 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1334 gimple stmt, enum use_type use_type)
1336 struct iv_use *use = XCNEW (struct iv_use);
1338 use->id = n_iv_uses (data);
1339 use->type = use_type;
1340 use->iv = iv;
1341 use->stmt = stmt;
1342 use->op_p = use_p;
1343 use->related_cands = BITMAP_ALLOC (NULL);
1345 /* To avoid showing ssa name in the dumps, if it was not reset by the
1346 caller. */
1347 iv->ssa_name = NULL_TREE;
1349 if (dump_file && (dump_flags & TDF_DETAILS))
1350 dump_use (dump_file, use);
1352 data->iv_uses.safe_push (use);
1354 return use;
1357 /* Checks whether OP is a loop-level invariant and if so, records it.
1358 NONLINEAR_USE is true if the invariant is used in a way we do not
1359 handle specially. */
1361 static void
1362 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1364 basic_block bb;
1365 struct version_info *info;
1367 if (TREE_CODE (op) != SSA_NAME
1368 || virtual_operand_p (op))
1369 return;
1371 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1372 if (bb
1373 && flow_bb_inside_loop_p (data->current_loop, bb))
1374 return;
1376 info = name_info (data, op);
1377 info->name = op;
1378 info->has_nonlin_use |= nonlinear_use;
1379 if (!info->inv_id)
1380 info->inv_id = ++data->max_inv_id;
1381 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1384 /* Checks whether the use OP is interesting and if so, records it. */
1386 static struct iv_use *
1387 find_interesting_uses_op (struct ivopts_data *data, tree op)
1389 struct iv *iv;
1390 struct iv *civ;
1391 gimple stmt;
1392 struct iv_use *use;
1394 if (TREE_CODE (op) != SSA_NAME)
1395 return NULL;
1397 iv = get_iv (data, op);
1398 if (!iv)
1399 return NULL;
1401 if (iv->have_use_for)
1403 use = iv_use (data, iv->use_id);
1405 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1406 return use;
1409 if (integer_zerop (iv->step))
1411 record_invariant (data, op, true);
1412 return NULL;
1414 iv->have_use_for = true;
1416 civ = XNEW (struct iv);
1417 *civ = *iv;
1419 stmt = SSA_NAME_DEF_STMT (op);
1420 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1421 || is_gimple_assign (stmt));
1423 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1424 iv->use_id = use->id;
1426 return use;
1429 /* Given a condition in statement STMT, checks whether it is a compare
1430 of an induction variable and an invariant. If this is the case,
1431 CONTROL_VAR is set to location of the iv, BOUND to the location of
1432 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1433 induction variable descriptions, and true is returned. If this is not
1434 the case, CONTROL_VAR and BOUND are set to the arguments of the
1435 condition and false is returned. */
1437 static bool
1438 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1439 tree **control_var, tree **bound,
1440 struct iv **iv_var, struct iv **iv_bound)
1442 /* The objects returned when COND has constant operands. */
1443 static struct iv const_iv;
1444 static tree zero;
1445 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1446 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1447 bool ret = false;
1449 if (gimple_code (stmt) == GIMPLE_COND)
1451 gcond *cond_stmt = as_a <gcond *> (stmt);
1452 op0 = gimple_cond_lhs_ptr (cond_stmt);
1453 op1 = gimple_cond_rhs_ptr (cond_stmt);
1455 else
1457 op0 = gimple_assign_rhs1_ptr (stmt);
1458 op1 = gimple_assign_rhs2_ptr (stmt);
1461 zero = integer_zero_node;
1462 const_iv.step = integer_zero_node;
1464 if (TREE_CODE (*op0) == SSA_NAME)
1465 iv0 = get_iv (data, *op0);
1466 if (TREE_CODE (*op1) == SSA_NAME)
1467 iv1 = get_iv (data, *op1);
1469 /* Exactly one of the compared values must be an iv, and the other one must
1470 be an invariant. */
1471 if (!iv0 || !iv1)
1472 goto end;
1474 if (integer_zerop (iv0->step))
1476 /* Control variable may be on the other side. */
1477 tmp_op = op0; op0 = op1; op1 = tmp_op;
1478 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1480 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1482 end:
1483 if (control_var)
1484 *control_var = op0;
1485 if (iv_var)
1486 *iv_var = iv0;
1487 if (bound)
1488 *bound = op1;
1489 if (iv_bound)
1490 *iv_bound = iv1;
1492 return ret;
1495 /* Checks whether the condition in STMT is interesting and if so,
1496 records it. */
1498 static void
1499 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1501 tree *var_p, *bound_p;
1502 struct iv *var_iv, *civ;
1504 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1506 find_interesting_uses_op (data, *var_p);
1507 find_interesting_uses_op (data, *bound_p);
1508 return;
1511 civ = XNEW (struct iv);
1512 *civ = *var_iv;
1513 record_use (data, NULL, civ, stmt, USE_COMPARE);
1516 /* Returns the outermost loop EXPR is obviously invariant in
1517 relative to the loop LOOP, i.e. if all its operands are defined
1518 outside of the returned loop. Returns NULL if EXPR is not
1519 even obviously invariant in LOOP. */
1521 struct loop *
1522 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1524 basic_block def_bb;
1525 unsigned i, len;
1527 if (is_gimple_min_invariant (expr))
1528 return current_loops->tree_root;
1530 if (TREE_CODE (expr) == SSA_NAME)
1532 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1533 if (def_bb)
1535 if (flow_bb_inside_loop_p (loop, def_bb))
1536 return NULL;
1537 return superloop_at_depth (loop,
1538 loop_depth (def_bb->loop_father) + 1);
1541 return current_loops->tree_root;
1544 if (!EXPR_P (expr))
1545 return NULL;
1547 unsigned maxdepth = 0;
1548 len = TREE_OPERAND_LENGTH (expr);
1549 for (i = 0; i < len; i++)
1551 struct loop *ivloop;
1552 if (!TREE_OPERAND (expr, i))
1553 continue;
1555 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1556 if (!ivloop)
1557 return NULL;
1558 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1561 return superloop_at_depth (loop, maxdepth);
1564 /* Returns true if expression EXPR is obviously invariant in LOOP,
1565 i.e. if all its operands are defined outside of the LOOP. LOOP
1566 should not be the function body. */
1568 bool
1569 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1571 basic_block def_bb;
1572 unsigned i, len;
1574 gcc_assert (loop_depth (loop) > 0);
1576 if (is_gimple_min_invariant (expr))
1577 return true;
1579 if (TREE_CODE (expr) == SSA_NAME)
1581 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1582 if (def_bb
1583 && flow_bb_inside_loop_p (loop, def_bb))
1584 return false;
1586 return true;
1589 if (!EXPR_P (expr))
1590 return false;
1592 len = TREE_OPERAND_LENGTH (expr);
1593 for (i = 0; i < len; i++)
1594 if (TREE_OPERAND (expr, i)
1595 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1596 return false;
1598 return true;
1601 /* Cumulates the steps of indices into DATA and replaces their values with the
1602 initial ones. Returns false when the value of the index cannot be determined.
1603 Callback for for_each_index. */
1605 struct ifs_ivopts_data
1607 struct ivopts_data *ivopts_data;
1608 gimple stmt;
1609 tree step;
1612 static bool
1613 idx_find_step (tree base, tree *idx, void *data)
1615 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1616 struct iv *iv;
1617 tree step, iv_base, iv_step, lbound, off;
1618 struct loop *loop = dta->ivopts_data->current_loop;
1620 /* If base is a component ref, require that the offset of the reference
1621 be invariant. */
1622 if (TREE_CODE (base) == COMPONENT_REF)
1624 off = component_ref_field_offset (base);
1625 return expr_invariant_in_loop_p (loop, off);
1628 /* If base is array, first check whether we will be able to move the
1629 reference out of the loop (in order to take its address in strength
1630 reduction). In order for this to work we need both lower bound
1631 and step to be loop invariants. */
1632 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1634 /* Moreover, for a range, the size needs to be invariant as well. */
1635 if (TREE_CODE (base) == ARRAY_RANGE_REF
1636 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1637 return false;
1639 step = array_ref_element_size (base);
1640 lbound = array_ref_low_bound (base);
1642 if (!expr_invariant_in_loop_p (loop, step)
1643 || !expr_invariant_in_loop_p (loop, lbound))
1644 return false;
1647 if (TREE_CODE (*idx) != SSA_NAME)
1648 return true;
1650 iv = get_iv (dta->ivopts_data, *idx);
1651 if (!iv)
1652 return false;
1654 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1655 *&x[0], which is not folded and does not trigger the
1656 ARRAY_REF path below. */
1657 *idx = iv->base;
1659 if (integer_zerop (iv->step))
1660 return true;
1662 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1664 step = array_ref_element_size (base);
1666 /* We only handle addresses whose step is an integer constant. */
1667 if (TREE_CODE (step) != INTEGER_CST)
1668 return false;
1670 else
1671 /* The step for pointer arithmetics already is 1 byte. */
1672 step = size_one_node;
1674 iv_base = iv->base;
1675 iv_step = iv->step;
1676 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1677 sizetype, &iv_base, &iv_step, dta->stmt,
1678 false))
1680 /* The index might wrap. */
1681 return false;
1684 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1685 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1687 return true;
1690 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1691 object is passed to it in DATA. */
1693 static bool
1694 idx_record_use (tree base, tree *idx,
1695 void *vdata)
1697 struct ivopts_data *data = (struct ivopts_data *) vdata;
1698 find_interesting_uses_op (data, *idx);
1699 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1701 find_interesting_uses_op (data, array_ref_element_size (base));
1702 find_interesting_uses_op (data, array_ref_low_bound (base));
1704 return true;
1707 /* If we can prove that TOP = cst * BOT for some constant cst,
1708 store cst to MUL and return true. Otherwise return false.
1709 The returned value is always sign-extended, regardless of the
1710 signedness of TOP and BOT. */
1712 static bool
1713 constant_multiple_of (tree top, tree bot, widest_int *mul)
1715 tree mby;
1716 enum tree_code code;
1717 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1718 widest_int res, p0, p1;
1720 STRIP_NOPS (top);
1721 STRIP_NOPS (bot);
1723 if (operand_equal_p (top, bot, 0))
1725 *mul = 1;
1726 return true;
1729 code = TREE_CODE (top);
1730 switch (code)
1732 case MULT_EXPR:
1733 mby = TREE_OPERAND (top, 1);
1734 if (TREE_CODE (mby) != INTEGER_CST)
1735 return false;
1737 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1738 return false;
1740 *mul = wi::sext (res * wi::to_widest (mby), precision);
1741 return true;
1743 case PLUS_EXPR:
1744 case MINUS_EXPR:
1745 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1746 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1747 return false;
1749 if (code == MINUS_EXPR)
1750 p1 = -p1;
1751 *mul = wi::sext (p0 + p1, precision);
1752 return true;
1754 case INTEGER_CST:
1755 if (TREE_CODE (bot) != INTEGER_CST)
1756 return false;
1758 p0 = widest_int::from (top, SIGNED);
1759 p1 = widest_int::from (bot, SIGNED);
1760 if (p1 == 0)
1761 return false;
1762 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1763 return res == 0;
1765 default:
1766 return false;
1770 /* Return true if memory reference REF with step STEP may be unaligned. */
1772 static bool
1773 may_be_unaligned_p (tree ref, tree step)
1775 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1776 thus they are not misaligned. */
1777 if (TREE_CODE (ref) == TARGET_MEM_REF)
1778 return false;
1780 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1781 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1782 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1784 unsigned HOST_WIDE_INT bitpos;
1785 unsigned int ref_align;
1786 get_object_alignment_1 (ref, &ref_align, &bitpos);
1787 if (ref_align < align
1788 || (bitpos % align) != 0
1789 || (bitpos % BITS_PER_UNIT) != 0)
1790 return true;
1792 unsigned int trailing_zeros = tree_ctz (step);
1793 if (trailing_zeros < HOST_BITS_PER_INT
1794 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1795 return true;
1797 return false;
1800 /* Return true if EXPR may be non-addressable. */
1802 bool
1803 may_be_nonaddressable_p (tree expr)
1805 switch (TREE_CODE (expr))
1807 case TARGET_MEM_REF:
1808 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1809 target, thus they are always addressable. */
1810 return false;
1812 case MEM_REF:
1813 /* Likewise for MEM_REFs, modulo the storage order. */
1814 return REF_REVERSE_STORAGE_ORDER (expr);
1816 case BIT_FIELD_REF:
1817 if (REF_REVERSE_STORAGE_ORDER (expr))
1818 return true;
1819 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1821 case COMPONENT_REF:
1822 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
1823 return true;
1824 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1825 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1827 case ARRAY_REF:
1828 case ARRAY_RANGE_REF:
1829 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
1830 return true;
1831 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1833 case VIEW_CONVERT_EXPR:
1834 /* This kind of view-conversions may wrap non-addressable objects
1835 and make them look addressable. After some processing the
1836 non-addressability may be uncovered again, causing ADDR_EXPRs
1837 of inappropriate objects to be built. */
1838 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1839 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1840 return true;
1841 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1843 CASE_CONVERT:
1844 return true;
1846 default:
1847 break;
1850 return false;
1853 /* Finds addresses in *OP_P inside STMT. */
1855 static void
1856 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1858 tree base = *op_p, step = size_zero_node;
1859 struct iv *civ;
1860 struct ifs_ivopts_data ifs_ivopts_data;
1862 /* Do not play with volatile memory references. A bit too conservative,
1863 perhaps, but safe. */
1864 if (gimple_has_volatile_ops (stmt))
1865 goto fail;
1867 /* Ignore bitfields for now. Not really something terribly complicated
1868 to handle. TODO. */
1869 if (TREE_CODE (base) == BIT_FIELD_REF)
1870 goto fail;
1872 base = unshare_expr (base);
1874 if (TREE_CODE (base) == TARGET_MEM_REF)
1876 tree type = build_pointer_type (TREE_TYPE (base));
1877 tree astep;
1879 if (TMR_BASE (base)
1880 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1882 civ = get_iv (data, TMR_BASE (base));
1883 if (!civ)
1884 goto fail;
1886 TMR_BASE (base) = civ->base;
1887 step = civ->step;
1889 if (TMR_INDEX2 (base)
1890 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1892 civ = get_iv (data, TMR_INDEX2 (base));
1893 if (!civ)
1894 goto fail;
1896 TMR_INDEX2 (base) = civ->base;
1897 step = civ->step;
1899 if (TMR_INDEX (base)
1900 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1902 civ = get_iv (data, TMR_INDEX (base));
1903 if (!civ)
1904 goto fail;
1906 TMR_INDEX (base) = civ->base;
1907 astep = civ->step;
1909 if (astep)
1911 if (TMR_STEP (base))
1912 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1914 step = fold_build2 (PLUS_EXPR, type, step, astep);
1918 if (integer_zerop (step))
1919 goto fail;
1920 base = tree_mem_ref_addr (type, base);
1922 else
1924 ifs_ivopts_data.ivopts_data = data;
1925 ifs_ivopts_data.stmt = stmt;
1926 ifs_ivopts_data.step = size_zero_node;
1927 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1928 || integer_zerop (ifs_ivopts_data.step))
1929 goto fail;
1930 step = ifs_ivopts_data.step;
1932 /* Check that the base expression is addressable. This needs
1933 to be done after substituting bases of IVs into it. */
1934 if (may_be_nonaddressable_p (base))
1935 goto fail;
1937 /* Moreover, on strict alignment platforms, check that it is
1938 sufficiently aligned. */
1939 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1940 goto fail;
1942 base = build_fold_addr_expr (base);
1944 /* Substituting bases of IVs into the base expression might
1945 have caused folding opportunities. */
1946 if (TREE_CODE (base) == ADDR_EXPR)
1948 tree *ref = &TREE_OPERAND (base, 0);
1949 while (handled_component_p (*ref))
1950 ref = &TREE_OPERAND (*ref, 0);
1951 if (TREE_CODE (*ref) == MEM_REF)
1953 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1954 TREE_OPERAND (*ref, 0),
1955 TREE_OPERAND (*ref, 1));
1956 if (tem)
1957 *ref = tem;
1962 civ = alloc_iv (base, step);
1963 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1964 return;
1966 fail:
1967 for_each_index (op_p, idx_record_use, data);
1970 /* Finds and records invariants used in STMT. */
1972 static void
1973 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1975 ssa_op_iter iter;
1976 use_operand_p use_p;
1977 tree op;
1979 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1981 op = USE_FROM_PTR (use_p);
1982 record_invariant (data, op, false);
1986 /* Finds interesting uses of induction variables in the statement STMT. */
1988 static void
1989 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1991 struct iv *iv;
1992 tree op, *lhs, *rhs;
1993 ssa_op_iter iter;
1994 use_operand_p use_p;
1995 enum tree_code code;
1997 find_invariants_stmt (data, stmt);
1999 if (gimple_code (stmt) == GIMPLE_COND)
2001 find_interesting_uses_cond (data, stmt);
2002 return;
2005 if (is_gimple_assign (stmt))
2007 lhs = gimple_assign_lhs_ptr (stmt);
2008 rhs = gimple_assign_rhs1_ptr (stmt);
2010 if (TREE_CODE (*lhs) == SSA_NAME)
2012 /* If the statement defines an induction variable, the uses are not
2013 interesting by themselves. */
2015 iv = get_iv (data, *lhs);
2017 if (iv && !integer_zerop (iv->step))
2018 return;
2021 code = gimple_assign_rhs_code (stmt);
2022 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2023 && (REFERENCE_CLASS_P (*rhs)
2024 || is_gimple_val (*rhs)))
2026 if (REFERENCE_CLASS_P (*rhs))
2027 find_interesting_uses_address (data, stmt, rhs);
2028 else
2029 find_interesting_uses_op (data, *rhs);
2031 if (REFERENCE_CLASS_P (*lhs))
2032 find_interesting_uses_address (data, stmt, lhs);
2033 return;
2035 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2037 find_interesting_uses_cond (data, stmt);
2038 return;
2041 /* TODO -- we should also handle address uses of type
2043 memory = call (whatever);
2047 call (memory). */
2050 if (gimple_code (stmt) == GIMPLE_PHI
2051 && gimple_bb (stmt) == data->current_loop->header)
2053 iv = get_iv (data, PHI_RESULT (stmt));
2055 if (iv && !integer_zerop (iv->step))
2056 return;
2059 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2061 op = USE_FROM_PTR (use_p);
2063 if (TREE_CODE (op) != SSA_NAME)
2064 continue;
2066 iv = get_iv (data, op);
2067 if (!iv)
2068 continue;
2070 find_interesting_uses_op (data, op);
2074 /* Finds interesting uses of induction variables outside of loops
2075 on loop exit edge EXIT. */
2077 static void
2078 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2080 gphi *phi;
2081 gphi_iterator psi;
2082 tree def;
2084 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2086 phi = psi.phi ();
2087 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2088 if (!virtual_operand_p (def))
2089 find_interesting_uses_op (data, def);
2093 /* Finds uses of the induction variables that are interesting. */
2095 static void
2096 find_interesting_uses (struct ivopts_data *data)
2098 basic_block bb;
2099 gimple_stmt_iterator bsi;
2100 basic_block *body = get_loop_body (data->current_loop);
2101 unsigned i;
2102 struct version_info *info;
2103 edge e;
2105 if (dump_file && (dump_flags & TDF_DETAILS))
2106 fprintf (dump_file, "Uses:\n\n");
2108 for (i = 0; i < data->current_loop->num_nodes; i++)
2110 edge_iterator ei;
2111 bb = body[i];
2113 FOR_EACH_EDGE (e, ei, bb->succs)
2114 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2115 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2116 find_interesting_uses_outside (data, e);
2118 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2119 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2120 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2121 if (!is_gimple_debug (gsi_stmt (bsi)))
2122 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2125 if (dump_file && (dump_flags & TDF_DETAILS))
2127 bitmap_iterator bi;
2129 fprintf (dump_file, "\n");
2131 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2133 info = ver_info (data, i);
2134 if (info->inv_id)
2136 fprintf (dump_file, " ");
2137 print_generic_expr (dump_file, info->name, TDF_SLIM);
2138 fprintf (dump_file, " is invariant (%d)%s\n",
2139 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2143 fprintf (dump_file, "\n");
2146 free (body);
2149 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2150 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2151 we are at the top-level of the processed address. */
2153 static tree
2154 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2155 HOST_WIDE_INT *offset)
2157 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2158 enum tree_code code;
2159 tree type, orig_type = TREE_TYPE (expr);
2160 HOST_WIDE_INT off0, off1, st;
2161 tree orig_expr = expr;
2163 STRIP_NOPS (expr);
2165 type = TREE_TYPE (expr);
2166 code = TREE_CODE (expr);
2167 *offset = 0;
2169 switch (code)
2171 case INTEGER_CST:
2172 if (!cst_and_fits_in_hwi (expr)
2173 || integer_zerop (expr))
2174 return orig_expr;
2176 *offset = int_cst_value (expr);
2177 return build_int_cst (orig_type, 0);
2179 case POINTER_PLUS_EXPR:
2180 case PLUS_EXPR:
2181 case MINUS_EXPR:
2182 op0 = TREE_OPERAND (expr, 0);
2183 op1 = TREE_OPERAND (expr, 1);
2185 op0 = strip_offset_1 (op0, false, false, &off0);
2186 op1 = strip_offset_1 (op1, false, false, &off1);
2188 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2189 if (op0 == TREE_OPERAND (expr, 0)
2190 && op1 == TREE_OPERAND (expr, 1))
2191 return orig_expr;
2193 if (integer_zerop (op1))
2194 expr = op0;
2195 else if (integer_zerop (op0))
2197 if (code == MINUS_EXPR)
2198 expr = fold_build1 (NEGATE_EXPR, type, op1);
2199 else
2200 expr = op1;
2202 else
2203 expr = fold_build2 (code, type, op0, op1);
2205 return fold_convert (orig_type, expr);
2207 case MULT_EXPR:
2208 op1 = TREE_OPERAND (expr, 1);
2209 if (!cst_and_fits_in_hwi (op1))
2210 return orig_expr;
2212 op0 = TREE_OPERAND (expr, 0);
2213 op0 = strip_offset_1 (op0, false, false, &off0);
2214 if (op0 == TREE_OPERAND (expr, 0))
2215 return orig_expr;
2217 *offset = off0 * int_cst_value (op1);
2218 if (integer_zerop (op0))
2219 expr = op0;
2220 else
2221 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2223 return fold_convert (orig_type, expr);
2225 case ARRAY_REF:
2226 case ARRAY_RANGE_REF:
2227 if (!inside_addr)
2228 return orig_expr;
2230 step = array_ref_element_size (expr);
2231 if (!cst_and_fits_in_hwi (step))
2232 break;
2234 st = int_cst_value (step);
2235 op1 = TREE_OPERAND (expr, 1);
2236 op1 = strip_offset_1 (op1, false, false, &off1);
2237 *offset = off1 * st;
2239 if (top_compref
2240 && integer_zerop (op1))
2242 /* Strip the component reference completely. */
2243 op0 = TREE_OPERAND (expr, 0);
2244 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2245 *offset += off0;
2246 return op0;
2248 break;
2250 case COMPONENT_REF:
2252 tree field;
2254 if (!inside_addr)
2255 return orig_expr;
2257 tmp = component_ref_field_offset (expr);
2258 field = TREE_OPERAND (expr, 1);
2259 if (top_compref
2260 && cst_and_fits_in_hwi (tmp)
2261 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2263 HOST_WIDE_INT boffset, abs_off;
2265 /* Strip the component reference completely. */
2266 op0 = TREE_OPERAND (expr, 0);
2267 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2268 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2269 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2270 if (boffset < 0)
2271 abs_off = -abs_off;
2273 *offset = off0 + int_cst_value (tmp) + abs_off;
2274 return op0;
2277 break;
2279 case ADDR_EXPR:
2280 op0 = TREE_OPERAND (expr, 0);
2281 op0 = strip_offset_1 (op0, true, true, &off0);
2282 *offset += off0;
2284 if (op0 == TREE_OPERAND (expr, 0))
2285 return orig_expr;
2287 expr = build_fold_addr_expr (op0);
2288 return fold_convert (orig_type, expr);
2290 case MEM_REF:
2291 /* ??? Offset operand? */
2292 inside_addr = false;
2293 break;
2295 default:
2296 return orig_expr;
2299 /* Default handling of expressions for that we want to recurse into
2300 the first operand. */
2301 op0 = TREE_OPERAND (expr, 0);
2302 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2303 *offset += off0;
2305 if (op0 == TREE_OPERAND (expr, 0)
2306 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2307 return orig_expr;
2309 expr = copy_node (expr);
2310 TREE_OPERAND (expr, 0) = op0;
2311 if (op1)
2312 TREE_OPERAND (expr, 1) = op1;
2314 /* Inside address, we might strip the top level component references,
2315 thus changing type of the expression. Handling of ADDR_EXPR
2316 will fix that. */
2317 expr = fold_convert (orig_type, expr);
2319 return expr;
2322 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2324 static tree
2325 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2327 HOST_WIDE_INT off;
2328 tree core = strip_offset_1 (expr, false, false, &off);
2329 *offset = off;
2330 return core;
2333 /* Returns variant of TYPE that can be used as base for different uses.
2334 We return unsigned type with the same precision, which avoids problems
2335 with overflows. */
2337 static tree
2338 generic_type_for (tree type)
2340 if (POINTER_TYPE_P (type))
2341 return unsigned_type_for (type);
2343 if (TYPE_UNSIGNED (type))
2344 return type;
2346 return unsigned_type_for (type);
2349 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2350 the bitmap to that we should store it. */
2352 static struct ivopts_data *fd_ivopts_data;
2353 static tree
2354 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2356 bitmap *depends_on = (bitmap *) data;
2357 struct version_info *info;
2359 if (TREE_CODE (*expr_p) != SSA_NAME)
2360 return NULL_TREE;
2361 info = name_info (fd_ivopts_data, *expr_p);
2363 if (!info->inv_id || info->has_nonlin_use)
2364 return NULL_TREE;
2366 if (!*depends_on)
2367 *depends_on = BITMAP_ALLOC (NULL);
2368 bitmap_set_bit (*depends_on, info->inv_id);
2370 return NULL_TREE;
2373 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2374 position to POS. If USE is not NULL, the candidate is set as related to
2375 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2376 replacement of the final value of the iv by a direct computation. */
2378 static struct iv_cand *
2379 add_candidate_1 (struct ivopts_data *data,
2380 tree base, tree step, bool important, enum iv_position pos,
2381 struct iv_use *use, gimple incremented_at)
2383 unsigned i;
2384 struct iv_cand *cand = NULL;
2385 tree type, orig_type;
2387 /* For non-original variables, make sure their values are computed in a type
2388 that does not invoke undefined behavior on overflows (since in general,
2389 we cannot prove that these induction variables are non-wrapping). */
2390 if (pos != IP_ORIGINAL)
2392 orig_type = TREE_TYPE (base);
2393 type = generic_type_for (orig_type);
2394 if (type != orig_type)
2396 base = fold_convert (type, base);
2397 step = fold_convert (type, step);
2401 for (i = 0; i < n_iv_cands (data); i++)
2403 cand = iv_cand (data, i);
2405 if (cand->pos != pos)
2406 continue;
2408 if (cand->incremented_at != incremented_at
2409 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2410 && cand->ainc_use != use))
2411 continue;
2413 if (!cand->iv)
2415 if (!base && !step)
2416 break;
2418 continue;
2421 if (!base && !step)
2422 continue;
2424 if (operand_equal_p (base, cand->iv->base, 0)
2425 && operand_equal_p (step, cand->iv->step, 0)
2426 && (TYPE_PRECISION (TREE_TYPE (base))
2427 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2428 break;
2431 if (i == n_iv_cands (data))
2433 cand = XCNEW (struct iv_cand);
2434 cand->id = i;
2436 if (!base && !step)
2437 cand->iv = NULL;
2438 else
2439 cand->iv = alloc_iv (base, step);
2441 cand->pos = pos;
2442 if (pos != IP_ORIGINAL && cand->iv)
2444 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2445 cand->var_after = cand->var_before;
2447 cand->important = important;
2448 cand->incremented_at = incremented_at;
2449 data->iv_candidates.safe_push (cand);
2451 if (step
2452 && TREE_CODE (step) != INTEGER_CST)
2454 fd_ivopts_data = data;
2455 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2458 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2459 cand->ainc_use = use;
2460 else
2461 cand->ainc_use = NULL;
2463 if (dump_file && (dump_flags & TDF_DETAILS))
2464 dump_cand (dump_file, cand);
2467 if (important && !cand->important)
2469 cand->important = true;
2470 if (dump_file && (dump_flags & TDF_DETAILS))
2471 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2474 if (use)
2476 bitmap_set_bit (use->related_cands, i);
2477 if (dump_file && (dump_flags & TDF_DETAILS))
2478 fprintf (dump_file, "Candidate %d is related to use %d\n",
2479 cand->id, use->id);
2482 return cand;
2485 /* Returns true if incrementing the induction variable at the end of the LOOP
2486 is allowed.
2488 The purpose is to avoid splitting latch edge with a biv increment, thus
2489 creating a jump, possibly confusing other optimization passes and leaving
2490 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2491 is not available (so we do not have a better alternative), or if the latch
2492 edge is already nonempty. */
2494 static bool
2495 allow_ip_end_pos_p (struct loop *loop)
2497 if (!ip_normal_pos (loop))
2498 return true;
2500 if (!empty_block_p (ip_end_pos (loop)))
2501 return true;
2503 return false;
2506 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2507 Important field is set to IMPORTANT. */
2509 static void
2510 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2511 bool important, struct iv_use *use)
2513 basic_block use_bb = gimple_bb (use->stmt);
2514 machine_mode mem_mode;
2515 unsigned HOST_WIDE_INT cstepi;
2517 /* If we insert the increment in any position other than the standard
2518 ones, we must ensure that it is incremented once per iteration.
2519 It must not be in an inner nested loop, or one side of an if
2520 statement. */
2521 if (use_bb->loop_father != data->current_loop
2522 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2523 || stmt_could_throw_p (use->stmt)
2524 || !cst_and_fits_in_hwi (step))
2525 return;
2527 cstepi = int_cst_value (step);
2529 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2530 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2531 || USE_STORE_PRE_INCREMENT (mem_mode))
2532 && GET_MODE_SIZE (mem_mode) == cstepi)
2533 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2534 || USE_STORE_PRE_DECREMENT (mem_mode))
2535 && GET_MODE_SIZE (mem_mode) == -cstepi))
2537 enum tree_code code = MINUS_EXPR;
2538 tree new_base;
2539 tree new_step = step;
2541 if (POINTER_TYPE_P (TREE_TYPE (base)))
2543 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2544 code = POINTER_PLUS_EXPR;
2546 else
2547 new_step = fold_convert (TREE_TYPE (base), new_step);
2548 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2549 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2550 use->stmt);
2552 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2553 || USE_STORE_POST_INCREMENT (mem_mode))
2554 && GET_MODE_SIZE (mem_mode) == cstepi)
2555 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2556 || USE_STORE_POST_DECREMENT (mem_mode))
2557 && GET_MODE_SIZE (mem_mode) == -cstepi))
2559 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2560 use->stmt);
2564 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2565 position to POS. If USE is not NULL, the candidate is set as related to
2566 it. The candidate computation is scheduled on all available positions. */
2568 static void
2569 add_candidate (struct ivopts_data *data,
2570 tree base, tree step, bool important, struct iv_use *use)
2572 if (ip_normal_pos (data->current_loop))
2573 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2574 if (ip_end_pos (data->current_loop)
2575 && allow_ip_end_pos_p (data->current_loop))
2576 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2578 if (use != NULL && use->type == USE_ADDRESS)
2579 add_autoinc_candidates (data, base, step, important, use);
2582 /* Adds standard iv candidates. */
2584 static void
2585 add_standard_iv_candidates (struct ivopts_data *data)
2587 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2589 /* The same for a double-integer type if it is still fast enough. */
2590 if (TYPE_PRECISION
2591 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2592 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2593 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2594 build_int_cst (long_integer_type_node, 1), true, NULL);
2596 /* The same for a double-integer type if it is still fast enough. */
2597 if (TYPE_PRECISION
2598 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2599 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2600 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2601 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2605 /* Adds candidates bases on the old induction variable IV. */
2607 static void
2608 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2610 gimple phi;
2611 tree def;
2612 struct iv_cand *cand;
2614 add_candidate (data, iv->base, iv->step, true, NULL);
2616 /* The same, but with initial value zero. */
2617 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2618 add_candidate (data, size_int (0), iv->step, true, NULL);
2619 else
2620 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2621 iv->step, true, NULL);
2623 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2624 if (gimple_code (phi) == GIMPLE_PHI)
2626 /* Additionally record the possibility of leaving the original iv
2627 untouched. */
2628 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2629 /* Don't add candidate if it's from another PHI node because
2630 it's an affine iv appearing in the form of PEELED_CHREC. */
2631 phi = SSA_NAME_DEF_STMT (def);
2632 if (gimple_code (phi) != GIMPLE_PHI)
2634 cand = add_candidate_1 (data,
2635 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2636 SSA_NAME_DEF_STMT (def));
2637 cand->var_before = iv->ssa_name;
2638 cand->var_after = def;
2640 else
2641 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2645 /* Adds candidates based on the old induction variables. */
2647 static void
2648 add_old_ivs_candidates (struct ivopts_data *data)
2650 unsigned i;
2651 struct iv *iv;
2652 bitmap_iterator bi;
2654 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2656 iv = ver_info (data, i)->iv;
2657 if (iv && iv->biv_p && !integer_zerop (iv->step))
2658 add_old_iv_candidates (data, iv);
2662 /* Adds candidates based on the value of the induction variable IV and USE. */
2664 static void
2665 add_iv_value_candidates (struct ivopts_data *data,
2666 struct iv *iv, struct iv_use *use)
2668 unsigned HOST_WIDE_INT offset;
2669 tree base;
2670 tree basetype;
2672 add_candidate (data, iv->base, iv->step, false, use);
2674 /* The same, but with initial value zero. Make such variable important,
2675 since it is generic enough so that possibly many uses may be based
2676 on it. */
2677 basetype = TREE_TYPE (iv->base);
2678 if (POINTER_TYPE_P (basetype))
2679 basetype = sizetype;
2680 add_candidate (data, build_int_cst (basetype, 0),
2681 iv->step, true, use);
2683 /* Third, try removing the constant offset. Make sure to even
2684 add a candidate for &a[0] vs. (T *)&a. */
2685 base = strip_offset (iv->base, &offset);
2686 if (offset
2687 || base != iv->base)
2688 add_candidate (data, base, iv->step, false, use);
2691 /* Adds candidates based on the uses. */
2693 static void
2694 add_derived_ivs_candidates (struct ivopts_data *data)
2696 unsigned i;
2698 for (i = 0; i < n_iv_uses (data); i++)
2700 struct iv_use *use = iv_use (data, i);
2702 if (!use)
2703 continue;
2705 switch (use->type)
2707 case USE_NONLINEAR_EXPR:
2708 case USE_COMPARE:
2709 case USE_ADDRESS:
2710 /* Just add the ivs based on the value of the iv used here. */
2711 add_iv_value_candidates (data, use->iv, use);
2712 break;
2714 default:
2715 gcc_unreachable ();
2720 /* Record important candidates and add them to related_cands bitmaps
2721 if needed. */
2723 static void
2724 record_important_candidates (struct ivopts_data *data)
2726 unsigned i;
2727 struct iv_use *use;
2729 for (i = 0; i < n_iv_cands (data); i++)
2731 struct iv_cand *cand = iv_cand (data, i);
2733 if (cand->important)
2734 bitmap_set_bit (data->important_candidates, i);
2737 data->consider_all_candidates = (n_iv_cands (data)
2738 <= CONSIDER_ALL_CANDIDATES_BOUND);
2740 if (data->consider_all_candidates)
2742 /* We will not need "related_cands" bitmaps in this case,
2743 so release them to decrease peak memory consumption. */
2744 for (i = 0; i < n_iv_uses (data); i++)
2746 use = iv_use (data, i);
2747 BITMAP_FREE (use->related_cands);
2750 else
2752 /* Add important candidates to the related_cands bitmaps. */
2753 for (i = 0; i < n_iv_uses (data); i++)
2754 bitmap_ior_into (iv_use (data, i)->related_cands,
2755 data->important_candidates);
2759 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2760 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2761 we allocate a simple list to every use. */
2763 static void
2764 alloc_use_cost_map (struct ivopts_data *data)
2766 unsigned i, size, s;
2768 for (i = 0; i < n_iv_uses (data); i++)
2770 struct iv_use *use = iv_use (data, i);
2772 if (data->consider_all_candidates)
2773 size = n_iv_cands (data);
2774 else
2776 s = bitmap_count_bits (use->related_cands);
2778 /* Round up to the power of two, so that moduling by it is fast. */
2779 size = s ? (1 << ceil_log2 (s)) : 1;
2782 use->n_map_members = size;
2783 use->cost_map = XCNEWVEC (struct cost_pair, size);
2787 /* Returns description of computation cost of expression whose runtime
2788 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2790 static comp_cost
2791 new_cost (unsigned runtime, unsigned complexity)
2793 comp_cost cost;
2795 cost.cost = runtime;
2796 cost.complexity = complexity;
2798 return cost;
2801 /* Adds costs COST1 and COST2. */
2803 static comp_cost
2804 add_costs (comp_cost cost1, comp_cost cost2)
2806 cost1.cost += cost2.cost;
2807 cost1.complexity += cost2.complexity;
2809 return cost1;
2811 /* Subtracts costs COST1 and COST2. */
2813 static comp_cost
2814 sub_costs (comp_cost cost1, comp_cost cost2)
2816 cost1.cost -= cost2.cost;
2817 cost1.complexity -= cost2.complexity;
2819 return cost1;
2822 /* Returns a negative number if COST1 < COST2, a positive number if
2823 COST1 > COST2, and 0 if COST1 = COST2. */
2825 static int
2826 compare_costs (comp_cost cost1, comp_cost cost2)
2828 if (cost1.cost == cost2.cost)
2829 return cost1.complexity - cost2.complexity;
2831 return cost1.cost - cost2.cost;
2834 /* Returns true if COST is infinite. */
2836 static bool
2837 infinite_cost_p (comp_cost cost)
2839 return cost.cost == INFTY;
2842 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2843 on invariants DEPENDS_ON and that the value used in expressing it
2844 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2846 static void
2847 set_use_iv_cost (struct ivopts_data *data,
2848 struct iv_use *use, struct iv_cand *cand,
2849 comp_cost cost, bitmap depends_on, tree value,
2850 enum tree_code comp, int inv_expr_id)
2852 unsigned i, s;
2854 if (infinite_cost_p (cost))
2856 BITMAP_FREE (depends_on);
2857 return;
2860 if (data->consider_all_candidates)
2862 use->cost_map[cand->id].cand = cand;
2863 use->cost_map[cand->id].cost = cost;
2864 use->cost_map[cand->id].depends_on = depends_on;
2865 use->cost_map[cand->id].value = value;
2866 use->cost_map[cand->id].comp = comp;
2867 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2868 return;
2871 /* n_map_members is a power of two, so this computes modulo. */
2872 s = cand->id & (use->n_map_members - 1);
2873 for (i = s; i < use->n_map_members; i++)
2874 if (!use->cost_map[i].cand)
2875 goto found;
2876 for (i = 0; i < s; i++)
2877 if (!use->cost_map[i].cand)
2878 goto found;
2880 gcc_unreachable ();
2882 found:
2883 use->cost_map[i].cand = cand;
2884 use->cost_map[i].cost = cost;
2885 use->cost_map[i].depends_on = depends_on;
2886 use->cost_map[i].value = value;
2887 use->cost_map[i].comp = comp;
2888 use->cost_map[i].inv_expr_id = inv_expr_id;
2891 /* Gets cost of (USE, CANDIDATE) pair. */
2893 static struct cost_pair *
2894 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2895 struct iv_cand *cand)
2897 unsigned i, s;
2898 struct cost_pair *ret;
2900 if (!cand)
2901 return NULL;
2903 if (data->consider_all_candidates)
2905 ret = use->cost_map + cand->id;
2906 if (!ret->cand)
2907 return NULL;
2909 return ret;
2912 /* n_map_members is a power of two, so this computes modulo. */
2913 s = cand->id & (use->n_map_members - 1);
2914 for (i = s; i < use->n_map_members; i++)
2915 if (use->cost_map[i].cand == cand)
2916 return use->cost_map + i;
2917 else if (use->cost_map[i].cand == NULL)
2918 return NULL;
2919 for (i = 0; i < s; i++)
2920 if (use->cost_map[i].cand == cand)
2921 return use->cost_map + i;
2922 else if (use->cost_map[i].cand == NULL)
2923 return NULL;
2925 return NULL;
2928 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2929 static rtx
2930 produce_memory_decl_rtl (tree obj, int *regno)
2932 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2933 machine_mode address_mode = targetm.addr_space.address_mode (as);
2934 rtx x;
2936 gcc_assert (obj);
2937 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2939 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2940 x = gen_rtx_SYMBOL_REF (address_mode, name);
2941 SET_SYMBOL_REF_DECL (x, obj);
2942 x = gen_rtx_MEM (DECL_MODE (obj), x);
2943 set_mem_addr_space (x, as);
2944 targetm.encode_section_info (obj, x, true);
2946 else
2948 x = gen_raw_REG (address_mode, (*regno)++);
2949 x = gen_rtx_MEM (DECL_MODE (obj), x);
2950 set_mem_addr_space (x, as);
2953 return x;
2956 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2957 walk_tree. DATA contains the actual fake register number. */
2959 static tree
2960 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2962 tree obj = NULL_TREE;
2963 rtx x = NULL_RTX;
2964 int *regno = (int *) data;
2966 switch (TREE_CODE (*expr_p))
2968 case ADDR_EXPR:
2969 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2970 handled_component_p (*expr_p);
2971 expr_p = &TREE_OPERAND (*expr_p, 0))
2972 continue;
2973 obj = *expr_p;
2974 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2975 x = produce_memory_decl_rtl (obj, regno);
2976 break;
2978 case SSA_NAME:
2979 *ws = 0;
2980 obj = SSA_NAME_VAR (*expr_p);
2981 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2982 if (!obj)
2983 return NULL_TREE;
2984 if (!DECL_RTL_SET_P (obj))
2985 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2986 break;
2988 case VAR_DECL:
2989 case PARM_DECL:
2990 case RESULT_DECL:
2991 *ws = 0;
2992 obj = *expr_p;
2994 if (DECL_RTL_SET_P (obj))
2995 break;
2997 if (DECL_MODE (obj) == BLKmode)
2998 x = produce_memory_decl_rtl (obj, regno);
2999 else
3000 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3002 break;
3004 default:
3005 break;
3008 if (x)
3010 decl_rtl_to_reset.safe_push (obj);
3011 SET_DECL_RTL (obj, x);
3014 return NULL_TREE;
3017 /* Determines cost of the computation of EXPR. */
3019 static unsigned
3020 computation_cost (tree expr, bool speed)
3022 rtx_insn *seq;
3023 rtx rslt;
3024 tree type = TREE_TYPE (expr);
3025 unsigned cost;
3026 /* Avoid using hard regs in ways which may be unsupported. */
3027 int regno = LAST_VIRTUAL_REGISTER + 1;
3028 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3029 enum node_frequency real_frequency = node->frequency;
3031 node->frequency = NODE_FREQUENCY_NORMAL;
3032 crtl->maybe_hot_insn_p = speed;
3033 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3034 start_sequence ();
3035 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3036 seq = get_insns ();
3037 end_sequence ();
3038 default_rtl_profile ();
3039 node->frequency = real_frequency;
3041 cost = seq_cost (seq, speed);
3042 if (MEM_P (rslt))
3043 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3044 TYPE_ADDR_SPACE (type), speed);
3045 else if (!REG_P (rslt))
3046 cost += set_src_cost (rslt, speed);
3048 return cost;
3051 /* Returns variable containing the value of candidate CAND at statement AT. */
3053 static tree
3054 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3056 if (stmt_after_increment (loop, cand, stmt))
3057 return cand->var_after;
3058 else
3059 return cand->var_before;
3062 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3063 same precision that is at least as wide as the precision of TYPE, stores
3064 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3065 type of A and B. */
3067 static tree
3068 determine_common_wider_type (tree *a, tree *b)
3070 tree wider_type = NULL;
3071 tree suba, subb;
3072 tree atype = TREE_TYPE (*a);
3074 if (CONVERT_EXPR_P (*a))
3076 suba = TREE_OPERAND (*a, 0);
3077 wider_type = TREE_TYPE (suba);
3078 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3079 return atype;
3081 else
3082 return atype;
3084 if (CONVERT_EXPR_P (*b))
3086 subb = TREE_OPERAND (*b, 0);
3087 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3088 return atype;
3090 else
3091 return atype;
3093 *a = suba;
3094 *b = subb;
3095 return wider_type;
3098 /* Determines the expression by that USE is expressed from induction variable
3099 CAND at statement AT in LOOP. The expression is stored in a decomposed
3100 form into AFF. Returns false if USE cannot be expressed using CAND. */
3102 static bool
3103 get_computation_aff (struct loop *loop,
3104 struct iv_use *use, struct iv_cand *cand, gimple at,
3105 struct aff_tree *aff)
3107 tree ubase = use->iv->base;
3108 tree ustep = use->iv->step;
3109 tree cbase = cand->iv->base;
3110 tree cstep = cand->iv->step, cstep_common;
3111 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3112 tree common_type, var;
3113 tree uutype;
3114 aff_tree cbase_aff, var_aff;
3115 widest_int rat;
3117 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3119 /* We do not have a precision to express the values of use. */
3120 return false;
3123 var = var_at_stmt (loop, cand, at);
3124 uutype = unsigned_type_for (utype);
3126 /* If the conversion is not noop, perform it. */
3127 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3129 cstep = fold_convert (uutype, cstep);
3130 cbase = fold_convert (uutype, cbase);
3131 var = fold_convert (uutype, var);
3134 if (!constant_multiple_of (ustep, cstep, &rat))
3135 return false;
3137 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3138 type, we achieve better folding by computing their difference in this
3139 wider type, and cast the result to UUTYPE. We do not need to worry about
3140 overflows, as all the arithmetics will in the end be performed in UUTYPE
3141 anyway. */
3142 common_type = determine_common_wider_type (&ubase, &cbase);
3144 /* use = ubase - ratio * cbase + ratio * var. */
3145 tree_to_aff_combination (ubase, common_type, aff);
3146 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3147 tree_to_aff_combination (var, uutype, &var_aff);
3149 /* We need to shift the value if we are after the increment. */
3150 if (stmt_after_increment (loop, cand, at))
3152 aff_tree cstep_aff;
3154 if (common_type != uutype)
3155 cstep_common = fold_convert (common_type, cstep);
3156 else
3157 cstep_common = cstep;
3159 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3160 aff_combination_add (&cbase_aff, &cstep_aff);
3163 aff_combination_scale (&cbase_aff, -rat);
3164 aff_combination_add (aff, &cbase_aff);
3165 if (common_type != uutype)
3166 aff_combination_convert (aff, uutype);
3168 aff_combination_scale (&var_aff, rat);
3169 aff_combination_add (aff, &var_aff);
3171 return true;
3174 /* Return the type of USE. */
3176 static tree
3177 get_use_type (struct iv_use *use)
3179 tree base_type = TREE_TYPE (use->iv->base);
3180 tree type;
3182 if (use->type == USE_ADDRESS)
3184 /* The base_type may be a void pointer. Create a pointer type based on
3185 the mem_ref instead. */
3186 type = build_pointer_type (TREE_TYPE (*use->op_p));
3187 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3188 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3190 else
3191 type = base_type;
3193 return type;
3196 /* Determines the expression by that USE is expressed from induction variable
3197 CAND at statement AT in LOOP. The computation is unshared. */
3199 static tree
3200 get_computation_at (struct loop *loop,
3201 struct iv_use *use, struct iv_cand *cand, gimple at)
3203 aff_tree aff;
3204 tree type = get_use_type (use);
3206 if (!get_computation_aff (loop, use, cand, at, &aff))
3207 return NULL_TREE;
3208 unshare_aff_combination (&aff);
3209 return fold_convert (type, aff_combination_to_tree (&aff));
3212 /* Determines the expression by that USE is expressed from induction variable
3213 CAND in LOOP. The computation is unshared. */
3215 static tree
3216 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3218 return get_computation_at (loop, use, cand, use->stmt);
3221 /* Adjust the cost COST for being in loop setup rather than loop body.
3222 If we're optimizing for space, the loop setup overhead is constant;
3223 if we're optimizing for speed, amortize it over the per-iteration cost. */
3224 static unsigned
3225 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3227 if (cost == INFTY)
3228 return cost;
3229 else if (optimize_loop_for_speed_p (data->current_loop))
3230 return cost / avg_loop_niter (data->current_loop);
3231 else
3232 return cost;
3235 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3236 validity for a memory reference accessing memory of mode MODE in
3237 address space AS. */
3240 bool
3241 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3242 addr_space_t as)
3244 #define MAX_RATIO 128
3245 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3246 static vec<sbitmap> valid_mult_list;
3247 sbitmap valid_mult;
3249 if (data_index >= valid_mult_list.length ())
3250 valid_mult_list.safe_grow_cleared (data_index + 1);
3252 valid_mult = valid_mult_list[data_index];
3253 if (!valid_mult)
3255 machine_mode address_mode = targetm.addr_space.address_mode (as);
3256 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3257 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3258 rtx addr, scaled;
3259 HOST_WIDE_INT i;
3261 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3262 bitmap_clear (valid_mult);
3263 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3264 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3265 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3267 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3268 if (memory_address_addr_space_p (mode, addr, as)
3269 || memory_address_addr_space_p (mode, scaled, as))
3270 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3273 if (dump_file && (dump_flags & TDF_DETAILS))
3275 fprintf (dump_file, " allowed multipliers:");
3276 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3277 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3278 fprintf (dump_file, " %d", (int) i);
3279 fprintf (dump_file, "\n");
3280 fprintf (dump_file, "\n");
3283 valid_mult_list[data_index] = valid_mult;
3286 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3287 return false;
3289 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3292 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3293 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3294 variable is omitted. Compute the cost for a memory reference that accesses
3295 a memory location of mode MEM_MODE in address space AS.
3297 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3298 size of MEM_MODE / RATIO) is available. To make this determination, we
3299 look at the size of the increment to be made, which is given in CSTEP.
3300 CSTEP may be zero if the step is unknown.
3301 STMT_AFTER_INC is true iff the statement we're looking at is after the
3302 increment of the original biv.
3304 TODO -- there must be some better way. This all is quite crude. */
3306 enum ainc_type
3308 AINC_PRE_INC, /* Pre increment. */
3309 AINC_PRE_DEC, /* Pre decrement. */
3310 AINC_POST_INC, /* Post increment. */
3311 AINC_POST_DEC, /* Post decrement. */
3312 AINC_NONE /* Also the number of auto increment types. */
3315 typedef struct address_cost_data_s
3317 HOST_WIDE_INT min_offset, max_offset;
3318 unsigned costs[2][2][2][2];
3319 unsigned ainc_costs[AINC_NONE];
3320 } *address_cost_data;
3323 static comp_cost
3324 get_address_cost (bool symbol_present, bool var_present,
3325 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3326 HOST_WIDE_INT cstep, machine_mode mem_mode,
3327 addr_space_t as, bool speed,
3328 bool stmt_after_inc, bool *may_autoinc)
3330 machine_mode address_mode = targetm.addr_space.address_mode (as);
3331 static vec<address_cost_data> address_cost_data_list;
3332 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3333 address_cost_data data;
3334 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3335 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3336 unsigned cost, acost, complexity;
3337 enum ainc_type autoinc_type;
3338 bool offset_p, ratio_p, autoinc;
3339 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3340 unsigned HOST_WIDE_INT mask;
3341 unsigned bits;
3343 if (data_index >= address_cost_data_list.length ())
3344 address_cost_data_list.safe_grow_cleared (data_index + 1);
3346 data = address_cost_data_list[data_index];
3347 if (!data)
3349 HOST_WIDE_INT i;
3350 HOST_WIDE_INT rat, off = 0;
3351 int old_cse_not_expected, width;
3352 unsigned sym_p, var_p, off_p, rat_p, add_c;
3353 rtx_insn *seq;
3354 rtx addr, base;
3355 rtx reg0, reg1;
3357 data = (address_cost_data) xcalloc (1, sizeof (*data));
3359 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3361 width = GET_MODE_BITSIZE (address_mode) - 1;
3362 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3363 width = HOST_BITS_PER_WIDE_INT - 1;
3364 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3366 for (i = width; i >= 0; i--)
3368 off = -((unsigned HOST_WIDE_INT) 1 << i);
3369 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3370 if (memory_address_addr_space_p (mem_mode, addr, as))
3371 break;
3373 data->min_offset = (i == -1? 0 : off);
3375 for (i = width; i >= 0; i--)
3377 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3378 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3379 if (memory_address_addr_space_p (mem_mode, addr, as))
3380 break;
3381 /* For some strict-alignment targets, the offset must be naturally
3382 aligned. Try an aligned offset if mem_mode is not QImode. */
3383 off = mem_mode != QImode
3384 ? ((unsigned HOST_WIDE_INT) 1 << i)
3385 - GET_MODE_SIZE (mem_mode)
3386 : 0;
3387 if (off > 0)
3389 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3390 if (memory_address_addr_space_p (mem_mode, addr, as))
3391 break;
3394 if (i == -1)
3395 off = 0;
3396 data->max_offset = off;
3398 if (dump_file && (dump_flags & TDF_DETAILS))
3400 fprintf (dump_file, "get_address_cost:\n");
3401 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3402 GET_MODE_NAME (mem_mode),
3403 data->min_offset);
3404 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3405 GET_MODE_NAME (mem_mode),
3406 data->max_offset);
3409 rat = 1;
3410 for (i = 2; i <= MAX_RATIO; i++)
3411 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3413 rat = i;
3414 break;
3417 /* Compute the cost of various addressing modes. */
3418 acost = 0;
3419 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3420 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3422 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3423 || USE_STORE_PRE_DECREMENT (mem_mode))
3425 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3426 has_predec[mem_mode]
3427 = memory_address_addr_space_p (mem_mode, addr, as);
3429 if (has_predec[mem_mode])
3430 data->ainc_costs[AINC_PRE_DEC]
3431 = address_cost (addr, mem_mode, as, speed);
3433 if (USE_LOAD_POST_DECREMENT (mem_mode)
3434 || USE_STORE_POST_DECREMENT (mem_mode))
3436 addr = gen_rtx_POST_DEC (address_mode, reg0);
3437 has_postdec[mem_mode]
3438 = memory_address_addr_space_p (mem_mode, addr, as);
3440 if (has_postdec[mem_mode])
3441 data->ainc_costs[AINC_POST_DEC]
3442 = address_cost (addr, mem_mode, as, speed);
3444 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3445 || USE_STORE_PRE_DECREMENT (mem_mode))
3447 addr = gen_rtx_PRE_INC (address_mode, reg0);
3448 has_preinc[mem_mode]
3449 = memory_address_addr_space_p (mem_mode, addr, as);
3451 if (has_preinc[mem_mode])
3452 data->ainc_costs[AINC_PRE_INC]
3453 = address_cost (addr, mem_mode, as, speed);
3455 if (USE_LOAD_POST_INCREMENT (mem_mode)
3456 || USE_STORE_POST_INCREMENT (mem_mode))
3458 addr = gen_rtx_POST_INC (address_mode, reg0);
3459 has_postinc[mem_mode]
3460 = memory_address_addr_space_p (mem_mode, addr, as);
3462 if (has_postinc[mem_mode])
3463 data->ainc_costs[AINC_POST_INC]
3464 = address_cost (addr, mem_mode, as, speed);
3466 for (i = 0; i < 16; i++)
3468 sym_p = i & 1;
3469 var_p = (i >> 1) & 1;
3470 off_p = (i >> 2) & 1;
3471 rat_p = (i >> 3) & 1;
3473 addr = reg0;
3474 if (rat_p)
3475 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3476 gen_int_mode (rat, address_mode));
3478 if (var_p)
3479 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3481 if (sym_p)
3483 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3484 /* ??? We can run into trouble with some backends by presenting
3485 it with symbols which haven't been properly passed through
3486 targetm.encode_section_info. By setting the local bit, we
3487 enhance the probability of things working. */
3488 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3490 if (off_p)
3491 base = gen_rtx_fmt_e (CONST, address_mode,
3492 gen_rtx_fmt_ee
3493 (PLUS, address_mode, base,
3494 gen_int_mode (off, address_mode)));
3496 else if (off_p)
3497 base = gen_int_mode (off, address_mode);
3498 else
3499 base = NULL_RTX;
3501 if (base)
3502 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3504 start_sequence ();
3505 /* To avoid splitting addressing modes, pretend that no cse will
3506 follow. */
3507 old_cse_not_expected = cse_not_expected;
3508 cse_not_expected = true;
3509 addr = memory_address_addr_space (mem_mode, addr, as);
3510 cse_not_expected = old_cse_not_expected;
3511 seq = get_insns ();
3512 end_sequence ();
3514 acost = seq_cost (seq, speed);
3515 acost += address_cost (addr, mem_mode, as, speed);
3517 if (!acost)
3518 acost = 1;
3519 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3522 /* On some targets, it is quite expensive to load symbol to a register,
3523 which makes addresses that contain symbols look much more expensive.
3524 However, the symbol will have to be loaded in any case before the
3525 loop (and quite likely we have it in register already), so it does not
3526 make much sense to penalize them too heavily. So make some final
3527 tweaks for the SYMBOL_PRESENT modes:
3529 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3530 var is cheaper, use this mode with small penalty.
3531 If VAR_PRESENT is true, try whether the mode with
3532 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3533 if this is the case, use it. */
3534 add_c = add_cost (speed, address_mode);
3535 for (i = 0; i < 8; i++)
3537 var_p = i & 1;
3538 off_p = (i >> 1) & 1;
3539 rat_p = (i >> 2) & 1;
3541 acost = data->costs[0][1][off_p][rat_p] + 1;
3542 if (var_p)
3543 acost += add_c;
3545 if (acost < data->costs[1][var_p][off_p][rat_p])
3546 data->costs[1][var_p][off_p][rat_p] = acost;
3549 if (dump_file && (dump_flags & TDF_DETAILS))
3551 fprintf (dump_file, "Address costs:\n");
3553 for (i = 0; i < 16; i++)
3555 sym_p = i & 1;
3556 var_p = (i >> 1) & 1;
3557 off_p = (i >> 2) & 1;
3558 rat_p = (i >> 3) & 1;
3560 fprintf (dump_file, " ");
3561 if (sym_p)
3562 fprintf (dump_file, "sym + ");
3563 if (var_p)
3564 fprintf (dump_file, "var + ");
3565 if (off_p)
3566 fprintf (dump_file, "cst + ");
3567 if (rat_p)
3568 fprintf (dump_file, "rat * ");
3570 acost = data->costs[sym_p][var_p][off_p][rat_p];
3571 fprintf (dump_file, "index costs %d\n", acost);
3573 if (has_predec[mem_mode] || has_postdec[mem_mode]
3574 || has_preinc[mem_mode] || has_postinc[mem_mode])
3575 fprintf (dump_file, " May include autoinc/dec\n");
3576 fprintf (dump_file, "\n");
3579 address_cost_data_list[data_index] = data;
3582 bits = GET_MODE_BITSIZE (address_mode);
3583 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3584 offset &= mask;
3585 if ((offset >> (bits - 1) & 1))
3586 offset |= ~mask;
3587 s_offset = offset;
3589 autoinc = false;
3590 autoinc_type = AINC_NONE;
3591 msize = GET_MODE_SIZE (mem_mode);
3592 autoinc_offset = offset;
3593 if (stmt_after_inc)
3594 autoinc_offset += ratio * cstep;
3595 if (symbol_present || var_present || ratio != 1)
3596 autoinc = false;
3597 else
3599 if (has_postinc[mem_mode] && autoinc_offset == 0
3600 && msize == cstep)
3601 autoinc_type = AINC_POST_INC;
3602 else if (has_postdec[mem_mode] && autoinc_offset == 0
3603 && msize == -cstep)
3604 autoinc_type = AINC_POST_DEC;
3605 else if (has_preinc[mem_mode] && autoinc_offset == msize
3606 && msize == cstep)
3607 autoinc_type = AINC_PRE_INC;
3608 else if (has_predec[mem_mode] && autoinc_offset == -msize
3609 && msize == -cstep)
3610 autoinc_type = AINC_PRE_DEC;
3612 if (autoinc_type != AINC_NONE)
3613 autoinc = true;
3616 cost = 0;
3617 offset_p = (s_offset != 0
3618 && data->min_offset <= s_offset
3619 && s_offset <= data->max_offset);
3620 ratio_p = (ratio != 1
3621 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3623 if (ratio != 1 && !ratio_p)
3624 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3626 if (s_offset && !offset_p && !symbol_present)
3627 cost += add_cost (speed, address_mode);
3629 if (may_autoinc)
3630 *may_autoinc = autoinc;
3631 if (autoinc)
3632 acost = data->ainc_costs[autoinc_type];
3633 else
3634 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3635 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3636 return new_cost (cost + acost, complexity);
3639 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3640 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3641 calculating the operands of EXPR. Returns true if successful, and returns
3642 the cost in COST. */
3644 static bool
3645 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3646 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3648 comp_cost res;
3649 tree op1 = TREE_OPERAND (expr, 1);
3650 tree cst = TREE_OPERAND (mult, 1);
3651 tree multop = TREE_OPERAND (mult, 0);
3652 int m = exact_log2 (int_cst_value (cst));
3653 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3654 int as_cost, sa_cost;
3655 bool mult_in_op1;
3657 if (!(m >= 0 && m < maxm))
3658 return false;
3660 mult_in_op1 = operand_equal_p (op1, mult, 0);
3662 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3664 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3665 use that in preference to a shift insn followed by an add insn. */
3666 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3667 ? shiftadd_cost (speed, mode, m)
3668 : (mult_in_op1
3669 ? shiftsub1_cost (speed, mode, m)
3670 : shiftsub0_cost (speed, mode, m)));
3672 res = new_cost (MIN (as_cost, sa_cost), 0);
3673 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3675 STRIP_NOPS (multop);
3676 if (!is_gimple_val (multop))
3677 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3679 *cost = res;
3680 return true;
3683 /* Estimates cost of forcing expression EXPR into a variable. */
3685 static comp_cost
3686 force_expr_to_var_cost (tree expr, bool speed)
3688 static bool costs_initialized = false;
3689 static unsigned integer_cost [2];
3690 static unsigned symbol_cost [2];
3691 static unsigned address_cost [2];
3692 tree op0, op1;
3693 comp_cost cost0, cost1, cost;
3694 machine_mode mode;
3696 if (!costs_initialized)
3698 tree type = build_pointer_type (integer_type_node);
3699 tree var, addr;
3700 rtx x;
3701 int i;
3703 var = create_tmp_var_raw (integer_type_node, "test_var");
3704 TREE_STATIC (var) = 1;
3705 x = produce_memory_decl_rtl (var, NULL);
3706 SET_DECL_RTL (var, x);
3708 addr = build1 (ADDR_EXPR, type, var);
3711 for (i = 0; i < 2; i++)
3713 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3714 2000), i);
3716 symbol_cost[i] = computation_cost (addr, i) + 1;
3718 address_cost[i]
3719 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3720 if (dump_file && (dump_flags & TDF_DETAILS))
3722 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3723 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3724 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3725 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3726 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3727 fprintf (dump_file, "\n");
3731 costs_initialized = true;
3734 STRIP_NOPS (expr);
3736 if (SSA_VAR_P (expr))
3737 return no_cost;
3739 if (is_gimple_min_invariant (expr))
3741 if (TREE_CODE (expr) == INTEGER_CST)
3742 return new_cost (integer_cost [speed], 0);
3744 if (TREE_CODE (expr) == ADDR_EXPR)
3746 tree obj = TREE_OPERAND (expr, 0);
3748 if (TREE_CODE (obj) == VAR_DECL
3749 || TREE_CODE (obj) == PARM_DECL
3750 || TREE_CODE (obj) == RESULT_DECL)
3751 return new_cost (symbol_cost [speed], 0);
3754 return new_cost (address_cost [speed], 0);
3757 switch (TREE_CODE (expr))
3759 case POINTER_PLUS_EXPR:
3760 case PLUS_EXPR:
3761 case MINUS_EXPR:
3762 case MULT_EXPR:
3763 op0 = TREE_OPERAND (expr, 0);
3764 op1 = TREE_OPERAND (expr, 1);
3765 STRIP_NOPS (op0);
3766 STRIP_NOPS (op1);
3767 break;
3769 CASE_CONVERT:
3770 case NEGATE_EXPR:
3771 op0 = TREE_OPERAND (expr, 0);
3772 STRIP_NOPS (op0);
3773 op1 = NULL_TREE;
3774 break;
3776 default:
3777 /* Just an arbitrary value, FIXME. */
3778 return new_cost (target_spill_cost[speed], 0);
3781 if (op0 == NULL_TREE
3782 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3783 cost0 = no_cost;
3784 else
3785 cost0 = force_expr_to_var_cost (op0, speed);
3787 if (op1 == NULL_TREE
3788 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3789 cost1 = no_cost;
3790 else
3791 cost1 = force_expr_to_var_cost (op1, speed);
3793 mode = TYPE_MODE (TREE_TYPE (expr));
3794 switch (TREE_CODE (expr))
3796 case POINTER_PLUS_EXPR:
3797 case PLUS_EXPR:
3798 case MINUS_EXPR:
3799 case NEGATE_EXPR:
3800 cost = new_cost (add_cost (speed, mode), 0);
3801 if (TREE_CODE (expr) != NEGATE_EXPR)
3803 tree mult = NULL_TREE;
3804 comp_cost sa_cost;
3805 if (TREE_CODE (op1) == MULT_EXPR)
3806 mult = op1;
3807 else if (TREE_CODE (op0) == MULT_EXPR)
3808 mult = op0;
3810 if (mult != NULL_TREE
3811 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3812 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3813 speed, &sa_cost))
3814 return sa_cost;
3816 break;
3818 CASE_CONVERT:
3820 tree inner_mode, outer_mode;
3821 outer_mode = TREE_TYPE (expr);
3822 inner_mode = TREE_TYPE (op0);
3823 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3824 TYPE_MODE (inner_mode), speed), 0);
3826 break;
3828 case MULT_EXPR:
3829 if (cst_and_fits_in_hwi (op0))
3830 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3831 mode, speed), 0);
3832 else if (cst_and_fits_in_hwi (op1))
3833 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3834 mode, speed), 0);
3835 else
3836 return new_cost (target_spill_cost [speed], 0);
3837 break;
3839 default:
3840 gcc_unreachable ();
3843 cost = add_costs (cost, cost0);
3844 cost = add_costs (cost, cost1);
3846 /* Bound the cost by target_spill_cost. The parts of complicated
3847 computations often are either loop invariant or at least can
3848 be shared between several iv uses, so letting this grow without
3849 limits would not give reasonable results. */
3850 if (cost.cost > (int) target_spill_cost [speed])
3851 cost.cost = target_spill_cost [speed];
3853 return cost;
3856 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3857 invariants the computation depends on. */
3859 static comp_cost
3860 force_var_cost (struct ivopts_data *data,
3861 tree expr, bitmap *depends_on)
3863 if (depends_on)
3865 fd_ivopts_data = data;
3866 walk_tree (&expr, find_depends, depends_on, NULL);
3869 return force_expr_to_var_cost (expr, data->speed);
3872 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3873 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3874 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3875 invariants the computation depends on. */
3877 static comp_cost
3878 split_address_cost (struct ivopts_data *data,
3879 tree addr, bool *symbol_present, bool *var_present,
3880 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3882 tree core;
3883 HOST_WIDE_INT bitsize;
3884 HOST_WIDE_INT bitpos;
3885 tree toffset;
3886 machine_mode mode;
3887 int unsignedp, reversep, volatilep;
3889 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3890 &unsignedp, &reversep, &volatilep, false);
3892 if (toffset != 0
3893 || bitpos % BITS_PER_UNIT != 0
3894 || reversep
3895 || TREE_CODE (core) != VAR_DECL)
3897 *symbol_present = false;
3898 *var_present = true;
3899 fd_ivopts_data = data;
3900 walk_tree (&addr, find_depends, depends_on, NULL);
3901 return new_cost (target_spill_cost[data->speed], 0);
3904 *offset += bitpos / BITS_PER_UNIT;
3905 if (TREE_STATIC (core)
3906 || DECL_EXTERNAL (core))
3908 *symbol_present = true;
3909 *var_present = false;
3910 return no_cost;
3913 *symbol_present = false;
3914 *var_present = true;
3915 return no_cost;
3918 /* Estimates cost of expressing difference of addresses E1 - E2 as
3919 var + symbol + offset. The value of offset is added to OFFSET,
3920 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3921 part is missing. DEPENDS_ON is a set of the invariants the computation
3922 depends on. */
3924 static comp_cost
3925 ptr_difference_cost (struct ivopts_data *data,
3926 tree e1, tree e2, bool *symbol_present, bool *var_present,
3927 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3929 HOST_WIDE_INT diff = 0;
3930 aff_tree aff_e1, aff_e2;
3931 tree type;
3933 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3935 if (ptr_difference_const (e1, e2, &diff))
3937 *offset += diff;
3938 *symbol_present = false;
3939 *var_present = false;
3940 return no_cost;
3943 if (integer_zerop (e2))
3944 return split_address_cost (data, TREE_OPERAND (e1, 0),
3945 symbol_present, var_present, offset, depends_on);
3947 *symbol_present = false;
3948 *var_present = true;
3950 type = signed_type_for (TREE_TYPE (e1));
3951 tree_to_aff_combination (e1, type, &aff_e1);
3952 tree_to_aff_combination (e2, type, &aff_e2);
3953 aff_combination_scale (&aff_e2, -1);
3954 aff_combination_add (&aff_e1, &aff_e2);
3956 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3959 /* Estimates cost of expressing difference E1 - E2 as
3960 var + symbol + offset. The value of offset is added to OFFSET,
3961 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3962 part is missing. DEPENDS_ON is a set of the invariants the computation
3963 depends on. */
3965 static comp_cost
3966 difference_cost (struct ivopts_data *data,
3967 tree e1, tree e2, bool *symbol_present, bool *var_present,
3968 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3970 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3971 unsigned HOST_WIDE_INT off1, off2;
3972 aff_tree aff_e1, aff_e2;
3973 tree type;
3975 e1 = strip_offset (e1, &off1);
3976 e2 = strip_offset (e2, &off2);
3977 *offset += off1 - off2;
3979 STRIP_NOPS (e1);
3980 STRIP_NOPS (e2);
3982 if (TREE_CODE (e1) == ADDR_EXPR)
3983 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3984 offset, depends_on);
3985 *symbol_present = false;
3987 if (operand_equal_p (e1, e2, 0))
3989 *var_present = false;
3990 return no_cost;
3993 *var_present = true;
3995 if (integer_zerop (e2))
3996 return force_var_cost (data, e1, depends_on);
3998 if (integer_zerop (e1))
4000 comp_cost cost = force_var_cost (data, e2, depends_on);
4001 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4002 return cost;
4005 type = signed_type_for (TREE_TYPE (e1));
4006 tree_to_aff_combination (e1, type, &aff_e1);
4007 tree_to_aff_combination (e2, type, &aff_e2);
4008 aff_combination_scale (&aff_e2, -1);
4009 aff_combination_add (&aff_e1, &aff_e2);
4011 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4014 /* Returns true if AFF1 and AFF2 are identical. */
4016 static bool
4017 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4019 unsigned i;
4021 if (aff1->n != aff2->n)
4022 return false;
4024 for (i = 0; i < aff1->n; i++)
4026 if (aff1->elts[i].coef != aff2->elts[i].coef)
4027 return false;
4029 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4030 return false;
4032 return true;
4035 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4037 static int
4038 get_expr_id (struct ivopts_data *data, tree expr)
4040 struct iv_inv_expr_ent ent;
4041 struct iv_inv_expr_ent **slot;
4043 ent.expr = expr;
4044 ent.hash = iterative_hash_expr (expr, 0);
4045 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4046 if (*slot)
4047 return (*slot)->id;
4049 *slot = XNEW (struct iv_inv_expr_ent);
4050 (*slot)->expr = expr;
4051 (*slot)->hash = ent.hash;
4052 (*slot)->id = data->inv_expr_id++;
4053 return (*slot)->id;
4056 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4057 requires a new compiler generated temporary. Returns -1 otherwise.
4058 ADDRESS_P is a flag indicating if the expression is for address
4059 computation. */
4061 static int
4062 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4063 tree cbase, HOST_WIDE_INT ratio,
4064 bool address_p)
4066 aff_tree ubase_aff, cbase_aff;
4067 tree expr, ub, cb;
4069 STRIP_NOPS (ubase);
4070 STRIP_NOPS (cbase);
4071 ub = ubase;
4072 cb = cbase;
4074 if ((TREE_CODE (ubase) == INTEGER_CST)
4075 && (TREE_CODE (cbase) == INTEGER_CST))
4076 return -1;
4078 /* Strips the constant part. */
4079 if (TREE_CODE (ubase) == PLUS_EXPR
4080 || TREE_CODE (ubase) == MINUS_EXPR
4081 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4083 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4084 ubase = TREE_OPERAND (ubase, 0);
4087 /* Strips the constant part. */
4088 if (TREE_CODE (cbase) == PLUS_EXPR
4089 || TREE_CODE (cbase) == MINUS_EXPR
4090 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4092 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4093 cbase = TREE_OPERAND (cbase, 0);
4096 if (address_p)
4098 if (((TREE_CODE (ubase) == SSA_NAME)
4099 || (TREE_CODE (ubase) == ADDR_EXPR
4100 && is_gimple_min_invariant (ubase)))
4101 && (TREE_CODE (cbase) == INTEGER_CST))
4102 return -1;
4104 if (((TREE_CODE (cbase) == SSA_NAME)
4105 || (TREE_CODE (cbase) == ADDR_EXPR
4106 && is_gimple_min_invariant (cbase)))
4107 && (TREE_CODE (ubase) == INTEGER_CST))
4108 return -1;
4111 if (ratio == 1)
4113 if (operand_equal_p (ubase, cbase, 0))
4114 return -1;
4116 if (TREE_CODE (ubase) == ADDR_EXPR
4117 && TREE_CODE (cbase) == ADDR_EXPR)
4119 tree usym, csym;
4121 usym = TREE_OPERAND (ubase, 0);
4122 csym = TREE_OPERAND (cbase, 0);
4123 if (TREE_CODE (usym) == ARRAY_REF)
4125 tree ind = TREE_OPERAND (usym, 1);
4126 if (TREE_CODE (ind) == INTEGER_CST
4127 && tree_fits_shwi_p (ind)
4128 && tree_to_shwi (ind) == 0)
4129 usym = TREE_OPERAND (usym, 0);
4131 if (TREE_CODE (csym) == ARRAY_REF)
4133 tree ind = TREE_OPERAND (csym, 1);
4134 if (TREE_CODE (ind) == INTEGER_CST
4135 && tree_fits_shwi_p (ind)
4136 && tree_to_shwi (ind) == 0)
4137 csym = TREE_OPERAND (csym, 0);
4139 if (operand_equal_p (usym, csym, 0))
4140 return -1;
4142 /* Now do more complex comparison */
4143 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4144 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4145 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4146 return -1;
4149 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4150 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4152 aff_combination_scale (&cbase_aff, -1 * ratio);
4153 aff_combination_add (&ubase_aff, &cbase_aff);
4154 expr = aff_combination_to_tree (&ubase_aff);
4155 return get_expr_id (data, expr);
4160 /* Determines the cost of the computation by that USE is expressed
4161 from induction variable CAND. If ADDRESS_P is true, we just need
4162 to create an address from it, otherwise we want to get it into
4163 register. A set of invariants we depend on is stored in
4164 DEPENDS_ON. AT is the statement at that the value is computed.
4165 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4166 addressing is likely. */
4168 static comp_cost
4169 get_computation_cost_at (struct ivopts_data *data,
4170 struct iv_use *use, struct iv_cand *cand,
4171 bool address_p, bitmap *depends_on, gimple at,
4172 bool *can_autoinc,
4173 int *inv_expr_id)
4175 tree ubase = use->iv->base, ustep = use->iv->step;
4176 tree cbase, cstep;
4177 tree utype = TREE_TYPE (ubase), ctype;
4178 unsigned HOST_WIDE_INT cstepi, offset = 0;
4179 HOST_WIDE_INT ratio, aratio;
4180 bool var_present, symbol_present, stmt_is_after_inc;
4181 comp_cost cost;
4182 widest_int rat;
4183 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4184 machine_mode mem_mode = (address_p
4185 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4186 : VOIDmode);
4188 *depends_on = NULL;
4190 /* Only consider real candidates. */
4191 if (!cand->iv)
4192 return infinite_cost;
4194 cbase = cand->iv->base;
4195 cstep = cand->iv->step;
4196 ctype = TREE_TYPE (cbase);
4198 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4200 /* We do not have a precision to express the values of use. */
4201 return infinite_cost;
4204 if (address_p
4205 || (use->iv->base_object
4206 && cand->iv->base_object
4207 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4208 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4210 /* Do not try to express address of an object with computation based
4211 on address of a different object. This may cause problems in rtl
4212 level alias analysis (that does not expect this to be happening,
4213 as this is illegal in C), and would be unlikely to be useful
4214 anyway. */
4215 if (use->iv->base_object
4216 && cand->iv->base_object
4217 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4218 return infinite_cost;
4221 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4223 /* TODO -- add direct handling of this case. */
4224 goto fallback;
4227 /* CSTEPI is removed from the offset in case statement is after the
4228 increment. If the step is not constant, we use zero instead.
4229 This is a bit imprecise (there is the extra addition), but
4230 redundancy elimination is likely to transform the code so that
4231 it uses value of the variable before increment anyway,
4232 so it is not that much unrealistic. */
4233 if (cst_and_fits_in_hwi (cstep))
4234 cstepi = int_cst_value (cstep);
4235 else
4236 cstepi = 0;
4238 if (!constant_multiple_of (ustep, cstep, &rat))
4239 return infinite_cost;
4241 if (wi::fits_shwi_p (rat))
4242 ratio = rat.to_shwi ();
4243 else
4244 return infinite_cost;
4246 STRIP_NOPS (cbase);
4247 ctype = TREE_TYPE (cbase);
4249 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4251 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4252 or ratio == 1, it is better to handle this like
4254 ubase - ratio * cbase + ratio * var
4256 (also holds in the case ratio == -1, TODO. */
4258 if (cst_and_fits_in_hwi (cbase))
4260 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4261 cost = difference_cost (data,
4262 ubase, build_int_cst (utype, 0),
4263 &symbol_present, &var_present, &offset,
4264 depends_on);
4265 cost.cost /= avg_loop_niter (data->current_loop);
4267 else if (ratio == 1)
4269 tree real_cbase = cbase;
4271 /* Check to see if any adjustment is needed. */
4272 if (cstepi == 0 && stmt_is_after_inc)
4274 aff_tree real_cbase_aff;
4275 aff_tree cstep_aff;
4277 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4278 &real_cbase_aff);
4279 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4281 aff_combination_add (&real_cbase_aff, &cstep_aff);
4282 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4285 cost = difference_cost (data,
4286 ubase, real_cbase,
4287 &symbol_present, &var_present, &offset,
4288 depends_on);
4289 cost.cost /= avg_loop_niter (data->current_loop);
4291 else if (address_p
4292 && !POINTER_TYPE_P (ctype)
4293 && multiplier_allowed_in_address_p
4294 (ratio, mem_mode,
4295 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4297 cbase
4298 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4299 cost = difference_cost (data,
4300 ubase, cbase,
4301 &symbol_present, &var_present, &offset,
4302 depends_on);
4303 cost.cost /= avg_loop_niter (data->current_loop);
4305 else
4307 cost = force_var_cost (data, cbase, depends_on);
4308 cost = add_costs (cost,
4309 difference_cost (data,
4310 ubase, build_int_cst (utype, 0),
4311 &symbol_present, &var_present,
4312 &offset, depends_on));
4313 cost.cost /= avg_loop_niter (data->current_loop);
4314 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4317 if (inv_expr_id)
4319 *inv_expr_id =
4320 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4321 /* Clear depends on. */
4322 if (*inv_expr_id != -1 && depends_on && *depends_on)
4323 bitmap_clear (*depends_on);
4326 /* If we are after the increment, the value of the candidate is higher by
4327 one iteration. */
4328 if (stmt_is_after_inc)
4329 offset -= ratio * cstepi;
4331 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4332 (symbol/var1/const parts may be omitted). If we are looking for an
4333 address, find the cost of addressing this. */
4334 if (address_p)
4335 return add_costs (cost,
4336 get_address_cost (symbol_present, var_present,
4337 offset, ratio, cstepi,
4338 mem_mode,
4339 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4340 speed, stmt_is_after_inc,
4341 can_autoinc));
4343 /* Otherwise estimate the costs for computing the expression. */
4344 if (!symbol_present && !var_present && !offset)
4346 if (ratio != 1)
4347 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4348 return cost;
4351 /* Symbol + offset should be compile-time computable so consider that they
4352 are added once to the variable, if present. */
4353 if (var_present && (symbol_present || offset))
4354 cost.cost += adjust_setup_cost (data,
4355 add_cost (speed, TYPE_MODE (ctype)));
4357 /* Having offset does not affect runtime cost in case it is added to
4358 symbol, but it increases complexity. */
4359 if (offset)
4360 cost.complexity++;
4362 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4364 aratio = ratio > 0 ? ratio : -ratio;
4365 if (aratio != 1)
4366 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4367 return cost;
4369 fallback:
4370 if (can_autoinc)
4371 *can_autoinc = false;
4374 /* Just get the expression, expand it and measure the cost. */
4375 tree comp = get_computation_at (data->current_loop, use, cand, at);
4377 if (!comp)
4378 return infinite_cost;
4380 if (address_p)
4381 comp = build_simple_mem_ref (comp);
4383 return new_cost (computation_cost (comp, speed), 0);
4387 /* Determines the cost of the computation by that USE is expressed
4388 from induction variable CAND. If ADDRESS_P is true, we just need
4389 to create an address from it, otherwise we want to get it into
4390 register. A set of invariants we depend on is stored in
4391 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4392 autoinc addressing is likely. */
4394 static comp_cost
4395 get_computation_cost (struct ivopts_data *data,
4396 struct iv_use *use, struct iv_cand *cand,
4397 bool address_p, bitmap *depends_on,
4398 bool *can_autoinc, int *inv_expr_id)
4400 return get_computation_cost_at (data,
4401 use, cand, address_p, depends_on, use->stmt,
4402 can_autoinc, inv_expr_id);
4405 /* Determines cost of basing replacement of USE on CAND in a generic
4406 expression. */
4408 static bool
4409 determine_use_iv_cost_generic (struct ivopts_data *data,
4410 struct iv_use *use, struct iv_cand *cand)
4412 bitmap depends_on;
4413 comp_cost cost;
4414 int inv_expr_id = -1;
4416 /* The simple case first -- if we need to express value of the preserved
4417 original biv, the cost is 0. This also prevents us from counting the
4418 cost of increment twice -- once at this use and once in the cost of
4419 the candidate. */
4420 if (cand->pos == IP_ORIGINAL
4421 && cand->incremented_at == use->stmt)
4423 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4424 ERROR_MARK, -1);
4425 return true;
4428 cost = get_computation_cost (data, use, cand, false, &depends_on,
4429 NULL, &inv_expr_id);
4431 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4432 inv_expr_id);
4434 return !infinite_cost_p (cost);
4437 /* Determines cost of basing replacement of USE on CAND in an address. */
4439 static bool
4440 determine_use_iv_cost_address (struct ivopts_data *data,
4441 struct iv_use *use, struct iv_cand *cand)
4443 bitmap depends_on;
4444 bool can_autoinc;
4445 int inv_expr_id = -1;
4446 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4447 &can_autoinc, &inv_expr_id);
4449 if (cand->ainc_use == use)
4451 if (can_autoinc)
4452 cost.cost -= cand->cost_step;
4453 /* If we generated the candidate solely for exploiting autoincrement
4454 opportunities, and it turns out it can't be used, set the cost to
4455 infinity to make sure we ignore it. */
4456 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4457 cost = infinite_cost;
4459 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4460 inv_expr_id);
4462 return !infinite_cost_p (cost);
4465 /* Computes value of candidate CAND at position AT in iteration NITER, and
4466 stores it to VAL. */
4468 static void
4469 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4470 aff_tree *val)
4472 aff_tree step, delta, nit;
4473 struct iv *iv = cand->iv;
4474 tree type = TREE_TYPE (iv->base);
4475 tree steptype = type;
4476 if (POINTER_TYPE_P (type))
4477 steptype = sizetype;
4478 steptype = unsigned_type_for (type);
4480 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4481 aff_combination_convert (&step, steptype);
4482 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4483 aff_combination_convert (&nit, steptype);
4484 aff_combination_mult (&nit, &step, &delta);
4485 if (stmt_after_increment (loop, cand, at))
4486 aff_combination_add (&delta, &step);
4488 tree_to_aff_combination (iv->base, type, val);
4489 if (!POINTER_TYPE_P (type))
4490 aff_combination_convert (val, steptype);
4491 aff_combination_add (val, &delta);
4494 /* Returns period of induction variable iv. */
4496 static tree
4497 iv_period (struct iv *iv)
4499 tree step = iv->step, period, type;
4500 tree pow2div;
4502 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4504 type = unsigned_type_for (TREE_TYPE (step));
4505 /* Period of the iv is lcm (step, type_range)/step -1,
4506 i.e., N*type_range/step - 1. Since type range is power
4507 of two, N == (step >> num_of_ending_zeros_binary (step),
4508 so the final result is
4510 (type_range >> num_of_ending_zeros_binary (step)) - 1
4513 pow2div = num_ending_zeros (step);
4515 period = build_low_bits_mask (type,
4516 (TYPE_PRECISION (type)
4517 - tree_to_uhwi (pow2div)));
4519 return period;
4522 /* Returns the comparison operator used when eliminating the iv USE. */
4524 static enum tree_code
4525 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4527 struct loop *loop = data->current_loop;
4528 basic_block ex_bb;
4529 edge exit;
4531 ex_bb = gimple_bb (use->stmt);
4532 exit = EDGE_SUCC (ex_bb, 0);
4533 if (flow_bb_inside_loop_p (loop, exit->dest))
4534 exit = EDGE_SUCC (ex_bb, 1);
4536 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4539 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4540 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4541 calculation is performed in non-wrapping type.
4543 TODO: More generally, we could test for the situation that
4544 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4545 This would require knowing the sign of OFFSET. */
4547 static bool
4548 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4550 enum tree_code code;
4551 tree e1, e2;
4552 aff_tree aff_e1, aff_e2, aff_offset;
4554 if (!nowrap_type_p (TREE_TYPE (base)))
4555 return false;
4557 base = expand_simple_operations (base);
4559 if (TREE_CODE (base) == SSA_NAME)
4561 gimple stmt = SSA_NAME_DEF_STMT (base);
4563 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4564 return false;
4566 code = gimple_assign_rhs_code (stmt);
4567 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4568 return false;
4570 e1 = gimple_assign_rhs1 (stmt);
4571 e2 = gimple_assign_rhs2 (stmt);
4573 else
4575 code = TREE_CODE (base);
4576 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4577 return false;
4578 e1 = TREE_OPERAND (base, 0);
4579 e2 = TREE_OPERAND (base, 1);
4582 /* Use affine expansion as deeper inspection to prove the equality. */
4583 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4584 &aff_e2, &data->name_expansion_cache);
4585 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4586 &aff_offset, &data->name_expansion_cache);
4587 aff_combination_scale (&aff_offset, -1);
4588 switch (code)
4590 case PLUS_EXPR:
4591 aff_combination_add (&aff_e2, &aff_offset);
4592 if (aff_combination_zero_p (&aff_e2))
4593 return true;
4595 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4596 &aff_e1, &data->name_expansion_cache);
4597 aff_combination_add (&aff_e1, &aff_offset);
4598 return aff_combination_zero_p (&aff_e1);
4600 case POINTER_PLUS_EXPR:
4601 aff_combination_add (&aff_e2, &aff_offset);
4602 return aff_combination_zero_p (&aff_e2);
4604 default:
4605 return false;
4609 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4610 comparison with CAND. NITER describes the number of iterations of
4611 the loops. If successful, the comparison in COMP_P is altered accordingly.
4613 We aim to handle the following situation:
4615 sometype *base, *p;
4616 int a, b, i;
4618 i = a;
4619 p = p_0 = base + a;
4623 bla (*p);
4624 p++;
4625 i++;
4627 while (i < b);
4629 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4630 We aim to optimize this to
4632 p = p_0 = base + a;
4635 bla (*p);
4636 p++;
4638 while (p < p_0 - a + b);
4640 This preserves the correctness, since the pointer arithmetics does not
4641 overflow. More precisely:
4643 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4644 overflow in computing it or the values of p.
4645 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4646 overflow. To prove this, we use the fact that p_0 = base + a. */
4648 static bool
4649 iv_elimination_compare_lt (struct ivopts_data *data,
4650 struct iv_cand *cand, enum tree_code *comp_p,
4651 struct tree_niter_desc *niter)
4653 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4654 struct aff_tree nit, tmpa, tmpb;
4655 enum tree_code comp;
4656 HOST_WIDE_INT step;
4658 /* We need to know that the candidate induction variable does not overflow.
4659 While more complex analysis may be used to prove this, for now just
4660 check that the variable appears in the original program and that it
4661 is computed in a type that guarantees no overflows. */
4662 cand_type = TREE_TYPE (cand->iv->base);
4663 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4664 return false;
4666 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4667 the calculation of the BOUND could overflow, making the comparison
4668 invalid. */
4669 if (!data->loop_single_exit_p)
4670 return false;
4672 /* We need to be able to decide whether candidate is increasing or decreasing
4673 in order to choose the right comparison operator. */
4674 if (!cst_and_fits_in_hwi (cand->iv->step))
4675 return false;
4676 step = int_cst_value (cand->iv->step);
4678 /* Check that the number of iterations matches the expected pattern:
4679 a + 1 > b ? 0 : b - a - 1. */
4680 mbz = niter->may_be_zero;
4681 if (TREE_CODE (mbz) == GT_EXPR)
4683 /* Handle a + 1 > b. */
4684 tree op0 = TREE_OPERAND (mbz, 0);
4685 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4687 a = TREE_OPERAND (op0, 0);
4688 b = TREE_OPERAND (mbz, 1);
4690 else
4691 return false;
4693 else if (TREE_CODE (mbz) == LT_EXPR)
4695 tree op1 = TREE_OPERAND (mbz, 1);
4697 /* Handle b < a + 1. */
4698 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4700 a = TREE_OPERAND (op1, 0);
4701 b = TREE_OPERAND (mbz, 0);
4703 else
4704 return false;
4706 else
4707 return false;
4709 /* Expected number of iterations is B - A - 1. Check that it matches
4710 the actual number, i.e., that B - A - NITER = 1. */
4711 tree_to_aff_combination (niter->niter, nit_type, &nit);
4712 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4713 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4714 aff_combination_scale (&nit, -1);
4715 aff_combination_scale (&tmpa, -1);
4716 aff_combination_add (&tmpb, &tmpa);
4717 aff_combination_add (&tmpb, &nit);
4718 if (tmpb.n != 0 || tmpb.offset != 1)
4719 return false;
4721 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4722 overflow. */
4723 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4724 cand->iv->step,
4725 fold_convert (TREE_TYPE (cand->iv->step), a));
4726 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4727 return false;
4729 /* Determine the new comparison operator. */
4730 comp = step < 0 ? GT_EXPR : LT_EXPR;
4731 if (*comp_p == NE_EXPR)
4732 *comp_p = comp;
4733 else if (*comp_p == EQ_EXPR)
4734 *comp_p = invert_tree_comparison (comp, false);
4735 else
4736 gcc_unreachable ();
4738 return true;
4741 /* Check whether it is possible to express the condition in USE by comparison
4742 of candidate CAND. If so, store the value compared with to BOUND, and the
4743 comparison operator to COMP. */
4745 static bool
4746 may_eliminate_iv (struct ivopts_data *data,
4747 struct iv_use *use, struct iv_cand *cand, tree *bound,
4748 enum tree_code *comp)
4750 basic_block ex_bb;
4751 edge exit;
4752 tree period;
4753 struct loop *loop = data->current_loop;
4754 aff_tree bnd;
4755 struct tree_niter_desc *desc = NULL;
4757 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4758 return false;
4760 /* For now works only for exits that dominate the loop latch.
4761 TODO: extend to other conditions inside loop body. */
4762 ex_bb = gimple_bb (use->stmt);
4763 if (use->stmt != last_stmt (ex_bb)
4764 || gimple_code (use->stmt) != GIMPLE_COND
4765 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4766 return false;
4768 exit = EDGE_SUCC (ex_bb, 0);
4769 if (flow_bb_inside_loop_p (loop, exit->dest))
4770 exit = EDGE_SUCC (ex_bb, 1);
4771 if (flow_bb_inside_loop_p (loop, exit->dest))
4772 return false;
4774 desc = niter_for_exit (data, exit);
4775 if (!desc)
4776 return false;
4778 /* Determine whether we can use the variable to test the exit condition.
4779 This is the case iff the period of the induction variable is greater
4780 than the number of iterations for which the exit condition is true. */
4781 period = iv_period (cand->iv);
4783 /* If the number of iterations is constant, compare against it directly. */
4784 if (TREE_CODE (desc->niter) == INTEGER_CST)
4786 /* See cand_value_at. */
4787 if (stmt_after_increment (loop, cand, use->stmt))
4789 if (!tree_int_cst_lt (desc->niter, period))
4790 return false;
4792 else
4794 if (tree_int_cst_lt (period, desc->niter))
4795 return false;
4799 /* If not, and if this is the only possible exit of the loop, see whether
4800 we can get a conservative estimate on the number of iterations of the
4801 entire loop and compare against that instead. */
4802 else
4804 widest_int period_value, max_niter;
4806 max_niter = desc->max;
4807 if (stmt_after_increment (loop, cand, use->stmt))
4808 max_niter += 1;
4809 period_value = wi::to_widest (period);
4810 if (wi::gtu_p (max_niter, period_value))
4812 /* See if we can take advantage of inferred loop bound information. */
4813 if (data->loop_single_exit_p)
4815 if (!max_loop_iterations (loop, &max_niter))
4816 return false;
4817 /* The loop bound is already adjusted by adding 1. */
4818 if (wi::gtu_p (max_niter, period_value))
4819 return false;
4821 else
4822 return false;
4826 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4828 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4829 aff_combination_to_tree (&bnd));
4830 *comp = iv_elimination_compare (data, use);
4832 /* It is unlikely that computing the number of iterations using division
4833 would be more profitable than keeping the original induction variable. */
4834 if (expression_expensive_p (*bound))
4835 return false;
4837 /* Sometimes, it is possible to handle the situation that the number of
4838 iterations may be zero unless additional assumtions by using <
4839 instead of != in the exit condition.
4841 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4842 base the exit condition on it. However, that is often too
4843 expensive. */
4844 if (!integer_zerop (desc->may_be_zero))
4845 return iv_elimination_compare_lt (data, cand, comp, desc);
4847 return true;
4850 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4851 be copied, if is is used in the loop body and DATA->body_includes_call. */
4853 static int
4854 parm_decl_cost (struct ivopts_data *data, tree bound)
4856 tree sbound = bound;
4857 STRIP_NOPS (sbound);
4859 if (TREE_CODE (sbound) == SSA_NAME
4860 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4861 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4862 && data->body_includes_call)
4863 return COSTS_N_INSNS (1);
4865 return 0;
4868 /* Determines cost of basing replacement of USE on CAND in a condition. */
4870 static bool
4871 determine_use_iv_cost_condition (struct ivopts_data *data,
4872 struct iv_use *use, struct iv_cand *cand)
4874 tree bound = NULL_TREE;
4875 struct iv *cmp_iv;
4876 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4877 comp_cost elim_cost, express_cost, cost, bound_cost;
4878 bool ok;
4879 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4880 tree *control_var, *bound_cst;
4881 enum tree_code comp = ERROR_MARK;
4883 /* Only consider real candidates. */
4884 if (!cand->iv)
4886 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4887 ERROR_MARK, -1);
4888 return false;
4891 /* Try iv elimination. */
4892 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4894 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4895 if (elim_cost.cost == 0)
4896 elim_cost.cost = parm_decl_cost (data, bound);
4897 else if (TREE_CODE (bound) == INTEGER_CST)
4898 elim_cost.cost = 0;
4899 /* If we replace a loop condition 'i < n' with 'p < base + n',
4900 depends_on_elim will have 'base' and 'n' set, which implies
4901 that both 'base' and 'n' will be live during the loop. More likely,
4902 'base + n' will be loop invariant, resulting in only one live value
4903 during the loop. So in that case we clear depends_on_elim and set
4904 elim_inv_expr_id instead. */
4905 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4907 elim_inv_expr_id = get_expr_id (data, bound);
4908 bitmap_clear (depends_on_elim);
4910 /* The bound is a loop invariant, so it will be only computed
4911 once. */
4912 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4914 else
4915 elim_cost = infinite_cost;
4917 /* Try expressing the original giv. If it is compared with an invariant,
4918 note that we cannot get rid of it. */
4919 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4920 NULL, &cmp_iv);
4921 gcc_assert (ok);
4923 /* When the condition is a comparison of the candidate IV against
4924 zero, prefer this IV.
4926 TODO: The constant that we're subtracting from the cost should
4927 be target-dependent. This information should be added to the
4928 target costs for each backend. */
4929 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4930 && integer_zerop (*bound_cst)
4931 && (operand_equal_p (*control_var, cand->var_after, 0)
4932 || operand_equal_p (*control_var, cand->var_before, 0)))
4933 elim_cost.cost -= 1;
4935 express_cost = get_computation_cost (data, use, cand, false,
4936 &depends_on_express, NULL,
4937 &express_inv_expr_id);
4938 fd_ivopts_data = data;
4939 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4941 /* Count the cost of the original bound as well. */
4942 bound_cost = force_var_cost (data, *bound_cst, NULL);
4943 if (bound_cost.cost == 0)
4944 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4945 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4946 bound_cost.cost = 0;
4947 express_cost.cost += bound_cost.cost;
4949 /* Choose the better approach, preferring the eliminated IV. */
4950 if (compare_costs (elim_cost, express_cost) <= 0)
4952 cost = elim_cost;
4953 depends_on = depends_on_elim;
4954 depends_on_elim = NULL;
4955 inv_expr_id = elim_inv_expr_id;
4957 else
4959 cost = express_cost;
4960 depends_on = depends_on_express;
4961 depends_on_express = NULL;
4962 bound = NULL_TREE;
4963 comp = ERROR_MARK;
4964 inv_expr_id = express_inv_expr_id;
4967 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4969 if (depends_on_elim)
4970 BITMAP_FREE (depends_on_elim);
4971 if (depends_on_express)
4972 BITMAP_FREE (depends_on_express);
4974 return !infinite_cost_p (cost);
4977 /* Determines cost of basing replacement of USE on CAND. Returns false
4978 if USE cannot be based on CAND. */
4980 static bool
4981 determine_use_iv_cost (struct ivopts_data *data,
4982 struct iv_use *use, struct iv_cand *cand)
4984 switch (use->type)
4986 case USE_NONLINEAR_EXPR:
4987 return determine_use_iv_cost_generic (data, use, cand);
4989 case USE_ADDRESS:
4990 return determine_use_iv_cost_address (data, use, cand);
4992 case USE_COMPARE:
4993 return determine_use_iv_cost_condition (data, use, cand);
4995 default:
4996 gcc_unreachable ();
5000 /* Return true if get_computation_cost indicates that autoincrement is
5001 a possibility for the pair of USE and CAND, false otherwise. */
5003 static bool
5004 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5005 struct iv_cand *cand)
5007 bitmap depends_on;
5008 bool can_autoinc;
5009 comp_cost cost;
5011 if (use->type != USE_ADDRESS)
5012 return false;
5014 cost = get_computation_cost (data, use, cand, true, &depends_on,
5015 &can_autoinc, NULL);
5017 BITMAP_FREE (depends_on);
5019 return !infinite_cost_p (cost) && can_autoinc;
5022 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5023 use that allows autoincrement, and set their AINC_USE if possible. */
5025 static void
5026 set_autoinc_for_original_candidates (struct ivopts_data *data)
5028 unsigned i, j;
5030 for (i = 0; i < n_iv_cands (data); i++)
5032 struct iv_cand *cand = iv_cand (data, i);
5033 struct iv_use *closest_before = NULL;
5034 struct iv_use *closest_after = NULL;
5035 if (cand->pos != IP_ORIGINAL)
5036 continue;
5038 for (j = 0; j < n_iv_uses (data); j++)
5040 struct iv_use *use = iv_use (data, j);
5041 unsigned uid = gimple_uid (use->stmt);
5043 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5044 continue;
5046 if (uid < gimple_uid (cand->incremented_at)
5047 && (closest_before == NULL
5048 || uid > gimple_uid (closest_before->stmt)))
5049 closest_before = use;
5051 if (uid > gimple_uid (cand->incremented_at)
5052 && (closest_after == NULL
5053 || uid < gimple_uid (closest_after->stmt)))
5054 closest_after = use;
5057 if (closest_before != NULL
5058 && autoinc_possible_for_pair (data, closest_before, cand))
5059 cand->ainc_use = closest_before;
5060 else if (closest_after != NULL
5061 && autoinc_possible_for_pair (data, closest_after, cand))
5062 cand->ainc_use = closest_after;
5066 /* Finds the candidates for the induction variables. */
5068 static void
5069 find_iv_candidates (struct ivopts_data *data)
5071 /* Add commonly used ivs. */
5072 add_standard_iv_candidates (data);
5074 /* Add old induction variables. */
5075 add_old_ivs_candidates (data);
5077 /* Add induction variables derived from uses. */
5078 add_derived_ivs_candidates (data);
5080 set_autoinc_for_original_candidates (data);
5082 /* Record the important candidates. */
5083 record_important_candidates (data);
5086 /* Determines costs of basing the use of the iv on an iv candidate. */
5088 static void
5089 determine_use_iv_costs (struct ivopts_data *data)
5091 unsigned i, j;
5092 struct iv_use *use;
5093 struct iv_cand *cand;
5094 bitmap to_clear = BITMAP_ALLOC (NULL);
5096 alloc_use_cost_map (data);
5098 for (i = 0; i < n_iv_uses (data); i++)
5100 use = iv_use (data, i);
5102 if (data->consider_all_candidates)
5104 for (j = 0; j < n_iv_cands (data); j++)
5106 cand = iv_cand (data, j);
5107 determine_use_iv_cost (data, use, cand);
5110 else
5112 bitmap_iterator bi;
5114 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5116 cand = iv_cand (data, j);
5117 if (!determine_use_iv_cost (data, use, cand))
5118 bitmap_set_bit (to_clear, j);
5121 /* Remove the candidates for that the cost is infinite from
5122 the list of related candidates. */
5123 bitmap_and_compl_into (use->related_cands, to_clear);
5124 bitmap_clear (to_clear);
5128 BITMAP_FREE (to_clear);
5130 if (dump_file && (dump_flags & TDF_DETAILS))
5132 fprintf (dump_file, "Use-candidate costs:\n");
5134 for (i = 0; i < n_iv_uses (data); i++)
5136 use = iv_use (data, i);
5138 fprintf (dump_file, "Use %d:\n", i);
5139 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5140 for (j = 0; j < use->n_map_members; j++)
5142 if (!use->cost_map[j].cand
5143 || infinite_cost_p (use->cost_map[j].cost))
5144 continue;
5146 fprintf (dump_file, " %d\t%d\t%d\t",
5147 use->cost_map[j].cand->id,
5148 use->cost_map[j].cost.cost,
5149 use->cost_map[j].cost.complexity);
5150 if (use->cost_map[j].depends_on)
5151 bitmap_print (dump_file,
5152 use->cost_map[j].depends_on, "","");
5153 if (use->cost_map[j].inv_expr_id != -1)
5154 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5155 fprintf (dump_file, "\n");
5158 fprintf (dump_file, "\n");
5160 fprintf (dump_file, "\n");
5164 /* Determines cost of the candidate CAND. */
5166 static void
5167 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5169 comp_cost cost_base;
5170 unsigned cost, cost_step;
5171 tree base;
5173 if (!cand->iv)
5175 cand->cost = 0;
5176 return;
5179 /* There are two costs associated with the candidate -- its increment
5180 and its initialization. The second is almost negligible for any loop
5181 that rolls enough, so we take it just very little into account. */
5183 base = cand->iv->base;
5184 cost_base = force_var_cost (data, base, NULL);
5185 /* It will be exceptional that the iv register happens to be initialized with
5186 the proper value at no cost. In general, there will at least be a regcopy
5187 or a const set. */
5188 if (cost_base.cost == 0)
5189 cost_base.cost = COSTS_N_INSNS (1);
5190 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5192 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5194 /* Prefer the original ivs unless we may gain something by replacing it.
5195 The reason is to make debugging simpler; so this is not relevant for
5196 artificial ivs created by other optimization passes. */
5197 if (cand->pos != IP_ORIGINAL
5198 || !SSA_NAME_VAR (cand->var_before)
5199 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5200 cost++;
5202 /* Prefer not to insert statements into latch unless there are some
5203 already (so that we do not create unnecessary jumps). */
5204 if (cand->pos == IP_END
5205 && empty_block_p (ip_end_pos (data->current_loop)))
5206 cost++;
5208 cand->cost = cost;
5209 cand->cost_step = cost_step;
5212 /* Determines costs of computation of the candidates. */
5214 static void
5215 determine_iv_costs (struct ivopts_data *data)
5217 unsigned i;
5219 if (dump_file && (dump_flags & TDF_DETAILS))
5221 fprintf (dump_file, "Candidate costs:\n");
5222 fprintf (dump_file, " cand\tcost\n");
5225 for (i = 0; i < n_iv_cands (data); i++)
5227 struct iv_cand *cand = iv_cand (data, i);
5229 determine_iv_cost (data, cand);
5231 if (dump_file && (dump_flags & TDF_DETAILS))
5232 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5235 if (dump_file && (dump_flags & TDF_DETAILS))
5236 fprintf (dump_file, "\n");
5239 /* Calculates cost for having SIZE induction variables. */
5241 static unsigned
5242 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5244 /* We add size to the cost, so that we prefer eliminating ivs
5245 if possible. */
5246 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5247 data->body_includes_call);
5250 /* For each size of the induction variable set determine the penalty. */
5252 static void
5253 determine_set_costs (struct ivopts_data *data)
5255 unsigned j, n;
5256 gphi *phi;
5257 gphi_iterator psi;
5258 tree op;
5259 struct loop *loop = data->current_loop;
5260 bitmap_iterator bi;
5262 if (dump_file && (dump_flags & TDF_DETAILS))
5264 fprintf (dump_file, "Global costs:\n");
5265 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5266 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5267 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5268 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5271 n = 0;
5272 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5274 phi = psi.phi ();
5275 op = PHI_RESULT (phi);
5277 if (virtual_operand_p (op))
5278 continue;
5280 if (get_iv (data, op))
5281 continue;
5283 n++;
5286 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5288 struct version_info *info = ver_info (data, j);
5290 if (info->inv_id && info->has_nonlin_use)
5291 n++;
5294 data->regs_used = n;
5295 if (dump_file && (dump_flags & TDF_DETAILS))
5296 fprintf (dump_file, " regs_used %d\n", n);
5298 if (dump_file && (dump_flags & TDF_DETAILS))
5300 fprintf (dump_file, " cost for size:\n");
5301 fprintf (dump_file, " ivs\tcost\n");
5302 for (j = 0; j <= 2 * target_avail_regs; j++)
5303 fprintf (dump_file, " %d\t%d\n", j,
5304 ivopts_global_cost_for_size (data, j));
5305 fprintf (dump_file, "\n");
5309 /* Returns true if A is a cheaper cost pair than B. */
5311 static bool
5312 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5314 int cmp;
5316 if (!a)
5317 return false;
5319 if (!b)
5320 return true;
5322 cmp = compare_costs (a->cost, b->cost);
5323 if (cmp < 0)
5324 return true;
5326 if (cmp > 0)
5327 return false;
5329 /* In case the costs are the same, prefer the cheaper candidate. */
5330 if (a->cand->cost < b->cand->cost)
5331 return true;
5333 return false;
5337 /* Returns candidate by that USE is expressed in IVS. */
5339 static struct cost_pair *
5340 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5342 return ivs->cand_for_use[use->id];
5345 /* Computes the cost field of IVS structure. */
5347 static void
5348 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5350 comp_cost cost = ivs->cand_use_cost;
5352 cost.cost += ivs->cand_cost;
5354 cost.cost += ivopts_global_cost_for_size (data,
5355 ivs->n_regs + ivs->num_used_inv_expr);
5357 ivs->cost = cost;
5360 /* Remove invariants in set INVS to set IVS. */
5362 static void
5363 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5365 bitmap_iterator bi;
5366 unsigned iid;
5368 if (!invs)
5369 return;
5371 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5373 ivs->n_invariant_uses[iid]--;
5374 if (ivs->n_invariant_uses[iid] == 0)
5375 ivs->n_regs--;
5379 /* Set USE not to be expressed by any candidate in IVS. */
5381 static void
5382 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5383 struct iv_use *use)
5385 unsigned uid = use->id, cid;
5386 struct cost_pair *cp;
5388 cp = ivs->cand_for_use[uid];
5389 if (!cp)
5390 return;
5391 cid = cp->cand->id;
5393 ivs->bad_uses++;
5394 ivs->cand_for_use[uid] = NULL;
5395 ivs->n_cand_uses[cid]--;
5397 if (ivs->n_cand_uses[cid] == 0)
5399 bitmap_clear_bit (ivs->cands, cid);
5400 /* Do not count the pseudocandidates. */
5401 if (cp->cand->iv)
5402 ivs->n_regs--;
5403 ivs->n_cands--;
5404 ivs->cand_cost -= cp->cand->cost;
5406 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5409 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5411 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5413 if (cp->inv_expr_id != -1)
5415 ivs->used_inv_expr[cp->inv_expr_id]--;
5416 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5417 ivs->num_used_inv_expr--;
5419 iv_ca_recount_cost (data, ivs);
5422 /* Add invariants in set INVS to set IVS. */
5424 static void
5425 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5427 bitmap_iterator bi;
5428 unsigned iid;
5430 if (!invs)
5431 return;
5433 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5435 ivs->n_invariant_uses[iid]++;
5436 if (ivs->n_invariant_uses[iid] == 1)
5437 ivs->n_regs++;
5441 /* Set cost pair for USE in set IVS to CP. */
5443 static void
5444 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5445 struct iv_use *use, struct cost_pair *cp)
5447 unsigned uid = use->id, cid;
5449 if (ivs->cand_for_use[uid] == cp)
5450 return;
5452 if (ivs->cand_for_use[uid])
5453 iv_ca_set_no_cp (data, ivs, use);
5455 if (cp)
5457 cid = cp->cand->id;
5459 ivs->bad_uses--;
5460 ivs->cand_for_use[uid] = cp;
5461 ivs->n_cand_uses[cid]++;
5462 if (ivs->n_cand_uses[cid] == 1)
5464 bitmap_set_bit (ivs->cands, cid);
5465 /* Do not count the pseudocandidates. */
5466 if (cp->cand->iv)
5467 ivs->n_regs++;
5468 ivs->n_cands++;
5469 ivs->cand_cost += cp->cand->cost;
5471 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5474 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5475 iv_ca_set_add_invariants (ivs, cp->depends_on);
5477 if (cp->inv_expr_id != -1)
5479 ivs->used_inv_expr[cp->inv_expr_id]++;
5480 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5481 ivs->num_used_inv_expr++;
5483 iv_ca_recount_cost (data, ivs);
5487 /* Extend set IVS by expressing USE by some of the candidates in it
5488 if possible. Consider all important candidates if candidates in
5489 set IVS don't give any result. */
5491 static void
5492 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5493 struct iv_use *use)
5495 struct cost_pair *best_cp = NULL, *cp;
5496 bitmap_iterator bi;
5497 unsigned i;
5498 struct iv_cand *cand;
5500 gcc_assert (ivs->upto >= use->id);
5501 ivs->upto++;
5502 ivs->bad_uses++;
5504 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5506 cand = iv_cand (data, i);
5507 cp = get_use_iv_cost (data, use, cand);
5508 if (cheaper_cost_pair (cp, best_cp))
5509 best_cp = cp;
5512 if (best_cp == NULL)
5514 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5516 cand = iv_cand (data, i);
5517 cp = get_use_iv_cost (data, use, cand);
5518 if (cheaper_cost_pair (cp, best_cp))
5519 best_cp = cp;
5523 iv_ca_set_cp (data, ivs, use, best_cp);
5526 /* Get cost for assignment IVS. */
5528 static comp_cost
5529 iv_ca_cost (struct iv_ca *ivs)
5531 /* This was a conditional expression but it triggered a bug in
5532 Sun C 5.5. */
5533 if (ivs->bad_uses)
5534 return infinite_cost;
5535 else
5536 return ivs->cost;
5539 /* Returns true if all dependences of CP are among invariants in IVS. */
5541 static bool
5542 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5544 unsigned i;
5545 bitmap_iterator bi;
5547 if (!cp->depends_on)
5548 return true;
5550 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5552 if (ivs->n_invariant_uses[i] == 0)
5553 return false;
5556 return true;
5559 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5560 it before NEXT_CHANGE. */
5562 static struct iv_ca_delta *
5563 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5564 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5566 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5568 change->use = use;
5569 change->old_cp = old_cp;
5570 change->new_cp = new_cp;
5571 change->next_change = next_change;
5573 return change;
5576 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5577 are rewritten. */
5579 static struct iv_ca_delta *
5580 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5582 struct iv_ca_delta *last;
5584 if (!l2)
5585 return l1;
5587 if (!l1)
5588 return l2;
5590 for (last = l1; last->next_change; last = last->next_change)
5591 continue;
5592 last->next_change = l2;
5594 return l1;
5597 /* Reverse the list of changes DELTA, forming the inverse to it. */
5599 static struct iv_ca_delta *
5600 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5602 struct iv_ca_delta *act, *next, *prev = NULL;
5603 struct cost_pair *tmp;
5605 for (act = delta; act; act = next)
5607 next = act->next_change;
5608 act->next_change = prev;
5609 prev = act;
5611 tmp = act->old_cp;
5612 act->old_cp = act->new_cp;
5613 act->new_cp = tmp;
5616 return prev;
5619 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5620 reverted instead. */
5622 static void
5623 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5624 struct iv_ca_delta *delta, bool forward)
5626 struct cost_pair *from, *to;
5627 struct iv_ca_delta *act;
5629 if (!forward)
5630 delta = iv_ca_delta_reverse (delta);
5632 for (act = delta; act; act = act->next_change)
5634 from = act->old_cp;
5635 to = act->new_cp;
5636 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5637 iv_ca_set_cp (data, ivs, act->use, to);
5640 if (!forward)
5641 iv_ca_delta_reverse (delta);
5644 /* Returns true if CAND is used in IVS. */
5646 static bool
5647 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5649 return ivs->n_cand_uses[cand->id] > 0;
5652 /* Returns number of induction variable candidates in the set IVS. */
5654 static unsigned
5655 iv_ca_n_cands (struct iv_ca *ivs)
5657 return ivs->n_cands;
5660 /* Free the list of changes DELTA. */
5662 static void
5663 iv_ca_delta_free (struct iv_ca_delta **delta)
5665 struct iv_ca_delta *act, *next;
5667 for (act = *delta; act; act = next)
5669 next = act->next_change;
5670 free (act);
5673 *delta = NULL;
5676 /* Allocates new iv candidates assignment. */
5678 static struct iv_ca *
5679 iv_ca_new (struct ivopts_data *data)
5681 struct iv_ca *nw = XNEW (struct iv_ca);
5683 nw->upto = 0;
5684 nw->bad_uses = 0;
5685 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5686 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5687 nw->cands = BITMAP_ALLOC (NULL);
5688 nw->n_cands = 0;
5689 nw->n_regs = 0;
5690 nw->cand_use_cost = no_cost;
5691 nw->cand_cost = 0;
5692 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5693 nw->cost = no_cost;
5694 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5695 nw->num_used_inv_expr = 0;
5697 return nw;
5700 /* Free memory occupied by the set IVS. */
5702 static void
5703 iv_ca_free (struct iv_ca **ivs)
5705 free ((*ivs)->cand_for_use);
5706 free ((*ivs)->n_cand_uses);
5707 BITMAP_FREE ((*ivs)->cands);
5708 free ((*ivs)->n_invariant_uses);
5709 free ((*ivs)->used_inv_expr);
5710 free (*ivs);
5711 *ivs = NULL;
5714 /* Dumps IVS to FILE. */
5716 static void
5717 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5719 const char *pref = " invariants ";
5720 unsigned i;
5721 comp_cost cost = iv_ca_cost (ivs);
5723 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5724 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5725 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5726 bitmap_print (file, ivs->cands, " candidates: ","\n");
5728 for (i = 0; i < ivs->upto; i++)
5730 struct iv_use *use = iv_use (data, i);
5731 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5732 if (cp)
5733 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5734 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5735 else
5736 fprintf (file, " use:%d --> ??\n", use->id);
5739 for (i = 1; i <= data->max_inv_id; i++)
5740 if (ivs->n_invariant_uses[i])
5742 fprintf (file, "%s%d", pref, i);
5743 pref = ", ";
5745 fprintf (file, "\n\n");
5748 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5749 new set, and store differences in DELTA. Number of induction variables
5750 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5751 the function will try to find a solution with mimimal iv candidates. */
5753 static comp_cost
5754 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5755 struct iv_cand *cand, struct iv_ca_delta **delta,
5756 unsigned *n_ivs, bool min_ncand)
5758 unsigned i;
5759 comp_cost cost;
5760 struct iv_use *use;
5761 struct cost_pair *old_cp, *new_cp;
5763 *delta = NULL;
5764 for (i = 0; i < ivs->upto; i++)
5766 use = iv_use (data, i);
5767 old_cp = iv_ca_cand_for_use (ivs, use);
5769 if (old_cp
5770 && old_cp->cand == cand)
5771 continue;
5773 new_cp = get_use_iv_cost (data, use, cand);
5774 if (!new_cp)
5775 continue;
5777 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5778 continue;
5780 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5781 continue;
5783 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5786 iv_ca_delta_commit (data, ivs, *delta, true);
5787 cost = iv_ca_cost (ivs);
5788 if (n_ivs)
5789 *n_ivs = iv_ca_n_cands (ivs);
5790 iv_ca_delta_commit (data, ivs, *delta, false);
5792 return cost;
5795 /* Try narrowing set IVS by removing CAND. Return the cost of
5796 the new set and store the differences in DELTA. START is
5797 the candidate with which we start narrowing. */
5799 static comp_cost
5800 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5801 struct iv_cand *cand, struct iv_cand *start,
5802 struct iv_ca_delta **delta)
5804 unsigned i, ci;
5805 struct iv_use *use;
5806 struct cost_pair *old_cp, *new_cp, *cp;
5807 bitmap_iterator bi;
5808 struct iv_cand *cnd;
5809 comp_cost cost, best_cost, acost;
5811 *delta = NULL;
5812 for (i = 0; i < n_iv_uses (data); i++)
5814 use = iv_use (data, i);
5816 old_cp = iv_ca_cand_for_use (ivs, use);
5817 if (old_cp->cand != cand)
5818 continue;
5820 best_cost = iv_ca_cost (ivs);
5821 /* Start narrowing with START. */
5822 new_cp = get_use_iv_cost (data, use, start);
5824 if (data->consider_all_candidates)
5826 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5828 if (ci == cand->id || (start && ci == start->id))
5829 continue;
5831 cnd = iv_cand (data, ci);
5833 cp = get_use_iv_cost (data, use, cnd);
5834 if (!cp)
5835 continue;
5837 iv_ca_set_cp (data, ivs, use, cp);
5838 acost = iv_ca_cost (ivs);
5840 if (compare_costs (acost, best_cost) < 0)
5842 best_cost = acost;
5843 new_cp = cp;
5847 else
5849 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5851 if (ci == cand->id || (start && ci == start->id))
5852 continue;
5854 cnd = iv_cand (data, ci);
5856 cp = get_use_iv_cost (data, use, cnd);
5857 if (!cp)
5858 continue;
5860 iv_ca_set_cp (data, ivs, use, cp);
5861 acost = iv_ca_cost (ivs);
5863 if (compare_costs (acost, best_cost) < 0)
5865 best_cost = acost;
5866 new_cp = cp;
5870 /* Restore to old cp for use. */
5871 iv_ca_set_cp (data, ivs, use, old_cp);
5873 if (!new_cp)
5875 iv_ca_delta_free (delta);
5876 return infinite_cost;
5879 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5882 iv_ca_delta_commit (data, ivs, *delta, true);
5883 cost = iv_ca_cost (ivs);
5884 iv_ca_delta_commit (data, ivs, *delta, false);
5886 return cost;
5889 /* Try optimizing the set of candidates IVS by removing candidates different
5890 from to EXCEPT_CAND from it. Return cost of the new set, and store
5891 differences in DELTA. */
5893 static comp_cost
5894 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5895 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5897 bitmap_iterator bi;
5898 struct iv_ca_delta *act_delta, *best_delta;
5899 unsigned i;
5900 comp_cost best_cost, acost;
5901 struct iv_cand *cand;
5903 best_delta = NULL;
5904 best_cost = iv_ca_cost (ivs);
5906 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5908 cand = iv_cand (data, i);
5910 if (cand == except_cand)
5911 continue;
5913 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5915 if (compare_costs (acost, best_cost) < 0)
5917 best_cost = acost;
5918 iv_ca_delta_free (&best_delta);
5919 best_delta = act_delta;
5921 else
5922 iv_ca_delta_free (&act_delta);
5925 if (!best_delta)
5927 *delta = NULL;
5928 return best_cost;
5931 /* Recurse to possibly remove other unnecessary ivs. */
5932 iv_ca_delta_commit (data, ivs, best_delta, true);
5933 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5934 iv_ca_delta_commit (data, ivs, best_delta, false);
5935 *delta = iv_ca_delta_join (best_delta, *delta);
5936 return best_cost;
5939 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5940 cheaper local cost for USE than BEST_CP. Return pointer to
5941 the corresponding cost_pair, otherwise just return BEST_CP. */
5943 static struct cost_pair*
5944 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5945 unsigned int cand_idx, struct iv_cand *old_cand,
5946 struct cost_pair *best_cp)
5948 struct iv_cand *cand;
5949 struct cost_pair *cp;
5951 gcc_assert (old_cand != NULL && best_cp != NULL);
5952 if (cand_idx == old_cand->id)
5953 return best_cp;
5955 cand = iv_cand (data, cand_idx);
5956 cp = get_use_iv_cost (data, use, cand);
5957 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5958 return cp;
5960 return best_cp;
5963 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5964 which are used by more than one iv uses. For each of those candidates,
5965 this function tries to represent iv uses under that candidate using
5966 other ones with lower local cost, then tries to prune the new set.
5967 If the new set has lower cost, It returns the new cost after recording
5968 candidate replacement in list DELTA. */
5970 static comp_cost
5971 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5972 struct iv_ca_delta **delta)
5974 bitmap_iterator bi, bj;
5975 unsigned int i, j, k;
5976 struct iv_use *use;
5977 struct iv_cand *cand;
5978 comp_cost orig_cost, acost;
5979 struct iv_ca_delta *act_delta, *tmp_delta;
5980 struct cost_pair *old_cp, *best_cp = NULL;
5982 *delta = NULL;
5983 orig_cost = iv_ca_cost (ivs);
5985 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5987 if (ivs->n_cand_uses[i] == 1
5988 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5989 continue;
5991 cand = iv_cand (data, i);
5993 act_delta = NULL;
5994 /* Represent uses under current candidate using other ones with
5995 lower local cost. */
5996 for (j = 0; j < ivs->upto; j++)
5998 use = iv_use (data, j);
5999 old_cp = iv_ca_cand_for_use (ivs, use);
6001 if (old_cp->cand != cand)
6002 continue;
6004 best_cp = old_cp;
6005 if (data->consider_all_candidates)
6006 for (k = 0; k < n_iv_cands (data); k++)
6007 best_cp = cheaper_cost_with_cand (data, use, k,
6008 old_cp->cand, best_cp);
6009 else
6010 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6011 best_cp = cheaper_cost_with_cand (data, use, k,
6012 old_cp->cand, best_cp);
6014 if (best_cp == old_cp)
6015 continue;
6017 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6019 /* No need for further prune. */
6020 if (!act_delta)
6021 continue;
6023 /* Prune the new candidate set. */
6024 iv_ca_delta_commit (data, ivs, act_delta, true);
6025 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6026 iv_ca_delta_commit (data, ivs, act_delta, false);
6027 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6029 if (compare_costs (acost, orig_cost) < 0)
6031 *delta = act_delta;
6032 return acost;
6034 else
6035 iv_ca_delta_free (&act_delta);
6038 return orig_cost;
6041 /* Tries to extend the sets IVS in the best possible way in order
6042 to express the USE. If ORIGINALP is true, prefer candidates from
6043 the original set of IVs, otherwise favor important candidates not
6044 based on any memory object. */
6046 static bool
6047 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6048 struct iv_use *use, bool originalp)
6050 comp_cost best_cost, act_cost;
6051 unsigned i;
6052 bitmap_iterator bi;
6053 struct iv_cand *cand;
6054 struct iv_ca_delta *best_delta = NULL, *act_delta;
6055 struct cost_pair *cp;
6057 iv_ca_add_use (data, ivs, use);
6058 best_cost = iv_ca_cost (ivs);
6059 cp = iv_ca_cand_for_use (ivs, use);
6060 if (cp)
6062 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6063 iv_ca_set_no_cp (data, ivs, use);
6066 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6067 first try important candidates not based on any memory object. Only if
6068 this fails, try the specific ones. Rationale -- in loops with many
6069 variables the best choice often is to use just one generic biv. If we
6070 added here many ivs specific to the uses, the optimization algorithm later
6071 would be likely to get stuck in a local minimum, thus causing us to create
6072 too many ivs. The approach from few ivs to more seems more likely to be
6073 successful -- starting from few ivs, replacing an expensive use by a
6074 specific iv should always be a win. */
6075 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6077 cand = iv_cand (data, i);
6079 if (originalp && cand->pos !=IP_ORIGINAL)
6080 continue;
6082 if (!originalp && cand->iv->base_object != NULL_TREE)
6083 continue;
6085 if (iv_ca_cand_used_p (ivs, cand))
6086 continue;
6088 cp = get_use_iv_cost (data, use, cand);
6089 if (!cp)
6090 continue;
6092 iv_ca_set_cp (data, ivs, use, cp);
6093 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6094 true);
6095 iv_ca_set_no_cp (data, ivs, use);
6096 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6098 if (compare_costs (act_cost, best_cost) < 0)
6100 best_cost = act_cost;
6102 iv_ca_delta_free (&best_delta);
6103 best_delta = act_delta;
6105 else
6106 iv_ca_delta_free (&act_delta);
6109 if (infinite_cost_p (best_cost))
6111 for (i = 0; i < use->n_map_members; i++)
6113 cp = use->cost_map + i;
6114 cand = cp->cand;
6115 if (!cand)
6116 continue;
6118 /* Already tried this. */
6119 if (cand->important)
6121 if (originalp && cand->pos == IP_ORIGINAL)
6122 continue;
6123 if (!originalp && cand->iv->base_object == NULL_TREE)
6124 continue;
6127 if (iv_ca_cand_used_p (ivs, cand))
6128 continue;
6130 act_delta = NULL;
6131 iv_ca_set_cp (data, ivs, use, cp);
6132 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6133 iv_ca_set_no_cp (data, ivs, use);
6134 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6135 cp, act_delta);
6137 if (compare_costs (act_cost, best_cost) < 0)
6139 best_cost = act_cost;
6141 if (best_delta)
6142 iv_ca_delta_free (&best_delta);
6143 best_delta = act_delta;
6145 else
6146 iv_ca_delta_free (&act_delta);
6150 iv_ca_delta_commit (data, ivs, best_delta, true);
6151 iv_ca_delta_free (&best_delta);
6153 return !infinite_cost_p (best_cost);
6156 /* Finds an initial assignment of candidates to uses. */
6158 static struct iv_ca *
6159 get_initial_solution (struct ivopts_data *data, bool originalp)
6161 struct iv_ca *ivs = iv_ca_new (data);
6162 unsigned i;
6164 for (i = 0; i < n_iv_uses (data); i++)
6165 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6167 iv_ca_free (&ivs);
6168 return NULL;
6171 return ivs;
6174 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6175 points to a bool variable, this function tries to break local
6176 optimal fixed-point by replacing candidates in IVS if it's true. */
6178 static bool
6179 try_improve_iv_set (struct ivopts_data *data,
6180 struct iv_ca *ivs, bool *try_replace_p)
6182 unsigned i, n_ivs;
6183 comp_cost acost, best_cost = iv_ca_cost (ivs);
6184 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6185 struct iv_cand *cand;
6187 /* Try extending the set of induction variables by one. */
6188 for (i = 0; i < n_iv_cands (data); i++)
6190 cand = iv_cand (data, i);
6192 if (iv_ca_cand_used_p (ivs, cand))
6193 continue;
6195 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6196 if (!act_delta)
6197 continue;
6199 /* If we successfully added the candidate and the set is small enough,
6200 try optimizing it by removing other candidates. */
6201 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6203 iv_ca_delta_commit (data, ivs, act_delta, true);
6204 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6205 iv_ca_delta_commit (data, ivs, act_delta, false);
6206 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6209 if (compare_costs (acost, best_cost) < 0)
6211 best_cost = acost;
6212 iv_ca_delta_free (&best_delta);
6213 best_delta = act_delta;
6215 else
6216 iv_ca_delta_free (&act_delta);
6219 if (!best_delta)
6221 /* Try removing the candidates from the set instead. */
6222 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6224 if (!best_delta && *try_replace_p)
6226 *try_replace_p = false;
6227 /* So far candidate selecting algorithm tends to choose fewer IVs
6228 so that it can handle cases in which loops have many variables
6229 but the best choice is often to use only one general biv. One
6230 weakness is it can't handle opposite cases, in which different
6231 candidates should be chosen with respect to each use. To solve
6232 the problem, we replace candidates in a manner described by the
6233 comments of iv_ca_replace, thus give general algorithm a chance
6234 to break local optimal fixed-point in these cases. */
6235 best_cost = iv_ca_replace (data, ivs, &best_delta);
6238 if (!best_delta)
6239 return false;
6242 iv_ca_delta_commit (data, ivs, best_delta, true);
6243 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6244 iv_ca_delta_free (&best_delta);
6245 return true;
6248 /* Attempts to find the optimal set of induction variables. We do simple
6249 greedy heuristic -- we try to replace at most one candidate in the selected
6250 solution and remove the unused ivs while this improves the cost. */
6252 static struct iv_ca *
6253 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6255 struct iv_ca *set;
6256 bool try_replace_p = true;
6258 /* Get the initial solution. */
6259 set = get_initial_solution (data, originalp);
6260 if (!set)
6262 if (dump_file && (dump_flags & TDF_DETAILS))
6263 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6264 return NULL;
6267 if (dump_file && (dump_flags & TDF_DETAILS))
6269 fprintf (dump_file, "Initial set of candidates:\n");
6270 iv_ca_dump (data, dump_file, set);
6273 while (try_improve_iv_set (data, set, &try_replace_p))
6275 if (dump_file && (dump_flags & TDF_DETAILS))
6277 fprintf (dump_file, "Improved to:\n");
6278 iv_ca_dump (data, dump_file, set);
6282 return set;
6285 static struct iv_ca *
6286 find_optimal_iv_set (struct ivopts_data *data)
6288 unsigned i;
6289 struct iv_ca *set, *origset;
6290 struct iv_use *use;
6291 comp_cost cost, origcost;
6293 /* Determine the cost based on a strategy that starts with original IVs,
6294 and try again using a strategy that prefers candidates not based
6295 on any IVs. */
6296 origset = find_optimal_iv_set_1 (data, true);
6297 set = find_optimal_iv_set_1 (data, false);
6299 if (!origset && !set)
6300 return NULL;
6302 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6303 cost = set ? iv_ca_cost (set) : infinite_cost;
6305 if (dump_file && (dump_flags & TDF_DETAILS))
6307 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6308 origcost.cost, origcost.complexity);
6309 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6310 cost.cost, cost.complexity);
6313 /* Choose the one with the best cost. */
6314 if (compare_costs (origcost, cost) <= 0)
6316 if (set)
6317 iv_ca_free (&set);
6318 set = origset;
6320 else if (origset)
6321 iv_ca_free (&origset);
6323 for (i = 0; i < n_iv_uses (data); i++)
6325 use = iv_use (data, i);
6326 use->selected = iv_ca_cand_for_use (set, use)->cand;
6329 return set;
6332 /* Creates a new induction variable corresponding to CAND. */
6334 static void
6335 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6337 gimple_stmt_iterator incr_pos;
6338 tree base;
6339 bool after = false;
6341 if (!cand->iv)
6342 return;
6344 switch (cand->pos)
6346 case IP_NORMAL:
6347 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6348 break;
6350 case IP_END:
6351 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6352 after = true;
6353 break;
6355 case IP_AFTER_USE:
6356 after = true;
6357 /* fall through */
6358 case IP_BEFORE_USE:
6359 incr_pos = gsi_for_stmt (cand->incremented_at);
6360 break;
6362 case IP_ORIGINAL:
6363 /* Mark that the iv is preserved. */
6364 name_info (data, cand->var_before)->preserve_biv = true;
6365 name_info (data, cand->var_after)->preserve_biv = true;
6367 /* Rewrite the increment so that it uses var_before directly. */
6368 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6369 return;
6372 gimple_add_tmp_var (cand->var_before);
6374 base = unshare_expr (cand->iv->base);
6376 create_iv (base, unshare_expr (cand->iv->step),
6377 cand->var_before, data->current_loop,
6378 &incr_pos, after, &cand->var_before, &cand->var_after);
6381 /* Creates new induction variables described in SET. */
6383 static void
6384 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6386 unsigned i;
6387 struct iv_cand *cand;
6388 bitmap_iterator bi;
6390 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6392 cand = iv_cand (data, i);
6393 create_new_iv (data, cand);
6396 if (dump_file && (dump_flags & TDF_DETAILS))
6398 fprintf (dump_file, "Selected IV set for loop %d",
6399 data->current_loop->num);
6400 if (data->loop_loc != UNKNOWN_LOCATION)
6401 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6402 LOCATION_LINE (data->loop_loc));
6403 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6404 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6406 cand = iv_cand (data, i);
6407 dump_cand (dump_file, cand);
6409 fprintf (dump_file, "\n");
6413 /* Rewrites USE (definition of iv used in a nonlinear expression)
6414 using candidate CAND. */
6416 static void
6417 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6418 struct iv_use *use, struct iv_cand *cand)
6420 tree comp;
6421 tree op, tgt;
6422 gassign *ass;
6423 gimple_stmt_iterator bsi;
6425 /* An important special case -- if we are asked to express value of
6426 the original iv by itself, just exit; there is no need to
6427 introduce a new computation (that might also need casting the
6428 variable to unsigned and back). */
6429 if (cand->pos == IP_ORIGINAL
6430 && cand->incremented_at == use->stmt)
6432 enum tree_code stmt_code;
6434 gcc_assert (is_gimple_assign (use->stmt));
6435 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6437 /* Check whether we may leave the computation unchanged.
6438 This is the case only if it does not rely on other
6439 computations in the loop -- otherwise, the computation
6440 we rely upon may be removed in remove_unused_ivs,
6441 thus leading to ICE. */
6442 stmt_code = gimple_assign_rhs_code (use->stmt);
6443 if (stmt_code == PLUS_EXPR
6444 || stmt_code == MINUS_EXPR
6445 || stmt_code == POINTER_PLUS_EXPR)
6447 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6448 op = gimple_assign_rhs2 (use->stmt);
6449 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6450 op = gimple_assign_rhs1 (use->stmt);
6451 else
6452 op = NULL_TREE;
6454 else
6455 op = NULL_TREE;
6457 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6458 return;
6461 comp = get_computation (data->current_loop, use, cand);
6462 gcc_assert (comp != NULL_TREE);
6464 switch (gimple_code (use->stmt))
6466 case GIMPLE_PHI:
6467 tgt = PHI_RESULT (use->stmt);
6469 /* If we should keep the biv, do not replace it. */
6470 if (name_info (data, tgt)->preserve_biv)
6471 return;
6473 bsi = gsi_after_labels (gimple_bb (use->stmt));
6474 break;
6476 case GIMPLE_ASSIGN:
6477 tgt = gimple_assign_lhs (use->stmt);
6478 bsi = gsi_for_stmt (use->stmt);
6479 break;
6481 default:
6482 gcc_unreachable ();
6485 if (!valid_gimple_rhs_p (comp)
6486 || (gimple_code (use->stmt) != GIMPLE_PHI
6487 /* We can't allow re-allocating the stmt as it might be pointed
6488 to still. */
6489 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6490 >= gimple_num_ops (gsi_stmt (bsi)))))
6492 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6493 true, GSI_SAME_STMT);
6494 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6496 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6497 /* As this isn't a plain copy we have to reset alignment
6498 information. */
6499 if (SSA_NAME_PTR_INFO (comp))
6500 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6504 if (gimple_code (use->stmt) == GIMPLE_PHI)
6506 ass = gimple_build_assign (tgt, comp);
6507 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6509 bsi = gsi_for_stmt (use->stmt);
6510 remove_phi_node (&bsi, false);
6512 else
6514 gimple_assign_set_rhs_from_tree (&bsi, comp);
6515 use->stmt = gsi_stmt (bsi);
6519 /* Performs a peephole optimization to reorder the iv update statement with
6520 a mem ref to enable instruction combining in later phases. The mem ref uses
6521 the iv value before the update, so the reordering transformation requires
6522 adjustment of the offset. CAND is the selected IV_CAND.
6524 Example:
6526 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6527 iv2 = iv1 + 1;
6529 if (t < val) (1)
6530 goto L;
6531 goto Head;
6534 directly propagating t over to (1) will introduce overlapping live range
6535 thus increase register pressure. This peephole transform it into:
6538 iv2 = iv1 + 1;
6539 t = MEM_REF (base, iv2, 8, 8);
6540 if (t < val)
6541 goto L;
6542 goto Head;
6545 static void
6546 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6548 tree var_after;
6549 gimple iv_update, stmt;
6550 basic_block bb;
6551 gimple_stmt_iterator gsi, gsi_iv;
6553 if (cand->pos != IP_NORMAL)
6554 return;
6556 var_after = cand->var_after;
6557 iv_update = SSA_NAME_DEF_STMT (var_after);
6559 bb = gimple_bb (iv_update);
6560 gsi = gsi_last_nondebug_bb (bb);
6561 stmt = gsi_stmt (gsi);
6563 /* Only handle conditional statement for now. */
6564 if (gimple_code (stmt) != GIMPLE_COND)
6565 return;
6567 gsi_prev_nondebug (&gsi);
6568 stmt = gsi_stmt (gsi);
6569 if (stmt != iv_update)
6570 return;
6572 gsi_prev_nondebug (&gsi);
6573 if (gsi_end_p (gsi))
6574 return;
6576 stmt = gsi_stmt (gsi);
6577 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6578 return;
6580 if (stmt != use->stmt)
6581 return;
6583 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6584 return;
6586 if (dump_file && (dump_flags & TDF_DETAILS))
6588 fprintf (dump_file, "Reordering \n");
6589 print_gimple_stmt (dump_file, iv_update, 0, 0);
6590 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6591 fprintf (dump_file, "\n");
6594 gsi = gsi_for_stmt (use->stmt);
6595 gsi_iv = gsi_for_stmt (iv_update);
6596 gsi_move_before (&gsi_iv, &gsi);
6598 cand->pos = IP_BEFORE_USE;
6599 cand->incremented_at = use->stmt;
6602 /* Rewrites USE (address that is an iv) using candidate CAND. */
6604 static void
6605 rewrite_use_address (struct ivopts_data *data,
6606 struct iv_use *use, struct iv_cand *cand)
6608 aff_tree aff;
6609 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6610 tree base_hint = NULL_TREE;
6611 tree ref, iv;
6612 bool ok;
6614 adjust_iv_update_pos (cand, use);
6615 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6616 gcc_assert (ok);
6617 unshare_aff_combination (&aff);
6619 /* To avoid undefined overflow problems, all IV candidates use unsigned
6620 integer types. The drawback is that this makes it impossible for
6621 create_mem_ref to distinguish an IV that is based on a memory object
6622 from one that represents simply an offset.
6624 To work around this problem, we pass a hint to create_mem_ref that
6625 indicates which variable (if any) in aff is an IV based on a memory
6626 object. Note that we only consider the candidate. If this is not
6627 based on an object, the base of the reference is in some subexpression
6628 of the use -- but these will use pointer types, so they are recognized
6629 by the create_mem_ref heuristics anyway. */
6630 if (cand->iv->base_object)
6631 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6633 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6634 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6635 reference_alias_ptr_type (*use->op_p),
6636 iv, base_hint, data->speed);
6637 copy_ref_info (ref, *use->op_p);
6638 *use->op_p = ref;
6641 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6642 candidate CAND. */
6644 static void
6645 rewrite_use_compare (struct ivopts_data *data,
6646 struct iv_use *use, struct iv_cand *cand)
6648 tree comp, *var_p, op, bound;
6649 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6650 enum tree_code compare;
6651 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6652 bool ok;
6654 bound = cp->value;
6655 if (bound)
6657 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6658 tree var_type = TREE_TYPE (var);
6659 gimple_seq stmts;
6661 if (dump_file && (dump_flags & TDF_DETAILS))
6663 fprintf (dump_file, "Replacing exit test: ");
6664 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6666 compare = cp->comp;
6667 bound = unshare_expr (fold_convert (var_type, bound));
6668 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6669 if (stmts)
6670 gsi_insert_seq_on_edge_immediate (
6671 loop_preheader_edge (data->current_loop),
6672 stmts);
6674 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6675 gimple_cond_set_lhs (cond_stmt, var);
6676 gimple_cond_set_code (cond_stmt, compare);
6677 gimple_cond_set_rhs (cond_stmt, op);
6678 return;
6681 /* The induction variable elimination failed; just express the original
6682 giv. */
6683 comp = get_computation (data->current_loop, use, cand);
6684 gcc_assert (comp != NULL_TREE);
6686 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6687 gcc_assert (ok);
6689 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6690 true, GSI_SAME_STMT);
6693 /* Rewrites USE using candidate CAND. */
6695 static void
6696 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6698 switch (use->type)
6700 case USE_NONLINEAR_EXPR:
6701 rewrite_use_nonlinear_expr (data, use, cand);
6702 break;
6704 case USE_ADDRESS:
6705 rewrite_use_address (data, use, cand);
6706 break;
6708 case USE_COMPARE:
6709 rewrite_use_compare (data, use, cand);
6710 break;
6712 default:
6713 gcc_unreachable ();
6716 update_stmt (use->stmt);
6719 /* Rewrite the uses using the selected induction variables. */
6721 static void
6722 rewrite_uses (struct ivopts_data *data)
6724 unsigned i;
6725 struct iv_cand *cand;
6726 struct iv_use *use;
6728 for (i = 0; i < n_iv_uses (data); i++)
6730 use = iv_use (data, i);
6731 cand = use->selected;
6732 gcc_assert (cand);
6734 rewrite_use (data, use, cand);
6738 /* Removes the ivs that are not used after rewriting. */
6740 static void
6741 remove_unused_ivs (struct ivopts_data *data)
6743 unsigned j;
6744 bitmap_iterator bi;
6745 bitmap toremove = BITMAP_ALLOC (NULL);
6747 /* Figure out an order in which to release SSA DEFs so that we don't
6748 release something that we'd have to propagate into a debug stmt
6749 afterwards. */
6750 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6752 struct version_info *info;
6754 info = ver_info (data, j);
6755 if (info->iv
6756 && !integer_zerop (info->iv->step)
6757 && !info->inv_id
6758 && !info->iv->have_use_for
6759 && !info->preserve_biv)
6761 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6763 tree def = info->iv->ssa_name;
6765 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6767 imm_use_iterator imm_iter;
6768 use_operand_p use_p;
6769 gimple stmt;
6770 int count = 0;
6772 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6774 if (!gimple_debug_bind_p (stmt))
6775 continue;
6777 /* We just want to determine whether to do nothing
6778 (count == 0), to substitute the computed
6779 expression into a single use of the SSA DEF by
6780 itself (count == 1), or to use a debug temp
6781 because the SSA DEF is used multiple times or as
6782 part of a larger expression (count > 1). */
6783 count++;
6784 if (gimple_debug_bind_get_value (stmt) != def)
6785 count++;
6787 if (count > 1)
6788 BREAK_FROM_IMM_USE_STMT (imm_iter);
6791 if (!count)
6792 continue;
6794 struct iv_use dummy_use;
6795 struct iv_cand *best_cand = NULL, *cand;
6796 unsigned i, best_pref = 0, cand_pref;
6798 memset (&dummy_use, 0, sizeof (dummy_use));
6799 dummy_use.iv = info->iv;
6800 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6802 cand = iv_use (data, i)->selected;
6803 if (cand == best_cand)
6804 continue;
6805 cand_pref = operand_equal_p (cand->iv->step,
6806 info->iv->step, 0)
6807 ? 4 : 0;
6808 cand_pref
6809 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6810 == TYPE_MODE (TREE_TYPE (info->iv->base))
6811 ? 2 : 0;
6812 cand_pref
6813 += TREE_CODE (cand->iv->base) == INTEGER_CST
6814 ? 1 : 0;
6815 if (best_cand == NULL || best_pref < cand_pref)
6817 best_cand = cand;
6818 best_pref = cand_pref;
6822 if (!best_cand)
6823 continue;
6825 tree comp = get_computation_at (data->current_loop,
6826 &dummy_use, best_cand,
6827 SSA_NAME_DEF_STMT (def));
6828 if (!comp)
6829 continue;
6831 if (count > 1)
6833 tree vexpr = make_node (DEBUG_EXPR_DECL);
6834 DECL_ARTIFICIAL (vexpr) = 1;
6835 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6836 if (SSA_NAME_VAR (def))
6837 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6838 else
6839 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6840 gdebug *def_temp
6841 = gimple_build_debug_bind (vexpr, comp, NULL);
6842 gimple_stmt_iterator gsi;
6844 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6845 gsi = gsi_after_labels (gimple_bb
6846 (SSA_NAME_DEF_STMT (def)));
6847 else
6848 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6850 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6851 comp = vexpr;
6854 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6856 if (!gimple_debug_bind_p (stmt))
6857 continue;
6859 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6860 SET_USE (use_p, comp);
6862 update_stmt (stmt);
6868 release_defs_bitset (toremove);
6870 BITMAP_FREE (toremove);
6873 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6874 for hash_map::traverse. */
6876 bool
6877 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6879 free (value);
6880 return true;
6883 /* Frees data allocated by the optimization of a single loop. */
6885 static void
6886 free_loop_data (struct ivopts_data *data)
6888 unsigned i, j;
6889 bitmap_iterator bi;
6890 tree obj;
6892 if (data->niters)
6894 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6895 delete data->niters;
6896 data->niters = NULL;
6899 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6901 struct version_info *info;
6903 info = ver_info (data, i);
6904 free (info->iv);
6905 info->iv = NULL;
6906 info->has_nonlin_use = false;
6907 info->preserve_biv = false;
6908 info->inv_id = 0;
6910 bitmap_clear (data->relevant);
6911 bitmap_clear (data->important_candidates);
6913 for (i = 0; i < n_iv_uses (data); i++)
6915 struct iv_use *use = iv_use (data, i);
6917 free (use->iv);
6918 BITMAP_FREE (use->related_cands);
6919 for (j = 0; j < use->n_map_members; j++)
6920 if (use->cost_map[j].depends_on)
6921 BITMAP_FREE (use->cost_map[j].depends_on);
6922 free (use->cost_map);
6923 free (use);
6925 data->iv_uses.truncate (0);
6927 for (i = 0; i < n_iv_cands (data); i++)
6929 struct iv_cand *cand = iv_cand (data, i);
6931 free (cand->iv);
6932 if (cand->depends_on)
6933 BITMAP_FREE (cand->depends_on);
6934 free (cand);
6936 data->iv_candidates.truncate (0);
6938 if (data->version_info_size < num_ssa_names)
6940 data->version_info_size = 2 * num_ssa_names;
6941 free (data->version_info);
6942 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6945 data->max_inv_id = 0;
6947 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6948 SET_DECL_RTL (obj, NULL_RTX);
6950 decl_rtl_to_reset.truncate (0);
6952 data->inv_expr_tab->empty ();
6953 data->inv_expr_id = 0;
6956 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6957 loop tree. */
6959 static void
6960 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6962 free_loop_data (data);
6963 free (data->version_info);
6964 BITMAP_FREE (data->relevant);
6965 BITMAP_FREE (data->important_candidates);
6967 decl_rtl_to_reset.release ();
6968 data->iv_uses.release ();
6969 data->iv_candidates.release ();
6970 delete data->inv_expr_tab;
6971 data->inv_expr_tab = NULL;
6972 free_affine_expand_cache (&data->name_expansion_cache);
6975 /* Returns true if the loop body BODY includes any function calls. */
6977 static bool
6978 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6980 gimple_stmt_iterator gsi;
6981 unsigned i;
6983 for (i = 0; i < num_nodes; i++)
6984 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6986 gimple stmt = gsi_stmt (gsi);
6987 if (is_gimple_call (stmt)
6988 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6989 return true;
6991 return false;
6994 /* Optimizes the LOOP. Returns true if anything changed. */
6996 static bool
6997 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6999 bool changed = false;
7000 struct iv_ca *iv_ca;
7001 edge exit = single_dom_exit (loop);
7002 basic_block *body;
7004 gcc_assert (!data->niters);
7005 data->current_loop = loop;
7006 data->loop_loc = find_loop_location (loop);
7007 data->speed = optimize_loop_for_speed_p (loop);
7009 if (dump_file && (dump_flags & TDF_DETAILS))
7011 fprintf (dump_file, "Processing loop %d", loop->num);
7012 if (data->loop_loc != UNKNOWN_LOCATION)
7013 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7014 LOCATION_LINE (data->loop_loc));
7015 fprintf (dump_file, "\n");
7017 if (exit)
7019 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7020 exit->src->index, exit->dest->index);
7021 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7022 fprintf (dump_file, "\n");
7025 fprintf (dump_file, "\n");
7028 body = get_loop_body (loop);
7029 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7030 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7031 free (body);
7033 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7035 /* For each ssa name determines whether it behaves as an induction variable
7036 in some loop. */
7037 if (!find_induction_variables (data))
7038 goto finish;
7040 /* Finds interesting uses (item 1). */
7041 find_interesting_uses (data);
7042 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7043 goto finish;
7045 /* Finds candidates for the induction variables (item 2). */
7046 find_iv_candidates (data);
7048 /* Calculates the costs (item 3, part 1). */
7049 determine_iv_costs (data);
7050 determine_use_iv_costs (data);
7051 determine_set_costs (data);
7053 /* Find the optimal set of induction variables (item 3, part 2). */
7054 iv_ca = find_optimal_iv_set (data);
7055 if (!iv_ca)
7056 goto finish;
7057 changed = true;
7059 /* Create the new induction variables (item 4, part 1). */
7060 create_new_ivs (data, iv_ca);
7061 iv_ca_free (&iv_ca);
7063 /* Rewrite the uses (item 4, part 2). */
7064 rewrite_uses (data);
7066 /* Remove the ivs that are unused after rewriting. */
7067 remove_unused_ivs (data);
7069 /* We have changed the structure of induction variables; it might happen
7070 that definitions in the scev database refer to some of them that were
7071 eliminated. */
7072 scev_reset ();
7074 finish:
7075 free_loop_data (data);
7077 return changed;
7080 /* Main entry point. Optimizes induction variables in loops. */
7082 void
7083 tree_ssa_iv_optimize (void)
7085 struct loop *loop;
7086 struct ivopts_data data;
7088 tree_ssa_iv_optimize_init (&data);
7090 /* Optimize the loops starting with the innermost ones. */
7091 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7093 if (dump_file && (dump_flags & TDF_DETAILS))
7094 flow_loop_dump (loop, dump_file, NULL, 1);
7096 tree_ssa_iv_optimize_loop (&data, loop);
7099 tree_ssa_iv_optimize_finalize (&data);