2015-05-12 Pierre-Marie de Rodat <derodat@adacore.com>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob44219d259e1dde67b9e888f051af4efef86d2683
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "hash-set.h"
69 #include "machmode.h"
70 #include "vec.h"
71 #include "double-int.h"
72 #include "input.h"
73 #include "alias.h"
74 #include "symtab.h"
75 #include "wide-int.h"
76 #include "inchash.h"
77 #include "tree.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
80 #include "tm_p.h"
81 #include "predict.h"
82 #include "hard-reg-set.h"
83 #include "function.h"
84 #include "dominance.h"
85 #include "cfg.h"
86 #include "basic-block.h"
87 #include "gimple-pretty-print.h"
88 #include "hash-map.h"
89 #include "hash-table.h"
90 #include "tree-ssa-alias.h"
91 #include "internal-fn.h"
92 #include "tree-eh.h"
93 #include "gimple-expr.h"
94 #include "is-a.h"
95 #include "gimple.h"
96 #include "gimplify.h"
97 #include "gimple-iterator.h"
98 #include "gimplify-me.h"
99 #include "gimple-ssa.h"
100 #include "plugin-api.h"
101 #include "ipa-ref.h"
102 #include "cgraph.h"
103 #include "tree-cfg.h"
104 #include "tree-phinodes.h"
105 #include "ssa-iterators.h"
106 #include "stringpool.h"
107 #include "tree-ssanames.h"
108 #include "tree-ssa-loop-ivopts.h"
109 #include "tree-ssa-loop-manip.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop.h"
112 #include "hashtab.h"
113 #include "rtl.h"
114 #include "flags.h"
115 #include "statistics.h"
116 #include "real.h"
117 #include "fixed-value.h"
118 #include "insn-config.h"
119 #include "expmed.h"
120 #include "dojump.h"
121 #include "explow.h"
122 #include "calls.h"
123 #include "emit-rtl.h"
124 #include "varasm.h"
125 #include "stmt.h"
126 #include "expr.h"
127 #include "tree-dfa.h"
128 #include "tree-ssa.h"
129 #include "cfgloop.h"
130 #include "tree-pass.h"
131 #include "tree-chrec.h"
132 #include "tree-scalar-evolution.h"
133 #include "params.h"
134 #include "langhooks.h"
135 #include "tree-affine.h"
136 #include "target.h"
137 #include "tree-inline.h"
138 #include "tree-ssa-propagate.h"
139 #include "tree-ssa-address.h"
140 #include "builtins.h"
141 #include "tree-vectorizer.h"
143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
144 cost of different addressing modes. This should be moved to a TBD
145 interface between the GIMPLE and RTL worlds. */
146 #include "recog.h"
148 /* The infinite cost. */
149 #define INFTY 10000000
151 #define AVG_LOOP_NITER(LOOP) 5
153 /* Returns the expected number of loop iterations for LOOP.
154 The average trip count is computed from profile data if it
155 exists. */
157 static inline HOST_WIDE_INT
158 avg_loop_niter (struct loop *loop)
160 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
161 if (niter == -1)
162 return AVG_LOOP_NITER (loop);
164 return niter;
167 /* Representation of the induction variable. */
168 struct iv
170 tree base; /* Initial value of the iv. */
171 tree base_object; /* A memory object to that the induction variable points. */
172 tree step; /* Step of the iv (constant only). */
173 tree ssa_name; /* The ssa name with the value. */
174 bool biv_p; /* Is it a biv? */
175 bool have_use_for; /* Do we already have a use for it? */
176 unsigned use_id; /* The identifier in the use if it is the case. */
179 /* Per-ssa version information (induction variable descriptions, etc.). */
180 struct version_info
182 tree name; /* The ssa name. */
183 struct iv *iv; /* Induction variable description. */
184 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
185 an expression that is not an induction variable. */
186 bool preserve_biv; /* For the original biv, whether to preserve it. */
187 unsigned inv_id; /* Id of an invariant. */
190 /* Types of uses. */
191 enum use_type
193 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
194 USE_ADDRESS, /* Use in an address. */
195 USE_COMPARE /* Use is a compare. */
198 /* Cost of a computation. */
199 typedef struct
201 int cost; /* The runtime cost. */
202 unsigned complexity; /* The estimate of the complexity of the code for
203 the computation (in no concrete units --
204 complexity field should be larger for more
205 complex expressions and addressing modes). */
206 } comp_cost;
208 static const comp_cost no_cost = {0, 0};
209 static const comp_cost infinite_cost = {INFTY, INFTY};
211 /* The candidate - cost pair. */
212 struct cost_pair
214 struct iv_cand *cand; /* The candidate. */
215 comp_cost cost; /* The cost. */
216 bitmap depends_on; /* The list of invariants that have to be
217 preserved. */
218 tree value; /* For final value elimination, the expression for
219 the final value of the iv. For iv elimination,
220 the new bound to compare with. */
221 enum tree_code comp; /* For iv elimination, the comparison. */
222 int inv_expr_id; /* Loop invariant expression id. */
225 /* Use. */
226 struct iv_use
228 unsigned id; /* The id of the use. */
229 enum use_type type; /* Type of the use. */
230 struct iv *iv; /* The induction variable it is based on. */
231 gimple stmt; /* Statement in that it occurs. */
232 tree *op_p; /* The place where it occurs. */
233 bitmap related_cands; /* The set of "related" iv candidates, plus the common
234 important ones. */
236 unsigned n_map_members; /* Number of candidates in the cost_map list. */
237 struct cost_pair *cost_map;
238 /* The costs wrto the iv candidates. */
240 struct iv_cand *selected;
241 /* The selected candidate. */
244 /* The position where the iv is computed. */
245 enum iv_position
247 IP_NORMAL, /* At the end, just before the exit condition. */
248 IP_END, /* At the end of the latch block. */
249 IP_BEFORE_USE, /* Immediately before a specific use. */
250 IP_AFTER_USE, /* Immediately after a specific use. */
251 IP_ORIGINAL /* The original biv. */
254 /* The induction variable candidate. */
255 struct iv_cand
257 unsigned id; /* The number of the candidate. */
258 bool important; /* Whether this is an "important" candidate, i.e. such
259 that it should be considered by all uses. */
260 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
261 gimple incremented_at;/* For original biv, the statement where it is
262 incremented. */
263 tree var_before; /* The variable used for it before increment. */
264 tree var_after; /* The variable used for it after increment. */
265 struct iv *iv; /* The value of the candidate. NULL for
266 "pseudocandidate" used to indicate the possibility
267 to replace the final value of an iv by direct
268 computation of the value. */
269 unsigned cost; /* Cost of the candidate. */
270 unsigned cost_step; /* Cost of the candidate's increment operation. */
271 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
272 where it is incremented. */
273 bitmap depends_on; /* The list of invariants that are used in step of the
274 biv. */
277 /* Loop invariant expression hashtable entry. */
278 struct iv_inv_expr_ent
280 tree expr;
281 int id;
282 hashval_t hash;
285 /* The data used by the induction variable optimizations. */
287 typedef struct iv_use *iv_use_p;
289 typedef struct iv_cand *iv_cand_p;
291 /* Hashtable helpers. */
293 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
295 typedef iv_inv_expr_ent *value_type;
296 typedef iv_inv_expr_ent *compare_type;
297 static inline hashval_t hash (const iv_inv_expr_ent *);
298 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
301 /* Hash function for loop invariant expressions. */
303 inline hashval_t
304 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
306 return expr->hash;
309 /* Hash table equality function for expressions. */
311 inline bool
312 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
313 const iv_inv_expr_ent *expr2)
315 return expr1->hash == expr2->hash
316 && operand_equal_p (expr1->expr, expr2->expr, 0);
319 struct ivopts_data
321 /* The currently optimized loop. */
322 struct loop *current_loop;
323 source_location loop_loc;
325 /* Numbers of iterations for all exits of the current loop. */
326 hash_map<edge, tree_niter_desc *> *niters;
328 /* Number of registers used in it. */
329 unsigned regs_used;
331 /* The size of version_info array allocated. */
332 unsigned version_info_size;
334 /* The array of information for the ssa names. */
335 struct version_info *version_info;
337 /* The hashtable of loop invariant expressions created
338 by ivopt. */
339 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
341 /* Loop invariant expression id. */
342 int inv_expr_id;
344 /* The bitmap of indices in version_info whose value was changed. */
345 bitmap relevant;
347 /* The uses of induction variables. */
348 vec<iv_use_p> iv_uses;
350 /* The candidates. */
351 vec<iv_cand_p> iv_candidates;
353 /* A bitmap of important candidates. */
354 bitmap important_candidates;
356 /* Cache used by tree_to_aff_combination_expand. */
357 hash_map<tree, name_expansion *> *name_expansion_cache;
359 /* The maximum invariant id. */
360 unsigned max_inv_id;
362 /* Whether to consider just related and important candidates when replacing a
363 use. */
364 bool consider_all_candidates;
366 /* Are we optimizing for speed? */
367 bool speed;
369 /* Whether the loop body includes any function calls. */
370 bool body_includes_call;
372 /* Whether the loop body can only be exited via single exit. */
373 bool loop_single_exit_p;
376 /* An assignment of iv candidates to uses. */
378 struct iv_ca
380 /* The number of uses covered by the assignment. */
381 unsigned upto;
383 /* Number of uses that cannot be expressed by the candidates in the set. */
384 unsigned bad_uses;
386 /* Candidate assigned to a use, together with the related costs. */
387 struct cost_pair **cand_for_use;
389 /* Number of times each candidate is used. */
390 unsigned *n_cand_uses;
392 /* The candidates used. */
393 bitmap cands;
395 /* The number of candidates in the set. */
396 unsigned n_cands;
398 /* Total number of registers needed. */
399 unsigned n_regs;
401 /* Total cost of expressing uses. */
402 comp_cost cand_use_cost;
404 /* Total cost of candidates. */
405 unsigned cand_cost;
407 /* Number of times each invariant is used. */
408 unsigned *n_invariant_uses;
410 /* The array holding the number of uses of each loop
411 invariant expressions created by ivopt. */
412 unsigned *used_inv_expr;
414 /* The number of created loop invariants. */
415 unsigned num_used_inv_expr;
417 /* Total cost of the assignment. */
418 comp_cost cost;
421 /* Difference of two iv candidate assignments. */
423 struct iv_ca_delta
425 /* Changed use. */
426 struct iv_use *use;
428 /* An old assignment (for rollback purposes). */
429 struct cost_pair *old_cp;
431 /* A new assignment. */
432 struct cost_pair *new_cp;
434 /* Next change in the list. */
435 struct iv_ca_delta *next_change;
438 /* Bound on number of candidates below that all candidates are considered. */
440 #define CONSIDER_ALL_CANDIDATES_BOUND \
441 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
443 /* If there are more iv occurrences, we just give up (it is quite unlikely that
444 optimizing such a loop would help, and it would take ages). */
446 #define MAX_CONSIDERED_USES \
447 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
449 /* If there are at most this number of ivs in the set, try removing unnecessary
450 ivs from the set always. */
452 #define ALWAYS_PRUNE_CAND_SET_BOUND \
453 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
455 /* The list of trees for that the decl_rtl field must be reset is stored
456 here. */
458 static vec<tree> decl_rtl_to_reset;
460 static comp_cost force_expr_to_var_cost (tree, bool);
462 /* Number of uses recorded in DATA. */
464 static inline unsigned
465 n_iv_uses (struct ivopts_data *data)
467 return data->iv_uses.length ();
470 /* Ith use recorded in DATA. */
472 static inline struct iv_use *
473 iv_use (struct ivopts_data *data, unsigned i)
475 return data->iv_uses[i];
478 /* Number of candidates recorded in DATA. */
480 static inline unsigned
481 n_iv_cands (struct ivopts_data *data)
483 return data->iv_candidates.length ();
486 /* Ith candidate recorded in DATA. */
488 static inline struct iv_cand *
489 iv_cand (struct ivopts_data *data, unsigned i)
491 return data->iv_candidates[i];
494 /* The single loop exit if it dominates the latch, NULL otherwise. */
496 edge
497 single_dom_exit (struct loop *loop)
499 edge exit = single_exit (loop);
501 if (!exit)
502 return NULL;
504 if (!just_once_each_iteration_p (loop, exit->src))
505 return NULL;
507 return exit;
510 /* Dumps information about the induction variable IV to FILE. */
512 void
513 dump_iv (FILE *file, struct iv *iv)
515 if (iv->ssa_name)
517 fprintf (file, "ssa name ");
518 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
519 fprintf (file, "\n");
522 fprintf (file, " type ");
523 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
524 fprintf (file, "\n");
526 if (iv->step)
528 fprintf (file, " base ");
529 print_generic_expr (file, iv->base, TDF_SLIM);
530 fprintf (file, "\n");
532 fprintf (file, " step ");
533 print_generic_expr (file, iv->step, TDF_SLIM);
534 fprintf (file, "\n");
536 else
538 fprintf (file, " invariant ");
539 print_generic_expr (file, iv->base, TDF_SLIM);
540 fprintf (file, "\n");
543 if (iv->base_object)
545 fprintf (file, " base object ");
546 print_generic_expr (file, iv->base_object, TDF_SLIM);
547 fprintf (file, "\n");
550 if (iv->biv_p)
551 fprintf (file, " is a biv\n");
554 /* Dumps information about the USE to FILE. */
556 void
557 dump_use (FILE *file, struct iv_use *use)
559 fprintf (file, "use %d\n", use->id);
561 switch (use->type)
563 case USE_NONLINEAR_EXPR:
564 fprintf (file, " generic\n");
565 break;
567 case USE_ADDRESS:
568 fprintf (file, " address\n");
569 break;
571 case USE_COMPARE:
572 fprintf (file, " compare\n");
573 break;
575 default:
576 gcc_unreachable ();
579 fprintf (file, " in statement ");
580 print_gimple_stmt (file, use->stmt, 0, 0);
581 fprintf (file, "\n");
583 fprintf (file, " at position ");
584 if (use->op_p)
585 print_generic_expr (file, *use->op_p, TDF_SLIM);
586 fprintf (file, "\n");
588 dump_iv (file, use->iv);
590 if (use->related_cands)
592 fprintf (file, " related candidates ");
593 dump_bitmap (file, use->related_cands);
597 /* Dumps information about the uses to FILE. */
599 void
600 dump_uses (FILE *file, struct ivopts_data *data)
602 unsigned i;
603 struct iv_use *use;
605 for (i = 0; i < n_iv_uses (data); i++)
607 use = iv_use (data, i);
609 dump_use (file, use);
610 fprintf (file, "\n");
614 /* Dumps information about induction variable candidate CAND to FILE. */
616 void
617 dump_cand (FILE *file, struct iv_cand *cand)
619 struct iv *iv = cand->iv;
621 fprintf (file, "candidate %d%s\n",
622 cand->id, cand->important ? " (important)" : "");
624 if (cand->depends_on)
626 fprintf (file, " depends on ");
627 dump_bitmap (file, cand->depends_on);
630 if (!iv)
632 fprintf (file, " final value replacement\n");
633 return;
636 if (cand->var_before)
638 fprintf (file, " var_before ");
639 print_generic_expr (file, cand->var_before, TDF_SLIM);
640 fprintf (file, "\n");
642 if (cand->var_after)
644 fprintf (file, " var_after ");
645 print_generic_expr (file, cand->var_after, TDF_SLIM);
646 fprintf (file, "\n");
649 switch (cand->pos)
651 case IP_NORMAL:
652 fprintf (file, " incremented before exit test\n");
653 break;
655 case IP_BEFORE_USE:
656 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
657 break;
659 case IP_AFTER_USE:
660 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
661 break;
663 case IP_END:
664 fprintf (file, " incremented at end\n");
665 break;
667 case IP_ORIGINAL:
668 fprintf (file, " original biv\n");
669 break;
672 dump_iv (file, iv);
675 /* Returns the info for ssa version VER. */
677 static inline struct version_info *
678 ver_info (struct ivopts_data *data, unsigned ver)
680 return data->version_info + ver;
683 /* Returns the info for ssa name NAME. */
685 static inline struct version_info *
686 name_info (struct ivopts_data *data, tree name)
688 return ver_info (data, SSA_NAME_VERSION (name));
691 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
692 emitted in LOOP. */
694 static bool
695 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
697 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
699 gcc_assert (bb);
701 if (sbb == loop->latch)
702 return true;
704 if (sbb != bb)
705 return false;
707 return stmt == last_stmt (bb);
710 /* Returns true if STMT if after the place where the original induction
711 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
712 if the positions are identical. */
714 static bool
715 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
717 basic_block cand_bb = gimple_bb (cand->incremented_at);
718 basic_block stmt_bb = gimple_bb (stmt);
720 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
721 return false;
723 if (stmt_bb != cand_bb)
724 return true;
726 if (true_if_equal
727 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
728 return true;
729 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
732 /* Returns true if STMT if after the place where the induction variable
733 CAND is incremented in LOOP. */
735 static bool
736 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
738 switch (cand->pos)
740 case IP_END:
741 return false;
743 case IP_NORMAL:
744 return stmt_after_ip_normal_pos (loop, stmt);
746 case IP_ORIGINAL:
747 case IP_AFTER_USE:
748 return stmt_after_inc_pos (cand, stmt, false);
750 case IP_BEFORE_USE:
751 return stmt_after_inc_pos (cand, stmt, true);
753 default:
754 gcc_unreachable ();
758 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
760 static bool
761 abnormal_ssa_name_p (tree exp)
763 if (!exp)
764 return false;
766 if (TREE_CODE (exp) != SSA_NAME)
767 return false;
769 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
772 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
773 abnormal phi node. Callback for for_each_index. */
775 static bool
776 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
777 void *data ATTRIBUTE_UNUSED)
779 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
781 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
782 return false;
783 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
784 return false;
787 return !abnormal_ssa_name_p (*index);
790 /* Returns true if EXPR contains a ssa name that occurs in an
791 abnormal phi node. */
793 bool
794 contains_abnormal_ssa_name_p (tree expr)
796 enum tree_code code;
797 enum tree_code_class codeclass;
799 if (!expr)
800 return false;
802 code = TREE_CODE (expr);
803 codeclass = TREE_CODE_CLASS (code);
805 if (code == SSA_NAME)
806 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
808 if (code == INTEGER_CST
809 || is_gimple_min_invariant (expr))
810 return false;
812 if (code == ADDR_EXPR)
813 return !for_each_index (&TREE_OPERAND (expr, 0),
814 idx_contains_abnormal_ssa_name_p,
815 NULL);
817 if (code == COND_EXPR)
818 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
819 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
820 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
822 switch (codeclass)
824 case tcc_binary:
825 case tcc_comparison:
826 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
827 return true;
829 /* Fallthru. */
830 case tcc_unary:
831 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
832 return true;
834 break;
836 default:
837 gcc_unreachable ();
840 return false;
843 /* Returns the structure describing number of iterations determined from
844 EXIT of DATA->current_loop, or NULL if something goes wrong. */
846 static struct tree_niter_desc *
847 niter_for_exit (struct ivopts_data *data, edge exit)
849 struct tree_niter_desc *desc;
850 tree_niter_desc **slot;
852 if (!data->niters)
854 data->niters = new hash_map<edge, tree_niter_desc *>;
855 slot = NULL;
857 else
858 slot = data->niters->get (exit);
860 if (!slot)
862 /* Try to determine number of iterations. We cannot safely work with ssa
863 names that appear in phi nodes on abnormal edges, so that we do not
864 create overlapping life ranges for them (PR 27283). */
865 desc = XNEW (struct tree_niter_desc);
866 if (!number_of_iterations_exit (data->current_loop,
867 exit, desc, true)
868 || contains_abnormal_ssa_name_p (desc->niter))
870 XDELETE (desc);
871 desc = NULL;
873 data->niters->put (exit, desc);
875 else
876 desc = *slot;
878 return desc;
881 /* Returns the structure describing number of iterations determined from
882 single dominating exit of DATA->current_loop, or NULL if something
883 goes wrong. */
885 static struct tree_niter_desc *
886 niter_for_single_dom_exit (struct ivopts_data *data)
888 edge exit = single_dom_exit (data->current_loop);
890 if (!exit)
891 return NULL;
893 return niter_for_exit (data, exit);
896 /* Initializes data structures used by the iv optimization pass, stored
897 in DATA. */
899 static void
900 tree_ssa_iv_optimize_init (struct ivopts_data *data)
902 data->version_info_size = 2 * num_ssa_names;
903 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
904 data->relevant = BITMAP_ALLOC (NULL);
905 data->important_candidates = BITMAP_ALLOC (NULL);
906 data->max_inv_id = 0;
907 data->niters = NULL;
908 data->iv_uses.create (20);
909 data->iv_candidates.create (20);
910 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
911 data->inv_expr_id = 0;
912 data->name_expansion_cache = NULL;
913 decl_rtl_to_reset.create (20);
916 /* Returns a memory object to that EXPR points. In case we are able to
917 determine that it does not point to any such object, NULL is returned. */
919 static tree
920 determine_base_object (tree expr)
922 enum tree_code code = TREE_CODE (expr);
923 tree base, obj;
925 /* If this is a pointer casted to any type, we need to determine
926 the base object for the pointer; so handle conversions before
927 throwing away non-pointer expressions. */
928 if (CONVERT_EXPR_P (expr))
929 return determine_base_object (TREE_OPERAND (expr, 0));
931 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
932 return NULL_TREE;
934 switch (code)
936 case INTEGER_CST:
937 return NULL_TREE;
939 case ADDR_EXPR:
940 obj = TREE_OPERAND (expr, 0);
941 base = get_base_address (obj);
943 if (!base)
944 return expr;
946 if (TREE_CODE (base) == MEM_REF)
947 return determine_base_object (TREE_OPERAND (base, 0));
949 return fold_convert (ptr_type_node,
950 build_fold_addr_expr (base));
952 case POINTER_PLUS_EXPR:
953 return determine_base_object (TREE_OPERAND (expr, 0));
955 case PLUS_EXPR:
956 case MINUS_EXPR:
957 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
958 gcc_unreachable ();
960 default:
961 return fold_convert (ptr_type_node, expr);
965 /* Return true if address expression with non-DECL_P operand appears
966 in EXPR. */
968 static bool
969 contain_complex_addr_expr (tree expr)
971 bool res = false;
973 STRIP_NOPS (expr);
974 switch (TREE_CODE (expr))
976 case POINTER_PLUS_EXPR:
977 case PLUS_EXPR:
978 case MINUS_EXPR:
979 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
980 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
981 break;
983 case ADDR_EXPR:
984 return (!DECL_P (TREE_OPERAND (expr, 0)));
986 default:
987 return false;
990 return res;
993 /* Allocates an induction variable with given initial value BASE and step STEP
994 for loop LOOP. */
996 static struct iv *
997 alloc_iv (tree base, tree step)
999 tree expr = base;
1000 struct iv *iv = XCNEW (struct iv);
1001 gcc_assert (step != NULL_TREE);
1003 /* Lower address expression in base except ones with DECL_P as operand.
1004 By doing this:
1005 1) More accurate cost can be computed for address expressions;
1006 2) Duplicate candidates won't be created for bases in different
1007 forms, like &a[0] and &a. */
1008 STRIP_NOPS (expr);
1009 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1010 || contain_complex_addr_expr (expr))
1012 aff_tree comb;
1013 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1014 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1017 iv->base = base;
1018 iv->base_object = determine_base_object (base);
1019 iv->step = step;
1020 iv->biv_p = false;
1021 iv->have_use_for = false;
1022 iv->use_id = 0;
1023 iv->ssa_name = NULL_TREE;
1025 return iv;
1028 /* Sets STEP and BASE for induction variable IV. */
1030 static void
1031 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1033 struct version_info *info = name_info (data, iv);
1035 gcc_assert (!info->iv);
1037 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1038 info->iv = alloc_iv (base, step);
1039 info->iv->ssa_name = iv;
1042 /* Finds induction variable declaration for VAR. */
1044 static struct iv *
1045 get_iv (struct ivopts_data *data, tree var)
1047 basic_block bb;
1048 tree type = TREE_TYPE (var);
1050 if (!POINTER_TYPE_P (type)
1051 && !INTEGRAL_TYPE_P (type))
1052 return NULL;
1054 if (!name_info (data, var)->iv)
1056 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1058 if (!bb
1059 || !flow_bb_inside_loop_p (data->current_loop, bb))
1060 set_iv (data, var, var, build_int_cst (type, 0));
1063 return name_info (data, var)->iv;
1066 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1067 not define a simple affine biv with nonzero step. */
1069 static tree
1070 determine_biv_step (gphi *phi)
1072 struct loop *loop = gimple_bb (phi)->loop_father;
1073 tree name = PHI_RESULT (phi);
1074 affine_iv iv;
1076 if (virtual_operand_p (name))
1077 return NULL_TREE;
1079 if (!simple_iv (loop, loop, name, &iv, true))
1080 return NULL_TREE;
1082 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1085 /* Return the first non-invariant ssa var found in EXPR. */
1087 static tree
1088 extract_single_var_from_expr (tree expr)
1090 int i, n;
1091 tree tmp;
1092 enum tree_code code;
1094 if (!expr || is_gimple_min_invariant (expr))
1095 return NULL;
1097 code = TREE_CODE (expr);
1098 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1100 n = TREE_OPERAND_LENGTH (expr);
1101 for (i = 0; i < n; i++)
1103 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1105 if (tmp)
1106 return tmp;
1109 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1112 /* Finds basic ivs. */
1114 static bool
1115 find_bivs (struct ivopts_data *data)
1117 gphi *phi;
1118 tree step, type, base, stop;
1119 bool found = false;
1120 struct loop *loop = data->current_loop;
1121 gphi_iterator psi;
1123 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1125 phi = psi.phi ();
1127 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1128 continue;
1130 step = determine_biv_step (phi);
1131 if (!step)
1132 continue;
1134 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1135 /* Stop expanding iv base at the first ssa var referred by iv step.
1136 Ideally we should stop at any ssa var, because that's expensive
1137 and unusual to happen, we just do it on the first one.
1139 See PR64705 for the rationale. */
1140 stop = extract_single_var_from_expr (step);
1141 base = expand_simple_operations (base, stop);
1142 if (contains_abnormal_ssa_name_p (base)
1143 || contains_abnormal_ssa_name_p (step))
1144 continue;
1146 type = TREE_TYPE (PHI_RESULT (phi));
1147 base = fold_convert (type, base);
1148 if (step)
1150 if (POINTER_TYPE_P (type))
1151 step = convert_to_ptrofftype (step);
1152 else
1153 step = fold_convert (type, step);
1156 set_iv (data, PHI_RESULT (phi), base, step);
1157 found = true;
1160 return found;
1163 /* Marks basic ivs. */
1165 static void
1166 mark_bivs (struct ivopts_data *data)
1168 gphi *phi;
1169 gimple def;
1170 tree var;
1171 struct iv *iv, *incr_iv;
1172 struct loop *loop = data->current_loop;
1173 basic_block incr_bb;
1174 gphi_iterator psi;
1176 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1178 phi = psi.phi ();
1180 iv = get_iv (data, PHI_RESULT (phi));
1181 if (!iv)
1182 continue;
1184 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1185 def = SSA_NAME_DEF_STMT (var);
1186 /* Don't mark iv peeled from other one as biv. */
1187 if (def
1188 && gimple_code (def) == GIMPLE_PHI
1189 && gimple_bb (def) == loop->header)
1190 continue;
1192 incr_iv = get_iv (data, var);
1193 if (!incr_iv)
1194 continue;
1196 /* If the increment is in the subloop, ignore it. */
1197 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1198 if (incr_bb->loop_father != data->current_loop
1199 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1200 continue;
1202 iv->biv_p = true;
1203 incr_iv->biv_p = true;
1207 /* Checks whether STMT defines a linear induction variable and stores its
1208 parameters to IV. */
1210 static bool
1211 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1213 tree lhs, stop;
1214 struct loop *loop = data->current_loop;
1216 iv->base = NULL_TREE;
1217 iv->step = NULL_TREE;
1219 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1220 return false;
1222 lhs = gimple_assign_lhs (stmt);
1223 if (TREE_CODE (lhs) != SSA_NAME)
1224 return false;
1226 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1227 return false;
1229 /* Stop expanding iv base at the first ssa var referred by iv step.
1230 Ideally we should stop at any ssa var, because that's expensive
1231 and unusual to happen, we just do it on the first one.
1233 See PR64705 for the rationale. */
1234 stop = extract_single_var_from_expr (iv->step);
1235 iv->base = expand_simple_operations (iv->base, stop);
1236 if (contains_abnormal_ssa_name_p (iv->base)
1237 || contains_abnormal_ssa_name_p (iv->step))
1238 return false;
1240 /* If STMT could throw, then do not consider STMT as defining a GIV.
1241 While this will suppress optimizations, we can not safely delete this
1242 GIV and associated statements, even if it appears it is not used. */
1243 if (stmt_could_throw_p (stmt))
1244 return false;
1246 return true;
1249 /* Finds general ivs in statement STMT. */
1251 static void
1252 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1254 affine_iv iv;
1256 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1257 return;
1259 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1262 /* Finds general ivs in basic block BB. */
1264 static void
1265 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1267 gimple_stmt_iterator bsi;
1269 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1270 find_givs_in_stmt (data, gsi_stmt (bsi));
1273 /* Finds general ivs. */
1275 static void
1276 find_givs (struct ivopts_data *data)
1278 struct loop *loop = data->current_loop;
1279 basic_block *body = get_loop_body_in_dom_order (loop);
1280 unsigned i;
1282 for (i = 0; i < loop->num_nodes; i++)
1283 find_givs_in_bb (data, body[i]);
1284 free (body);
1287 /* For each ssa name defined in LOOP determines whether it is an induction
1288 variable and if so, its initial value and step. */
1290 static bool
1291 find_induction_variables (struct ivopts_data *data)
1293 unsigned i;
1294 bitmap_iterator bi;
1296 if (!find_bivs (data))
1297 return false;
1299 find_givs (data);
1300 mark_bivs (data);
1302 if (dump_file && (dump_flags & TDF_DETAILS))
1304 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1306 if (niter)
1308 fprintf (dump_file, " number of iterations ");
1309 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1310 if (!integer_zerop (niter->may_be_zero))
1312 fprintf (dump_file, "; zero if ");
1313 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1315 fprintf (dump_file, "\n\n");
1318 fprintf (dump_file, "Induction variables:\n\n");
1320 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1322 if (ver_info (data, i)->iv)
1323 dump_iv (dump_file, ver_info (data, i)->iv);
1327 return true;
1330 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1332 static struct iv_use *
1333 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1334 gimple stmt, enum use_type use_type)
1336 struct iv_use *use = XCNEW (struct iv_use);
1338 use->id = n_iv_uses (data);
1339 use->type = use_type;
1340 use->iv = iv;
1341 use->stmt = stmt;
1342 use->op_p = use_p;
1343 use->related_cands = BITMAP_ALLOC (NULL);
1345 /* To avoid showing ssa name in the dumps, if it was not reset by the
1346 caller. */
1347 iv->ssa_name = NULL_TREE;
1349 if (dump_file && (dump_flags & TDF_DETAILS))
1350 dump_use (dump_file, use);
1352 data->iv_uses.safe_push (use);
1354 return use;
1357 /* Checks whether OP is a loop-level invariant and if so, records it.
1358 NONLINEAR_USE is true if the invariant is used in a way we do not
1359 handle specially. */
1361 static void
1362 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1364 basic_block bb;
1365 struct version_info *info;
1367 if (TREE_CODE (op) != SSA_NAME
1368 || virtual_operand_p (op))
1369 return;
1371 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1372 if (bb
1373 && flow_bb_inside_loop_p (data->current_loop, bb))
1374 return;
1376 info = name_info (data, op);
1377 info->name = op;
1378 info->has_nonlin_use |= nonlinear_use;
1379 if (!info->inv_id)
1380 info->inv_id = ++data->max_inv_id;
1381 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1384 /* Checks whether the use OP is interesting and if so, records it. */
1386 static struct iv_use *
1387 find_interesting_uses_op (struct ivopts_data *data, tree op)
1389 struct iv *iv;
1390 struct iv *civ;
1391 gimple stmt;
1392 struct iv_use *use;
1394 if (TREE_CODE (op) != SSA_NAME)
1395 return NULL;
1397 iv = get_iv (data, op);
1398 if (!iv)
1399 return NULL;
1401 if (iv->have_use_for)
1403 use = iv_use (data, iv->use_id);
1405 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1406 return use;
1409 if (integer_zerop (iv->step))
1411 record_invariant (data, op, true);
1412 return NULL;
1414 iv->have_use_for = true;
1416 civ = XNEW (struct iv);
1417 *civ = *iv;
1419 stmt = SSA_NAME_DEF_STMT (op);
1420 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1421 || is_gimple_assign (stmt));
1423 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1424 iv->use_id = use->id;
1426 return use;
1429 /* Given a condition in statement STMT, checks whether it is a compare
1430 of an induction variable and an invariant. If this is the case,
1431 CONTROL_VAR is set to location of the iv, BOUND to the location of
1432 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1433 induction variable descriptions, and true is returned. If this is not
1434 the case, CONTROL_VAR and BOUND are set to the arguments of the
1435 condition and false is returned. */
1437 static bool
1438 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1439 tree **control_var, tree **bound,
1440 struct iv **iv_var, struct iv **iv_bound)
1442 /* The objects returned when COND has constant operands. */
1443 static struct iv const_iv;
1444 static tree zero;
1445 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1446 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1447 bool ret = false;
1449 if (gimple_code (stmt) == GIMPLE_COND)
1451 gcond *cond_stmt = as_a <gcond *> (stmt);
1452 op0 = gimple_cond_lhs_ptr (cond_stmt);
1453 op1 = gimple_cond_rhs_ptr (cond_stmt);
1455 else
1457 op0 = gimple_assign_rhs1_ptr (stmt);
1458 op1 = gimple_assign_rhs2_ptr (stmt);
1461 zero = integer_zero_node;
1462 const_iv.step = integer_zero_node;
1464 if (TREE_CODE (*op0) == SSA_NAME)
1465 iv0 = get_iv (data, *op0);
1466 if (TREE_CODE (*op1) == SSA_NAME)
1467 iv1 = get_iv (data, *op1);
1469 /* Exactly one of the compared values must be an iv, and the other one must
1470 be an invariant. */
1471 if (!iv0 || !iv1)
1472 goto end;
1474 if (integer_zerop (iv0->step))
1476 /* Control variable may be on the other side. */
1477 tmp_op = op0; op0 = op1; op1 = tmp_op;
1478 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1480 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1482 end:
1483 if (control_var)
1484 *control_var = op0;
1485 if (iv_var)
1486 *iv_var = iv0;
1487 if (bound)
1488 *bound = op1;
1489 if (iv_bound)
1490 *iv_bound = iv1;
1492 return ret;
1495 /* Checks whether the condition in STMT is interesting and if so,
1496 records it. */
1498 static void
1499 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1501 tree *var_p, *bound_p;
1502 struct iv *var_iv, *civ;
1504 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1506 find_interesting_uses_op (data, *var_p);
1507 find_interesting_uses_op (data, *bound_p);
1508 return;
1511 civ = XNEW (struct iv);
1512 *civ = *var_iv;
1513 record_use (data, NULL, civ, stmt, USE_COMPARE);
1516 /* Returns the outermost loop EXPR is obviously invariant in
1517 relative to the loop LOOP, i.e. if all its operands are defined
1518 outside of the returned loop. Returns NULL if EXPR is not
1519 even obviously invariant in LOOP. */
1521 struct loop *
1522 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1524 basic_block def_bb;
1525 unsigned i, len;
1527 if (is_gimple_min_invariant (expr))
1528 return current_loops->tree_root;
1530 if (TREE_CODE (expr) == SSA_NAME)
1532 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1533 if (def_bb)
1535 if (flow_bb_inside_loop_p (loop, def_bb))
1536 return NULL;
1537 return superloop_at_depth (loop,
1538 loop_depth (def_bb->loop_father) + 1);
1541 return current_loops->tree_root;
1544 if (!EXPR_P (expr))
1545 return NULL;
1547 unsigned maxdepth = 0;
1548 len = TREE_OPERAND_LENGTH (expr);
1549 for (i = 0; i < len; i++)
1551 struct loop *ivloop;
1552 if (!TREE_OPERAND (expr, i))
1553 continue;
1555 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1556 if (!ivloop)
1557 return NULL;
1558 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1561 return superloop_at_depth (loop, maxdepth);
1564 /* Returns true if expression EXPR is obviously invariant in LOOP,
1565 i.e. if all its operands are defined outside of the LOOP. LOOP
1566 should not be the function body. */
1568 bool
1569 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1571 basic_block def_bb;
1572 unsigned i, len;
1574 gcc_assert (loop_depth (loop) > 0);
1576 if (is_gimple_min_invariant (expr))
1577 return true;
1579 if (TREE_CODE (expr) == SSA_NAME)
1581 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1582 if (def_bb
1583 && flow_bb_inside_loop_p (loop, def_bb))
1584 return false;
1586 return true;
1589 if (!EXPR_P (expr))
1590 return false;
1592 len = TREE_OPERAND_LENGTH (expr);
1593 for (i = 0; i < len; i++)
1594 if (TREE_OPERAND (expr, i)
1595 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1596 return false;
1598 return true;
1601 /* Cumulates the steps of indices into DATA and replaces their values with the
1602 initial ones. Returns false when the value of the index cannot be determined.
1603 Callback for for_each_index. */
1605 struct ifs_ivopts_data
1607 struct ivopts_data *ivopts_data;
1608 gimple stmt;
1609 tree step;
1612 static bool
1613 idx_find_step (tree base, tree *idx, void *data)
1615 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1616 struct iv *iv;
1617 tree step, iv_base, iv_step, lbound, off;
1618 struct loop *loop = dta->ivopts_data->current_loop;
1620 /* If base is a component ref, require that the offset of the reference
1621 be invariant. */
1622 if (TREE_CODE (base) == COMPONENT_REF)
1624 off = component_ref_field_offset (base);
1625 return expr_invariant_in_loop_p (loop, off);
1628 /* If base is array, first check whether we will be able to move the
1629 reference out of the loop (in order to take its address in strength
1630 reduction). In order for this to work we need both lower bound
1631 and step to be loop invariants. */
1632 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1634 /* Moreover, for a range, the size needs to be invariant as well. */
1635 if (TREE_CODE (base) == ARRAY_RANGE_REF
1636 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1637 return false;
1639 step = array_ref_element_size (base);
1640 lbound = array_ref_low_bound (base);
1642 if (!expr_invariant_in_loop_p (loop, step)
1643 || !expr_invariant_in_loop_p (loop, lbound))
1644 return false;
1647 if (TREE_CODE (*idx) != SSA_NAME)
1648 return true;
1650 iv = get_iv (dta->ivopts_data, *idx);
1651 if (!iv)
1652 return false;
1654 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1655 *&x[0], which is not folded and does not trigger the
1656 ARRAY_REF path below. */
1657 *idx = iv->base;
1659 if (integer_zerop (iv->step))
1660 return true;
1662 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1664 step = array_ref_element_size (base);
1666 /* We only handle addresses whose step is an integer constant. */
1667 if (TREE_CODE (step) != INTEGER_CST)
1668 return false;
1670 else
1671 /* The step for pointer arithmetics already is 1 byte. */
1672 step = size_one_node;
1674 iv_base = iv->base;
1675 iv_step = iv->step;
1676 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1677 sizetype, &iv_base, &iv_step, dta->stmt,
1678 false))
1680 /* The index might wrap. */
1681 return false;
1684 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1685 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1687 return true;
1690 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1691 object is passed to it in DATA. */
1693 static bool
1694 idx_record_use (tree base, tree *idx,
1695 void *vdata)
1697 struct ivopts_data *data = (struct ivopts_data *) vdata;
1698 find_interesting_uses_op (data, *idx);
1699 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1701 find_interesting_uses_op (data, array_ref_element_size (base));
1702 find_interesting_uses_op (data, array_ref_low_bound (base));
1704 return true;
1707 /* If we can prove that TOP = cst * BOT for some constant cst,
1708 store cst to MUL and return true. Otherwise return false.
1709 The returned value is always sign-extended, regardless of the
1710 signedness of TOP and BOT. */
1712 static bool
1713 constant_multiple_of (tree top, tree bot, widest_int *mul)
1715 tree mby;
1716 enum tree_code code;
1717 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1718 widest_int res, p0, p1;
1720 STRIP_NOPS (top);
1721 STRIP_NOPS (bot);
1723 if (operand_equal_p (top, bot, 0))
1725 *mul = 1;
1726 return true;
1729 code = TREE_CODE (top);
1730 switch (code)
1732 case MULT_EXPR:
1733 mby = TREE_OPERAND (top, 1);
1734 if (TREE_CODE (mby) != INTEGER_CST)
1735 return false;
1737 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1738 return false;
1740 *mul = wi::sext (res * wi::to_widest (mby), precision);
1741 return true;
1743 case PLUS_EXPR:
1744 case MINUS_EXPR:
1745 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1746 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1747 return false;
1749 if (code == MINUS_EXPR)
1750 p1 = -p1;
1751 *mul = wi::sext (p0 + p1, precision);
1752 return true;
1754 case INTEGER_CST:
1755 if (TREE_CODE (bot) != INTEGER_CST)
1756 return false;
1758 p0 = widest_int::from (top, SIGNED);
1759 p1 = widest_int::from (bot, SIGNED);
1760 if (p1 == 0)
1761 return false;
1762 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1763 return res == 0;
1765 default:
1766 return false;
1770 /* Return true if memory reference REF with step STEP may be unaligned. */
1772 static bool
1773 may_be_unaligned_p (tree ref, tree step)
1775 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1776 thus they are not misaligned. */
1777 if (TREE_CODE (ref) == TARGET_MEM_REF)
1778 return false;
1780 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1781 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1782 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1784 unsigned HOST_WIDE_INT bitpos;
1785 unsigned int ref_align;
1786 get_object_alignment_1 (ref, &ref_align, &bitpos);
1787 if (ref_align < align
1788 || (bitpos % align) != 0
1789 || (bitpos % BITS_PER_UNIT) != 0)
1790 return true;
1792 unsigned int trailing_zeros = tree_ctz (step);
1793 if (trailing_zeros < HOST_BITS_PER_INT
1794 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1795 return true;
1797 return false;
1800 /* Return true if EXPR may be non-addressable. */
1802 bool
1803 may_be_nonaddressable_p (tree expr)
1805 switch (TREE_CODE (expr))
1807 case TARGET_MEM_REF:
1808 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1809 target, thus they are always addressable. */
1810 return false;
1812 case COMPONENT_REF:
1813 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1814 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1816 case VIEW_CONVERT_EXPR:
1817 /* This kind of view-conversions may wrap non-addressable objects
1818 and make them look addressable. After some processing the
1819 non-addressability may be uncovered again, causing ADDR_EXPRs
1820 of inappropriate objects to be built. */
1821 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1822 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1823 return true;
1825 /* ... fall through ... */
1827 case ARRAY_REF:
1828 case ARRAY_RANGE_REF:
1829 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1831 CASE_CONVERT:
1832 return true;
1834 default:
1835 break;
1838 return false;
1841 /* Finds addresses in *OP_P inside STMT. */
1843 static void
1844 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1846 tree base = *op_p, step = size_zero_node;
1847 struct iv *civ;
1848 struct ifs_ivopts_data ifs_ivopts_data;
1850 /* Do not play with volatile memory references. A bit too conservative,
1851 perhaps, but safe. */
1852 if (gimple_has_volatile_ops (stmt))
1853 goto fail;
1855 /* Ignore bitfields for now. Not really something terribly complicated
1856 to handle. TODO. */
1857 if (TREE_CODE (base) == BIT_FIELD_REF)
1858 goto fail;
1860 base = unshare_expr (base);
1862 if (TREE_CODE (base) == TARGET_MEM_REF)
1864 tree type = build_pointer_type (TREE_TYPE (base));
1865 tree astep;
1867 if (TMR_BASE (base)
1868 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1870 civ = get_iv (data, TMR_BASE (base));
1871 if (!civ)
1872 goto fail;
1874 TMR_BASE (base) = civ->base;
1875 step = civ->step;
1877 if (TMR_INDEX2 (base)
1878 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1880 civ = get_iv (data, TMR_INDEX2 (base));
1881 if (!civ)
1882 goto fail;
1884 TMR_INDEX2 (base) = civ->base;
1885 step = civ->step;
1887 if (TMR_INDEX (base)
1888 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1890 civ = get_iv (data, TMR_INDEX (base));
1891 if (!civ)
1892 goto fail;
1894 TMR_INDEX (base) = civ->base;
1895 astep = civ->step;
1897 if (astep)
1899 if (TMR_STEP (base))
1900 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1902 step = fold_build2 (PLUS_EXPR, type, step, astep);
1906 if (integer_zerop (step))
1907 goto fail;
1908 base = tree_mem_ref_addr (type, base);
1910 else
1912 ifs_ivopts_data.ivopts_data = data;
1913 ifs_ivopts_data.stmt = stmt;
1914 ifs_ivopts_data.step = size_zero_node;
1915 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1916 || integer_zerop (ifs_ivopts_data.step))
1917 goto fail;
1918 step = ifs_ivopts_data.step;
1920 /* Check that the base expression is addressable. This needs
1921 to be done after substituting bases of IVs into it. */
1922 if (may_be_nonaddressable_p (base))
1923 goto fail;
1925 /* Moreover, on strict alignment platforms, check that it is
1926 sufficiently aligned. */
1927 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1928 goto fail;
1930 base = build_fold_addr_expr (base);
1932 /* Substituting bases of IVs into the base expression might
1933 have caused folding opportunities. */
1934 if (TREE_CODE (base) == ADDR_EXPR)
1936 tree *ref = &TREE_OPERAND (base, 0);
1937 while (handled_component_p (*ref))
1938 ref = &TREE_OPERAND (*ref, 0);
1939 if (TREE_CODE (*ref) == MEM_REF)
1941 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1942 TREE_OPERAND (*ref, 0),
1943 TREE_OPERAND (*ref, 1));
1944 if (tem)
1945 *ref = tem;
1950 civ = alloc_iv (base, step);
1951 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1952 return;
1954 fail:
1955 for_each_index (op_p, idx_record_use, data);
1958 /* Finds and records invariants used in STMT. */
1960 static void
1961 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1963 ssa_op_iter iter;
1964 use_operand_p use_p;
1965 tree op;
1967 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1969 op = USE_FROM_PTR (use_p);
1970 record_invariant (data, op, false);
1974 /* Finds interesting uses of induction variables in the statement STMT. */
1976 static void
1977 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1979 struct iv *iv;
1980 tree op, *lhs, *rhs;
1981 ssa_op_iter iter;
1982 use_operand_p use_p;
1983 enum tree_code code;
1985 find_invariants_stmt (data, stmt);
1987 if (gimple_code (stmt) == GIMPLE_COND)
1989 find_interesting_uses_cond (data, stmt);
1990 return;
1993 if (is_gimple_assign (stmt))
1995 lhs = gimple_assign_lhs_ptr (stmt);
1996 rhs = gimple_assign_rhs1_ptr (stmt);
1998 if (TREE_CODE (*lhs) == SSA_NAME)
2000 /* If the statement defines an induction variable, the uses are not
2001 interesting by themselves. */
2003 iv = get_iv (data, *lhs);
2005 if (iv && !integer_zerop (iv->step))
2006 return;
2009 code = gimple_assign_rhs_code (stmt);
2010 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2011 && (REFERENCE_CLASS_P (*rhs)
2012 || is_gimple_val (*rhs)))
2014 if (REFERENCE_CLASS_P (*rhs))
2015 find_interesting_uses_address (data, stmt, rhs);
2016 else
2017 find_interesting_uses_op (data, *rhs);
2019 if (REFERENCE_CLASS_P (*lhs))
2020 find_interesting_uses_address (data, stmt, lhs);
2021 return;
2023 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2025 find_interesting_uses_cond (data, stmt);
2026 return;
2029 /* TODO -- we should also handle address uses of type
2031 memory = call (whatever);
2035 call (memory). */
2038 if (gimple_code (stmt) == GIMPLE_PHI
2039 && gimple_bb (stmt) == data->current_loop->header)
2041 iv = get_iv (data, PHI_RESULT (stmt));
2043 if (iv && !integer_zerop (iv->step))
2044 return;
2047 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2049 op = USE_FROM_PTR (use_p);
2051 if (TREE_CODE (op) != SSA_NAME)
2052 continue;
2054 iv = get_iv (data, op);
2055 if (!iv)
2056 continue;
2058 find_interesting_uses_op (data, op);
2062 /* Finds interesting uses of induction variables outside of loops
2063 on loop exit edge EXIT. */
2065 static void
2066 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2068 gphi *phi;
2069 gphi_iterator psi;
2070 tree def;
2072 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2074 phi = psi.phi ();
2075 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2076 if (!virtual_operand_p (def))
2077 find_interesting_uses_op (data, def);
2081 /* Finds uses of the induction variables that are interesting. */
2083 static void
2084 find_interesting_uses (struct ivopts_data *data)
2086 basic_block bb;
2087 gimple_stmt_iterator bsi;
2088 basic_block *body = get_loop_body (data->current_loop);
2089 unsigned i;
2090 struct version_info *info;
2091 edge e;
2093 if (dump_file && (dump_flags & TDF_DETAILS))
2094 fprintf (dump_file, "Uses:\n\n");
2096 for (i = 0; i < data->current_loop->num_nodes; i++)
2098 edge_iterator ei;
2099 bb = body[i];
2101 FOR_EACH_EDGE (e, ei, bb->succs)
2102 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2103 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2104 find_interesting_uses_outside (data, e);
2106 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2107 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2108 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2109 if (!is_gimple_debug (gsi_stmt (bsi)))
2110 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2113 if (dump_file && (dump_flags & TDF_DETAILS))
2115 bitmap_iterator bi;
2117 fprintf (dump_file, "\n");
2119 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2121 info = ver_info (data, i);
2122 if (info->inv_id)
2124 fprintf (dump_file, " ");
2125 print_generic_expr (dump_file, info->name, TDF_SLIM);
2126 fprintf (dump_file, " is invariant (%d)%s\n",
2127 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2131 fprintf (dump_file, "\n");
2134 free (body);
2137 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2138 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2139 we are at the top-level of the processed address. */
2141 static tree
2142 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2143 HOST_WIDE_INT *offset)
2145 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2146 enum tree_code code;
2147 tree type, orig_type = TREE_TYPE (expr);
2148 HOST_WIDE_INT off0, off1, st;
2149 tree orig_expr = expr;
2151 STRIP_NOPS (expr);
2153 type = TREE_TYPE (expr);
2154 code = TREE_CODE (expr);
2155 *offset = 0;
2157 switch (code)
2159 case INTEGER_CST:
2160 if (!cst_and_fits_in_hwi (expr)
2161 || integer_zerop (expr))
2162 return orig_expr;
2164 *offset = int_cst_value (expr);
2165 return build_int_cst (orig_type, 0);
2167 case POINTER_PLUS_EXPR:
2168 case PLUS_EXPR:
2169 case MINUS_EXPR:
2170 op0 = TREE_OPERAND (expr, 0);
2171 op1 = TREE_OPERAND (expr, 1);
2173 op0 = strip_offset_1 (op0, false, false, &off0);
2174 op1 = strip_offset_1 (op1, false, false, &off1);
2176 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2177 if (op0 == TREE_OPERAND (expr, 0)
2178 && op1 == TREE_OPERAND (expr, 1))
2179 return orig_expr;
2181 if (integer_zerop (op1))
2182 expr = op0;
2183 else if (integer_zerop (op0))
2185 if (code == MINUS_EXPR)
2186 expr = fold_build1 (NEGATE_EXPR, type, op1);
2187 else
2188 expr = op1;
2190 else
2191 expr = fold_build2 (code, type, op0, op1);
2193 return fold_convert (orig_type, expr);
2195 case MULT_EXPR:
2196 op1 = TREE_OPERAND (expr, 1);
2197 if (!cst_and_fits_in_hwi (op1))
2198 return orig_expr;
2200 op0 = TREE_OPERAND (expr, 0);
2201 op0 = strip_offset_1 (op0, false, false, &off0);
2202 if (op0 == TREE_OPERAND (expr, 0))
2203 return orig_expr;
2205 *offset = off0 * int_cst_value (op1);
2206 if (integer_zerop (op0))
2207 expr = op0;
2208 else
2209 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2211 return fold_convert (orig_type, expr);
2213 case ARRAY_REF:
2214 case ARRAY_RANGE_REF:
2215 if (!inside_addr)
2216 return orig_expr;
2218 step = array_ref_element_size (expr);
2219 if (!cst_and_fits_in_hwi (step))
2220 break;
2222 st = int_cst_value (step);
2223 op1 = TREE_OPERAND (expr, 1);
2224 op1 = strip_offset_1 (op1, false, false, &off1);
2225 *offset = off1 * st;
2227 if (top_compref
2228 && integer_zerop (op1))
2230 /* Strip the component reference completely. */
2231 op0 = TREE_OPERAND (expr, 0);
2232 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2233 *offset += off0;
2234 return op0;
2236 break;
2238 case COMPONENT_REF:
2240 tree field;
2242 if (!inside_addr)
2243 return orig_expr;
2245 tmp = component_ref_field_offset (expr);
2246 field = TREE_OPERAND (expr, 1);
2247 if (top_compref
2248 && cst_and_fits_in_hwi (tmp)
2249 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2251 HOST_WIDE_INT boffset, abs_off;
2253 /* Strip the component reference completely. */
2254 op0 = TREE_OPERAND (expr, 0);
2255 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2256 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2257 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2258 if (boffset < 0)
2259 abs_off = -abs_off;
2261 *offset = off0 + int_cst_value (tmp) + abs_off;
2262 return op0;
2265 break;
2267 case ADDR_EXPR:
2268 op0 = TREE_OPERAND (expr, 0);
2269 op0 = strip_offset_1 (op0, true, true, &off0);
2270 *offset += off0;
2272 if (op0 == TREE_OPERAND (expr, 0))
2273 return orig_expr;
2275 expr = build_fold_addr_expr (op0);
2276 return fold_convert (orig_type, expr);
2278 case MEM_REF:
2279 /* ??? Offset operand? */
2280 inside_addr = false;
2281 break;
2283 default:
2284 return orig_expr;
2287 /* Default handling of expressions for that we want to recurse into
2288 the first operand. */
2289 op0 = TREE_OPERAND (expr, 0);
2290 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2291 *offset += off0;
2293 if (op0 == TREE_OPERAND (expr, 0)
2294 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2295 return orig_expr;
2297 expr = copy_node (expr);
2298 TREE_OPERAND (expr, 0) = op0;
2299 if (op1)
2300 TREE_OPERAND (expr, 1) = op1;
2302 /* Inside address, we might strip the top level component references,
2303 thus changing type of the expression. Handling of ADDR_EXPR
2304 will fix that. */
2305 expr = fold_convert (orig_type, expr);
2307 return expr;
2310 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2312 static tree
2313 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2315 HOST_WIDE_INT off;
2316 tree core = strip_offset_1 (expr, false, false, &off);
2317 *offset = off;
2318 return core;
2321 /* Returns variant of TYPE that can be used as base for different uses.
2322 We return unsigned type with the same precision, which avoids problems
2323 with overflows. */
2325 static tree
2326 generic_type_for (tree type)
2328 if (POINTER_TYPE_P (type))
2329 return unsigned_type_for (type);
2331 if (TYPE_UNSIGNED (type))
2332 return type;
2334 return unsigned_type_for (type);
2337 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2338 the bitmap to that we should store it. */
2340 static struct ivopts_data *fd_ivopts_data;
2341 static tree
2342 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2344 bitmap *depends_on = (bitmap *) data;
2345 struct version_info *info;
2347 if (TREE_CODE (*expr_p) != SSA_NAME)
2348 return NULL_TREE;
2349 info = name_info (fd_ivopts_data, *expr_p);
2351 if (!info->inv_id || info->has_nonlin_use)
2352 return NULL_TREE;
2354 if (!*depends_on)
2355 *depends_on = BITMAP_ALLOC (NULL);
2356 bitmap_set_bit (*depends_on, info->inv_id);
2358 return NULL_TREE;
2361 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2362 position to POS. If USE is not NULL, the candidate is set as related to
2363 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2364 replacement of the final value of the iv by a direct computation. */
2366 static struct iv_cand *
2367 add_candidate_1 (struct ivopts_data *data,
2368 tree base, tree step, bool important, enum iv_position pos,
2369 struct iv_use *use, gimple incremented_at)
2371 unsigned i;
2372 struct iv_cand *cand = NULL;
2373 tree type, orig_type;
2375 /* For non-original variables, make sure their values are computed in a type
2376 that does not invoke undefined behavior on overflows (since in general,
2377 we cannot prove that these induction variables are non-wrapping). */
2378 if (pos != IP_ORIGINAL)
2380 orig_type = TREE_TYPE (base);
2381 type = generic_type_for (orig_type);
2382 if (type != orig_type)
2384 base = fold_convert (type, base);
2385 step = fold_convert (type, step);
2389 for (i = 0; i < n_iv_cands (data); i++)
2391 cand = iv_cand (data, i);
2393 if (cand->pos != pos)
2394 continue;
2396 if (cand->incremented_at != incremented_at
2397 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2398 && cand->ainc_use != use))
2399 continue;
2401 if (!cand->iv)
2403 if (!base && !step)
2404 break;
2406 continue;
2409 if (!base && !step)
2410 continue;
2412 if (operand_equal_p (base, cand->iv->base, 0)
2413 && operand_equal_p (step, cand->iv->step, 0)
2414 && (TYPE_PRECISION (TREE_TYPE (base))
2415 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2416 break;
2419 if (i == n_iv_cands (data))
2421 cand = XCNEW (struct iv_cand);
2422 cand->id = i;
2424 if (!base && !step)
2425 cand->iv = NULL;
2426 else
2427 cand->iv = alloc_iv (base, step);
2429 cand->pos = pos;
2430 if (pos != IP_ORIGINAL && cand->iv)
2432 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2433 cand->var_after = cand->var_before;
2435 cand->important = important;
2436 cand->incremented_at = incremented_at;
2437 data->iv_candidates.safe_push (cand);
2439 if (step
2440 && TREE_CODE (step) != INTEGER_CST)
2442 fd_ivopts_data = data;
2443 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2446 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2447 cand->ainc_use = use;
2448 else
2449 cand->ainc_use = NULL;
2451 if (dump_file && (dump_flags & TDF_DETAILS))
2452 dump_cand (dump_file, cand);
2455 if (important && !cand->important)
2457 cand->important = true;
2458 if (dump_file && (dump_flags & TDF_DETAILS))
2459 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2462 if (use)
2464 bitmap_set_bit (use->related_cands, i);
2465 if (dump_file && (dump_flags & TDF_DETAILS))
2466 fprintf (dump_file, "Candidate %d is related to use %d\n",
2467 cand->id, use->id);
2470 return cand;
2473 /* Returns true if incrementing the induction variable at the end of the LOOP
2474 is allowed.
2476 The purpose is to avoid splitting latch edge with a biv increment, thus
2477 creating a jump, possibly confusing other optimization passes and leaving
2478 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2479 is not available (so we do not have a better alternative), or if the latch
2480 edge is already nonempty. */
2482 static bool
2483 allow_ip_end_pos_p (struct loop *loop)
2485 if (!ip_normal_pos (loop))
2486 return true;
2488 if (!empty_block_p (ip_end_pos (loop)))
2489 return true;
2491 return false;
2494 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2495 Important field is set to IMPORTANT. */
2497 static void
2498 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2499 bool important, struct iv_use *use)
2501 basic_block use_bb = gimple_bb (use->stmt);
2502 machine_mode mem_mode;
2503 unsigned HOST_WIDE_INT cstepi;
2505 /* If we insert the increment in any position other than the standard
2506 ones, we must ensure that it is incremented once per iteration.
2507 It must not be in an inner nested loop, or one side of an if
2508 statement. */
2509 if (use_bb->loop_father != data->current_loop
2510 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2511 || stmt_could_throw_p (use->stmt)
2512 || !cst_and_fits_in_hwi (step))
2513 return;
2515 cstepi = int_cst_value (step);
2517 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2518 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2519 || USE_STORE_PRE_INCREMENT (mem_mode))
2520 && GET_MODE_SIZE (mem_mode) == cstepi)
2521 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2522 || USE_STORE_PRE_DECREMENT (mem_mode))
2523 && GET_MODE_SIZE (mem_mode) == -cstepi))
2525 enum tree_code code = MINUS_EXPR;
2526 tree new_base;
2527 tree new_step = step;
2529 if (POINTER_TYPE_P (TREE_TYPE (base)))
2531 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2532 code = POINTER_PLUS_EXPR;
2534 else
2535 new_step = fold_convert (TREE_TYPE (base), new_step);
2536 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2537 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2538 use->stmt);
2540 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2541 || USE_STORE_POST_INCREMENT (mem_mode))
2542 && GET_MODE_SIZE (mem_mode) == cstepi)
2543 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2544 || USE_STORE_POST_DECREMENT (mem_mode))
2545 && GET_MODE_SIZE (mem_mode) == -cstepi))
2547 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2548 use->stmt);
2552 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2553 position to POS. If USE is not NULL, the candidate is set as related to
2554 it. The candidate computation is scheduled on all available positions. */
2556 static void
2557 add_candidate (struct ivopts_data *data,
2558 tree base, tree step, bool important, struct iv_use *use)
2560 if (ip_normal_pos (data->current_loop))
2561 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2562 if (ip_end_pos (data->current_loop)
2563 && allow_ip_end_pos_p (data->current_loop))
2564 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2566 if (use != NULL && use->type == USE_ADDRESS)
2567 add_autoinc_candidates (data, base, step, important, use);
2570 /* Adds standard iv candidates. */
2572 static void
2573 add_standard_iv_candidates (struct ivopts_data *data)
2575 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2577 /* The same for a double-integer type if it is still fast enough. */
2578 if (TYPE_PRECISION
2579 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2580 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2581 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2582 build_int_cst (long_integer_type_node, 1), true, NULL);
2584 /* The same for a double-integer type if it is still fast enough. */
2585 if (TYPE_PRECISION
2586 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2587 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2588 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2589 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2593 /* Adds candidates bases on the old induction variable IV. */
2595 static void
2596 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2598 gimple phi;
2599 tree def;
2600 struct iv_cand *cand;
2602 add_candidate (data, iv->base, iv->step, true, NULL);
2604 /* The same, but with initial value zero. */
2605 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2606 add_candidate (data, size_int (0), iv->step, true, NULL);
2607 else
2608 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2609 iv->step, true, NULL);
2611 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2612 if (gimple_code (phi) == GIMPLE_PHI)
2614 /* Additionally record the possibility of leaving the original iv
2615 untouched. */
2616 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2617 /* Don't add candidate if it's from another PHI node because
2618 it's an affine iv appearing in the form of PEELED_CHREC. */
2619 phi = SSA_NAME_DEF_STMT (def);
2620 if (gimple_code (phi) != GIMPLE_PHI)
2622 cand = add_candidate_1 (data,
2623 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2624 SSA_NAME_DEF_STMT (def));
2625 cand->var_before = iv->ssa_name;
2626 cand->var_after = def;
2628 else
2629 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2633 /* Adds candidates based on the old induction variables. */
2635 static void
2636 add_old_ivs_candidates (struct ivopts_data *data)
2638 unsigned i;
2639 struct iv *iv;
2640 bitmap_iterator bi;
2642 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2644 iv = ver_info (data, i)->iv;
2645 if (iv && iv->biv_p && !integer_zerop (iv->step))
2646 add_old_iv_candidates (data, iv);
2650 /* Adds candidates based on the value of the induction variable IV and USE. */
2652 static void
2653 add_iv_value_candidates (struct ivopts_data *data,
2654 struct iv *iv, struct iv_use *use)
2656 unsigned HOST_WIDE_INT offset;
2657 tree base;
2658 tree basetype;
2660 add_candidate (data, iv->base, iv->step, false, use);
2662 /* The same, but with initial value zero. Make such variable important,
2663 since it is generic enough so that possibly many uses may be based
2664 on it. */
2665 basetype = TREE_TYPE (iv->base);
2666 if (POINTER_TYPE_P (basetype))
2667 basetype = sizetype;
2668 add_candidate (data, build_int_cst (basetype, 0),
2669 iv->step, true, use);
2671 /* Third, try removing the constant offset. Make sure to even
2672 add a candidate for &a[0] vs. (T *)&a. */
2673 base = strip_offset (iv->base, &offset);
2674 if (offset
2675 || base != iv->base)
2676 add_candidate (data, base, iv->step, false, use);
2679 /* Adds candidates based on the uses. */
2681 static void
2682 add_derived_ivs_candidates (struct ivopts_data *data)
2684 unsigned i;
2686 for (i = 0; i < n_iv_uses (data); i++)
2688 struct iv_use *use = iv_use (data, i);
2690 if (!use)
2691 continue;
2693 switch (use->type)
2695 case USE_NONLINEAR_EXPR:
2696 case USE_COMPARE:
2697 case USE_ADDRESS:
2698 /* Just add the ivs based on the value of the iv used here. */
2699 add_iv_value_candidates (data, use->iv, use);
2700 break;
2702 default:
2703 gcc_unreachable ();
2708 /* Record important candidates and add them to related_cands bitmaps
2709 if needed. */
2711 static void
2712 record_important_candidates (struct ivopts_data *data)
2714 unsigned i;
2715 struct iv_use *use;
2717 for (i = 0; i < n_iv_cands (data); i++)
2719 struct iv_cand *cand = iv_cand (data, i);
2721 if (cand->important)
2722 bitmap_set_bit (data->important_candidates, i);
2725 data->consider_all_candidates = (n_iv_cands (data)
2726 <= CONSIDER_ALL_CANDIDATES_BOUND);
2728 if (data->consider_all_candidates)
2730 /* We will not need "related_cands" bitmaps in this case,
2731 so release them to decrease peak memory consumption. */
2732 for (i = 0; i < n_iv_uses (data); i++)
2734 use = iv_use (data, i);
2735 BITMAP_FREE (use->related_cands);
2738 else
2740 /* Add important candidates to the related_cands bitmaps. */
2741 for (i = 0; i < n_iv_uses (data); i++)
2742 bitmap_ior_into (iv_use (data, i)->related_cands,
2743 data->important_candidates);
2747 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2748 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2749 we allocate a simple list to every use. */
2751 static void
2752 alloc_use_cost_map (struct ivopts_data *data)
2754 unsigned i, size, s;
2756 for (i = 0; i < n_iv_uses (data); i++)
2758 struct iv_use *use = iv_use (data, i);
2760 if (data->consider_all_candidates)
2761 size = n_iv_cands (data);
2762 else
2764 s = bitmap_count_bits (use->related_cands);
2766 /* Round up to the power of two, so that moduling by it is fast. */
2767 size = s ? (1 << ceil_log2 (s)) : 1;
2770 use->n_map_members = size;
2771 use->cost_map = XCNEWVEC (struct cost_pair, size);
2775 /* Returns description of computation cost of expression whose runtime
2776 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2778 static comp_cost
2779 new_cost (unsigned runtime, unsigned complexity)
2781 comp_cost cost;
2783 cost.cost = runtime;
2784 cost.complexity = complexity;
2786 return cost;
2789 /* Adds costs COST1 and COST2. */
2791 static comp_cost
2792 add_costs (comp_cost cost1, comp_cost cost2)
2794 cost1.cost += cost2.cost;
2795 cost1.complexity += cost2.complexity;
2797 return cost1;
2799 /* Subtracts costs COST1 and COST2. */
2801 static comp_cost
2802 sub_costs (comp_cost cost1, comp_cost cost2)
2804 cost1.cost -= cost2.cost;
2805 cost1.complexity -= cost2.complexity;
2807 return cost1;
2810 /* Returns a negative number if COST1 < COST2, a positive number if
2811 COST1 > COST2, and 0 if COST1 = COST2. */
2813 static int
2814 compare_costs (comp_cost cost1, comp_cost cost2)
2816 if (cost1.cost == cost2.cost)
2817 return cost1.complexity - cost2.complexity;
2819 return cost1.cost - cost2.cost;
2822 /* Returns true if COST is infinite. */
2824 static bool
2825 infinite_cost_p (comp_cost cost)
2827 return cost.cost == INFTY;
2830 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2831 on invariants DEPENDS_ON and that the value used in expressing it
2832 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2834 static void
2835 set_use_iv_cost (struct ivopts_data *data,
2836 struct iv_use *use, struct iv_cand *cand,
2837 comp_cost cost, bitmap depends_on, tree value,
2838 enum tree_code comp, int inv_expr_id)
2840 unsigned i, s;
2842 if (infinite_cost_p (cost))
2844 BITMAP_FREE (depends_on);
2845 return;
2848 if (data->consider_all_candidates)
2850 use->cost_map[cand->id].cand = cand;
2851 use->cost_map[cand->id].cost = cost;
2852 use->cost_map[cand->id].depends_on = depends_on;
2853 use->cost_map[cand->id].value = value;
2854 use->cost_map[cand->id].comp = comp;
2855 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2856 return;
2859 /* n_map_members is a power of two, so this computes modulo. */
2860 s = cand->id & (use->n_map_members - 1);
2861 for (i = s; i < use->n_map_members; i++)
2862 if (!use->cost_map[i].cand)
2863 goto found;
2864 for (i = 0; i < s; i++)
2865 if (!use->cost_map[i].cand)
2866 goto found;
2868 gcc_unreachable ();
2870 found:
2871 use->cost_map[i].cand = cand;
2872 use->cost_map[i].cost = cost;
2873 use->cost_map[i].depends_on = depends_on;
2874 use->cost_map[i].value = value;
2875 use->cost_map[i].comp = comp;
2876 use->cost_map[i].inv_expr_id = inv_expr_id;
2879 /* Gets cost of (USE, CANDIDATE) pair. */
2881 static struct cost_pair *
2882 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2883 struct iv_cand *cand)
2885 unsigned i, s;
2886 struct cost_pair *ret;
2888 if (!cand)
2889 return NULL;
2891 if (data->consider_all_candidates)
2893 ret = use->cost_map + cand->id;
2894 if (!ret->cand)
2895 return NULL;
2897 return ret;
2900 /* n_map_members is a power of two, so this computes modulo. */
2901 s = cand->id & (use->n_map_members - 1);
2902 for (i = s; i < use->n_map_members; i++)
2903 if (use->cost_map[i].cand == cand)
2904 return use->cost_map + i;
2905 else if (use->cost_map[i].cand == NULL)
2906 return NULL;
2907 for (i = 0; i < s; i++)
2908 if (use->cost_map[i].cand == cand)
2909 return use->cost_map + i;
2910 else if (use->cost_map[i].cand == NULL)
2911 return NULL;
2913 return NULL;
2916 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2917 static rtx
2918 produce_memory_decl_rtl (tree obj, int *regno)
2920 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2921 machine_mode address_mode = targetm.addr_space.address_mode (as);
2922 rtx x;
2924 gcc_assert (obj);
2925 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2927 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2928 x = gen_rtx_SYMBOL_REF (address_mode, name);
2929 SET_SYMBOL_REF_DECL (x, obj);
2930 x = gen_rtx_MEM (DECL_MODE (obj), x);
2931 set_mem_addr_space (x, as);
2932 targetm.encode_section_info (obj, x, true);
2934 else
2936 x = gen_raw_REG (address_mode, (*regno)++);
2937 x = gen_rtx_MEM (DECL_MODE (obj), x);
2938 set_mem_addr_space (x, as);
2941 return x;
2944 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2945 walk_tree. DATA contains the actual fake register number. */
2947 static tree
2948 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2950 tree obj = NULL_TREE;
2951 rtx x = NULL_RTX;
2952 int *regno = (int *) data;
2954 switch (TREE_CODE (*expr_p))
2956 case ADDR_EXPR:
2957 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2958 handled_component_p (*expr_p);
2959 expr_p = &TREE_OPERAND (*expr_p, 0))
2960 continue;
2961 obj = *expr_p;
2962 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2963 x = produce_memory_decl_rtl (obj, regno);
2964 break;
2966 case SSA_NAME:
2967 *ws = 0;
2968 obj = SSA_NAME_VAR (*expr_p);
2969 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2970 if (!obj)
2971 return NULL_TREE;
2972 if (!DECL_RTL_SET_P (obj))
2973 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2974 break;
2976 case VAR_DECL:
2977 case PARM_DECL:
2978 case RESULT_DECL:
2979 *ws = 0;
2980 obj = *expr_p;
2982 if (DECL_RTL_SET_P (obj))
2983 break;
2985 if (DECL_MODE (obj) == BLKmode)
2986 x = produce_memory_decl_rtl (obj, regno);
2987 else
2988 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2990 break;
2992 default:
2993 break;
2996 if (x)
2998 decl_rtl_to_reset.safe_push (obj);
2999 SET_DECL_RTL (obj, x);
3002 return NULL_TREE;
3005 /* Determines cost of the computation of EXPR. */
3007 static unsigned
3008 computation_cost (tree expr, bool speed)
3010 rtx_insn *seq;
3011 rtx rslt;
3012 tree type = TREE_TYPE (expr);
3013 unsigned cost;
3014 /* Avoid using hard regs in ways which may be unsupported. */
3015 int regno = LAST_VIRTUAL_REGISTER + 1;
3016 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3017 enum node_frequency real_frequency = node->frequency;
3019 node->frequency = NODE_FREQUENCY_NORMAL;
3020 crtl->maybe_hot_insn_p = speed;
3021 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3022 start_sequence ();
3023 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3024 seq = get_insns ();
3025 end_sequence ();
3026 default_rtl_profile ();
3027 node->frequency = real_frequency;
3029 cost = seq_cost (seq, speed);
3030 if (MEM_P (rslt))
3031 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3032 TYPE_ADDR_SPACE (type), speed);
3033 else if (!REG_P (rslt))
3034 cost += set_src_cost (rslt, speed);
3036 return cost;
3039 /* Returns variable containing the value of candidate CAND at statement AT. */
3041 static tree
3042 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3044 if (stmt_after_increment (loop, cand, stmt))
3045 return cand->var_after;
3046 else
3047 return cand->var_before;
3050 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3051 same precision that is at least as wide as the precision of TYPE, stores
3052 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3053 type of A and B. */
3055 static tree
3056 determine_common_wider_type (tree *a, tree *b)
3058 tree wider_type = NULL;
3059 tree suba, subb;
3060 tree atype = TREE_TYPE (*a);
3062 if (CONVERT_EXPR_P (*a))
3064 suba = TREE_OPERAND (*a, 0);
3065 wider_type = TREE_TYPE (suba);
3066 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3067 return atype;
3069 else
3070 return atype;
3072 if (CONVERT_EXPR_P (*b))
3074 subb = TREE_OPERAND (*b, 0);
3075 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3076 return atype;
3078 else
3079 return atype;
3081 *a = suba;
3082 *b = subb;
3083 return wider_type;
3086 /* Determines the expression by that USE is expressed from induction variable
3087 CAND at statement AT in LOOP. The expression is stored in a decomposed
3088 form into AFF. Returns false if USE cannot be expressed using CAND. */
3090 static bool
3091 get_computation_aff (struct loop *loop,
3092 struct iv_use *use, struct iv_cand *cand, gimple at,
3093 struct aff_tree *aff)
3095 tree ubase = use->iv->base;
3096 tree ustep = use->iv->step;
3097 tree cbase = cand->iv->base;
3098 tree cstep = cand->iv->step, cstep_common;
3099 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3100 tree common_type, var;
3101 tree uutype;
3102 aff_tree cbase_aff, var_aff;
3103 widest_int rat;
3105 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3107 /* We do not have a precision to express the values of use. */
3108 return false;
3111 var = var_at_stmt (loop, cand, at);
3112 uutype = unsigned_type_for (utype);
3114 /* If the conversion is not noop, perform it. */
3115 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3117 cstep = fold_convert (uutype, cstep);
3118 cbase = fold_convert (uutype, cbase);
3119 var = fold_convert (uutype, var);
3122 if (!constant_multiple_of (ustep, cstep, &rat))
3123 return false;
3125 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3126 type, we achieve better folding by computing their difference in this
3127 wider type, and cast the result to UUTYPE. We do not need to worry about
3128 overflows, as all the arithmetics will in the end be performed in UUTYPE
3129 anyway. */
3130 common_type = determine_common_wider_type (&ubase, &cbase);
3132 /* use = ubase - ratio * cbase + ratio * var. */
3133 tree_to_aff_combination (ubase, common_type, aff);
3134 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3135 tree_to_aff_combination (var, uutype, &var_aff);
3137 /* We need to shift the value if we are after the increment. */
3138 if (stmt_after_increment (loop, cand, at))
3140 aff_tree cstep_aff;
3142 if (common_type != uutype)
3143 cstep_common = fold_convert (common_type, cstep);
3144 else
3145 cstep_common = cstep;
3147 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3148 aff_combination_add (&cbase_aff, &cstep_aff);
3151 aff_combination_scale (&cbase_aff, -rat);
3152 aff_combination_add (aff, &cbase_aff);
3153 if (common_type != uutype)
3154 aff_combination_convert (aff, uutype);
3156 aff_combination_scale (&var_aff, rat);
3157 aff_combination_add (aff, &var_aff);
3159 return true;
3162 /* Return the type of USE. */
3164 static tree
3165 get_use_type (struct iv_use *use)
3167 tree base_type = TREE_TYPE (use->iv->base);
3168 tree type;
3170 if (use->type == USE_ADDRESS)
3172 /* The base_type may be a void pointer. Create a pointer type based on
3173 the mem_ref instead. */
3174 type = build_pointer_type (TREE_TYPE (*use->op_p));
3175 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3176 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3178 else
3179 type = base_type;
3181 return type;
3184 /* Determines the expression by that USE is expressed from induction variable
3185 CAND at statement AT in LOOP. The computation is unshared. */
3187 static tree
3188 get_computation_at (struct loop *loop,
3189 struct iv_use *use, struct iv_cand *cand, gimple at)
3191 aff_tree aff;
3192 tree type = get_use_type (use);
3194 if (!get_computation_aff (loop, use, cand, at, &aff))
3195 return NULL_TREE;
3196 unshare_aff_combination (&aff);
3197 return fold_convert (type, aff_combination_to_tree (&aff));
3200 /* Determines the expression by that USE is expressed from induction variable
3201 CAND in LOOP. The computation is unshared. */
3203 static tree
3204 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3206 return get_computation_at (loop, use, cand, use->stmt);
3209 /* Adjust the cost COST for being in loop setup rather than loop body.
3210 If we're optimizing for space, the loop setup overhead is constant;
3211 if we're optimizing for speed, amortize it over the per-iteration cost. */
3212 static unsigned
3213 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3215 if (cost == INFTY)
3216 return cost;
3217 else if (optimize_loop_for_speed_p (data->current_loop))
3218 return cost / avg_loop_niter (data->current_loop);
3219 else
3220 return cost;
3223 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3224 validity for a memory reference accessing memory of mode MODE in
3225 address space AS. */
3228 bool
3229 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3230 addr_space_t as)
3232 #define MAX_RATIO 128
3233 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3234 static vec<sbitmap> valid_mult_list;
3235 sbitmap valid_mult;
3237 if (data_index >= valid_mult_list.length ())
3238 valid_mult_list.safe_grow_cleared (data_index + 1);
3240 valid_mult = valid_mult_list[data_index];
3241 if (!valid_mult)
3243 machine_mode address_mode = targetm.addr_space.address_mode (as);
3244 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3245 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3246 rtx addr, scaled;
3247 HOST_WIDE_INT i;
3249 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3250 bitmap_clear (valid_mult);
3251 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3252 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3253 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3255 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3256 if (memory_address_addr_space_p (mode, addr, as)
3257 || memory_address_addr_space_p (mode, scaled, as))
3258 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3261 if (dump_file && (dump_flags & TDF_DETAILS))
3263 fprintf (dump_file, " allowed multipliers:");
3264 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3265 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3266 fprintf (dump_file, " %d", (int) i);
3267 fprintf (dump_file, "\n");
3268 fprintf (dump_file, "\n");
3271 valid_mult_list[data_index] = valid_mult;
3274 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3275 return false;
3277 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3280 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3281 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3282 variable is omitted. Compute the cost for a memory reference that accesses
3283 a memory location of mode MEM_MODE in address space AS.
3285 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3286 size of MEM_MODE / RATIO) is available. To make this determination, we
3287 look at the size of the increment to be made, which is given in CSTEP.
3288 CSTEP may be zero if the step is unknown.
3289 STMT_AFTER_INC is true iff the statement we're looking at is after the
3290 increment of the original biv.
3292 TODO -- there must be some better way. This all is quite crude. */
3294 enum ainc_type
3296 AINC_PRE_INC, /* Pre increment. */
3297 AINC_PRE_DEC, /* Pre decrement. */
3298 AINC_POST_INC, /* Post increment. */
3299 AINC_POST_DEC, /* Post decrement. */
3300 AINC_NONE /* Also the number of auto increment types. */
3303 typedef struct address_cost_data_s
3305 HOST_WIDE_INT min_offset, max_offset;
3306 unsigned costs[2][2][2][2];
3307 unsigned ainc_costs[AINC_NONE];
3308 } *address_cost_data;
3311 static comp_cost
3312 get_address_cost (bool symbol_present, bool var_present,
3313 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3314 HOST_WIDE_INT cstep, machine_mode mem_mode,
3315 addr_space_t as, bool speed,
3316 bool stmt_after_inc, bool *may_autoinc)
3318 machine_mode address_mode = targetm.addr_space.address_mode (as);
3319 static vec<address_cost_data> address_cost_data_list;
3320 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3321 address_cost_data data;
3322 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3323 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3324 unsigned cost, acost, complexity;
3325 enum ainc_type autoinc_type;
3326 bool offset_p, ratio_p, autoinc;
3327 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3328 unsigned HOST_WIDE_INT mask;
3329 unsigned bits;
3331 if (data_index >= address_cost_data_list.length ())
3332 address_cost_data_list.safe_grow_cleared (data_index + 1);
3334 data = address_cost_data_list[data_index];
3335 if (!data)
3337 HOST_WIDE_INT i;
3338 HOST_WIDE_INT rat, off = 0;
3339 int old_cse_not_expected, width;
3340 unsigned sym_p, var_p, off_p, rat_p, add_c;
3341 rtx_insn *seq;
3342 rtx addr, base;
3343 rtx reg0, reg1;
3345 data = (address_cost_data) xcalloc (1, sizeof (*data));
3347 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3349 width = GET_MODE_BITSIZE (address_mode) - 1;
3350 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3351 width = HOST_BITS_PER_WIDE_INT - 1;
3352 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3354 for (i = width; i >= 0; i--)
3356 off = -((unsigned HOST_WIDE_INT) 1 << i);
3357 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3358 if (memory_address_addr_space_p (mem_mode, addr, as))
3359 break;
3361 data->min_offset = (i == -1? 0 : off);
3363 for (i = width; i >= 0; i--)
3365 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3366 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3367 if (memory_address_addr_space_p (mem_mode, addr, as))
3368 break;
3369 /* For some strict-alignment targets, the offset must be naturally
3370 aligned. Try an aligned offset if mem_mode is not QImode. */
3371 off = mem_mode != QImode
3372 ? ((unsigned HOST_WIDE_INT) 1 << i)
3373 - GET_MODE_SIZE (mem_mode)
3374 : 0;
3375 if (off > 0)
3377 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3378 if (memory_address_addr_space_p (mem_mode, addr, as))
3379 break;
3382 if (i == -1)
3383 off = 0;
3384 data->max_offset = off;
3386 if (dump_file && (dump_flags & TDF_DETAILS))
3388 fprintf (dump_file, "get_address_cost:\n");
3389 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3390 GET_MODE_NAME (mem_mode),
3391 data->min_offset);
3392 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3393 GET_MODE_NAME (mem_mode),
3394 data->max_offset);
3397 rat = 1;
3398 for (i = 2; i <= MAX_RATIO; i++)
3399 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3401 rat = i;
3402 break;
3405 /* Compute the cost of various addressing modes. */
3406 acost = 0;
3407 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3408 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3410 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3411 || USE_STORE_PRE_DECREMENT (mem_mode))
3413 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3414 has_predec[mem_mode]
3415 = memory_address_addr_space_p (mem_mode, addr, as);
3417 if (has_predec[mem_mode])
3418 data->ainc_costs[AINC_PRE_DEC]
3419 = address_cost (addr, mem_mode, as, speed);
3421 if (USE_LOAD_POST_DECREMENT (mem_mode)
3422 || USE_STORE_POST_DECREMENT (mem_mode))
3424 addr = gen_rtx_POST_DEC (address_mode, reg0);
3425 has_postdec[mem_mode]
3426 = memory_address_addr_space_p (mem_mode, addr, as);
3428 if (has_postdec[mem_mode])
3429 data->ainc_costs[AINC_POST_DEC]
3430 = address_cost (addr, mem_mode, as, speed);
3432 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3433 || USE_STORE_PRE_DECREMENT (mem_mode))
3435 addr = gen_rtx_PRE_INC (address_mode, reg0);
3436 has_preinc[mem_mode]
3437 = memory_address_addr_space_p (mem_mode, addr, as);
3439 if (has_preinc[mem_mode])
3440 data->ainc_costs[AINC_PRE_INC]
3441 = address_cost (addr, mem_mode, as, speed);
3443 if (USE_LOAD_POST_INCREMENT (mem_mode)
3444 || USE_STORE_POST_INCREMENT (mem_mode))
3446 addr = gen_rtx_POST_INC (address_mode, reg0);
3447 has_postinc[mem_mode]
3448 = memory_address_addr_space_p (mem_mode, addr, as);
3450 if (has_postinc[mem_mode])
3451 data->ainc_costs[AINC_POST_INC]
3452 = address_cost (addr, mem_mode, as, speed);
3454 for (i = 0; i < 16; i++)
3456 sym_p = i & 1;
3457 var_p = (i >> 1) & 1;
3458 off_p = (i >> 2) & 1;
3459 rat_p = (i >> 3) & 1;
3461 addr = reg0;
3462 if (rat_p)
3463 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3464 gen_int_mode (rat, address_mode));
3466 if (var_p)
3467 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3469 if (sym_p)
3471 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3472 /* ??? We can run into trouble with some backends by presenting
3473 it with symbols which haven't been properly passed through
3474 targetm.encode_section_info. By setting the local bit, we
3475 enhance the probability of things working. */
3476 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3478 if (off_p)
3479 base = gen_rtx_fmt_e (CONST, address_mode,
3480 gen_rtx_fmt_ee
3481 (PLUS, address_mode, base,
3482 gen_int_mode (off, address_mode)));
3484 else if (off_p)
3485 base = gen_int_mode (off, address_mode);
3486 else
3487 base = NULL_RTX;
3489 if (base)
3490 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3492 start_sequence ();
3493 /* To avoid splitting addressing modes, pretend that no cse will
3494 follow. */
3495 old_cse_not_expected = cse_not_expected;
3496 cse_not_expected = true;
3497 addr = memory_address_addr_space (mem_mode, addr, as);
3498 cse_not_expected = old_cse_not_expected;
3499 seq = get_insns ();
3500 end_sequence ();
3502 acost = seq_cost (seq, speed);
3503 acost += address_cost (addr, mem_mode, as, speed);
3505 if (!acost)
3506 acost = 1;
3507 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3510 /* On some targets, it is quite expensive to load symbol to a register,
3511 which makes addresses that contain symbols look much more expensive.
3512 However, the symbol will have to be loaded in any case before the
3513 loop (and quite likely we have it in register already), so it does not
3514 make much sense to penalize them too heavily. So make some final
3515 tweaks for the SYMBOL_PRESENT modes:
3517 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3518 var is cheaper, use this mode with small penalty.
3519 If VAR_PRESENT is true, try whether the mode with
3520 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3521 if this is the case, use it. */
3522 add_c = add_cost (speed, address_mode);
3523 for (i = 0; i < 8; i++)
3525 var_p = i & 1;
3526 off_p = (i >> 1) & 1;
3527 rat_p = (i >> 2) & 1;
3529 acost = data->costs[0][1][off_p][rat_p] + 1;
3530 if (var_p)
3531 acost += add_c;
3533 if (acost < data->costs[1][var_p][off_p][rat_p])
3534 data->costs[1][var_p][off_p][rat_p] = acost;
3537 if (dump_file && (dump_flags & TDF_DETAILS))
3539 fprintf (dump_file, "Address costs:\n");
3541 for (i = 0; i < 16; i++)
3543 sym_p = i & 1;
3544 var_p = (i >> 1) & 1;
3545 off_p = (i >> 2) & 1;
3546 rat_p = (i >> 3) & 1;
3548 fprintf (dump_file, " ");
3549 if (sym_p)
3550 fprintf (dump_file, "sym + ");
3551 if (var_p)
3552 fprintf (dump_file, "var + ");
3553 if (off_p)
3554 fprintf (dump_file, "cst + ");
3555 if (rat_p)
3556 fprintf (dump_file, "rat * ");
3558 acost = data->costs[sym_p][var_p][off_p][rat_p];
3559 fprintf (dump_file, "index costs %d\n", acost);
3561 if (has_predec[mem_mode] || has_postdec[mem_mode]
3562 || has_preinc[mem_mode] || has_postinc[mem_mode])
3563 fprintf (dump_file, " May include autoinc/dec\n");
3564 fprintf (dump_file, "\n");
3567 address_cost_data_list[data_index] = data;
3570 bits = GET_MODE_BITSIZE (address_mode);
3571 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3572 offset &= mask;
3573 if ((offset >> (bits - 1) & 1))
3574 offset |= ~mask;
3575 s_offset = offset;
3577 autoinc = false;
3578 autoinc_type = AINC_NONE;
3579 msize = GET_MODE_SIZE (mem_mode);
3580 autoinc_offset = offset;
3581 if (stmt_after_inc)
3582 autoinc_offset += ratio * cstep;
3583 if (symbol_present || var_present || ratio != 1)
3584 autoinc = false;
3585 else
3587 if (has_postinc[mem_mode] && autoinc_offset == 0
3588 && msize == cstep)
3589 autoinc_type = AINC_POST_INC;
3590 else if (has_postdec[mem_mode] && autoinc_offset == 0
3591 && msize == -cstep)
3592 autoinc_type = AINC_POST_DEC;
3593 else if (has_preinc[mem_mode] && autoinc_offset == msize
3594 && msize == cstep)
3595 autoinc_type = AINC_PRE_INC;
3596 else if (has_predec[mem_mode] && autoinc_offset == -msize
3597 && msize == -cstep)
3598 autoinc_type = AINC_PRE_DEC;
3600 if (autoinc_type != AINC_NONE)
3601 autoinc = true;
3604 cost = 0;
3605 offset_p = (s_offset != 0
3606 && data->min_offset <= s_offset
3607 && s_offset <= data->max_offset);
3608 ratio_p = (ratio != 1
3609 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3611 if (ratio != 1 && !ratio_p)
3612 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3614 if (s_offset && !offset_p && !symbol_present)
3615 cost += add_cost (speed, address_mode);
3617 if (may_autoinc)
3618 *may_autoinc = autoinc;
3619 if (autoinc)
3620 acost = data->ainc_costs[autoinc_type];
3621 else
3622 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3623 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3624 return new_cost (cost + acost, complexity);
3627 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3628 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3629 calculating the operands of EXPR. Returns true if successful, and returns
3630 the cost in COST. */
3632 static bool
3633 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3634 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3636 comp_cost res;
3637 tree op1 = TREE_OPERAND (expr, 1);
3638 tree cst = TREE_OPERAND (mult, 1);
3639 tree multop = TREE_OPERAND (mult, 0);
3640 int m = exact_log2 (int_cst_value (cst));
3641 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3642 int as_cost, sa_cost;
3643 bool mult_in_op1;
3645 if (!(m >= 0 && m < maxm))
3646 return false;
3648 mult_in_op1 = operand_equal_p (op1, mult, 0);
3650 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3652 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3653 use that in preference to a shift insn followed by an add insn. */
3654 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3655 ? shiftadd_cost (speed, mode, m)
3656 : (mult_in_op1
3657 ? shiftsub1_cost (speed, mode, m)
3658 : shiftsub0_cost (speed, mode, m)));
3660 res = new_cost (MIN (as_cost, sa_cost), 0);
3661 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3663 STRIP_NOPS (multop);
3664 if (!is_gimple_val (multop))
3665 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3667 *cost = res;
3668 return true;
3671 /* Estimates cost of forcing expression EXPR into a variable. */
3673 static comp_cost
3674 force_expr_to_var_cost (tree expr, bool speed)
3676 static bool costs_initialized = false;
3677 static unsigned integer_cost [2];
3678 static unsigned symbol_cost [2];
3679 static unsigned address_cost [2];
3680 tree op0, op1;
3681 comp_cost cost0, cost1, cost;
3682 machine_mode mode;
3684 if (!costs_initialized)
3686 tree type = build_pointer_type (integer_type_node);
3687 tree var, addr;
3688 rtx x;
3689 int i;
3691 var = create_tmp_var_raw (integer_type_node, "test_var");
3692 TREE_STATIC (var) = 1;
3693 x = produce_memory_decl_rtl (var, NULL);
3694 SET_DECL_RTL (var, x);
3696 addr = build1 (ADDR_EXPR, type, var);
3699 for (i = 0; i < 2; i++)
3701 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3702 2000), i);
3704 symbol_cost[i] = computation_cost (addr, i) + 1;
3706 address_cost[i]
3707 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3708 if (dump_file && (dump_flags & TDF_DETAILS))
3710 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3711 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3712 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3713 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3714 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3715 fprintf (dump_file, "\n");
3719 costs_initialized = true;
3722 STRIP_NOPS (expr);
3724 if (SSA_VAR_P (expr))
3725 return no_cost;
3727 if (is_gimple_min_invariant (expr))
3729 if (TREE_CODE (expr) == INTEGER_CST)
3730 return new_cost (integer_cost [speed], 0);
3732 if (TREE_CODE (expr) == ADDR_EXPR)
3734 tree obj = TREE_OPERAND (expr, 0);
3736 if (TREE_CODE (obj) == VAR_DECL
3737 || TREE_CODE (obj) == PARM_DECL
3738 || TREE_CODE (obj) == RESULT_DECL)
3739 return new_cost (symbol_cost [speed], 0);
3742 return new_cost (address_cost [speed], 0);
3745 switch (TREE_CODE (expr))
3747 case POINTER_PLUS_EXPR:
3748 case PLUS_EXPR:
3749 case MINUS_EXPR:
3750 case MULT_EXPR:
3751 op0 = TREE_OPERAND (expr, 0);
3752 op1 = TREE_OPERAND (expr, 1);
3753 STRIP_NOPS (op0);
3754 STRIP_NOPS (op1);
3755 break;
3757 CASE_CONVERT:
3758 case NEGATE_EXPR:
3759 op0 = TREE_OPERAND (expr, 0);
3760 STRIP_NOPS (op0);
3761 op1 = NULL_TREE;
3762 break;
3764 default:
3765 /* Just an arbitrary value, FIXME. */
3766 return new_cost (target_spill_cost[speed], 0);
3769 if (op0 == NULL_TREE
3770 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3771 cost0 = no_cost;
3772 else
3773 cost0 = force_expr_to_var_cost (op0, speed);
3775 if (op1 == NULL_TREE
3776 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3777 cost1 = no_cost;
3778 else
3779 cost1 = force_expr_to_var_cost (op1, speed);
3781 mode = TYPE_MODE (TREE_TYPE (expr));
3782 switch (TREE_CODE (expr))
3784 case POINTER_PLUS_EXPR:
3785 case PLUS_EXPR:
3786 case MINUS_EXPR:
3787 case NEGATE_EXPR:
3788 cost = new_cost (add_cost (speed, mode), 0);
3789 if (TREE_CODE (expr) != NEGATE_EXPR)
3791 tree mult = NULL_TREE;
3792 comp_cost sa_cost;
3793 if (TREE_CODE (op1) == MULT_EXPR)
3794 mult = op1;
3795 else if (TREE_CODE (op0) == MULT_EXPR)
3796 mult = op0;
3798 if (mult != NULL_TREE
3799 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3800 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3801 speed, &sa_cost))
3802 return sa_cost;
3804 break;
3806 CASE_CONVERT:
3808 tree inner_mode, outer_mode;
3809 outer_mode = TREE_TYPE (expr);
3810 inner_mode = TREE_TYPE (op0);
3811 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3812 TYPE_MODE (inner_mode), speed), 0);
3814 break;
3816 case MULT_EXPR:
3817 if (cst_and_fits_in_hwi (op0))
3818 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3819 mode, speed), 0);
3820 else if (cst_and_fits_in_hwi (op1))
3821 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3822 mode, speed), 0);
3823 else
3824 return new_cost (target_spill_cost [speed], 0);
3825 break;
3827 default:
3828 gcc_unreachable ();
3831 cost = add_costs (cost, cost0);
3832 cost = add_costs (cost, cost1);
3834 /* Bound the cost by target_spill_cost. The parts of complicated
3835 computations often are either loop invariant or at least can
3836 be shared between several iv uses, so letting this grow without
3837 limits would not give reasonable results. */
3838 if (cost.cost > (int) target_spill_cost [speed])
3839 cost.cost = target_spill_cost [speed];
3841 return cost;
3844 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3845 invariants the computation depends on. */
3847 static comp_cost
3848 force_var_cost (struct ivopts_data *data,
3849 tree expr, bitmap *depends_on)
3851 if (depends_on)
3853 fd_ivopts_data = data;
3854 walk_tree (&expr, find_depends, depends_on, NULL);
3857 return force_expr_to_var_cost (expr, data->speed);
3860 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3861 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3862 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3863 invariants the computation depends on. */
3865 static comp_cost
3866 split_address_cost (struct ivopts_data *data,
3867 tree addr, bool *symbol_present, bool *var_present,
3868 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3870 tree core;
3871 HOST_WIDE_INT bitsize;
3872 HOST_WIDE_INT bitpos;
3873 tree toffset;
3874 machine_mode mode;
3875 int unsignedp, volatilep;
3877 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3878 &unsignedp, &volatilep, false);
3880 if (toffset != 0
3881 || bitpos % BITS_PER_UNIT != 0
3882 || TREE_CODE (core) != VAR_DECL)
3884 *symbol_present = false;
3885 *var_present = true;
3886 fd_ivopts_data = data;
3887 walk_tree (&addr, find_depends, depends_on, NULL);
3888 return new_cost (target_spill_cost[data->speed], 0);
3891 *offset += bitpos / BITS_PER_UNIT;
3892 if (TREE_STATIC (core)
3893 || DECL_EXTERNAL (core))
3895 *symbol_present = true;
3896 *var_present = false;
3897 return no_cost;
3900 *symbol_present = false;
3901 *var_present = true;
3902 return no_cost;
3905 /* Estimates cost of expressing difference of addresses E1 - E2 as
3906 var + symbol + offset. The value of offset is added to OFFSET,
3907 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3908 part is missing. DEPENDS_ON is a set of the invariants the computation
3909 depends on. */
3911 static comp_cost
3912 ptr_difference_cost (struct ivopts_data *data,
3913 tree e1, tree e2, bool *symbol_present, bool *var_present,
3914 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3916 HOST_WIDE_INT diff = 0;
3917 aff_tree aff_e1, aff_e2;
3918 tree type;
3920 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3922 if (ptr_difference_const (e1, e2, &diff))
3924 *offset += diff;
3925 *symbol_present = false;
3926 *var_present = false;
3927 return no_cost;
3930 if (integer_zerop (e2))
3931 return split_address_cost (data, TREE_OPERAND (e1, 0),
3932 symbol_present, var_present, offset, depends_on);
3934 *symbol_present = false;
3935 *var_present = true;
3937 type = signed_type_for (TREE_TYPE (e1));
3938 tree_to_aff_combination (e1, type, &aff_e1);
3939 tree_to_aff_combination (e2, type, &aff_e2);
3940 aff_combination_scale (&aff_e2, -1);
3941 aff_combination_add (&aff_e1, &aff_e2);
3943 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3946 /* Estimates cost of expressing difference E1 - E2 as
3947 var + symbol + offset. The value of offset is added to OFFSET,
3948 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3949 part is missing. DEPENDS_ON is a set of the invariants the computation
3950 depends on. */
3952 static comp_cost
3953 difference_cost (struct ivopts_data *data,
3954 tree e1, tree e2, bool *symbol_present, bool *var_present,
3955 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3957 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3958 unsigned HOST_WIDE_INT off1, off2;
3959 aff_tree aff_e1, aff_e2;
3960 tree type;
3962 e1 = strip_offset (e1, &off1);
3963 e2 = strip_offset (e2, &off2);
3964 *offset += off1 - off2;
3966 STRIP_NOPS (e1);
3967 STRIP_NOPS (e2);
3969 if (TREE_CODE (e1) == ADDR_EXPR)
3970 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3971 offset, depends_on);
3972 *symbol_present = false;
3974 if (operand_equal_p (e1, e2, 0))
3976 *var_present = false;
3977 return no_cost;
3980 *var_present = true;
3982 if (integer_zerop (e2))
3983 return force_var_cost (data, e1, depends_on);
3985 if (integer_zerop (e1))
3987 comp_cost cost = force_var_cost (data, e2, depends_on);
3988 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3989 return cost;
3992 type = signed_type_for (TREE_TYPE (e1));
3993 tree_to_aff_combination (e1, type, &aff_e1);
3994 tree_to_aff_combination (e2, type, &aff_e2);
3995 aff_combination_scale (&aff_e2, -1);
3996 aff_combination_add (&aff_e1, &aff_e2);
3998 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4001 /* Returns true if AFF1 and AFF2 are identical. */
4003 static bool
4004 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4006 unsigned i;
4008 if (aff1->n != aff2->n)
4009 return false;
4011 for (i = 0; i < aff1->n; i++)
4013 if (aff1->elts[i].coef != aff2->elts[i].coef)
4014 return false;
4016 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4017 return false;
4019 return true;
4022 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4024 static int
4025 get_expr_id (struct ivopts_data *data, tree expr)
4027 struct iv_inv_expr_ent ent;
4028 struct iv_inv_expr_ent **slot;
4030 ent.expr = expr;
4031 ent.hash = iterative_hash_expr (expr, 0);
4032 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4033 if (*slot)
4034 return (*slot)->id;
4036 *slot = XNEW (struct iv_inv_expr_ent);
4037 (*slot)->expr = expr;
4038 (*slot)->hash = ent.hash;
4039 (*slot)->id = data->inv_expr_id++;
4040 return (*slot)->id;
4043 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4044 requires a new compiler generated temporary. Returns -1 otherwise.
4045 ADDRESS_P is a flag indicating if the expression is for address
4046 computation. */
4048 static int
4049 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4050 tree cbase, HOST_WIDE_INT ratio,
4051 bool address_p)
4053 aff_tree ubase_aff, cbase_aff;
4054 tree expr, ub, cb;
4056 STRIP_NOPS (ubase);
4057 STRIP_NOPS (cbase);
4058 ub = ubase;
4059 cb = cbase;
4061 if ((TREE_CODE (ubase) == INTEGER_CST)
4062 && (TREE_CODE (cbase) == INTEGER_CST))
4063 return -1;
4065 /* Strips the constant part. */
4066 if (TREE_CODE (ubase) == PLUS_EXPR
4067 || TREE_CODE (ubase) == MINUS_EXPR
4068 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4070 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4071 ubase = TREE_OPERAND (ubase, 0);
4074 /* Strips the constant part. */
4075 if (TREE_CODE (cbase) == PLUS_EXPR
4076 || TREE_CODE (cbase) == MINUS_EXPR
4077 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4079 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4080 cbase = TREE_OPERAND (cbase, 0);
4083 if (address_p)
4085 if (((TREE_CODE (ubase) == SSA_NAME)
4086 || (TREE_CODE (ubase) == ADDR_EXPR
4087 && is_gimple_min_invariant (ubase)))
4088 && (TREE_CODE (cbase) == INTEGER_CST))
4089 return -1;
4091 if (((TREE_CODE (cbase) == SSA_NAME)
4092 || (TREE_CODE (cbase) == ADDR_EXPR
4093 && is_gimple_min_invariant (cbase)))
4094 && (TREE_CODE (ubase) == INTEGER_CST))
4095 return -1;
4098 if (ratio == 1)
4100 if (operand_equal_p (ubase, cbase, 0))
4101 return -1;
4103 if (TREE_CODE (ubase) == ADDR_EXPR
4104 && TREE_CODE (cbase) == ADDR_EXPR)
4106 tree usym, csym;
4108 usym = TREE_OPERAND (ubase, 0);
4109 csym = TREE_OPERAND (cbase, 0);
4110 if (TREE_CODE (usym) == ARRAY_REF)
4112 tree ind = TREE_OPERAND (usym, 1);
4113 if (TREE_CODE (ind) == INTEGER_CST
4114 && tree_fits_shwi_p (ind)
4115 && tree_to_shwi (ind) == 0)
4116 usym = TREE_OPERAND (usym, 0);
4118 if (TREE_CODE (csym) == ARRAY_REF)
4120 tree ind = TREE_OPERAND (csym, 1);
4121 if (TREE_CODE (ind) == INTEGER_CST
4122 && tree_fits_shwi_p (ind)
4123 && tree_to_shwi (ind) == 0)
4124 csym = TREE_OPERAND (csym, 0);
4126 if (operand_equal_p (usym, csym, 0))
4127 return -1;
4129 /* Now do more complex comparison */
4130 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4131 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4132 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4133 return -1;
4136 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4137 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4139 aff_combination_scale (&cbase_aff, -1 * ratio);
4140 aff_combination_add (&ubase_aff, &cbase_aff);
4141 expr = aff_combination_to_tree (&ubase_aff);
4142 return get_expr_id (data, expr);
4147 /* Determines the cost of the computation by that USE is expressed
4148 from induction variable CAND. If ADDRESS_P is true, we just need
4149 to create an address from it, otherwise we want to get it into
4150 register. A set of invariants we depend on is stored in
4151 DEPENDS_ON. AT is the statement at that the value is computed.
4152 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4153 addressing is likely. */
4155 static comp_cost
4156 get_computation_cost_at (struct ivopts_data *data,
4157 struct iv_use *use, struct iv_cand *cand,
4158 bool address_p, bitmap *depends_on, gimple at,
4159 bool *can_autoinc,
4160 int *inv_expr_id)
4162 tree ubase = use->iv->base, ustep = use->iv->step;
4163 tree cbase, cstep;
4164 tree utype = TREE_TYPE (ubase), ctype;
4165 unsigned HOST_WIDE_INT cstepi, offset = 0;
4166 HOST_WIDE_INT ratio, aratio;
4167 bool var_present, symbol_present, stmt_is_after_inc;
4168 comp_cost cost;
4169 widest_int rat;
4170 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4171 machine_mode mem_mode = (address_p
4172 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4173 : VOIDmode);
4175 *depends_on = NULL;
4177 /* Only consider real candidates. */
4178 if (!cand->iv)
4179 return infinite_cost;
4181 cbase = cand->iv->base;
4182 cstep = cand->iv->step;
4183 ctype = TREE_TYPE (cbase);
4185 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4187 /* We do not have a precision to express the values of use. */
4188 return infinite_cost;
4191 if (address_p
4192 || (use->iv->base_object
4193 && cand->iv->base_object
4194 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4195 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4197 /* Do not try to express address of an object with computation based
4198 on address of a different object. This may cause problems in rtl
4199 level alias analysis (that does not expect this to be happening,
4200 as this is illegal in C), and would be unlikely to be useful
4201 anyway. */
4202 if (use->iv->base_object
4203 && cand->iv->base_object
4204 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4205 return infinite_cost;
4208 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4210 /* TODO -- add direct handling of this case. */
4211 goto fallback;
4214 /* CSTEPI is removed from the offset in case statement is after the
4215 increment. If the step is not constant, we use zero instead.
4216 This is a bit imprecise (there is the extra addition), but
4217 redundancy elimination is likely to transform the code so that
4218 it uses value of the variable before increment anyway,
4219 so it is not that much unrealistic. */
4220 if (cst_and_fits_in_hwi (cstep))
4221 cstepi = int_cst_value (cstep);
4222 else
4223 cstepi = 0;
4225 if (!constant_multiple_of (ustep, cstep, &rat))
4226 return infinite_cost;
4228 if (wi::fits_shwi_p (rat))
4229 ratio = rat.to_shwi ();
4230 else
4231 return infinite_cost;
4233 STRIP_NOPS (cbase);
4234 ctype = TREE_TYPE (cbase);
4236 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4238 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4239 or ratio == 1, it is better to handle this like
4241 ubase - ratio * cbase + ratio * var
4243 (also holds in the case ratio == -1, TODO. */
4245 if (cst_and_fits_in_hwi (cbase))
4247 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4248 cost = difference_cost (data,
4249 ubase, build_int_cst (utype, 0),
4250 &symbol_present, &var_present, &offset,
4251 depends_on);
4252 cost.cost /= avg_loop_niter (data->current_loop);
4254 else if (ratio == 1)
4256 tree real_cbase = cbase;
4258 /* Check to see if any adjustment is needed. */
4259 if (cstepi == 0 && stmt_is_after_inc)
4261 aff_tree real_cbase_aff;
4262 aff_tree cstep_aff;
4264 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4265 &real_cbase_aff);
4266 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4268 aff_combination_add (&real_cbase_aff, &cstep_aff);
4269 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4272 cost = difference_cost (data,
4273 ubase, real_cbase,
4274 &symbol_present, &var_present, &offset,
4275 depends_on);
4276 cost.cost /= avg_loop_niter (data->current_loop);
4278 else if (address_p
4279 && !POINTER_TYPE_P (ctype)
4280 && multiplier_allowed_in_address_p
4281 (ratio, mem_mode,
4282 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4284 cbase
4285 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4286 cost = difference_cost (data,
4287 ubase, cbase,
4288 &symbol_present, &var_present, &offset,
4289 depends_on);
4290 cost.cost /= avg_loop_niter (data->current_loop);
4292 else
4294 cost = force_var_cost (data, cbase, depends_on);
4295 cost = add_costs (cost,
4296 difference_cost (data,
4297 ubase, build_int_cst (utype, 0),
4298 &symbol_present, &var_present,
4299 &offset, depends_on));
4300 cost.cost /= avg_loop_niter (data->current_loop);
4301 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4304 if (inv_expr_id)
4306 *inv_expr_id =
4307 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4308 /* Clear depends on. */
4309 if (*inv_expr_id != -1 && depends_on && *depends_on)
4310 bitmap_clear (*depends_on);
4313 /* If we are after the increment, the value of the candidate is higher by
4314 one iteration. */
4315 if (stmt_is_after_inc)
4316 offset -= ratio * cstepi;
4318 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4319 (symbol/var1/const parts may be omitted). If we are looking for an
4320 address, find the cost of addressing this. */
4321 if (address_p)
4322 return add_costs (cost,
4323 get_address_cost (symbol_present, var_present,
4324 offset, ratio, cstepi,
4325 mem_mode,
4326 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4327 speed, stmt_is_after_inc,
4328 can_autoinc));
4330 /* Otherwise estimate the costs for computing the expression. */
4331 if (!symbol_present && !var_present && !offset)
4333 if (ratio != 1)
4334 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4335 return cost;
4338 /* Symbol + offset should be compile-time computable so consider that they
4339 are added once to the variable, if present. */
4340 if (var_present && (symbol_present || offset))
4341 cost.cost += adjust_setup_cost (data,
4342 add_cost (speed, TYPE_MODE (ctype)));
4344 /* Having offset does not affect runtime cost in case it is added to
4345 symbol, but it increases complexity. */
4346 if (offset)
4347 cost.complexity++;
4349 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4351 aratio = ratio > 0 ? ratio : -ratio;
4352 if (aratio != 1)
4353 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4354 return cost;
4356 fallback:
4357 if (can_autoinc)
4358 *can_autoinc = false;
4361 /* Just get the expression, expand it and measure the cost. */
4362 tree comp = get_computation_at (data->current_loop, use, cand, at);
4364 if (!comp)
4365 return infinite_cost;
4367 if (address_p)
4368 comp = build_simple_mem_ref (comp);
4370 return new_cost (computation_cost (comp, speed), 0);
4374 /* Determines the cost of the computation by that USE is expressed
4375 from induction variable CAND. If ADDRESS_P is true, we just need
4376 to create an address from it, otherwise we want to get it into
4377 register. A set of invariants we depend on is stored in
4378 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4379 autoinc addressing is likely. */
4381 static comp_cost
4382 get_computation_cost (struct ivopts_data *data,
4383 struct iv_use *use, struct iv_cand *cand,
4384 bool address_p, bitmap *depends_on,
4385 bool *can_autoinc, int *inv_expr_id)
4387 return get_computation_cost_at (data,
4388 use, cand, address_p, depends_on, use->stmt,
4389 can_autoinc, inv_expr_id);
4392 /* Determines cost of basing replacement of USE on CAND in a generic
4393 expression. */
4395 static bool
4396 determine_use_iv_cost_generic (struct ivopts_data *data,
4397 struct iv_use *use, struct iv_cand *cand)
4399 bitmap depends_on;
4400 comp_cost cost;
4401 int inv_expr_id = -1;
4403 /* The simple case first -- if we need to express value of the preserved
4404 original biv, the cost is 0. This also prevents us from counting the
4405 cost of increment twice -- once at this use and once in the cost of
4406 the candidate. */
4407 if (cand->pos == IP_ORIGINAL
4408 && cand->incremented_at == use->stmt)
4410 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4411 ERROR_MARK, -1);
4412 return true;
4415 cost = get_computation_cost (data, use, cand, false, &depends_on,
4416 NULL, &inv_expr_id);
4418 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4419 inv_expr_id);
4421 return !infinite_cost_p (cost);
4424 /* Determines cost of basing replacement of USE on CAND in an address. */
4426 static bool
4427 determine_use_iv_cost_address (struct ivopts_data *data,
4428 struct iv_use *use, struct iv_cand *cand)
4430 bitmap depends_on;
4431 bool can_autoinc;
4432 int inv_expr_id = -1;
4433 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4434 &can_autoinc, &inv_expr_id);
4436 if (cand->ainc_use == use)
4438 if (can_autoinc)
4439 cost.cost -= cand->cost_step;
4440 /* If we generated the candidate solely for exploiting autoincrement
4441 opportunities, and it turns out it can't be used, set the cost to
4442 infinity to make sure we ignore it. */
4443 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4444 cost = infinite_cost;
4446 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4447 inv_expr_id);
4449 return !infinite_cost_p (cost);
4452 /* Computes value of candidate CAND at position AT in iteration NITER, and
4453 stores it to VAL. */
4455 static void
4456 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4457 aff_tree *val)
4459 aff_tree step, delta, nit;
4460 struct iv *iv = cand->iv;
4461 tree type = TREE_TYPE (iv->base);
4462 tree steptype = type;
4463 if (POINTER_TYPE_P (type))
4464 steptype = sizetype;
4465 steptype = unsigned_type_for (type);
4467 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4468 aff_combination_convert (&step, steptype);
4469 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4470 aff_combination_convert (&nit, steptype);
4471 aff_combination_mult (&nit, &step, &delta);
4472 if (stmt_after_increment (loop, cand, at))
4473 aff_combination_add (&delta, &step);
4475 tree_to_aff_combination (iv->base, type, val);
4476 if (!POINTER_TYPE_P (type))
4477 aff_combination_convert (val, steptype);
4478 aff_combination_add (val, &delta);
4481 /* Returns period of induction variable iv. */
4483 static tree
4484 iv_period (struct iv *iv)
4486 tree step = iv->step, period, type;
4487 tree pow2div;
4489 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4491 type = unsigned_type_for (TREE_TYPE (step));
4492 /* Period of the iv is lcm (step, type_range)/step -1,
4493 i.e., N*type_range/step - 1. Since type range is power
4494 of two, N == (step >> num_of_ending_zeros_binary (step),
4495 so the final result is
4497 (type_range >> num_of_ending_zeros_binary (step)) - 1
4500 pow2div = num_ending_zeros (step);
4502 period = build_low_bits_mask (type,
4503 (TYPE_PRECISION (type)
4504 - tree_to_uhwi (pow2div)));
4506 return period;
4509 /* Returns the comparison operator used when eliminating the iv USE. */
4511 static enum tree_code
4512 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4514 struct loop *loop = data->current_loop;
4515 basic_block ex_bb;
4516 edge exit;
4518 ex_bb = gimple_bb (use->stmt);
4519 exit = EDGE_SUCC (ex_bb, 0);
4520 if (flow_bb_inside_loop_p (loop, exit->dest))
4521 exit = EDGE_SUCC (ex_bb, 1);
4523 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4526 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4527 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4528 calculation is performed in non-wrapping type.
4530 TODO: More generally, we could test for the situation that
4531 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4532 This would require knowing the sign of OFFSET. */
4534 static bool
4535 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4537 enum tree_code code;
4538 tree e1, e2;
4539 aff_tree aff_e1, aff_e2, aff_offset;
4541 if (!nowrap_type_p (TREE_TYPE (base)))
4542 return false;
4544 base = expand_simple_operations (base);
4546 if (TREE_CODE (base) == SSA_NAME)
4548 gimple stmt = SSA_NAME_DEF_STMT (base);
4550 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4551 return false;
4553 code = gimple_assign_rhs_code (stmt);
4554 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4555 return false;
4557 e1 = gimple_assign_rhs1 (stmt);
4558 e2 = gimple_assign_rhs2 (stmt);
4560 else
4562 code = TREE_CODE (base);
4563 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4564 return false;
4565 e1 = TREE_OPERAND (base, 0);
4566 e2 = TREE_OPERAND (base, 1);
4569 /* Use affine expansion as deeper inspection to prove the equality. */
4570 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4571 &aff_e2, &data->name_expansion_cache);
4572 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4573 &aff_offset, &data->name_expansion_cache);
4574 aff_combination_scale (&aff_offset, -1);
4575 switch (code)
4577 case PLUS_EXPR:
4578 aff_combination_add (&aff_e2, &aff_offset);
4579 if (aff_combination_zero_p (&aff_e2))
4580 return true;
4582 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4583 &aff_e1, &data->name_expansion_cache);
4584 aff_combination_add (&aff_e1, &aff_offset);
4585 return aff_combination_zero_p (&aff_e1);
4587 case POINTER_PLUS_EXPR:
4588 aff_combination_add (&aff_e2, &aff_offset);
4589 return aff_combination_zero_p (&aff_e2);
4591 default:
4592 return false;
4596 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4597 comparison with CAND. NITER describes the number of iterations of
4598 the loops. If successful, the comparison in COMP_P is altered accordingly.
4600 We aim to handle the following situation:
4602 sometype *base, *p;
4603 int a, b, i;
4605 i = a;
4606 p = p_0 = base + a;
4610 bla (*p);
4611 p++;
4612 i++;
4614 while (i < b);
4616 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4617 We aim to optimize this to
4619 p = p_0 = base + a;
4622 bla (*p);
4623 p++;
4625 while (p < p_0 - a + b);
4627 This preserves the correctness, since the pointer arithmetics does not
4628 overflow. More precisely:
4630 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4631 overflow in computing it or the values of p.
4632 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4633 overflow. To prove this, we use the fact that p_0 = base + a. */
4635 static bool
4636 iv_elimination_compare_lt (struct ivopts_data *data,
4637 struct iv_cand *cand, enum tree_code *comp_p,
4638 struct tree_niter_desc *niter)
4640 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4641 struct aff_tree nit, tmpa, tmpb;
4642 enum tree_code comp;
4643 HOST_WIDE_INT step;
4645 /* We need to know that the candidate induction variable does not overflow.
4646 While more complex analysis may be used to prove this, for now just
4647 check that the variable appears in the original program and that it
4648 is computed in a type that guarantees no overflows. */
4649 cand_type = TREE_TYPE (cand->iv->base);
4650 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4651 return false;
4653 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4654 the calculation of the BOUND could overflow, making the comparison
4655 invalid. */
4656 if (!data->loop_single_exit_p)
4657 return false;
4659 /* We need to be able to decide whether candidate is increasing or decreasing
4660 in order to choose the right comparison operator. */
4661 if (!cst_and_fits_in_hwi (cand->iv->step))
4662 return false;
4663 step = int_cst_value (cand->iv->step);
4665 /* Check that the number of iterations matches the expected pattern:
4666 a + 1 > b ? 0 : b - a - 1. */
4667 mbz = niter->may_be_zero;
4668 if (TREE_CODE (mbz) == GT_EXPR)
4670 /* Handle a + 1 > b. */
4671 tree op0 = TREE_OPERAND (mbz, 0);
4672 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4674 a = TREE_OPERAND (op0, 0);
4675 b = TREE_OPERAND (mbz, 1);
4677 else
4678 return false;
4680 else if (TREE_CODE (mbz) == LT_EXPR)
4682 tree op1 = TREE_OPERAND (mbz, 1);
4684 /* Handle b < a + 1. */
4685 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4687 a = TREE_OPERAND (op1, 0);
4688 b = TREE_OPERAND (mbz, 0);
4690 else
4691 return false;
4693 else
4694 return false;
4696 /* Expected number of iterations is B - A - 1. Check that it matches
4697 the actual number, i.e., that B - A - NITER = 1. */
4698 tree_to_aff_combination (niter->niter, nit_type, &nit);
4699 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4700 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4701 aff_combination_scale (&nit, -1);
4702 aff_combination_scale (&tmpa, -1);
4703 aff_combination_add (&tmpb, &tmpa);
4704 aff_combination_add (&tmpb, &nit);
4705 if (tmpb.n != 0 || tmpb.offset != 1)
4706 return false;
4708 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4709 overflow. */
4710 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4711 cand->iv->step,
4712 fold_convert (TREE_TYPE (cand->iv->step), a));
4713 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4714 return false;
4716 /* Determine the new comparison operator. */
4717 comp = step < 0 ? GT_EXPR : LT_EXPR;
4718 if (*comp_p == NE_EXPR)
4719 *comp_p = comp;
4720 else if (*comp_p == EQ_EXPR)
4721 *comp_p = invert_tree_comparison (comp, false);
4722 else
4723 gcc_unreachable ();
4725 return true;
4728 /* Check whether it is possible to express the condition in USE by comparison
4729 of candidate CAND. If so, store the value compared with to BOUND, and the
4730 comparison operator to COMP. */
4732 static bool
4733 may_eliminate_iv (struct ivopts_data *data,
4734 struct iv_use *use, struct iv_cand *cand, tree *bound,
4735 enum tree_code *comp)
4737 basic_block ex_bb;
4738 edge exit;
4739 tree period;
4740 struct loop *loop = data->current_loop;
4741 aff_tree bnd;
4742 struct tree_niter_desc *desc = NULL;
4744 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4745 return false;
4747 /* For now works only for exits that dominate the loop latch.
4748 TODO: extend to other conditions inside loop body. */
4749 ex_bb = gimple_bb (use->stmt);
4750 if (use->stmt != last_stmt (ex_bb)
4751 || gimple_code (use->stmt) != GIMPLE_COND
4752 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4753 return false;
4755 exit = EDGE_SUCC (ex_bb, 0);
4756 if (flow_bb_inside_loop_p (loop, exit->dest))
4757 exit = EDGE_SUCC (ex_bb, 1);
4758 if (flow_bb_inside_loop_p (loop, exit->dest))
4759 return false;
4761 desc = niter_for_exit (data, exit);
4762 if (!desc)
4763 return false;
4765 /* Determine whether we can use the variable to test the exit condition.
4766 This is the case iff the period of the induction variable is greater
4767 than the number of iterations for which the exit condition is true. */
4768 period = iv_period (cand->iv);
4770 /* If the number of iterations is constant, compare against it directly. */
4771 if (TREE_CODE (desc->niter) == INTEGER_CST)
4773 /* See cand_value_at. */
4774 if (stmt_after_increment (loop, cand, use->stmt))
4776 if (!tree_int_cst_lt (desc->niter, period))
4777 return false;
4779 else
4781 if (tree_int_cst_lt (period, desc->niter))
4782 return false;
4786 /* If not, and if this is the only possible exit of the loop, see whether
4787 we can get a conservative estimate on the number of iterations of the
4788 entire loop and compare against that instead. */
4789 else
4791 widest_int period_value, max_niter;
4793 max_niter = desc->max;
4794 if (stmt_after_increment (loop, cand, use->stmt))
4795 max_niter += 1;
4796 period_value = wi::to_widest (period);
4797 if (wi::gtu_p (max_niter, period_value))
4799 /* See if we can take advantage of inferred loop bound information. */
4800 if (data->loop_single_exit_p)
4802 if (!max_loop_iterations (loop, &max_niter))
4803 return false;
4804 /* The loop bound is already adjusted by adding 1. */
4805 if (wi::gtu_p (max_niter, period_value))
4806 return false;
4808 else
4809 return false;
4813 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4815 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4816 aff_combination_to_tree (&bnd));
4817 *comp = iv_elimination_compare (data, use);
4819 /* It is unlikely that computing the number of iterations using division
4820 would be more profitable than keeping the original induction variable. */
4821 if (expression_expensive_p (*bound))
4822 return false;
4824 /* Sometimes, it is possible to handle the situation that the number of
4825 iterations may be zero unless additional assumtions by using <
4826 instead of != in the exit condition.
4828 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4829 base the exit condition on it. However, that is often too
4830 expensive. */
4831 if (!integer_zerop (desc->may_be_zero))
4832 return iv_elimination_compare_lt (data, cand, comp, desc);
4834 return true;
4837 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4838 be copied, if is is used in the loop body and DATA->body_includes_call. */
4840 static int
4841 parm_decl_cost (struct ivopts_data *data, tree bound)
4843 tree sbound = bound;
4844 STRIP_NOPS (sbound);
4846 if (TREE_CODE (sbound) == SSA_NAME
4847 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4848 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4849 && data->body_includes_call)
4850 return COSTS_N_INSNS (1);
4852 return 0;
4855 /* Determines cost of basing replacement of USE on CAND in a condition. */
4857 static bool
4858 determine_use_iv_cost_condition (struct ivopts_data *data,
4859 struct iv_use *use, struct iv_cand *cand)
4861 tree bound = NULL_TREE;
4862 struct iv *cmp_iv;
4863 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4864 comp_cost elim_cost, express_cost, cost, bound_cost;
4865 bool ok;
4866 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4867 tree *control_var, *bound_cst;
4868 enum tree_code comp = ERROR_MARK;
4870 /* Only consider real candidates. */
4871 if (!cand->iv)
4873 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4874 ERROR_MARK, -1);
4875 return false;
4878 /* Try iv elimination. */
4879 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4881 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4882 if (elim_cost.cost == 0)
4883 elim_cost.cost = parm_decl_cost (data, bound);
4884 else if (TREE_CODE (bound) == INTEGER_CST)
4885 elim_cost.cost = 0;
4886 /* If we replace a loop condition 'i < n' with 'p < base + n',
4887 depends_on_elim will have 'base' and 'n' set, which implies
4888 that both 'base' and 'n' will be live during the loop. More likely,
4889 'base + n' will be loop invariant, resulting in only one live value
4890 during the loop. So in that case we clear depends_on_elim and set
4891 elim_inv_expr_id instead. */
4892 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4894 elim_inv_expr_id = get_expr_id (data, bound);
4895 bitmap_clear (depends_on_elim);
4897 /* The bound is a loop invariant, so it will be only computed
4898 once. */
4899 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4901 else
4902 elim_cost = infinite_cost;
4904 /* Try expressing the original giv. If it is compared with an invariant,
4905 note that we cannot get rid of it. */
4906 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4907 NULL, &cmp_iv);
4908 gcc_assert (ok);
4910 /* When the condition is a comparison of the candidate IV against
4911 zero, prefer this IV.
4913 TODO: The constant that we're subtracting from the cost should
4914 be target-dependent. This information should be added to the
4915 target costs for each backend. */
4916 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4917 && integer_zerop (*bound_cst)
4918 && (operand_equal_p (*control_var, cand->var_after, 0)
4919 || operand_equal_p (*control_var, cand->var_before, 0)))
4920 elim_cost.cost -= 1;
4922 express_cost = get_computation_cost (data, use, cand, false,
4923 &depends_on_express, NULL,
4924 &express_inv_expr_id);
4925 fd_ivopts_data = data;
4926 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4928 /* Count the cost of the original bound as well. */
4929 bound_cost = force_var_cost (data, *bound_cst, NULL);
4930 if (bound_cost.cost == 0)
4931 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4932 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4933 bound_cost.cost = 0;
4934 express_cost.cost += bound_cost.cost;
4936 /* Choose the better approach, preferring the eliminated IV. */
4937 if (compare_costs (elim_cost, express_cost) <= 0)
4939 cost = elim_cost;
4940 depends_on = depends_on_elim;
4941 depends_on_elim = NULL;
4942 inv_expr_id = elim_inv_expr_id;
4944 else
4946 cost = express_cost;
4947 depends_on = depends_on_express;
4948 depends_on_express = NULL;
4949 bound = NULL_TREE;
4950 comp = ERROR_MARK;
4951 inv_expr_id = express_inv_expr_id;
4954 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4956 if (depends_on_elim)
4957 BITMAP_FREE (depends_on_elim);
4958 if (depends_on_express)
4959 BITMAP_FREE (depends_on_express);
4961 return !infinite_cost_p (cost);
4964 /* Determines cost of basing replacement of USE on CAND. Returns false
4965 if USE cannot be based on CAND. */
4967 static bool
4968 determine_use_iv_cost (struct ivopts_data *data,
4969 struct iv_use *use, struct iv_cand *cand)
4971 switch (use->type)
4973 case USE_NONLINEAR_EXPR:
4974 return determine_use_iv_cost_generic (data, use, cand);
4976 case USE_ADDRESS:
4977 return determine_use_iv_cost_address (data, use, cand);
4979 case USE_COMPARE:
4980 return determine_use_iv_cost_condition (data, use, cand);
4982 default:
4983 gcc_unreachable ();
4987 /* Return true if get_computation_cost indicates that autoincrement is
4988 a possibility for the pair of USE and CAND, false otherwise. */
4990 static bool
4991 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4992 struct iv_cand *cand)
4994 bitmap depends_on;
4995 bool can_autoinc;
4996 comp_cost cost;
4998 if (use->type != USE_ADDRESS)
4999 return false;
5001 cost = get_computation_cost (data, use, cand, true, &depends_on,
5002 &can_autoinc, NULL);
5004 BITMAP_FREE (depends_on);
5006 return !infinite_cost_p (cost) && can_autoinc;
5009 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5010 use that allows autoincrement, and set their AINC_USE if possible. */
5012 static void
5013 set_autoinc_for_original_candidates (struct ivopts_data *data)
5015 unsigned i, j;
5017 for (i = 0; i < n_iv_cands (data); i++)
5019 struct iv_cand *cand = iv_cand (data, i);
5020 struct iv_use *closest_before = NULL;
5021 struct iv_use *closest_after = NULL;
5022 if (cand->pos != IP_ORIGINAL)
5023 continue;
5025 for (j = 0; j < n_iv_uses (data); j++)
5027 struct iv_use *use = iv_use (data, j);
5028 unsigned uid = gimple_uid (use->stmt);
5030 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5031 continue;
5033 if (uid < gimple_uid (cand->incremented_at)
5034 && (closest_before == NULL
5035 || uid > gimple_uid (closest_before->stmt)))
5036 closest_before = use;
5038 if (uid > gimple_uid (cand->incremented_at)
5039 && (closest_after == NULL
5040 || uid < gimple_uid (closest_after->stmt)))
5041 closest_after = use;
5044 if (closest_before != NULL
5045 && autoinc_possible_for_pair (data, closest_before, cand))
5046 cand->ainc_use = closest_before;
5047 else if (closest_after != NULL
5048 && autoinc_possible_for_pair (data, closest_after, cand))
5049 cand->ainc_use = closest_after;
5053 /* Finds the candidates for the induction variables. */
5055 static void
5056 find_iv_candidates (struct ivopts_data *data)
5058 /* Add commonly used ivs. */
5059 add_standard_iv_candidates (data);
5061 /* Add old induction variables. */
5062 add_old_ivs_candidates (data);
5064 /* Add induction variables derived from uses. */
5065 add_derived_ivs_candidates (data);
5067 set_autoinc_for_original_candidates (data);
5069 /* Record the important candidates. */
5070 record_important_candidates (data);
5073 /* Determines costs of basing the use of the iv on an iv candidate. */
5075 static void
5076 determine_use_iv_costs (struct ivopts_data *data)
5078 unsigned i, j;
5079 struct iv_use *use;
5080 struct iv_cand *cand;
5081 bitmap to_clear = BITMAP_ALLOC (NULL);
5083 alloc_use_cost_map (data);
5085 for (i = 0; i < n_iv_uses (data); i++)
5087 use = iv_use (data, i);
5089 if (data->consider_all_candidates)
5091 for (j = 0; j < n_iv_cands (data); j++)
5093 cand = iv_cand (data, j);
5094 determine_use_iv_cost (data, use, cand);
5097 else
5099 bitmap_iterator bi;
5101 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5103 cand = iv_cand (data, j);
5104 if (!determine_use_iv_cost (data, use, cand))
5105 bitmap_set_bit (to_clear, j);
5108 /* Remove the candidates for that the cost is infinite from
5109 the list of related candidates. */
5110 bitmap_and_compl_into (use->related_cands, to_clear);
5111 bitmap_clear (to_clear);
5115 BITMAP_FREE (to_clear);
5117 if (dump_file && (dump_flags & TDF_DETAILS))
5119 fprintf (dump_file, "Use-candidate costs:\n");
5121 for (i = 0; i < n_iv_uses (data); i++)
5123 use = iv_use (data, i);
5125 fprintf (dump_file, "Use %d:\n", i);
5126 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5127 for (j = 0; j < use->n_map_members; j++)
5129 if (!use->cost_map[j].cand
5130 || infinite_cost_p (use->cost_map[j].cost))
5131 continue;
5133 fprintf (dump_file, " %d\t%d\t%d\t",
5134 use->cost_map[j].cand->id,
5135 use->cost_map[j].cost.cost,
5136 use->cost_map[j].cost.complexity);
5137 if (use->cost_map[j].depends_on)
5138 bitmap_print (dump_file,
5139 use->cost_map[j].depends_on, "","");
5140 if (use->cost_map[j].inv_expr_id != -1)
5141 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5142 fprintf (dump_file, "\n");
5145 fprintf (dump_file, "\n");
5147 fprintf (dump_file, "\n");
5151 /* Determines cost of the candidate CAND. */
5153 static void
5154 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5156 comp_cost cost_base;
5157 unsigned cost, cost_step;
5158 tree base;
5160 if (!cand->iv)
5162 cand->cost = 0;
5163 return;
5166 /* There are two costs associated with the candidate -- its increment
5167 and its initialization. The second is almost negligible for any loop
5168 that rolls enough, so we take it just very little into account. */
5170 base = cand->iv->base;
5171 cost_base = force_var_cost (data, base, NULL);
5172 /* It will be exceptional that the iv register happens to be initialized with
5173 the proper value at no cost. In general, there will at least be a regcopy
5174 or a const set. */
5175 if (cost_base.cost == 0)
5176 cost_base.cost = COSTS_N_INSNS (1);
5177 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5179 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5181 /* Prefer the original ivs unless we may gain something by replacing it.
5182 The reason is to make debugging simpler; so this is not relevant for
5183 artificial ivs created by other optimization passes. */
5184 if (cand->pos != IP_ORIGINAL
5185 || !SSA_NAME_VAR (cand->var_before)
5186 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5187 cost++;
5189 /* Prefer not to insert statements into latch unless there are some
5190 already (so that we do not create unnecessary jumps). */
5191 if (cand->pos == IP_END
5192 && empty_block_p (ip_end_pos (data->current_loop)))
5193 cost++;
5195 cand->cost = cost;
5196 cand->cost_step = cost_step;
5199 /* Determines costs of computation of the candidates. */
5201 static void
5202 determine_iv_costs (struct ivopts_data *data)
5204 unsigned i;
5206 if (dump_file && (dump_flags & TDF_DETAILS))
5208 fprintf (dump_file, "Candidate costs:\n");
5209 fprintf (dump_file, " cand\tcost\n");
5212 for (i = 0; i < n_iv_cands (data); i++)
5214 struct iv_cand *cand = iv_cand (data, i);
5216 determine_iv_cost (data, cand);
5218 if (dump_file && (dump_flags & TDF_DETAILS))
5219 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5222 if (dump_file && (dump_flags & TDF_DETAILS))
5223 fprintf (dump_file, "\n");
5226 /* Calculates cost for having SIZE induction variables. */
5228 static unsigned
5229 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5231 /* We add size to the cost, so that we prefer eliminating ivs
5232 if possible. */
5233 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5234 data->body_includes_call);
5237 /* For each size of the induction variable set determine the penalty. */
5239 static void
5240 determine_set_costs (struct ivopts_data *data)
5242 unsigned j, n;
5243 gphi *phi;
5244 gphi_iterator psi;
5245 tree op;
5246 struct loop *loop = data->current_loop;
5247 bitmap_iterator bi;
5249 if (dump_file && (dump_flags & TDF_DETAILS))
5251 fprintf (dump_file, "Global costs:\n");
5252 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5253 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5254 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5255 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5258 n = 0;
5259 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5261 phi = psi.phi ();
5262 op = PHI_RESULT (phi);
5264 if (virtual_operand_p (op))
5265 continue;
5267 if (get_iv (data, op))
5268 continue;
5270 n++;
5273 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5275 struct version_info *info = ver_info (data, j);
5277 if (info->inv_id && info->has_nonlin_use)
5278 n++;
5281 data->regs_used = n;
5282 if (dump_file && (dump_flags & TDF_DETAILS))
5283 fprintf (dump_file, " regs_used %d\n", n);
5285 if (dump_file && (dump_flags & TDF_DETAILS))
5287 fprintf (dump_file, " cost for size:\n");
5288 fprintf (dump_file, " ivs\tcost\n");
5289 for (j = 0; j <= 2 * target_avail_regs; j++)
5290 fprintf (dump_file, " %d\t%d\n", j,
5291 ivopts_global_cost_for_size (data, j));
5292 fprintf (dump_file, "\n");
5296 /* Returns true if A is a cheaper cost pair than B. */
5298 static bool
5299 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5301 int cmp;
5303 if (!a)
5304 return false;
5306 if (!b)
5307 return true;
5309 cmp = compare_costs (a->cost, b->cost);
5310 if (cmp < 0)
5311 return true;
5313 if (cmp > 0)
5314 return false;
5316 /* In case the costs are the same, prefer the cheaper candidate. */
5317 if (a->cand->cost < b->cand->cost)
5318 return true;
5320 return false;
5324 /* Returns candidate by that USE is expressed in IVS. */
5326 static struct cost_pair *
5327 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5329 return ivs->cand_for_use[use->id];
5332 /* Computes the cost field of IVS structure. */
5334 static void
5335 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5337 comp_cost cost = ivs->cand_use_cost;
5339 cost.cost += ivs->cand_cost;
5341 cost.cost += ivopts_global_cost_for_size (data,
5342 ivs->n_regs + ivs->num_used_inv_expr);
5344 ivs->cost = cost;
5347 /* Remove invariants in set INVS to set IVS. */
5349 static void
5350 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5352 bitmap_iterator bi;
5353 unsigned iid;
5355 if (!invs)
5356 return;
5358 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5360 ivs->n_invariant_uses[iid]--;
5361 if (ivs->n_invariant_uses[iid] == 0)
5362 ivs->n_regs--;
5366 /* Set USE not to be expressed by any candidate in IVS. */
5368 static void
5369 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5370 struct iv_use *use)
5372 unsigned uid = use->id, cid;
5373 struct cost_pair *cp;
5375 cp = ivs->cand_for_use[uid];
5376 if (!cp)
5377 return;
5378 cid = cp->cand->id;
5380 ivs->bad_uses++;
5381 ivs->cand_for_use[uid] = NULL;
5382 ivs->n_cand_uses[cid]--;
5384 if (ivs->n_cand_uses[cid] == 0)
5386 bitmap_clear_bit (ivs->cands, cid);
5387 /* Do not count the pseudocandidates. */
5388 if (cp->cand->iv)
5389 ivs->n_regs--;
5390 ivs->n_cands--;
5391 ivs->cand_cost -= cp->cand->cost;
5393 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5396 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5398 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5400 if (cp->inv_expr_id != -1)
5402 ivs->used_inv_expr[cp->inv_expr_id]--;
5403 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5404 ivs->num_used_inv_expr--;
5406 iv_ca_recount_cost (data, ivs);
5409 /* Add invariants in set INVS to set IVS. */
5411 static void
5412 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5414 bitmap_iterator bi;
5415 unsigned iid;
5417 if (!invs)
5418 return;
5420 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5422 ivs->n_invariant_uses[iid]++;
5423 if (ivs->n_invariant_uses[iid] == 1)
5424 ivs->n_regs++;
5428 /* Set cost pair for USE in set IVS to CP. */
5430 static void
5431 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5432 struct iv_use *use, struct cost_pair *cp)
5434 unsigned uid = use->id, cid;
5436 if (ivs->cand_for_use[uid] == cp)
5437 return;
5439 if (ivs->cand_for_use[uid])
5440 iv_ca_set_no_cp (data, ivs, use);
5442 if (cp)
5444 cid = cp->cand->id;
5446 ivs->bad_uses--;
5447 ivs->cand_for_use[uid] = cp;
5448 ivs->n_cand_uses[cid]++;
5449 if (ivs->n_cand_uses[cid] == 1)
5451 bitmap_set_bit (ivs->cands, cid);
5452 /* Do not count the pseudocandidates. */
5453 if (cp->cand->iv)
5454 ivs->n_regs++;
5455 ivs->n_cands++;
5456 ivs->cand_cost += cp->cand->cost;
5458 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5461 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5462 iv_ca_set_add_invariants (ivs, cp->depends_on);
5464 if (cp->inv_expr_id != -1)
5466 ivs->used_inv_expr[cp->inv_expr_id]++;
5467 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5468 ivs->num_used_inv_expr++;
5470 iv_ca_recount_cost (data, ivs);
5474 /* Extend set IVS by expressing USE by some of the candidates in it
5475 if possible. Consider all important candidates if candidates in
5476 set IVS don't give any result. */
5478 static void
5479 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5480 struct iv_use *use)
5482 struct cost_pair *best_cp = NULL, *cp;
5483 bitmap_iterator bi;
5484 unsigned i;
5485 struct iv_cand *cand;
5487 gcc_assert (ivs->upto >= use->id);
5488 ivs->upto++;
5489 ivs->bad_uses++;
5491 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5493 cand = iv_cand (data, i);
5494 cp = get_use_iv_cost (data, use, cand);
5495 if (cheaper_cost_pair (cp, best_cp))
5496 best_cp = cp;
5499 if (best_cp == NULL)
5501 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5503 cand = iv_cand (data, i);
5504 cp = get_use_iv_cost (data, use, cand);
5505 if (cheaper_cost_pair (cp, best_cp))
5506 best_cp = cp;
5510 iv_ca_set_cp (data, ivs, use, best_cp);
5513 /* Get cost for assignment IVS. */
5515 static comp_cost
5516 iv_ca_cost (struct iv_ca *ivs)
5518 /* This was a conditional expression but it triggered a bug in
5519 Sun C 5.5. */
5520 if (ivs->bad_uses)
5521 return infinite_cost;
5522 else
5523 return ivs->cost;
5526 /* Returns true if all dependences of CP are among invariants in IVS. */
5528 static bool
5529 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5531 unsigned i;
5532 bitmap_iterator bi;
5534 if (!cp->depends_on)
5535 return true;
5537 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5539 if (ivs->n_invariant_uses[i] == 0)
5540 return false;
5543 return true;
5546 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5547 it before NEXT_CHANGE. */
5549 static struct iv_ca_delta *
5550 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5551 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5553 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5555 change->use = use;
5556 change->old_cp = old_cp;
5557 change->new_cp = new_cp;
5558 change->next_change = next_change;
5560 return change;
5563 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5564 are rewritten. */
5566 static struct iv_ca_delta *
5567 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5569 struct iv_ca_delta *last;
5571 if (!l2)
5572 return l1;
5574 if (!l1)
5575 return l2;
5577 for (last = l1; last->next_change; last = last->next_change)
5578 continue;
5579 last->next_change = l2;
5581 return l1;
5584 /* Reverse the list of changes DELTA, forming the inverse to it. */
5586 static struct iv_ca_delta *
5587 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5589 struct iv_ca_delta *act, *next, *prev = NULL;
5590 struct cost_pair *tmp;
5592 for (act = delta; act; act = next)
5594 next = act->next_change;
5595 act->next_change = prev;
5596 prev = act;
5598 tmp = act->old_cp;
5599 act->old_cp = act->new_cp;
5600 act->new_cp = tmp;
5603 return prev;
5606 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5607 reverted instead. */
5609 static void
5610 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5611 struct iv_ca_delta *delta, bool forward)
5613 struct cost_pair *from, *to;
5614 struct iv_ca_delta *act;
5616 if (!forward)
5617 delta = iv_ca_delta_reverse (delta);
5619 for (act = delta; act; act = act->next_change)
5621 from = act->old_cp;
5622 to = act->new_cp;
5623 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5624 iv_ca_set_cp (data, ivs, act->use, to);
5627 if (!forward)
5628 iv_ca_delta_reverse (delta);
5631 /* Returns true if CAND is used in IVS. */
5633 static bool
5634 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5636 return ivs->n_cand_uses[cand->id] > 0;
5639 /* Returns number of induction variable candidates in the set IVS. */
5641 static unsigned
5642 iv_ca_n_cands (struct iv_ca *ivs)
5644 return ivs->n_cands;
5647 /* Free the list of changes DELTA. */
5649 static void
5650 iv_ca_delta_free (struct iv_ca_delta **delta)
5652 struct iv_ca_delta *act, *next;
5654 for (act = *delta; act; act = next)
5656 next = act->next_change;
5657 free (act);
5660 *delta = NULL;
5663 /* Allocates new iv candidates assignment. */
5665 static struct iv_ca *
5666 iv_ca_new (struct ivopts_data *data)
5668 struct iv_ca *nw = XNEW (struct iv_ca);
5670 nw->upto = 0;
5671 nw->bad_uses = 0;
5672 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5673 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5674 nw->cands = BITMAP_ALLOC (NULL);
5675 nw->n_cands = 0;
5676 nw->n_regs = 0;
5677 nw->cand_use_cost = no_cost;
5678 nw->cand_cost = 0;
5679 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5680 nw->cost = no_cost;
5681 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5682 nw->num_used_inv_expr = 0;
5684 return nw;
5687 /* Free memory occupied by the set IVS. */
5689 static void
5690 iv_ca_free (struct iv_ca **ivs)
5692 free ((*ivs)->cand_for_use);
5693 free ((*ivs)->n_cand_uses);
5694 BITMAP_FREE ((*ivs)->cands);
5695 free ((*ivs)->n_invariant_uses);
5696 free ((*ivs)->used_inv_expr);
5697 free (*ivs);
5698 *ivs = NULL;
5701 /* Dumps IVS to FILE. */
5703 static void
5704 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5706 const char *pref = " invariants ";
5707 unsigned i;
5708 comp_cost cost = iv_ca_cost (ivs);
5710 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5711 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5712 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5713 bitmap_print (file, ivs->cands, " candidates: ","\n");
5715 for (i = 0; i < ivs->upto; i++)
5717 struct iv_use *use = iv_use (data, i);
5718 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5719 if (cp)
5720 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5721 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5722 else
5723 fprintf (file, " use:%d --> ??\n", use->id);
5726 for (i = 1; i <= data->max_inv_id; i++)
5727 if (ivs->n_invariant_uses[i])
5729 fprintf (file, "%s%d", pref, i);
5730 pref = ", ";
5732 fprintf (file, "\n\n");
5735 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5736 new set, and store differences in DELTA. Number of induction variables
5737 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5738 the function will try to find a solution with mimimal iv candidates. */
5740 static comp_cost
5741 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5742 struct iv_cand *cand, struct iv_ca_delta **delta,
5743 unsigned *n_ivs, bool min_ncand)
5745 unsigned i;
5746 comp_cost cost;
5747 struct iv_use *use;
5748 struct cost_pair *old_cp, *new_cp;
5750 *delta = NULL;
5751 for (i = 0; i < ivs->upto; i++)
5753 use = iv_use (data, i);
5754 old_cp = iv_ca_cand_for_use (ivs, use);
5756 if (old_cp
5757 && old_cp->cand == cand)
5758 continue;
5760 new_cp = get_use_iv_cost (data, use, cand);
5761 if (!new_cp)
5762 continue;
5764 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5765 continue;
5767 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5768 continue;
5770 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5773 iv_ca_delta_commit (data, ivs, *delta, true);
5774 cost = iv_ca_cost (ivs);
5775 if (n_ivs)
5776 *n_ivs = iv_ca_n_cands (ivs);
5777 iv_ca_delta_commit (data, ivs, *delta, false);
5779 return cost;
5782 /* Try narrowing set IVS by removing CAND. Return the cost of
5783 the new set and store the differences in DELTA. START is
5784 the candidate with which we start narrowing. */
5786 static comp_cost
5787 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5788 struct iv_cand *cand, struct iv_cand *start,
5789 struct iv_ca_delta **delta)
5791 unsigned i, ci;
5792 struct iv_use *use;
5793 struct cost_pair *old_cp, *new_cp, *cp;
5794 bitmap_iterator bi;
5795 struct iv_cand *cnd;
5796 comp_cost cost, best_cost, acost;
5798 *delta = NULL;
5799 for (i = 0; i < n_iv_uses (data); i++)
5801 use = iv_use (data, i);
5803 old_cp = iv_ca_cand_for_use (ivs, use);
5804 if (old_cp->cand != cand)
5805 continue;
5807 best_cost = iv_ca_cost (ivs);
5808 /* Start narrowing with START. */
5809 new_cp = get_use_iv_cost (data, use, start);
5811 if (data->consider_all_candidates)
5813 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5815 if (ci == cand->id || (start && ci == start->id))
5816 continue;
5818 cnd = iv_cand (data, ci);
5820 cp = get_use_iv_cost (data, use, cnd);
5821 if (!cp)
5822 continue;
5824 iv_ca_set_cp (data, ivs, use, cp);
5825 acost = iv_ca_cost (ivs);
5827 if (compare_costs (acost, best_cost) < 0)
5829 best_cost = acost;
5830 new_cp = cp;
5834 else
5836 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5838 if (ci == cand->id || (start && ci == start->id))
5839 continue;
5841 cnd = iv_cand (data, ci);
5843 cp = get_use_iv_cost (data, use, cnd);
5844 if (!cp)
5845 continue;
5847 iv_ca_set_cp (data, ivs, use, cp);
5848 acost = iv_ca_cost (ivs);
5850 if (compare_costs (acost, best_cost) < 0)
5852 best_cost = acost;
5853 new_cp = cp;
5857 /* Restore to old cp for use. */
5858 iv_ca_set_cp (data, ivs, use, old_cp);
5860 if (!new_cp)
5862 iv_ca_delta_free (delta);
5863 return infinite_cost;
5866 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5869 iv_ca_delta_commit (data, ivs, *delta, true);
5870 cost = iv_ca_cost (ivs);
5871 iv_ca_delta_commit (data, ivs, *delta, false);
5873 return cost;
5876 /* Try optimizing the set of candidates IVS by removing candidates different
5877 from to EXCEPT_CAND from it. Return cost of the new set, and store
5878 differences in DELTA. */
5880 static comp_cost
5881 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5882 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5884 bitmap_iterator bi;
5885 struct iv_ca_delta *act_delta, *best_delta;
5886 unsigned i;
5887 comp_cost best_cost, acost;
5888 struct iv_cand *cand;
5890 best_delta = NULL;
5891 best_cost = iv_ca_cost (ivs);
5893 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5895 cand = iv_cand (data, i);
5897 if (cand == except_cand)
5898 continue;
5900 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5902 if (compare_costs (acost, best_cost) < 0)
5904 best_cost = acost;
5905 iv_ca_delta_free (&best_delta);
5906 best_delta = act_delta;
5908 else
5909 iv_ca_delta_free (&act_delta);
5912 if (!best_delta)
5914 *delta = NULL;
5915 return best_cost;
5918 /* Recurse to possibly remove other unnecessary ivs. */
5919 iv_ca_delta_commit (data, ivs, best_delta, true);
5920 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5921 iv_ca_delta_commit (data, ivs, best_delta, false);
5922 *delta = iv_ca_delta_join (best_delta, *delta);
5923 return best_cost;
5926 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5927 cheaper local cost for USE than BEST_CP. Return pointer to
5928 the corresponding cost_pair, otherwise just return BEST_CP. */
5930 static struct cost_pair*
5931 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5932 unsigned int cand_idx, struct iv_cand *old_cand,
5933 struct cost_pair *best_cp)
5935 struct iv_cand *cand;
5936 struct cost_pair *cp;
5938 gcc_assert (old_cand != NULL && best_cp != NULL);
5939 if (cand_idx == old_cand->id)
5940 return best_cp;
5942 cand = iv_cand (data, cand_idx);
5943 cp = get_use_iv_cost (data, use, cand);
5944 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5945 return cp;
5947 return best_cp;
5950 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5951 which are used by more than one iv uses. For each of those candidates,
5952 this function tries to represent iv uses under that candidate using
5953 other ones with lower local cost, then tries to prune the new set.
5954 If the new set has lower cost, It returns the new cost after recording
5955 candidate replacement in list DELTA. */
5957 static comp_cost
5958 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5959 struct iv_ca_delta **delta)
5961 bitmap_iterator bi, bj;
5962 unsigned int i, j, k;
5963 struct iv_use *use;
5964 struct iv_cand *cand;
5965 comp_cost orig_cost, acost;
5966 struct iv_ca_delta *act_delta, *tmp_delta;
5967 struct cost_pair *old_cp, *best_cp = NULL;
5969 *delta = NULL;
5970 orig_cost = iv_ca_cost (ivs);
5972 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5974 if (ivs->n_cand_uses[i] == 1
5975 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5976 continue;
5978 cand = iv_cand (data, i);
5980 act_delta = NULL;
5981 /* Represent uses under current candidate using other ones with
5982 lower local cost. */
5983 for (j = 0; j < ivs->upto; j++)
5985 use = iv_use (data, j);
5986 old_cp = iv_ca_cand_for_use (ivs, use);
5988 if (old_cp->cand != cand)
5989 continue;
5991 best_cp = old_cp;
5992 if (data->consider_all_candidates)
5993 for (k = 0; k < n_iv_cands (data); k++)
5994 best_cp = cheaper_cost_with_cand (data, use, k,
5995 old_cp->cand, best_cp);
5996 else
5997 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
5998 best_cp = cheaper_cost_with_cand (data, use, k,
5999 old_cp->cand, best_cp);
6001 if (best_cp == old_cp)
6002 continue;
6004 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6006 /* No need for further prune. */
6007 if (!act_delta)
6008 continue;
6010 /* Prune the new candidate set. */
6011 iv_ca_delta_commit (data, ivs, act_delta, true);
6012 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6013 iv_ca_delta_commit (data, ivs, act_delta, false);
6014 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6016 if (compare_costs (acost, orig_cost) < 0)
6018 *delta = act_delta;
6019 return acost;
6021 else
6022 iv_ca_delta_free (&act_delta);
6025 return orig_cost;
6028 /* Tries to extend the sets IVS in the best possible way in order
6029 to express the USE. If ORIGINALP is true, prefer candidates from
6030 the original set of IVs, otherwise favor important candidates not
6031 based on any memory object. */
6033 static bool
6034 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6035 struct iv_use *use, bool originalp)
6037 comp_cost best_cost, act_cost;
6038 unsigned i;
6039 bitmap_iterator bi;
6040 struct iv_cand *cand;
6041 struct iv_ca_delta *best_delta = NULL, *act_delta;
6042 struct cost_pair *cp;
6044 iv_ca_add_use (data, ivs, use);
6045 best_cost = iv_ca_cost (ivs);
6046 cp = iv_ca_cand_for_use (ivs, use);
6047 if (cp)
6049 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6050 iv_ca_set_no_cp (data, ivs, use);
6053 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6054 first try important candidates not based on any memory object. Only if
6055 this fails, try the specific ones. Rationale -- in loops with many
6056 variables the best choice often is to use just one generic biv. If we
6057 added here many ivs specific to the uses, the optimization algorithm later
6058 would be likely to get stuck in a local minimum, thus causing us to create
6059 too many ivs. The approach from few ivs to more seems more likely to be
6060 successful -- starting from few ivs, replacing an expensive use by a
6061 specific iv should always be a win. */
6062 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6064 cand = iv_cand (data, i);
6066 if (originalp && cand->pos !=IP_ORIGINAL)
6067 continue;
6069 if (!originalp && cand->iv->base_object != NULL_TREE)
6070 continue;
6072 if (iv_ca_cand_used_p (ivs, cand))
6073 continue;
6075 cp = get_use_iv_cost (data, use, cand);
6076 if (!cp)
6077 continue;
6079 iv_ca_set_cp (data, ivs, use, cp);
6080 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6081 true);
6082 iv_ca_set_no_cp (data, ivs, use);
6083 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6085 if (compare_costs (act_cost, best_cost) < 0)
6087 best_cost = act_cost;
6089 iv_ca_delta_free (&best_delta);
6090 best_delta = act_delta;
6092 else
6093 iv_ca_delta_free (&act_delta);
6096 if (infinite_cost_p (best_cost))
6098 for (i = 0; i < use->n_map_members; i++)
6100 cp = use->cost_map + i;
6101 cand = cp->cand;
6102 if (!cand)
6103 continue;
6105 /* Already tried this. */
6106 if (cand->important)
6108 if (originalp && cand->pos == IP_ORIGINAL)
6109 continue;
6110 if (!originalp && cand->iv->base_object == NULL_TREE)
6111 continue;
6114 if (iv_ca_cand_used_p (ivs, cand))
6115 continue;
6117 act_delta = NULL;
6118 iv_ca_set_cp (data, ivs, use, cp);
6119 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6120 iv_ca_set_no_cp (data, ivs, use);
6121 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6122 cp, act_delta);
6124 if (compare_costs (act_cost, best_cost) < 0)
6126 best_cost = act_cost;
6128 if (best_delta)
6129 iv_ca_delta_free (&best_delta);
6130 best_delta = act_delta;
6132 else
6133 iv_ca_delta_free (&act_delta);
6137 iv_ca_delta_commit (data, ivs, best_delta, true);
6138 iv_ca_delta_free (&best_delta);
6140 return !infinite_cost_p (best_cost);
6143 /* Finds an initial assignment of candidates to uses. */
6145 static struct iv_ca *
6146 get_initial_solution (struct ivopts_data *data, bool originalp)
6148 struct iv_ca *ivs = iv_ca_new (data);
6149 unsigned i;
6151 for (i = 0; i < n_iv_uses (data); i++)
6152 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6154 iv_ca_free (&ivs);
6155 return NULL;
6158 return ivs;
6161 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6162 points to a bool variable, this function tries to break local
6163 optimal fixed-point by replacing candidates in IVS if it's true. */
6165 static bool
6166 try_improve_iv_set (struct ivopts_data *data,
6167 struct iv_ca *ivs, bool *try_replace_p)
6169 unsigned i, n_ivs;
6170 comp_cost acost, best_cost = iv_ca_cost (ivs);
6171 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6172 struct iv_cand *cand;
6174 /* Try extending the set of induction variables by one. */
6175 for (i = 0; i < n_iv_cands (data); i++)
6177 cand = iv_cand (data, i);
6179 if (iv_ca_cand_used_p (ivs, cand))
6180 continue;
6182 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6183 if (!act_delta)
6184 continue;
6186 /* If we successfully added the candidate and the set is small enough,
6187 try optimizing it by removing other candidates. */
6188 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6190 iv_ca_delta_commit (data, ivs, act_delta, true);
6191 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6192 iv_ca_delta_commit (data, ivs, act_delta, false);
6193 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6196 if (compare_costs (acost, best_cost) < 0)
6198 best_cost = acost;
6199 iv_ca_delta_free (&best_delta);
6200 best_delta = act_delta;
6202 else
6203 iv_ca_delta_free (&act_delta);
6206 if (!best_delta)
6208 /* Try removing the candidates from the set instead. */
6209 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6211 if (!best_delta && *try_replace_p)
6213 *try_replace_p = false;
6214 /* So far candidate selecting algorithm tends to choose fewer IVs
6215 so that it can handle cases in which loops have many variables
6216 but the best choice is often to use only one general biv. One
6217 weakness is it can't handle opposite cases, in which different
6218 candidates should be chosen with respect to each use. To solve
6219 the problem, we replace candidates in a manner described by the
6220 comments of iv_ca_replace, thus give general algorithm a chance
6221 to break local optimal fixed-point in these cases. */
6222 best_cost = iv_ca_replace (data, ivs, &best_delta);
6225 if (!best_delta)
6226 return false;
6229 iv_ca_delta_commit (data, ivs, best_delta, true);
6230 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6231 iv_ca_delta_free (&best_delta);
6232 return true;
6235 /* Attempts to find the optimal set of induction variables. We do simple
6236 greedy heuristic -- we try to replace at most one candidate in the selected
6237 solution and remove the unused ivs while this improves the cost. */
6239 static struct iv_ca *
6240 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6242 struct iv_ca *set;
6243 bool try_replace_p = true;
6245 /* Get the initial solution. */
6246 set = get_initial_solution (data, originalp);
6247 if (!set)
6249 if (dump_file && (dump_flags & TDF_DETAILS))
6250 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6251 return NULL;
6254 if (dump_file && (dump_flags & TDF_DETAILS))
6256 fprintf (dump_file, "Initial set of candidates:\n");
6257 iv_ca_dump (data, dump_file, set);
6260 while (try_improve_iv_set (data, set, &try_replace_p))
6262 if (dump_file && (dump_flags & TDF_DETAILS))
6264 fprintf (dump_file, "Improved to:\n");
6265 iv_ca_dump (data, dump_file, set);
6269 return set;
6272 static struct iv_ca *
6273 find_optimal_iv_set (struct ivopts_data *data)
6275 unsigned i;
6276 struct iv_ca *set, *origset;
6277 struct iv_use *use;
6278 comp_cost cost, origcost;
6280 /* Determine the cost based on a strategy that starts with original IVs,
6281 and try again using a strategy that prefers candidates not based
6282 on any IVs. */
6283 origset = find_optimal_iv_set_1 (data, true);
6284 set = find_optimal_iv_set_1 (data, false);
6286 if (!origset && !set)
6287 return NULL;
6289 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6290 cost = set ? iv_ca_cost (set) : infinite_cost;
6292 if (dump_file && (dump_flags & TDF_DETAILS))
6294 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6295 origcost.cost, origcost.complexity);
6296 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6297 cost.cost, cost.complexity);
6300 /* Choose the one with the best cost. */
6301 if (compare_costs (origcost, cost) <= 0)
6303 if (set)
6304 iv_ca_free (&set);
6305 set = origset;
6307 else if (origset)
6308 iv_ca_free (&origset);
6310 for (i = 0; i < n_iv_uses (data); i++)
6312 use = iv_use (data, i);
6313 use->selected = iv_ca_cand_for_use (set, use)->cand;
6316 return set;
6319 /* Creates a new induction variable corresponding to CAND. */
6321 static void
6322 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6324 gimple_stmt_iterator incr_pos;
6325 tree base;
6326 bool after = false;
6328 if (!cand->iv)
6329 return;
6331 switch (cand->pos)
6333 case IP_NORMAL:
6334 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6335 break;
6337 case IP_END:
6338 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6339 after = true;
6340 break;
6342 case IP_AFTER_USE:
6343 after = true;
6344 /* fall through */
6345 case IP_BEFORE_USE:
6346 incr_pos = gsi_for_stmt (cand->incremented_at);
6347 break;
6349 case IP_ORIGINAL:
6350 /* Mark that the iv is preserved. */
6351 name_info (data, cand->var_before)->preserve_biv = true;
6352 name_info (data, cand->var_after)->preserve_biv = true;
6354 /* Rewrite the increment so that it uses var_before directly. */
6355 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6356 return;
6359 gimple_add_tmp_var (cand->var_before);
6361 base = unshare_expr (cand->iv->base);
6363 create_iv (base, unshare_expr (cand->iv->step),
6364 cand->var_before, data->current_loop,
6365 &incr_pos, after, &cand->var_before, &cand->var_after);
6368 /* Creates new induction variables described in SET. */
6370 static void
6371 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6373 unsigned i;
6374 struct iv_cand *cand;
6375 bitmap_iterator bi;
6377 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6379 cand = iv_cand (data, i);
6380 create_new_iv (data, cand);
6383 if (dump_file && (dump_flags & TDF_DETAILS))
6385 fprintf (dump_file, "Selected IV set for loop %d",
6386 data->current_loop->num);
6387 if (data->loop_loc != UNKNOWN_LOCATION)
6388 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6389 LOCATION_LINE (data->loop_loc));
6390 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6391 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6393 cand = iv_cand (data, i);
6394 dump_cand (dump_file, cand);
6396 fprintf (dump_file, "\n");
6400 /* Rewrites USE (definition of iv used in a nonlinear expression)
6401 using candidate CAND. */
6403 static void
6404 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6405 struct iv_use *use, struct iv_cand *cand)
6407 tree comp;
6408 tree op, tgt;
6409 gassign *ass;
6410 gimple_stmt_iterator bsi;
6412 /* An important special case -- if we are asked to express value of
6413 the original iv by itself, just exit; there is no need to
6414 introduce a new computation (that might also need casting the
6415 variable to unsigned and back). */
6416 if (cand->pos == IP_ORIGINAL
6417 && cand->incremented_at == use->stmt)
6419 enum tree_code stmt_code;
6421 gcc_assert (is_gimple_assign (use->stmt));
6422 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6424 /* Check whether we may leave the computation unchanged.
6425 This is the case only if it does not rely on other
6426 computations in the loop -- otherwise, the computation
6427 we rely upon may be removed in remove_unused_ivs,
6428 thus leading to ICE. */
6429 stmt_code = gimple_assign_rhs_code (use->stmt);
6430 if (stmt_code == PLUS_EXPR
6431 || stmt_code == MINUS_EXPR
6432 || stmt_code == POINTER_PLUS_EXPR)
6434 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6435 op = gimple_assign_rhs2 (use->stmt);
6436 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6437 op = gimple_assign_rhs1 (use->stmt);
6438 else
6439 op = NULL_TREE;
6441 else
6442 op = NULL_TREE;
6444 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6445 return;
6448 comp = get_computation (data->current_loop, use, cand);
6449 gcc_assert (comp != NULL_TREE);
6451 switch (gimple_code (use->stmt))
6453 case GIMPLE_PHI:
6454 tgt = PHI_RESULT (use->stmt);
6456 /* If we should keep the biv, do not replace it. */
6457 if (name_info (data, tgt)->preserve_biv)
6458 return;
6460 bsi = gsi_after_labels (gimple_bb (use->stmt));
6461 break;
6463 case GIMPLE_ASSIGN:
6464 tgt = gimple_assign_lhs (use->stmt);
6465 bsi = gsi_for_stmt (use->stmt);
6466 break;
6468 default:
6469 gcc_unreachable ();
6472 if (!valid_gimple_rhs_p (comp)
6473 || (gimple_code (use->stmt) != GIMPLE_PHI
6474 /* We can't allow re-allocating the stmt as it might be pointed
6475 to still. */
6476 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6477 >= gimple_num_ops (gsi_stmt (bsi)))))
6479 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6480 true, GSI_SAME_STMT);
6481 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6483 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6484 /* As this isn't a plain copy we have to reset alignment
6485 information. */
6486 if (SSA_NAME_PTR_INFO (comp))
6487 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6491 if (gimple_code (use->stmt) == GIMPLE_PHI)
6493 ass = gimple_build_assign (tgt, comp);
6494 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6496 bsi = gsi_for_stmt (use->stmt);
6497 remove_phi_node (&bsi, false);
6499 else
6501 gimple_assign_set_rhs_from_tree (&bsi, comp);
6502 use->stmt = gsi_stmt (bsi);
6506 /* Performs a peephole optimization to reorder the iv update statement with
6507 a mem ref to enable instruction combining in later phases. The mem ref uses
6508 the iv value before the update, so the reordering transformation requires
6509 adjustment of the offset. CAND is the selected IV_CAND.
6511 Example:
6513 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6514 iv2 = iv1 + 1;
6516 if (t < val) (1)
6517 goto L;
6518 goto Head;
6521 directly propagating t over to (1) will introduce overlapping live range
6522 thus increase register pressure. This peephole transform it into:
6525 iv2 = iv1 + 1;
6526 t = MEM_REF (base, iv2, 8, 8);
6527 if (t < val)
6528 goto L;
6529 goto Head;
6532 static void
6533 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6535 tree var_after;
6536 gimple iv_update, stmt;
6537 basic_block bb;
6538 gimple_stmt_iterator gsi, gsi_iv;
6540 if (cand->pos != IP_NORMAL)
6541 return;
6543 var_after = cand->var_after;
6544 iv_update = SSA_NAME_DEF_STMT (var_after);
6546 bb = gimple_bb (iv_update);
6547 gsi = gsi_last_nondebug_bb (bb);
6548 stmt = gsi_stmt (gsi);
6550 /* Only handle conditional statement for now. */
6551 if (gimple_code (stmt) != GIMPLE_COND)
6552 return;
6554 gsi_prev_nondebug (&gsi);
6555 stmt = gsi_stmt (gsi);
6556 if (stmt != iv_update)
6557 return;
6559 gsi_prev_nondebug (&gsi);
6560 if (gsi_end_p (gsi))
6561 return;
6563 stmt = gsi_stmt (gsi);
6564 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6565 return;
6567 if (stmt != use->stmt)
6568 return;
6570 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6571 return;
6573 if (dump_file && (dump_flags & TDF_DETAILS))
6575 fprintf (dump_file, "Reordering \n");
6576 print_gimple_stmt (dump_file, iv_update, 0, 0);
6577 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6578 fprintf (dump_file, "\n");
6581 gsi = gsi_for_stmt (use->stmt);
6582 gsi_iv = gsi_for_stmt (iv_update);
6583 gsi_move_before (&gsi_iv, &gsi);
6585 cand->pos = IP_BEFORE_USE;
6586 cand->incremented_at = use->stmt;
6589 /* Rewrites USE (address that is an iv) using candidate CAND. */
6591 static void
6592 rewrite_use_address (struct ivopts_data *data,
6593 struct iv_use *use, struct iv_cand *cand)
6595 aff_tree aff;
6596 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6597 tree base_hint = NULL_TREE;
6598 tree ref, iv;
6599 bool ok;
6601 adjust_iv_update_pos (cand, use);
6602 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6603 gcc_assert (ok);
6604 unshare_aff_combination (&aff);
6606 /* To avoid undefined overflow problems, all IV candidates use unsigned
6607 integer types. The drawback is that this makes it impossible for
6608 create_mem_ref to distinguish an IV that is based on a memory object
6609 from one that represents simply an offset.
6611 To work around this problem, we pass a hint to create_mem_ref that
6612 indicates which variable (if any) in aff is an IV based on a memory
6613 object. Note that we only consider the candidate. If this is not
6614 based on an object, the base of the reference is in some subexpression
6615 of the use -- but these will use pointer types, so they are recognized
6616 by the create_mem_ref heuristics anyway. */
6617 if (cand->iv->base_object)
6618 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6620 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6621 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6622 reference_alias_ptr_type (*use->op_p),
6623 iv, base_hint, data->speed);
6624 copy_ref_info (ref, *use->op_p);
6625 *use->op_p = ref;
6628 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6629 candidate CAND. */
6631 static void
6632 rewrite_use_compare (struct ivopts_data *data,
6633 struct iv_use *use, struct iv_cand *cand)
6635 tree comp, *var_p, op, bound;
6636 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6637 enum tree_code compare;
6638 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6639 bool ok;
6641 bound = cp->value;
6642 if (bound)
6644 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6645 tree var_type = TREE_TYPE (var);
6646 gimple_seq stmts;
6648 if (dump_file && (dump_flags & TDF_DETAILS))
6650 fprintf (dump_file, "Replacing exit test: ");
6651 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6653 compare = cp->comp;
6654 bound = unshare_expr (fold_convert (var_type, bound));
6655 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6656 if (stmts)
6657 gsi_insert_seq_on_edge_immediate (
6658 loop_preheader_edge (data->current_loop),
6659 stmts);
6661 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6662 gimple_cond_set_lhs (cond_stmt, var);
6663 gimple_cond_set_code (cond_stmt, compare);
6664 gimple_cond_set_rhs (cond_stmt, op);
6665 return;
6668 /* The induction variable elimination failed; just express the original
6669 giv. */
6670 comp = get_computation (data->current_loop, use, cand);
6671 gcc_assert (comp != NULL_TREE);
6673 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6674 gcc_assert (ok);
6676 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6677 true, GSI_SAME_STMT);
6680 /* Rewrites USE using candidate CAND. */
6682 static void
6683 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6685 switch (use->type)
6687 case USE_NONLINEAR_EXPR:
6688 rewrite_use_nonlinear_expr (data, use, cand);
6689 break;
6691 case USE_ADDRESS:
6692 rewrite_use_address (data, use, cand);
6693 break;
6695 case USE_COMPARE:
6696 rewrite_use_compare (data, use, cand);
6697 break;
6699 default:
6700 gcc_unreachable ();
6703 update_stmt (use->stmt);
6706 /* Rewrite the uses using the selected induction variables. */
6708 static void
6709 rewrite_uses (struct ivopts_data *data)
6711 unsigned i;
6712 struct iv_cand *cand;
6713 struct iv_use *use;
6715 for (i = 0; i < n_iv_uses (data); i++)
6717 use = iv_use (data, i);
6718 cand = use->selected;
6719 gcc_assert (cand);
6721 rewrite_use (data, use, cand);
6725 /* Removes the ivs that are not used after rewriting. */
6727 static void
6728 remove_unused_ivs (struct ivopts_data *data)
6730 unsigned j;
6731 bitmap_iterator bi;
6732 bitmap toremove = BITMAP_ALLOC (NULL);
6734 /* Figure out an order in which to release SSA DEFs so that we don't
6735 release something that we'd have to propagate into a debug stmt
6736 afterwards. */
6737 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6739 struct version_info *info;
6741 info = ver_info (data, j);
6742 if (info->iv
6743 && !integer_zerop (info->iv->step)
6744 && !info->inv_id
6745 && !info->iv->have_use_for
6746 && !info->preserve_biv)
6748 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6750 tree def = info->iv->ssa_name;
6752 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6754 imm_use_iterator imm_iter;
6755 use_operand_p use_p;
6756 gimple stmt;
6757 int count = 0;
6759 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6761 if (!gimple_debug_bind_p (stmt))
6762 continue;
6764 /* We just want to determine whether to do nothing
6765 (count == 0), to substitute the computed
6766 expression into a single use of the SSA DEF by
6767 itself (count == 1), or to use a debug temp
6768 because the SSA DEF is used multiple times or as
6769 part of a larger expression (count > 1). */
6770 count++;
6771 if (gimple_debug_bind_get_value (stmt) != def)
6772 count++;
6774 if (count > 1)
6775 BREAK_FROM_IMM_USE_STMT (imm_iter);
6778 if (!count)
6779 continue;
6781 struct iv_use dummy_use;
6782 struct iv_cand *best_cand = NULL, *cand;
6783 unsigned i, best_pref = 0, cand_pref;
6785 memset (&dummy_use, 0, sizeof (dummy_use));
6786 dummy_use.iv = info->iv;
6787 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6789 cand = iv_use (data, i)->selected;
6790 if (cand == best_cand)
6791 continue;
6792 cand_pref = operand_equal_p (cand->iv->step,
6793 info->iv->step, 0)
6794 ? 4 : 0;
6795 cand_pref
6796 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6797 == TYPE_MODE (TREE_TYPE (info->iv->base))
6798 ? 2 : 0;
6799 cand_pref
6800 += TREE_CODE (cand->iv->base) == INTEGER_CST
6801 ? 1 : 0;
6802 if (best_cand == NULL || best_pref < cand_pref)
6804 best_cand = cand;
6805 best_pref = cand_pref;
6809 if (!best_cand)
6810 continue;
6812 tree comp = get_computation_at (data->current_loop,
6813 &dummy_use, best_cand,
6814 SSA_NAME_DEF_STMT (def));
6815 if (!comp)
6816 continue;
6818 if (count > 1)
6820 tree vexpr = make_node (DEBUG_EXPR_DECL);
6821 DECL_ARTIFICIAL (vexpr) = 1;
6822 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6823 if (SSA_NAME_VAR (def))
6824 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6825 else
6826 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6827 gdebug *def_temp
6828 = gimple_build_debug_bind (vexpr, comp, NULL);
6829 gimple_stmt_iterator gsi;
6831 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6832 gsi = gsi_after_labels (gimple_bb
6833 (SSA_NAME_DEF_STMT (def)));
6834 else
6835 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6837 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6838 comp = vexpr;
6841 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6843 if (!gimple_debug_bind_p (stmt))
6844 continue;
6846 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6847 SET_USE (use_p, comp);
6849 update_stmt (stmt);
6855 release_defs_bitset (toremove);
6857 BITMAP_FREE (toremove);
6860 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6861 for hash_map::traverse. */
6863 bool
6864 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6866 free (value);
6867 return true;
6870 /* Frees data allocated by the optimization of a single loop. */
6872 static void
6873 free_loop_data (struct ivopts_data *data)
6875 unsigned i, j;
6876 bitmap_iterator bi;
6877 tree obj;
6879 if (data->niters)
6881 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6882 delete data->niters;
6883 data->niters = NULL;
6886 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6888 struct version_info *info;
6890 info = ver_info (data, i);
6891 free (info->iv);
6892 info->iv = NULL;
6893 info->has_nonlin_use = false;
6894 info->preserve_biv = false;
6895 info->inv_id = 0;
6897 bitmap_clear (data->relevant);
6898 bitmap_clear (data->important_candidates);
6900 for (i = 0; i < n_iv_uses (data); i++)
6902 struct iv_use *use = iv_use (data, i);
6904 free (use->iv);
6905 BITMAP_FREE (use->related_cands);
6906 for (j = 0; j < use->n_map_members; j++)
6907 if (use->cost_map[j].depends_on)
6908 BITMAP_FREE (use->cost_map[j].depends_on);
6909 free (use->cost_map);
6910 free (use);
6912 data->iv_uses.truncate (0);
6914 for (i = 0; i < n_iv_cands (data); i++)
6916 struct iv_cand *cand = iv_cand (data, i);
6918 free (cand->iv);
6919 if (cand->depends_on)
6920 BITMAP_FREE (cand->depends_on);
6921 free (cand);
6923 data->iv_candidates.truncate (0);
6925 if (data->version_info_size < num_ssa_names)
6927 data->version_info_size = 2 * num_ssa_names;
6928 free (data->version_info);
6929 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6932 data->max_inv_id = 0;
6934 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6935 SET_DECL_RTL (obj, NULL_RTX);
6937 decl_rtl_to_reset.truncate (0);
6939 data->inv_expr_tab->empty ();
6940 data->inv_expr_id = 0;
6943 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6944 loop tree. */
6946 static void
6947 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6949 free_loop_data (data);
6950 free (data->version_info);
6951 BITMAP_FREE (data->relevant);
6952 BITMAP_FREE (data->important_candidates);
6954 decl_rtl_to_reset.release ();
6955 data->iv_uses.release ();
6956 data->iv_candidates.release ();
6957 delete data->inv_expr_tab;
6958 data->inv_expr_tab = NULL;
6959 free_affine_expand_cache (&data->name_expansion_cache);
6962 /* Returns true if the loop body BODY includes any function calls. */
6964 static bool
6965 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6967 gimple_stmt_iterator gsi;
6968 unsigned i;
6970 for (i = 0; i < num_nodes; i++)
6971 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6973 gimple stmt = gsi_stmt (gsi);
6974 if (is_gimple_call (stmt)
6975 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6976 return true;
6978 return false;
6981 /* Optimizes the LOOP. Returns true if anything changed. */
6983 static bool
6984 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6986 bool changed = false;
6987 struct iv_ca *iv_ca;
6988 edge exit = single_dom_exit (loop);
6989 basic_block *body;
6991 gcc_assert (!data->niters);
6992 data->current_loop = loop;
6993 data->loop_loc = find_loop_location (loop);
6994 data->speed = optimize_loop_for_speed_p (loop);
6996 if (dump_file && (dump_flags & TDF_DETAILS))
6998 fprintf (dump_file, "Processing loop %d", loop->num);
6999 if (data->loop_loc != UNKNOWN_LOCATION)
7000 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7001 LOCATION_LINE (data->loop_loc));
7002 fprintf (dump_file, "\n");
7004 if (exit)
7006 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7007 exit->src->index, exit->dest->index);
7008 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7009 fprintf (dump_file, "\n");
7012 fprintf (dump_file, "\n");
7015 body = get_loop_body (loop);
7016 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7017 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7018 free (body);
7020 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7022 /* For each ssa name determines whether it behaves as an induction variable
7023 in some loop. */
7024 if (!find_induction_variables (data))
7025 goto finish;
7027 /* Finds interesting uses (item 1). */
7028 find_interesting_uses (data);
7029 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7030 goto finish;
7032 /* Finds candidates for the induction variables (item 2). */
7033 find_iv_candidates (data);
7035 /* Calculates the costs (item 3, part 1). */
7036 determine_iv_costs (data);
7037 determine_use_iv_costs (data);
7038 determine_set_costs (data);
7040 /* Find the optimal set of induction variables (item 3, part 2). */
7041 iv_ca = find_optimal_iv_set (data);
7042 if (!iv_ca)
7043 goto finish;
7044 changed = true;
7046 /* Create the new induction variables (item 4, part 1). */
7047 create_new_ivs (data, iv_ca);
7048 iv_ca_free (&iv_ca);
7050 /* Rewrite the uses (item 4, part 2). */
7051 rewrite_uses (data);
7053 /* Remove the ivs that are unused after rewriting. */
7054 remove_unused_ivs (data);
7056 /* We have changed the structure of induction variables; it might happen
7057 that definitions in the scev database refer to some of them that were
7058 eliminated. */
7059 scev_reset ();
7061 finish:
7062 free_loop_data (data);
7064 return changed;
7067 /* Main entry point. Optimizes induction variables in loops. */
7069 void
7070 tree_ssa_iv_optimize (void)
7072 struct loop *loop;
7073 struct ivopts_data data;
7075 tree_ssa_iv_optimize_init (&data);
7077 /* Optimize the loops starting with the innermost ones. */
7078 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7080 if (dump_file && (dump_flags & TDF_DETAILS))
7081 flow_loop_dump (loop, dump_file, NULL, 1);
7083 tree_ssa_iv_optimize_loop (&data, loop);
7086 tree_ssa_iv_optimize_finalize (&data);