* gcc.dg/guality/pr41447-1.c: Remove xfail.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobd50adb0f257cc302ee001b35d49ec4d8b654a25c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "hash-set.h"
69 #include "machmode.h"
70 #include "vec.h"
71 #include "double-int.h"
72 #include "input.h"
73 #include "alias.h"
74 #include "symtab.h"
75 #include "wide-int.h"
76 #include "inchash.h"
77 #include "tree.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
80 #include "tm_p.h"
81 #include "predict.h"
82 #include "hard-reg-set.h"
83 #include "function.h"
84 #include "dominance.h"
85 #include "cfg.h"
86 #include "basic-block.h"
87 #include "gimple-pretty-print.h"
88 #include "hash-map.h"
89 #include "hash-table.h"
90 #include "tree-ssa-alias.h"
91 #include "internal-fn.h"
92 #include "tree-eh.h"
93 #include "gimple-expr.h"
94 #include "is-a.h"
95 #include "gimple.h"
96 #include "gimplify.h"
97 #include "gimple-iterator.h"
98 #include "gimplify-me.h"
99 #include "gimple-ssa.h"
100 #include "plugin-api.h"
101 #include "ipa-ref.h"
102 #include "cgraph.h"
103 #include "tree-cfg.h"
104 #include "tree-phinodes.h"
105 #include "ssa-iterators.h"
106 #include "stringpool.h"
107 #include "tree-ssanames.h"
108 #include "tree-ssa-loop-ivopts.h"
109 #include "tree-ssa-loop-manip.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop.h"
112 #include "hashtab.h"
113 #include "rtl.h"
114 #include "flags.h"
115 #include "statistics.h"
116 #include "real.h"
117 #include "fixed-value.h"
118 #include "insn-config.h"
119 #include "expmed.h"
120 #include "dojump.h"
121 #include "explow.h"
122 #include "calls.h"
123 #include "emit-rtl.h"
124 #include "varasm.h"
125 #include "stmt.h"
126 #include "expr.h"
127 #include "tree-dfa.h"
128 #include "tree-ssa.h"
129 #include "cfgloop.h"
130 #include "tree-pass.h"
131 #include "tree-chrec.h"
132 #include "tree-scalar-evolution.h"
133 #include "params.h"
134 #include "langhooks.h"
135 #include "tree-affine.h"
136 #include "target.h"
137 #include "tree-inline.h"
138 #include "tree-ssa-propagate.h"
139 #include "tree-ssa-address.h"
140 #include "builtins.h"
142 /* FIXME: Expressions are expanded to RTL in this pass to determine the
143 cost of different addressing modes. This should be moved to a TBD
144 interface between the GIMPLE and RTL worlds. */
145 #include "recog.h"
147 /* The infinite cost. */
148 #define INFTY 10000000
150 #define AVG_LOOP_NITER(LOOP) 5
152 /* Returns the expected number of loop iterations for LOOP.
153 The average trip count is computed from profile data if it
154 exists. */
156 static inline HOST_WIDE_INT
157 avg_loop_niter (struct loop *loop)
159 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
160 if (niter == -1)
161 return AVG_LOOP_NITER (loop);
163 return niter;
166 /* Representation of the induction variable. */
167 struct iv
169 tree base; /* Initial value of the iv. */
170 tree base_object; /* A memory object to that the induction variable points. */
171 tree step; /* Step of the iv (constant only). */
172 tree ssa_name; /* The ssa name with the value. */
173 bool biv_p; /* Is it a biv? */
174 bool have_use_for; /* Do we already have a use for it? */
175 unsigned use_id; /* The identifier in the use if it is the case. */
178 /* Per-ssa version information (induction variable descriptions, etc.). */
179 struct version_info
181 tree name; /* The ssa name. */
182 struct iv *iv; /* Induction variable description. */
183 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
184 an expression that is not an induction variable. */
185 bool preserve_biv; /* For the original biv, whether to preserve it. */
186 unsigned inv_id; /* Id of an invariant. */
189 /* Types of uses. */
190 enum use_type
192 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
193 USE_ADDRESS, /* Use in an address. */
194 USE_COMPARE /* Use is a compare. */
197 /* Cost of a computation. */
198 typedef struct
200 int cost; /* The runtime cost. */
201 unsigned complexity; /* The estimate of the complexity of the code for
202 the computation (in no concrete units --
203 complexity field should be larger for more
204 complex expressions and addressing modes). */
205 } comp_cost;
207 static const comp_cost no_cost = {0, 0};
208 static const comp_cost infinite_cost = {INFTY, INFTY};
210 /* The candidate - cost pair. */
211 struct cost_pair
213 struct iv_cand *cand; /* The candidate. */
214 comp_cost cost; /* The cost. */
215 bitmap depends_on; /* The list of invariants that have to be
216 preserved. */
217 tree value; /* For final value elimination, the expression for
218 the final value of the iv. For iv elimination,
219 the new bound to compare with. */
220 enum tree_code comp; /* For iv elimination, the comparison. */
221 int inv_expr_id; /* Loop invariant expression id. */
224 /* Use. */
225 struct iv_use
227 unsigned id; /* The id of the use. */
228 enum use_type type; /* Type of the use. */
229 struct iv *iv; /* The induction variable it is based on. */
230 gimple stmt; /* Statement in that it occurs. */
231 tree *op_p; /* The place where it occurs. */
232 bitmap related_cands; /* The set of "related" iv candidates, plus the common
233 important ones. */
235 unsigned n_map_members; /* Number of candidates in the cost_map list. */
236 struct cost_pair *cost_map;
237 /* The costs wrto the iv candidates. */
239 struct iv_cand *selected;
240 /* The selected candidate. */
243 /* The position where the iv is computed. */
244 enum iv_position
246 IP_NORMAL, /* At the end, just before the exit condition. */
247 IP_END, /* At the end of the latch block. */
248 IP_BEFORE_USE, /* Immediately before a specific use. */
249 IP_AFTER_USE, /* Immediately after a specific use. */
250 IP_ORIGINAL /* The original biv. */
253 /* The induction variable candidate. */
254 struct iv_cand
256 unsigned id; /* The number of the candidate. */
257 bool important; /* Whether this is an "important" candidate, i.e. such
258 that it should be considered by all uses. */
259 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
260 gimple incremented_at;/* For original biv, the statement where it is
261 incremented. */
262 tree var_before; /* The variable used for it before increment. */
263 tree var_after; /* The variable used for it after increment. */
264 struct iv *iv; /* The value of the candidate. NULL for
265 "pseudocandidate" used to indicate the possibility
266 to replace the final value of an iv by direct
267 computation of the value. */
268 unsigned cost; /* Cost of the candidate. */
269 unsigned cost_step; /* Cost of the candidate's increment operation. */
270 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
271 where it is incremented. */
272 bitmap depends_on; /* The list of invariants that are used in step of the
273 biv. */
276 /* Loop invariant expression hashtable entry. */
277 struct iv_inv_expr_ent
279 tree expr;
280 int id;
281 hashval_t hash;
284 /* The data used by the induction variable optimizations. */
286 typedef struct iv_use *iv_use_p;
288 typedef struct iv_cand *iv_cand_p;
290 /* Hashtable helpers. */
292 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
294 typedef iv_inv_expr_ent value_type;
295 typedef iv_inv_expr_ent compare_type;
296 static inline hashval_t hash (const value_type *);
297 static inline bool equal (const value_type *, const compare_type *);
300 /* Hash function for loop invariant expressions. */
302 inline hashval_t
303 iv_inv_expr_hasher::hash (const value_type *expr)
305 return expr->hash;
308 /* Hash table equality function for expressions. */
310 inline bool
311 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
313 return expr1->hash == expr2->hash
314 && operand_equal_p (expr1->expr, expr2->expr, 0);
317 struct ivopts_data
319 /* The currently optimized loop. */
320 struct loop *current_loop;
322 /* Numbers of iterations for all exits of the current loop. */
323 hash_map<edge, tree_niter_desc *> *niters;
325 /* Number of registers used in it. */
326 unsigned regs_used;
328 /* The size of version_info array allocated. */
329 unsigned version_info_size;
331 /* The array of information for the ssa names. */
332 struct version_info *version_info;
334 /* The hashtable of loop invariant expressions created
335 by ivopt. */
336 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
338 /* Loop invariant expression id. */
339 int inv_expr_id;
341 /* The bitmap of indices in version_info whose value was changed. */
342 bitmap relevant;
344 /* The uses of induction variables. */
345 vec<iv_use_p> iv_uses;
347 /* The candidates. */
348 vec<iv_cand_p> iv_candidates;
350 /* A bitmap of important candidates. */
351 bitmap important_candidates;
353 /* Cache used by tree_to_aff_combination_expand. */
354 hash_map<tree, name_expansion *> *name_expansion_cache;
356 /* The maximum invariant id. */
357 unsigned max_inv_id;
359 /* Whether to consider just related and important candidates when replacing a
360 use. */
361 bool consider_all_candidates;
363 /* Are we optimizing for speed? */
364 bool speed;
366 /* Whether the loop body includes any function calls. */
367 bool body_includes_call;
369 /* Whether the loop body can only be exited via single exit. */
370 bool loop_single_exit_p;
373 /* An assignment of iv candidates to uses. */
375 struct iv_ca
377 /* The number of uses covered by the assignment. */
378 unsigned upto;
380 /* Number of uses that cannot be expressed by the candidates in the set. */
381 unsigned bad_uses;
383 /* Candidate assigned to a use, together with the related costs. */
384 struct cost_pair **cand_for_use;
386 /* Number of times each candidate is used. */
387 unsigned *n_cand_uses;
389 /* The candidates used. */
390 bitmap cands;
392 /* The number of candidates in the set. */
393 unsigned n_cands;
395 /* Total number of registers needed. */
396 unsigned n_regs;
398 /* Total cost of expressing uses. */
399 comp_cost cand_use_cost;
401 /* Total cost of candidates. */
402 unsigned cand_cost;
404 /* Number of times each invariant is used. */
405 unsigned *n_invariant_uses;
407 /* The array holding the number of uses of each loop
408 invariant expressions created by ivopt. */
409 unsigned *used_inv_expr;
411 /* The number of created loop invariants. */
412 unsigned num_used_inv_expr;
414 /* Total cost of the assignment. */
415 comp_cost cost;
418 /* Difference of two iv candidate assignments. */
420 struct iv_ca_delta
422 /* Changed use. */
423 struct iv_use *use;
425 /* An old assignment (for rollback purposes). */
426 struct cost_pair *old_cp;
428 /* A new assignment. */
429 struct cost_pair *new_cp;
431 /* Next change in the list. */
432 struct iv_ca_delta *next_change;
435 /* Bound on number of candidates below that all candidates are considered. */
437 #define CONSIDER_ALL_CANDIDATES_BOUND \
438 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
440 /* If there are more iv occurrences, we just give up (it is quite unlikely that
441 optimizing such a loop would help, and it would take ages). */
443 #define MAX_CONSIDERED_USES \
444 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
446 /* If there are at most this number of ivs in the set, try removing unnecessary
447 ivs from the set always. */
449 #define ALWAYS_PRUNE_CAND_SET_BOUND \
450 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
452 /* The list of trees for that the decl_rtl field must be reset is stored
453 here. */
455 static vec<tree> decl_rtl_to_reset;
457 static comp_cost force_expr_to_var_cost (tree, bool);
459 /* Number of uses recorded in DATA. */
461 static inline unsigned
462 n_iv_uses (struct ivopts_data *data)
464 return data->iv_uses.length ();
467 /* Ith use recorded in DATA. */
469 static inline struct iv_use *
470 iv_use (struct ivopts_data *data, unsigned i)
472 return data->iv_uses[i];
475 /* Number of candidates recorded in DATA. */
477 static inline unsigned
478 n_iv_cands (struct ivopts_data *data)
480 return data->iv_candidates.length ();
483 /* Ith candidate recorded in DATA. */
485 static inline struct iv_cand *
486 iv_cand (struct ivopts_data *data, unsigned i)
488 return data->iv_candidates[i];
491 /* The single loop exit if it dominates the latch, NULL otherwise. */
493 edge
494 single_dom_exit (struct loop *loop)
496 edge exit = single_exit (loop);
498 if (!exit)
499 return NULL;
501 if (!just_once_each_iteration_p (loop, exit->src))
502 return NULL;
504 return exit;
507 /* Dumps information about the induction variable IV to FILE. */
509 void
510 dump_iv (FILE *file, struct iv *iv)
512 if (iv->ssa_name)
514 fprintf (file, "ssa name ");
515 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
516 fprintf (file, "\n");
519 fprintf (file, " type ");
520 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
521 fprintf (file, "\n");
523 if (iv->step)
525 fprintf (file, " base ");
526 print_generic_expr (file, iv->base, TDF_SLIM);
527 fprintf (file, "\n");
529 fprintf (file, " step ");
530 print_generic_expr (file, iv->step, TDF_SLIM);
531 fprintf (file, "\n");
533 else
535 fprintf (file, " invariant ");
536 print_generic_expr (file, iv->base, TDF_SLIM);
537 fprintf (file, "\n");
540 if (iv->base_object)
542 fprintf (file, " base object ");
543 print_generic_expr (file, iv->base_object, TDF_SLIM);
544 fprintf (file, "\n");
547 if (iv->biv_p)
548 fprintf (file, " is a biv\n");
551 /* Dumps information about the USE to FILE. */
553 void
554 dump_use (FILE *file, struct iv_use *use)
556 fprintf (file, "use %d\n", use->id);
558 switch (use->type)
560 case USE_NONLINEAR_EXPR:
561 fprintf (file, " generic\n");
562 break;
564 case USE_ADDRESS:
565 fprintf (file, " address\n");
566 break;
568 case USE_COMPARE:
569 fprintf (file, " compare\n");
570 break;
572 default:
573 gcc_unreachable ();
576 fprintf (file, " in statement ");
577 print_gimple_stmt (file, use->stmt, 0, 0);
578 fprintf (file, "\n");
580 fprintf (file, " at position ");
581 if (use->op_p)
582 print_generic_expr (file, *use->op_p, TDF_SLIM);
583 fprintf (file, "\n");
585 dump_iv (file, use->iv);
587 if (use->related_cands)
589 fprintf (file, " related candidates ");
590 dump_bitmap (file, use->related_cands);
594 /* Dumps information about the uses to FILE. */
596 void
597 dump_uses (FILE *file, struct ivopts_data *data)
599 unsigned i;
600 struct iv_use *use;
602 for (i = 0; i < n_iv_uses (data); i++)
604 use = iv_use (data, i);
606 dump_use (file, use);
607 fprintf (file, "\n");
611 /* Dumps information about induction variable candidate CAND to FILE. */
613 void
614 dump_cand (FILE *file, struct iv_cand *cand)
616 struct iv *iv = cand->iv;
618 fprintf (file, "candidate %d%s\n",
619 cand->id, cand->important ? " (important)" : "");
621 if (cand->depends_on)
623 fprintf (file, " depends on ");
624 dump_bitmap (file, cand->depends_on);
627 if (!iv)
629 fprintf (file, " final value replacement\n");
630 return;
633 if (cand->var_before)
635 fprintf (file, " var_before ");
636 print_generic_expr (file, cand->var_before, TDF_SLIM);
637 fprintf (file, "\n");
639 if (cand->var_after)
641 fprintf (file, " var_after ");
642 print_generic_expr (file, cand->var_after, TDF_SLIM);
643 fprintf (file, "\n");
646 switch (cand->pos)
648 case IP_NORMAL:
649 fprintf (file, " incremented before exit test\n");
650 break;
652 case IP_BEFORE_USE:
653 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
654 break;
656 case IP_AFTER_USE:
657 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
658 break;
660 case IP_END:
661 fprintf (file, " incremented at end\n");
662 break;
664 case IP_ORIGINAL:
665 fprintf (file, " original biv\n");
666 break;
669 dump_iv (file, iv);
672 /* Returns the info for ssa version VER. */
674 static inline struct version_info *
675 ver_info (struct ivopts_data *data, unsigned ver)
677 return data->version_info + ver;
680 /* Returns the info for ssa name NAME. */
682 static inline struct version_info *
683 name_info (struct ivopts_data *data, tree name)
685 return ver_info (data, SSA_NAME_VERSION (name));
688 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
689 emitted in LOOP. */
691 static bool
692 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
694 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
696 gcc_assert (bb);
698 if (sbb == loop->latch)
699 return true;
701 if (sbb != bb)
702 return false;
704 return stmt == last_stmt (bb);
707 /* Returns true if STMT if after the place where the original induction
708 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
709 if the positions are identical. */
711 static bool
712 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
714 basic_block cand_bb = gimple_bb (cand->incremented_at);
715 basic_block stmt_bb = gimple_bb (stmt);
717 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
718 return false;
720 if (stmt_bb != cand_bb)
721 return true;
723 if (true_if_equal
724 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
725 return true;
726 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
729 /* Returns true if STMT if after the place where the induction variable
730 CAND is incremented in LOOP. */
732 static bool
733 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
735 switch (cand->pos)
737 case IP_END:
738 return false;
740 case IP_NORMAL:
741 return stmt_after_ip_normal_pos (loop, stmt);
743 case IP_ORIGINAL:
744 case IP_AFTER_USE:
745 return stmt_after_inc_pos (cand, stmt, false);
747 case IP_BEFORE_USE:
748 return stmt_after_inc_pos (cand, stmt, true);
750 default:
751 gcc_unreachable ();
755 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
757 static bool
758 abnormal_ssa_name_p (tree exp)
760 if (!exp)
761 return false;
763 if (TREE_CODE (exp) != SSA_NAME)
764 return false;
766 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
769 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
770 abnormal phi node. Callback for for_each_index. */
772 static bool
773 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
774 void *data ATTRIBUTE_UNUSED)
776 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
778 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
779 return false;
780 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
781 return false;
784 return !abnormal_ssa_name_p (*index);
787 /* Returns true if EXPR contains a ssa name that occurs in an
788 abnormal phi node. */
790 bool
791 contains_abnormal_ssa_name_p (tree expr)
793 enum tree_code code;
794 enum tree_code_class codeclass;
796 if (!expr)
797 return false;
799 code = TREE_CODE (expr);
800 codeclass = TREE_CODE_CLASS (code);
802 if (code == SSA_NAME)
803 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
805 if (code == INTEGER_CST
806 || is_gimple_min_invariant (expr))
807 return false;
809 if (code == ADDR_EXPR)
810 return !for_each_index (&TREE_OPERAND (expr, 0),
811 idx_contains_abnormal_ssa_name_p,
812 NULL);
814 if (code == COND_EXPR)
815 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
816 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
817 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
819 switch (codeclass)
821 case tcc_binary:
822 case tcc_comparison:
823 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
824 return true;
826 /* Fallthru. */
827 case tcc_unary:
828 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
829 return true;
831 break;
833 default:
834 gcc_unreachable ();
837 return false;
840 /* Returns the structure describing number of iterations determined from
841 EXIT of DATA->current_loop, or NULL if something goes wrong. */
843 static struct tree_niter_desc *
844 niter_for_exit (struct ivopts_data *data, edge exit)
846 struct tree_niter_desc *desc;
847 tree_niter_desc **slot;
849 if (!data->niters)
851 data->niters = new hash_map<edge, tree_niter_desc *>;
852 slot = NULL;
854 else
855 slot = data->niters->get (exit);
857 if (!slot)
859 /* Try to determine number of iterations. We cannot safely work with ssa
860 names that appear in phi nodes on abnormal edges, so that we do not
861 create overlapping life ranges for them (PR 27283). */
862 desc = XNEW (struct tree_niter_desc);
863 if (!number_of_iterations_exit (data->current_loop,
864 exit, desc, true)
865 || contains_abnormal_ssa_name_p (desc->niter))
867 XDELETE (desc);
868 desc = NULL;
870 data->niters->put (exit, desc);
872 else
873 desc = *slot;
875 return desc;
878 /* Returns the structure describing number of iterations determined from
879 single dominating exit of DATA->current_loop, or NULL if something
880 goes wrong. */
882 static struct tree_niter_desc *
883 niter_for_single_dom_exit (struct ivopts_data *data)
885 edge exit = single_dom_exit (data->current_loop);
887 if (!exit)
888 return NULL;
890 return niter_for_exit (data, exit);
893 /* Initializes data structures used by the iv optimization pass, stored
894 in DATA. */
896 static void
897 tree_ssa_iv_optimize_init (struct ivopts_data *data)
899 data->version_info_size = 2 * num_ssa_names;
900 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
901 data->relevant = BITMAP_ALLOC (NULL);
902 data->important_candidates = BITMAP_ALLOC (NULL);
903 data->max_inv_id = 0;
904 data->niters = NULL;
905 data->iv_uses.create (20);
906 data->iv_candidates.create (20);
907 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
908 data->inv_expr_id = 0;
909 data->name_expansion_cache = NULL;
910 decl_rtl_to_reset.create (20);
913 /* Returns a memory object to that EXPR points. In case we are able to
914 determine that it does not point to any such object, NULL is returned. */
916 static tree
917 determine_base_object (tree expr)
919 enum tree_code code = TREE_CODE (expr);
920 tree base, obj;
922 /* If this is a pointer casted to any type, we need to determine
923 the base object for the pointer; so handle conversions before
924 throwing away non-pointer expressions. */
925 if (CONVERT_EXPR_P (expr))
926 return determine_base_object (TREE_OPERAND (expr, 0));
928 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
929 return NULL_TREE;
931 switch (code)
933 case INTEGER_CST:
934 return NULL_TREE;
936 case ADDR_EXPR:
937 obj = TREE_OPERAND (expr, 0);
938 base = get_base_address (obj);
940 if (!base)
941 return expr;
943 if (TREE_CODE (base) == MEM_REF)
944 return determine_base_object (TREE_OPERAND (base, 0));
946 return fold_convert (ptr_type_node,
947 build_fold_addr_expr (base));
949 case POINTER_PLUS_EXPR:
950 return determine_base_object (TREE_OPERAND (expr, 0));
952 case PLUS_EXPR:
953 case MINUS_EXPR:
954 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
955 gcc_unreachable ();
957 default:
958 return fold_convert (ptr_type_node, expr);
962 /* Return true if address expression with non-DECL_P operand appears
963 in EXPR. */
965 static bool
966 contain_complex_addr_expr (tree expr)
968 bool res = false;
970 STRIP_NOPS (expr);
971 switch (TREE_CODE (expr))
973 case POINTER_PLUS_EXPR:
974 case PLUS_EXPR:
975 case MINUS_EXPR:
976 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
977 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
978 break;
980 case ADDR_EXPR:
981 return (!DECL_P (TREE_OPERAND (expr, 0)));
983 default:
984 return false;
987 return res;
990 /* Allocates an induction variable with given initial value BASE and step STEP
991 for loop LOOP. */
993 static struct iv *
994 alloc_iv (tree base, tree step)
996 tree expr = base;
997 struct iv *iv = XCNEW (struct iv);
998 gcc_assert (step != NULL_TREE);
1000 /* Lower address expression in base except ones with DECL_P as operand.
1001 By doing this:
1002 1) More accurate cost can be computed for address expressions;
1003 2) Duplicate candidates won't be created for bases in different
1004 forms, like &a[0] and &a. */
1005 STRIP_NOPS (expr);
1006 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1007 || contain_complex_addr_expr (expr))
1009 aff_tree comb;
1010 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1011 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1014 iv->base = base;
1015 iv->base_object = determine_base_object (base);
1016 iv->step = step;
1017 iv->biv_p = false;
1018 iv->have_use_for = false;
1019 iv->use_id = 0;
1020 iv->ssa_name = NULL_TREE;
1022 return iv;
1025 /* Sets STEP and BASE for induction variable IV. */
1027 static void
1028 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1030 struct version_info *info = name_info (data, iv);
1032 gcc_assert (!info->iv);
1034 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1035 info->iv = alloc_iv (base, step);
1036 info->iv->ssa_name = iv;
1039 /* Finds induction variable declaration for VAR. */
1041 static struct iv *
1042 get_iv (struct ivopts_data *data, tree var)
1044 basic_block bb;
1045 tree type = TREE_TYPE (var);
1047 if (!POINTER_TYPE_P (type)
1048 && !INTEGRAL_TYPE_P (type))
1049 return NULL;
1051 if (!name_info (data, var)->iv)
1053 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1055 if (!bb
1056 || !flow_bb_inside_loop_p (data->current_loop, bb))
1057 set_iv (data, var, var, build_int_cst (type, 0));
1060 return name_info (data, var)->iv;
1063 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1064 not define a simple affine biv with nonzero step. */
1066 static tree
1067 determine_biv_step (gphi *phi)
1069 struct loop *loop = gimple_bb (phi)->loop_father;
1070 tree name = PHI_RESULT (phi);
1071 affine_iv iv;
1073 if (virtual_operand_p (name))
1074 return NULL_TREE;
1076 if (!simple_iv (loop, loop, name, &iv, true))
1077 return NULL_TREE;
1079 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1082 /* Finds basic ivs. */
1084 static bool
1085 find_bivs (struct ivopts_data *data)
1087 gphi *phi;
1088 tree step, type, base;
1089 bool found = false;
1090 struct loop *loop = data->current_loop;
1091 gphi_iterator psi;
1093 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1095 phi = psi.phi ();
1097 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1098 continue;
1100 step = determine_biv_step (phi);
1101 if (!step)
1102 continue;
1104 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1105 base = expand_simple_operations (base);
1106 if (contains_abnormal_ssa_name_p (base)
1107 || contains_abnormal_ssa_name_p (step))
1108 continue;
1110 type = TREE_TYPE (PHI_RESULT (phi));
1111 base = fold_convert (type, base);
1112 if (step)
1114 if (POINTER_TYPE_P (type))
1115 step = convert_to_ptrofftype (step);
1116 else
1117 step = fold_convert (type, step);
1120 set_iv (data, PHI_RESULT (phi), base, step);
1121 found = true;
1124 return found;
1127 /* Marks basic ivs. */
1129 static void
1130 mark_bivs (struct ivopts_data *data)
1132 gphi *phi;
1133 gimple def;
1134 tree var;
1135 struct iv *iv, *incr_iv;
1136 struct loop *loop = data->current_loop;
1137 basic_block incr_bb;
1138 gphi_iterator psi;
1140 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1142 phi = psi.phi ();
1144 iv = get_iv (data, PHI_RESULT (phi));
1145 if (!iv)
1146 continue;
1148 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1149 def = SSA_NAME_DEF_STMT (var);
1150 /* Don't mark iv peeled from other one as biv. */
1151 if (def
1152 && gimple_code (def) == GIMPLE_PHI
1153 && gimple_bb (def) == loop->header)
1154 continue;
1156 incr_iv = get_iv (data, var);
1157 if (!incr_iv)
1158 continue;
1160 /* If the increment is in the subloop, ignore it. */
1161 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1162 if (incr_bb->loop_father != data->current_loop
1163 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1164 continue;
1166 iv->biv_p = true;
1167 incr_iv->biv_p = true;
1171 /* Checks whether STMT defines a linear induction variable and stores its
1172 parameters to IV. */
1174 static bool
1175 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1177 tree lhs;
1178 struct loop *loop = data->current_loop;
1180 iv->base = NULL_TREE;
1181 iv->step = NULL_TREE;
1183 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1184 return false;
1186 lhs = gimple_assign_lhs (stmt);
1187 if (TREE_CODE (lhs) != SSA_NAME)
1188 return false;
1190 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1191 return false;
1192 iv->base = expand_simple_operations (iv->base);
1194 if (contains_abnormal_ssa_name_p (iv->base)
1195 || contains_abnormal_ssa_name_p (iv->step))
1196 return false;
1198 /* If STMT could throw, then do not consider STMT as defining a GIV.
1199 While this will suppress optimizations, we can not safely delete this
1200 GIV and associated statements, even if it appears it is not used. */
1201 if (stmt_could_throw_p (stmt))
1202 return false;
1204 return true;
1207 /* Finds general ivs in statement STMT. */
1209 static void
1210 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1212 affine_iv iv;
1214 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1215 return;
1217 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1220 /* Finds general ivs in basic block BB. */
1222 static void
1223 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1225 gimple_stmt_iterator bsi;
1227 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1228 find_givs_in_stmt (data, gsi_stmt (bsi));
1231 /* Finds general ivs. */
1233 static void
1234 find_givs (struct ivopts_data *data)
1236 struct loop *loop = data->current_loop;
1237 basic_block *body = get_loop_body_in_dom_order (loop);
1238 unsigned i;
1240 for (i = 0; i < loop->num_nodes; i++)
1241 find_givs_in_bb (data, body[i]);
1242 free (body);
1245 /* For each ssa name defined in LOOP determines whether it is an induction
1246 variable and if so, its initial value and step. */
1248 static bool
1249 find_induction_variables (struct ivopts_data *data)
1251 unsigned i;
1252 bitmap_iterator bi;
1254 if (!find_bivs (data))
1255 return false;
1257 find_givs (data);
1258 mark_bivs (data);
1260 if (dump_file && (dump_flags & TDF_DETAILS))
1262 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1264 if (niter)
1266 fprintf (dump_file, " number of iterations ");
1267 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1268 if (!integer_zerop (niter->may_be_zero))
1270 fprintf (dump_file, "; zero if ");
1271 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1273 fprintf (dump_file, "\n\n");
1276 fprintf (dump_file, "Induction variables:\n\n");
1278 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1280 if (ver_info (data, i)->iv)
1281 dump_iv (dump_file, ver_info (data, i)->iv);
1285 return true;
1288 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1290 static struct iv_use *
1291 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1292 gimple stmt, enum use_type use_type)
1294 struct iv_use *use = XCNEW (struct iv_use);
1296 use->id = n_iv_uses (data);
1297 use->type = use_type;
1298 use->iv = iv;
1299 use->stmt = stmt;
1300 use->op_p = use_p;
1301 use->related_cands = BITMAP_ALLOC (NULL);
1303 /* To avoid showing ssa name in the dumps, if it was not reset by the
1304 caller. */
1305 iv->ssa_name = NULL_TREE;
1307 if (dump_file && (dump_flags & TDF_DETAILS))
1308 dump_use (dump_file, use);
1310 data->iv_uses.safe_push (use);
1312 return use;
1315 /* Checks whether OP is a loop-level invariant and if so, records it.
1316 NONLINEAR_USE is true if the invariant is used in a way we do not
1317 handle specially. */
1319 static void
1320 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1322 basic_block bb;
1323 struct version_info *info;
1325 if (TREE_CODE (op) != SSA_NAME
1326 || virtual_operand_p (op))
1327 return;
1329 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1330 if (bb
1331 && flow_bb_inside_loop_p (data->current_loop, bb))
1332 return;
1334 info = name_info (data, op);
1335 info->name = op;
1336 info->has_nonlin_use |= nonlinear_use;
1337 if (!info->inv_id)
1338 info->inv_id = ++data->max_inv_id;
1339 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1342 /* Checks whether the use OP is interesting and if so, records it. */
1344 static struct iv_use *
1345 find_interesting_uses_op (struct ivopts_data *data, tree op)
1347 struct iv *iv;
1348 struct iv *civ;
1349 gimple stmt;
1350 struct iv_use *use;
1352 if (TREE_CODE (op) != SSA_NAME)
1353 return NULL;
1355 iv = get_iv (data, op);
1356 if (!iv)
1357 return NULL;
1359 if (iv->have_use_for)
1361 use = iv_use (data, iv->use_id);
1363 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1364 return use;
1367 if (integer_zerop (iv->step))
1369 record_invariant (data, op, true);
1370 return NULL;
1372 iv->have_use_for = true;
1374 civ = XNEW (struct iv);
1375 *civ = *iv;
1377 stmt = SSA_NAME_DEF_STMT (op);
1378 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1379 || is_gimple_assign (stmt));
1381 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1382 iv->use_id = use->id;
1384 return use;
1387 /* Given a condition in statement STMT, checks whether it is a compare
1388 of an induction variable and an invariant. If this is the case,
1389 CONTROL_VAR is set to location of the iv, BOUND to the location of
1390 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1391 induction variable descriptions, and true is returned. If this is not
1392 the case, CONTROL_VAR and BOUND are set to the arguments of the
1393 condition and false is returned. */
1395 static bool
1396 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1397 tree **control_var, tree **bound,
1398 struct iv **iv_var, struct iv **iv_bound)
1400 /* The objects returned when COND has constant operands. */
1401 static struct iv const_iv;
1402 static tree zero;
1403 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1404 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1405 bool ret = false;
1407 if (gimple_code (stmt) == GIMPLE_COND)
1409 gcond *cond_stmt = as_a <gcond *> (stmt);
1410 op0 = gimple_cond_lhs_ptr (cond_stmt);
1411 op1 = gimple_cond_rhs_ptr (cond_stmt);
1413 else
1415 op0 = gimple_assign_rhs1_ptr (stmt);
1416 op1 = gimple_assign_rhs2_ptr (stmt);
1419 zero = integer_zero_node;
1420 const_iv.step = integer_zero_node;
1422 if (TREE_CODE (*op0) == SSA_NAME)
1423 iv0 = get_iv (data, *op0);
1424 if (TREE_CODE (*op1) == SSA_NAME)
1425 iv1 = get_iv (data, *op1);
1427 /* Exactly one of the compared values must be an iv, and the other one must
1428 be an invariant. */
1429 if (!iv0 || !iv1)
1430 goto end;
1432 if (integer_zerop (iv0->step))
1434 /* Control variable may be on the other side. */
1435 tmp_op = op0; op0 = op1; op1 = tmp_op;
1436 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1438 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1440 end:
1441 if (control_var)
1442 *control_var = op0;;
1443 if (iv_var)
1444 *iv_var = iv0;;
1445 if (bound)
1446 *bound = op1;
1447 if (iv_bound)
1448 *iv_bound = iv1;
1450 return ret;
1453 /* Checks whether the condition in STMT is interesting and if so,
1454 records it. */
1456 static void
1457 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1459 tree *var_p, *bound_p;
1460 struct iv *var_iv, *civ;
1462 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1464 find_interesting_uses_op (data, *var_p);
1465 find_interesting_uses_op (data, *bound_p);
1466 return;
1469 civ = XNEW (struct iv);
1470 *civ = *var_iv;
1471 record_use (data, NULL, civ, stmt, USE_COMPARE);
1474 /* Returns the outermost loop EXPR is obviously invariant in
1475 relative to the loop LOOP, i.e. if all its operands are defined
1476 outside of the returned loop. Returns NULL if EXPR is not
1477 even obviously invariant in LOOP. */
1479 struct loop *
1480 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1482 basic_block def_bb;
1483 unsigned i, len;
1485 if (is_gimple_min_invariant (expr))
1486 return current_loops->tree_root;
1488 if (TREE_CODE (expr) == SSA_NAME)
1490 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1491 if (def_bb)
1493 if (flow_bb_inside_loop_p (loop, def_bb))
1494 return NULL;
1495 return superloop_at_depth (loop,
1496 loop_depth (def_bb->loop_father) + 1);
1499 return current_loops->tree_root;
1502 if (!EXPR_P (expr))
1503 return NULL;
1505 unsigned maxdepth = 0;
1506 len = TREE_OPERAND_LENGTH (expr);
1507 for (i = 0; i < len; i++)
1509 struct loop *ivloop;
1510 if (!TREE_OPERAND (expr, i))
1511 continue;
1513 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1514 if (!ivloop)
1515 return NULL;
1516 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1519 return superloop_at_depth (loop, maxdepth);
1522 /* Returns true if expression EXPR is obviously invariant in LOOP,
1523 i.e. if all its operands are defined outside of the LOOP. LOOP
1524 should not be the function body. */
1526 bool
1527 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1529 basic_block def_bb;
1530 unsigned i, len;
1532 gcc_assert (loop_depth (loop) > 0);
1534 if (is_gimple_min_invariant (expr))
1535 return true;
1537 if (TREE_CODE (expr) == SSA_NAME)
1539 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1540 if (def_bb
1541 && flow_bb_inside_loop_p (loop, def_bb))
1542 return false;
1544 return true;
1547 if (!EXPR_P (expr))
1548 return false;
1550 len = TREE_OPERAND_LENGTH (expr);
1551 for (i = 0; i < len; i++)
1552 if (TREE_OPERAND (expr, i)
1553 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1554 return false;
1556 return true;
1559 /* Cumulates the steps of indices into DATA and replaces their values with the
1560 initial ones. Returns false when the value of the index cannot be determined.
1561 Callback for for_each_index. */
1563 struct ifs_ivopts_data
1565 struct ivopts_data *ivopts_data;
1566 gimple stmt;
1567 tree step;
1570 static bool
1571 idx_find_step (tree base, tree *idx, void *data)
1573 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1574 struct iv *iv;
1575 tree step, iv_base, iv_step, lbound, off;
1576 struct loop *loop = dta->ivopts_data->current_loop;
1578 /* If base is a component ref, require that the offset of the reference
1579 be invariant. */
1580 if (TREE_CODE (base) == COMPONENT_REF)
1582 off = component_ref_field_offset (base);
1583 return expr_invariant_in_loop_p (loop, off);
1586 /* If base is array, first check whether we will be able to move the
1587 reference out of the loop (in order to take its address in strength
1588 reduction). In order for this to work we need both lower bound
1589 and step to be loop invariants. */
1590 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1592 /* Moreover, for a range, the size needs to be invariant as well. */
1593 if (TREE_CODE (base) == ARRAY_RANGE_REF
1594 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1595 return false;
1597 step = array_ref_element_size (base);
1598 lbound = array_ref_low_bound (base);
1600 if (!expr_invariant_in_loop_p (loop, step)
1601 || !expr_invariant_in_loop_p (loop, lbound))
1602 return false;
1605 if (TREE_CODE (*idx) != SSA_NAME)
1606 return true;
1608 iv = get_iv (dta->ivopts_data, *idx);
1609 if (!iv)
1610 return false;
1612 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1613 *&x[0], which is not folded and does not trigger the
1614 ARRAY_REF path below. */
1615 *idx = iv->base;
1617 if (integer_zerop (iv->step))
1618 return true;
1620 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1622 step = array_ref_element_size (base);
1624 /* We only handle addresses whose step is an integer constant. */
1625 if (TREE_CODE (step) != INTEGER_CST)
1626 return false;
1628 else
1629 /* The step for pointer arithmetics already is 1 byte. */
1630 step = size_one_node;
1632 iv_base = iv->base;
1633 iv_step = iv->step;
1634 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1635 sizetype, &iv_base, &iv_step, dta->stmt,
1636 false))
1638 /* The index might wrap. */
1639 return false;
1642 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1643 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1645 return true;
1648 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1649 object is passed to it in DATA. */
1651 static bool
1652 idx_record_use (tree base, tree *idx,
1653 void *vdata)
1655 struct ivopts_data *data = (struct ivopts_data *) vdata;
1656 find_interesting_uses_op (data, *idx);
1657 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1659 find_interesting_uses_op (data, array_ref_element_size (base));
1660 find_interesting_uses_op (data, array_ref_low_bound (base));
1662 return true;
1665 /* If we can prove that TOP = cst * BOT for some constant cst,
1666 store cst to MUL and return true. Otherwise return false.
1667 The returned value is always sign-extended, regardless of the
1668 signedness of TOP and BOT. */
1670 static bool
1671 constant_multiple_of (tree top, tree bot, widest_int *mul)
1673 tree mby;
1674 enum tree_code code;
1675 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1676 widest_int res, p0, p1;
1678 STRIP_NOPS (top);
1679 STRIP_NOPS (bot);
1681 if (operand_equal_p (top, bot, 0))
1683 *mul = 1;
1684 return true;
1687 code = TREE_CODE (top);
1688 switch (code)
1690 case MULT_EXPR:
1691 mby = TREE_OPERAND (top, 1);
1692 if (TREE_CODE (mby) != INTEGER_CST)
1693 return false;
1695 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1696 return false;
1698 *mul = wi::sext (res * wi::to_widest (mby), precision);
1699 return true;
1701 case PLUS_EXPR:
1702 case MINUS_EXPR:
1703 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1704 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1705 return false;
1707 if (code == MINUS_EXPR)
1708 p1 = -p1;
1709 *mul = wi::sext (p0 + p1, precision);
1710 return true;
1712 case INTEGER_CST:
1713 if (TREE_CODE (bot) != INTEGER_CST)
1714 return false;
1716 p0 = widest_int::from (top, SIGNED);
1717 p1 = widest_int::from (bot, SIGNED);
1718 if (p1 == 0)
1719 return false;
1720 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1721 return res == 0;
1723 default:
1724 return false;
1728 /* Return true if memory reference REF with step STEP may be unaligned. */
1730 static bool
1731 may_be_unaligned_p (tree ref, tree step)
1733 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1734 thus they are not misaligned. */
1735 if (TREE_CODE (ref) == TARGET_MEM_REF)
1736 return false;
1738 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1739 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1740 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1742 unsigned HOST_WIDE_INT bitpos;
1743 unsigned int ref_align;
1744 get_object_alignment_1 (ref, &ref_align, &bitpos);
1745 if (ref_align < align
1746 || (bitpos % align) != 0
1747 || (bitpos % BITS_PER_UNIT) != 0)
1748 return true;
1750 unsigned int trailing_zeros = tree_ctz (step);
1751 if (trailing_zeros < HOST_BITS_PER_INT
1752 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1753 return true;
1755 return false;
1758 /* Return true if EXPR may be non-addressable. */
1760 bool
1761 may_be_nonaddressable_p (tree expr)
1763 switch (TREE_CODE (expr))
1765 case TARGET_MEM_REF:
1766 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1767 target, thus they are always addressable. */
1768 return false;
1770 case COMPONENT_REF:
1771 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1772 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1774 case VIEW_CONVERT_EXPR:
1775 /* This kind of view-conversions may wrap non-addressable objects
1776 and make them look addressable. After some processing the
1777 non-addressability may be uncovered again, causing ADDR_EXPRs
1778 of inappropriate objects to be built. */
1779 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1780 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1781 return true;
1783 /* ... fall through ... */
1785 case ARRAY_REF:
1786 case ARRAY_RANGE_REF:
1787 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1789 CASE_CONVERT:
1790 return true;
1792 default:
1793 break;
1796 return false;
1799 /* Finds addresses in *OP_P inside STMT. */
1801 static void
1802 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1804 tree base = *op_p, step = size_zero_node;
1805 struct iv *civ;
1806 struct ifs_ivopts_data ifs_ivopts_data;
1808 /* Do not play with volatile memory references. A bit too conservative,
1809 perhaps, but safe. */
1810 if (gimple_has_volatile_ops (stmt))
1811 goto fail;
1813 /* Ignore bitfields for now. Not really something terribly complicated
1814 to handle. TODO. */
1815 if (TREE_CODE (base) == BIT_FIELD_REF)
1816 goto fail;
1818 base = unshare_expr (base);
1820 if (TREE_CODE (base) == TARGET_MEM_REF)
1822 tree type = build_pointer_type (TREE_TYPE (base));
1823 tree astep;
1825 if (TMR_BASE (base)
1826 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1828 civ = get_iv (data, TMR_BASE (base));
1829 if (!civ)
1830 goto fail;
1832 TMR_BASE (base) = civ->base;
1833 step = civ->step;
1835 if (TMR_INDEX2 (base)
1836 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1838 civ = get_iv (data, TMR_INDEX2 (base));
1839 if (!civ)
1840 goto fail;
1842 TMR_INDEX2 (base) = civ->base;
1843 step = civ->step;
1845 if (TMR_INDEX (base)
1846 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1848 civ = get_iv (data, TMR_INDEX (base));
1849 if (!civ)
1850 goto fail;
1852 TMR_INDEX (base) = civ->base;
1853 astep = civ->step;
1855 if (astep)
1857 if (TMR_STEP (base))
1858 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1860 step = fold_build2 (PLUS_EXPR, type, step, astep);
1864 if (integer_zerop (step))
1865 goto fail;
1866 base = tree_mem_ref_addr (type, base);
1868 else
1870 ifs_ivopts_data.ivopts_data = data;
1871 ifs_ivopts_data.stmt = stmt;
1872 ifs_ivopts_data.step = size_zero_node;
1873 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1874 || integer_zerop (ifs_ivopts_data.step))
1875 goto fail;
1876 step = ifs_ivopts_data.step;
1878 /* Check that the base expression is addressable. This needs
1879 to be done after substituting bases of IVs into it. */
1880 if (may_be_nonaddressable_p (base))
1881 goto fail;
1883 /* Moreover, on strict alignment platforms, check that it is
1884 sufficiently aligned. */
1885 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1886 goto fail;
1888 base = build_fold_addr_expr (base);
1890 /* Substituting bases of IVs into the base expression might
1891 have caused folding opportunities. */
1892 if (TREE_CODE (base) == ADDR_EXPR)
1894 tree *ref = &TREE_OPERAND (base, 0);
1895 while (handled_component_p (*ref))
1896 ref = &TREE_OPERAND (*ref, 0);
1897 if (TREE_CODE (*ref) == MEM_REF)
1899 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1900 TREE_OPERAND (*ref, 0),
1901 TREE_OPERAND (*ref, 1));
1902 if (tem)
1903 *ref = tem;
1908 civ = alloc_iv (base, step);
1909 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1910 return;
1912 fail:
1913 for_each_index (op_p, idx_record_use, data);
1916 /* Finds and records invariants used in STMT. */
1918 static void
1919 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1921 ssa_op_iter iter;
1922 use_operand_p use_p;
1923 tree op;
1925 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1927 op = USE_FROM_PTR (use_p);
1928 record_invariant (data, op, false);
1932 /* Finds interesting uses of induction variables in the statement STMT. */
1934 static void
1935 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1937 struct iv *iv;
1938 tree op, *lhs, *rhs;
1939 ssa_op_iter iter;
1940 use_operand_p use_p;
1941 enum tree_code code;
1943 find_invariants_stmt (data, stmt);
1945 if (gimple_code (stmt) == GIMPLE_COND)
1947 find_interesting_uses_cond (data, stmt);
1948 return;
1951 if (is_gimple_assign (stmt))
1953 lhs = gimple_assign_lhs_ptr (stmt);
1954 rhs = gimple_assign_rhs1_ptr (stmt);
1956 if (TREE_CODE (*lhs) == SSA_NAME)
1958 /* If the statement defines an induction variable, the uses are not
1959 interesting by themselves. */
1961 iv = get_iv (data, *lhs);
1963 if (iv && !integer_zerop (iv->step))
1964 return;
1967 code = gimple_assign_rhs_code (stmt);
1968 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1969 && (REFERENCE_CLASS_P (*rhs)
1970 || is_gimple_val (*rhs)))
1972 if (REFERENCE_CLASS_P (*rhs))
1973 find_interesting_uses_address (data, stmt, rhs);
1974 else
1975 find_interesting_uses_op (data, *rhs);
1977 if (REFERENCE_CLASS_P (*lhs))
1978 find_interesting_uses_address (data, stmt, lhs);
1979 return;
1981 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1983 find_interesting_uses_cond (data, stmt);
1984 return;
1987 /* TODO -- we should also handle address uses of type
1989 memory = call (whatever);
1993 call (memory). */
1996 if (gimple_code (stmt) == GIMPLE_PHI
1997 && gimple_bb (stmt) == data->current_loop->header)
1999 iv = get_iv (data, PHI_RESULT (stmt));
2001 if (iv && !integer_zerop (iv->step))
2002 return;
2005 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2007 op = USE_FROM_PTR (use_p);
2009 if (TREE_CODE (op) != SSA_NAME)
2010 continue;
2012 iv = get_iv (data, op);
2013 if (!iv)
2014 continue;
2016 find_interesting_uses_op (data, op);
2020 /* Finds interesting uses of induction variables outside of loops
2021 on loop exit edge EXIT. */
2023 static void
2024 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2026 gphi *phi;
2027 gphi_iterator psi;
2028 tree def;
2030 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2032 phi = psi.phi ();
2033 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2034 if (!virtual_operand_p (def))
2035 find_interesting_uses_op (data, def);
2039 /* Finds uses of the induction variables that are interesting. */
2041 static void
2042 find_interesting_uses (struct ivopts_data *data)
2044 basic_block bb;
2045 gimple_stmt_iterator bsi;
2046 basic_block *body = get_loop_body (data->current_loop);
2047 unsigned i;
2048 struct version_info *info;
2049 edge e;
2051 if (dump_file && (dump_flags & TDF_DETAILS))
2052 fprintf (dump_file, "Uses:\n\n");
2054 for (i = 0; i < data->current_loop->num_nodes; i++)
2056 edge_iterator ei;
2057 bb = body[i];
2059 FOR_EACH_EDGE (e, ei, bb->succs)
2060 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2061 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2062 find_interesting_uses_outside (data, e);
2064 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2065 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2066 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2067 if (!is_gimple_debug (gsi_stmt (bsi)))
2068 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2071 if (dump_file && (dump_flags & TDF_DETAILS))
2073 bitmap_iterator bi;
2075 fprintf (dump_file, "\n");
2077 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2079 info = ver_info (data, i);
2080 if (info->inv_id)
2082 fprintf (dump_file, " ");
2083 print_generic_expr (dump_file, info->name, TDF_SLIM);
2084 fprintf (dump_file, " is invariant (%d)%s\n",
2085 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2089 fprintf (dump_file, "\n");
2092 free (body);
2095 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2096 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2097 we are at the top-level of the processed address. */
2099 static tree
2100 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2101 HOST_WIDE_INT *offset)
2103 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2104 enum tree_code code;
2105 tree type, orig_type = TREE_TYPE (expr);
2106 HOST_WIDE_INT off0, off1, st;
2107 tree orig_expr = expr;
2109 STRIP_NOPS (expr);
2111 type = TREE_TYPE (expr);
2112 code = TREE_CODE (expr);
2113 *offset = 0;
2115 switch (code)
2117 case INTEGER_CST:
2118 if (!cst_and_fits_in_hwi (expr)
2119 || integer_zerop (expr))
2120 return orig_expr;
2122 *offset = int_cst_value (expr);
2123 return build_int_cst (orig_type, 0);
2125 case POINTER_PLUS_EXPR:
2126 case PLUS_EXPR:
2127 case MINUS_EXPR:
2128 op0 = TREE_OPERAND (expr, 0);
2129 op1 = TREE_OPERAND (expr, 1);
2131 op0 = strip_offset_1 (op0, false, false, &off0);
2132 op1 = strip_offset_1 (op1, false, false, &off1);
2134 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2135 if (op0 == TREE_OPERAND (expr, 0)
2136 && op1 == TREE_OPERAND (expr, 1))
2137 return orig_expr;
2139 if (integer_zerop (op1))
2140 expr = op0;
2141 else if (integer_zerop (op0))
2143 if (code == MINUS_EXPR)
2144 expr = fold_build1 (NEGATE_EXPR, type, op1);
2145 else
2146 expr = op1;
2148 else
2149 expr = fold_build2 (code, type, op0, op1);
2151 return fold_convert (orig_type, expr);
2153 case MULT_EXPR:
2154 op1 = TREE_OPERAND (expr, 1);
2155 if (!cst_and_fits_in_hwi (op1))
2156 return orig_expr;
2158 op0 = TREE_OPERAND (expr, 0);
2159 op0 = strip_offset_1 (op0, false, false, &off0);
2160 if (op0 == TREE_OPERAND (expr, 0))
2161 return orig_expr;
2163 *offset = off0 * int_cst_value (op1);
2164 if (integer_zerop (op0))
2165 expr = op0;
2166 else
2167 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2169 return fold_convert (orig_type, expr);
2171 case ARRAY_REF:
2172 case ARRAY_RANGE_REF:
2173 if (!inside_addr)
2174 return orig_expr;
2176 step = array_ref_element_size (expr);
2177 if (!cst_and_fits_in_hwi (step))
2178 break;
2180 st = int_cst_value (step);
2181 op1 = TREE_OPERAND (expr, 1);
2182 op1 = strip_offset_1 (op1, false, false, &off1);
2183 *offset = off1 * st;
2185 if (top_compref
2186 && integer_zerop (op1))
2188 /* Strip the component reference completely. */
2189 op0 = TREE_OPERAND (expr, 0);
2190 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2191 *offset += off0;
2192 return op0;
2194 break;
2196 case COMPONENT_REF:
2198 tree field;
2200 if (!inside_addr)
2201 return orig_expr;
2203 tmp = component_ref_field_offset (expr);
2204 field = TREE_OPERAND (expr, 1);
2205 if (top_compref
2206 && cst_and_fits_in_hwi (tmp)
2207 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2209 HOST_WIDE_INT boffset, abs_off;
2211 /* Strip the component reference completely. */
2212 op0 = TREE_OPERAND (expr, 0);
2213 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2214 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2215 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2216 if (boffset < 0)
2217 abs_off = -abs_off;
2219 *offset = off0 + int_cst_value (tmp) + abs_off;
2220 return op0;
2223 break;
2225 case ADDR_EXPR:
2226 op0 = TREE_OPERAND (expr, 0);
2227 op0 = strip_offset_1 (op0, true, true, &off0);
2228 *offset += off0;
2230 if (op0 == TREE_OPERAND (expr, 0))
2231 return orig_expr;
2233 expr = build_fold_addr_expr (op0);
2234 return fold_convert (orig_type, expr);
2236 case MEM_REF:
2237 /* ??? Offset operand? */
2238 inside_addr = false;
2239 break;
2241 default:
2242 return orig_expr;
2245 /* Default handling of expressions for that we want to recurse into
2246 the first operand. */
2247 op0 = TREE_OPERAND (expr, 0);
2248 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2249 *offset += off0;
2251 if (op0 == TREE_OPERAND (expr, 0)
2252 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2253 return orig_expr;
2255 expr = copy_node (expr);
2256 TREE_OPERAND (expr, 0) = op0;
2257 if (op1)
2258 TREE_OPERAND (expr, 1) = op1;
2260 /* Inside address, we might strip the top level component references,
2261 thus changing type of the expression. Handling of ADDR_EXPR
2262 will fix that. */
2263 expr = fold_convert (orig_type, expr);
2265 return expr;
2268 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2270 static tree
2271 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2273 HOST_WIDE_INT off;
2274 tree core = strip_offset_1 (expr, false, false, &off);
2275 *offset = off;
2276 return core;
2279 /* Returns variant of TYPE that can be used as base for different uses.
2280 We return unsigned type with the same precision, which avoids problems
2281 with overflows. */
2283 static tree
2284 generic_type_for (tree type)
2286 if (POINTER_TYPE_P (type))
2287 return unsigned_type_for (type);
2289 if (TYPE_UNSIGNED (type))
2290 return type;
2292 return unsigned_type_for (type);
2295 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2296 the bitmap to that we should store it. */
2298 static struct ivopts_data *fd_ivopts_data;
2299 static tree
2300 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2302 bitmap *depends_on = (bitmap *) data;
2303 struct version_info *info;
2305 if (TREE_CODE (*expr_p) != SSA_NAME)
2306 return NULL_TREE;
2307 info = name_info (fd_ivopts_data, *expr_p);
2309 if (!info->inv_id || info->has_nonlin_use)
2310 return NULL_TREE;
2312 if (!*depends_on)
2313 *depends_on = BITMAP_ALLOC (NULL);
2314 bitmap_set_bit (*depends_on, info->inv_id);
2316 return NULL_TREE;
2319 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2320 position to POS. If USE is not NULL, the candidate is set as related to
2321 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2322 replacement of the final value of the iv by a direct computation. */
2324 static struct iv_cand *
2325 add_candidate_1 (struct ivopts_data *data,
2326 tree base, tree step, bool important, enum iv_position pos,
2327 struct iv_use *use, gimple incremented_at)
2329 unsigned i;
2330 struct iv_cand *cand = NULL;
2331 tree type, orig_type;
2333 /* For non-original variables, make sure their values are computed in a type
2334 that does not invoke undefined behavior on overflows (since in general,
2335 we cannot prove that these induction variables are non-wrapping). */
2336 if (pos != IP_ORIGINAL)
2338 orig_type = TREE_TYPE (base);
2339 type = generic_type_for (orig_type);
2340 if (type != orig_type)
2342 base = fold_convert (type, base);
2343 step = fold_convert (type, step);
2347 for (i = 0; i < n_iv_cands (data); i++)
2349 cand = iv_cand (data, i);
2351 if (cand->pos != pos)
2352 continue;
2354 if (cand->incremented_at != incremented_at
2355 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2356 && cand->ainc_use != use))
2357 continue;
2359 if (!cand->iv)
2361 if (!base && !step)
2362 break;
2364 continue;
2367 if (!base && !step)
2368 continue;
2370 if (operand_equal_p (base, cand->iv->base, 0)
2371 && operand_equal_p (step, cand->iv->step, 0)
2372 && (TYPE_PRECISION (TREE_TYPE (base))
2373 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2374 break;
2377 if (i == n_iv_cands (data))
2379 cand = XCNEW (struct iv_cand);
2380 cand->id = i;
2382 if (!base && !step)
2383 cand->iv = NULL;
2384 else
2385 cand->iv = alloc_iv (base, step);
2387 cand->pos = pos;
2388 if (pos != IP_ORIGINAL && cand->iv)
2390 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2391 cand->var_after = cand->var_before;
2393 cand->important = important;
2394 cand->incremented_at = incremented_at;
2395 data->iv_candidates.safe_push (cand);
2397 if (step
2398 && TREE_CODE (step) != INTEGER_CST)
2400 fd_ivopts_data = data;
2401 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2404 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2405 cand->ainc_use = use;
2406 else
2407 cand->ainc_use = NULL;
2409 if (dump_file && (dump_flags & TDF_DETAILS))
2410 dump_cand (dump_file, cand);
2413 if (important && !cand->important)
2415 cand->important = true;
2416 if (dump_file && (dump_flags & TDF_DETAILS))
2417 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2420 if (use)
2422 bitmap_set_bit (use->related_cands, i);
2423 if (dump_file && (dump_flags & TDF_DETAILS))
2424 fprintf (dump_file, "Candidate %d is related to use %d\n",
2425 cand->id, use->id);
2428 return cand;
2431 /* Returns true if incrementing the induction variable at the end of the LOOP
2432 is allowed.
2434 The purpose is to avoid splitting latch edge with a biv increment, thus
2435 creating a jump, possibly confusing other optimization passes and leaving
2436 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2437 is not available (so we do not have a better alternative), or if the latch
2438 edge is already nonempty. */
2440 static bool
2441 allow_ip_end_pos_p (struct loop *loop)
2443 if (!ip_normal_pos (loop))
2444 return true;
2446 if (!empty_block_p (ip_end_pos (loop)))
2447 return true;
2449 return false;
2452 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2453 Important field is set to IMPORTANT. */
2455 static void
2456 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2457 bool important, struct iv_use *use)
2459 basic_block use_bb = gimple_bb (use->stmt);
2460 machine_mode mem_mode;
2461 unsigned HOST_WIDE_INT cstepi;
2463 /* If we insert the increment in any position other than the standard
2464 ones, we must ensure that it is incremented once per iteration.
2465 It must not be in an inner nested loop, or one side of an if
2466 statement. */
2467 if (use_bb->loop_father != data->current_loop
2468 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2469 || stmt_could_throw_p (use->stmt)
2470 || !cst_and_fits_in_hwi (step))
2471 return;
2473 cstepi = int_cst_value (step);
2475 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2476 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2477 || USE_STORE_PRE_INCREMENT (mem_mode))
2478 && GET_MODE_SIZE (mem_mode) == cstepi)
2479 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2480 || USE_STORE_PRE_DECREMENT (mem_mode))
2481 && GET_MODE_SIZE (mem_mode) == -cstepi))
2483 enum tree_code code = MINUS_EXPR;
2484 tree new_base;
2485 tree new_step = step;
2487 if (POINTER_TYPE_P (TREE_TYPE (base)))
2489 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2490 code = POINTER_PLUS_EXPR;
2492 else
2493 new_step = fold_convert (TREE_TYPE (base), new_step);
2494 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2495 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2496 use->stmt);
2498 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2499 || USE_STORE_POST_INCREMENT (mem_mode))
2500 && GET_MODE_SIZE (mem_mode) == cstepi)
2501 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2502 || USE_STORE_POST_DECREMENT (mem_mode))
2503 && GET_MODE_SIZE (mem_mode) == -cstepi))
2505 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2506 use->stmt);
2510 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2511 position to POS. If USE is not NULL, the candidate is set as related to
2512 it. The candidate computation is scheduled on all available positions. */
2514 static void
2515 add_candidate (struct ivopts_data *data,
2516 tree base, tree step, bool important, struct iv_use *use)
2518 if (ip_normal_pos (data->current_loop))
2519 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2520 if (ip_end_pos (data->current_loop)
2521 && allow_ip_end_pos_p (data->current_loop))
2522 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2524 if (use != NULL && use->type == USE_ADDRESS)
2525 add_autoinc_candidates (data, base, step, important, use);
2528 /* Adds standard iv candidates. */
2530 static void
2531 add_standard_iv_candidates (struct ivopts_data *data)
2533 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2535 /* The same for a double-integer type if it is still fast enough. */
2536 if (TYPE_PRECISION
2537 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2538 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2539 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2540 build_int_cst (long_integer_type_node, 1), true, NULL);
2542 /* The same for a double-integer type if it is still fast enough. */
2543 if (TYPE_PRECISION
2544 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2545 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2546 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2547 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2551 /* Adds candidates bases on the old induction variable IV. */
2553 static void
2554 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2556 gimple phi;
2557 tree def;
2558 struct iv_cand *cand;
2560 add_candidate (data, iv->base, iv->step, true, NULL);
2562 /* The same, but with initial value zero. */
2563 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2564 add_candidate (data, size_int (0), iv->step, true, NULL);
2565 else
2566 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2567 iv->step, true, NULL);
2569 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2570 if (gimple_code (phi) == GIMPLE_PHI)
2572 /* Additionally record the possibility of leaving the original iv
2573 untouched. */
2574 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2575 /* Don't add candidate if it's from another PHI node because
2576 it's an affine iv appearing in the form of PEELED_CHREC. */
2577 phi = SSA_NAME_DEF_STMT (def);
2578 if (gimple_code (phi) != GIMPLE_PHI)
2580 cand = add_candidate_1 (data,
2581 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2582 SSA_NAME_DEF_STMT (def));
2583 cand->var_before = iv->ssa_name;
2584 cand->var_after = def;
2586 else
2587 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2591 /* Adds candidates based on the old induction variables. */
2593 static void
2594 add_old_ivs_candidates (struct ivopts_data *data)
2596 unsigned i;
2597 struct iv *iv;
2598 bitmap_iterator bi;
2600 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2602 iv = ver_info (data, i)->iv;
2603 if (iv && iv->biv_p && !integer_zerop (iv->step))
2604 add_old_iv_candidates (data, iv);
2608 /* Adds candidates based on the value of the induction variable IV and USE. */
2610 static void
2611 add_iv_value_candidates (struct ivopts_data *data,
2612 struct iv *iv, struct iv_use *use)
2614 unsigned HOST_WIDE_INT offset;
2615 tree base;
2616 tree basetype;
2618 add_candidate (data, iv->base, iv->step, false, use);
2620 /* The same, but with initial value zero. Make such variable important,
2621 since it is generic enough so that possibly many uses may be based
2622 on it. */
2623 basetype = TREE_TYPE (iv->base);
2624 if (POINTER_TYPE_P (basetype))
2625 basetype = sizetype;
2626 add_candidate (data, build_int_cst (basetype, 0),
2627 iv->step, true, use);
2629 /* Third, try removing the constant offset. Make sure to even
2630 add a candidate for &a[0] vs. (T *)&a. */
2631 base = strip_offset (iv->base, &offset);
2632 if (offset
2633 || base != iv->base)
2634 add_candidate (data, base, iv->step, false, use);
2637 /* Adds candidates based on the uses. */
2639 static void
2640 add_derived_ivs_candidates (struct ivopts_data *data)
2642 unsigned i;
2644 for (i = 0; i < n_iv_uses (data); i++)
2646 struct iv_use *use = iv_use (data, i);
2648 if (!use)
2649 continue;
2651 switch (use->type)
2653 case USE_NONLINEAR_EXPR:
2654 case USE_COMPARE:
2655 case USE_ADDRESS:
2656 /* Just add the ivs based on the value of the iv used here. */
2657 add_iv_value_candidates (data, use->iv, use);
2658 break;
2660 default:
2661 gcc_unreachable ();
2666 /* Record important candidates and add them to related_cands bitmaps
2667 if needed. */
2669 static void
2670 record_important_candidates (struct ivopts_data *data)
2672 unsigned i;
2673 struct iv_use *use;
2675 for (i = 0; i < n_iv_cands (data); i++)
2677 struct iv_cand *cand = iv_cand (data, i);
2679 if (cand->important)
2680 bitmap_set_bit (data->important_candidates, i);
2683 data->consider_all_candidates = (n_iv_cands (data)
2684 <= CONSIDER_ALL_CANDIDATES_BOUND);
2686 if (data->consider_all_candidates)
2688 /* We will not need "related_cands" bitmaps in this case,
2689 so release them to decrease peak memory consumption. */
2690 for (i = 0; i < n_iv_uses (data); i++)
2692 use = iv_use (data, i);
2693 BITMAP_FREE (use->related_cands);
2696 else
2698 /* Add important candidates to the related_cands bitmaps. */
2699 for (i = 0; i < n_iv_uses (data); i++)
2700 bitmap_ior_into (iv_use (data, i)->related_cands,
2701 data->important_candidates);
2705 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2706 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2707 we allocate a simple list to every use. */
2709 static void
2710 alloc_use_cost_map (struct ivopts_data *data)
2712 unsigned i, size, s;
2714 for (i = 0; i < n_iv_uses (data); i++)
2716 struct iv_use *use = iv_use (data, i);
2718 if (data->consider_all_candidates)
2719 size = n_iv_cands (data);
2720 else
2722 s = bitmap_count_bits (use->related_cands);
2724 /* Round up to the power of two, so that moduling by it is fast. */
2725 size = s ? (1 << ceil_log2 (s)) : 1;
2728 use->n_map_members = size;
2729 use->cost_map = XCNEWVEC (struct cost_pair, size);
2733 /* Returns description of computation cost of expression whose runtime
2734 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2736 static comp_cost
2737 new_cost (unsigned runtime, unsigned complexity)
2739 comp_cost cost;
2741 cost.cost = runtime;
2742 cost.complexity = complexity;
2744 return cost;
2747 /* Adds costs COST1 and COST2. */
2749 static comp_cost
2750 add_costs (comp_cost cost1, comp_cost cost2)
2752 cost1.cost += cost2.cost;
2753 cost1.complexity += cost2.complexity;
2755 return cost1;
2757 /* Subtracts costs COST1 and COST2. */
2759 static comp_cost
2760 sub_costs (comp_cost cost1, comp_cost cost2)
2762 cost1.cost -= cost2.cost;
2763 cost1.complexity -= cost2.complexity;
2765 return cost1;
2768 /* Returns a negative number if COST1 < COST2, a positive number if
2769 COST1 > COST2, and 0 if COST1 = COST2. */
2771 static int
2772 compare_costs (comp_cost cost1, comp_cost cost2)
2774 if (cost1.cost == cost2.cost)
2775 return cost1.complexity - cost2.complexity;
2777 return cost1.cost - cost2.cost;
2780 /* Returns true if COST is infinite. */
2782 static bool
2783 infinite_cost_p (comp_cost cost)
2785 return cost.cost == INFTY;
2788 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2789 on invariants DEPENDS_ON and that the value used in expressing it
2790 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2792 static void
2793 set_use_iv_cost (struct ivopts_data *data,
2794 struct iv_use *use, struct iv_cand *cand,
2795 comp_cost cost, bitmap depends_on, tree value,
2796 enum tree_code comp, int inv_expr_id)
2798 unsigned i, s;
2800 if (infinite_cost_p (cost))
2802 BITMAP_FREE (depends_on);
2803 return;
2806 if (data->consider_all_candidates)
2808 use->cost_map[cand->id].cand = cand;
2809 use->cost_map[cand->id].cost = cost;
2810 use->cost_map[cand->id].depends_on = depends_on;
2811 use->cost_map[cand->id].value = value;
2812 use->cost_map[cand->id].comp = comp;
2813 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2814 return;
2817 /* n_map_members is a power of two, so this computes modulo. */
2818 s = cand->id & (use->n_map_members - 1);
2819 for (i = s; i < use->n_map_members; i++)
2820 if (!use->cost_map[i].cand)
2821 goto found;
2822 for (i = 0; i < s; i++)
2823 if (!use->cost_map[i].cand)
2824 goto found;
2826 gcc_unreachable ();
2828 found:
2829 use->cost_map[i].cand = cand;
2830 use->cost_map[i].cost = cost;
2831 use->cost_map[i].depends_on = depends_on;
2832 use->cost_map[i].value = value;
2833 use->cost_map[i].comp = comp;
2834 use->cost_map[i].inv_expr_id = inv_expr_id;
2837 /* Gets cost of (USE, CANDIDATE) pair. */
2839 static struct cost_pair *
2840 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2841 struct iv_cand *cand)
2843 unsigned i, s;
2844 struct cost_pair *ret;
2846 if (!cand)
2847 return NULL;
2849 if (data->consider_all_candidates)
2851 ret = use->cost_map + cand->id;
2852 if (!ret->cand)
2853 return NULL;
2855 return ret;
2858 /* n_map_members is a power of two, so this computes modulo. */
2859 s = cand->id & (use->n_map_members - 1);
2860 for (i = s; i < use->n_map_members; i++)
2861 if (use->cost_map[i].cand == cand)
2862 return use->cost_map + i;
2863 else if (use->cost_map[i].cand == NULL)
2864 return NULL;
2865 for (i = 0; i < s; i++)
2866 if (use->cost_map[i].cand == cand)
2867 return use->cost_map + i;
2868 else if (use->cost_map[i].cand == NULL)
2869 return NULL;
2871 return NULL;
2874 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2875 static rtx
2876 produce_memory_decl_rtl (tree obj, int *regno)
2878 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2879 machine_mode address_mode = targetm.addr_space.address_mode (as);
2880 rtx x;
2882 gcc_assert (obj);
2883 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2885 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2886 x = gen_rtx_SYMBOL_REF (address_mode, name);
2887 SET_SYMBOL_REF_DECL (x, obj);
2888 x = gen_rtx_MEM (DECL_MODE (obj), x);
2889 set_mem_addr_space (x, as);
2890 targetm.encode_section_info (obj, x, true);
2892 else
2894 x = gen_raw_REG (address_mode, (*regno)++);
2895 x = gen_rtx_MEM (DECL_MODE (obj), x);
2896 set_mem_addr_space (x, as);
2899 return x;
2902 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2903 walk_tree. DATA contains the actual fake register number. */
2905 static tree
2906 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2908 tree obj = NULL_TREE;
2909 rtx x = NULL_RTX;
2910 int *regno = (int *) data;
2912 switch (TREE_CODE (*expr_p))
2914 case ADDR_EXPR:
2915 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2916 handled_component_p (*expr_p);
2917 expr_p = &TREE_OPERAND (*expr_p, 0))
2918 continue;
2919 obj = *expr_p;
2920 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2921 x = produce_memory_decl_rtl (obj, regno);
2922 break;
2924 case SSA_NAME:
2925 *ws = 0;
2926 obj = SSA_NAME_VAR (*expr_p);
2927 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2928 if (!obj)
2929 return NULL_TREE;
2930 if (!DECL_RTL_SET_P (obj))
2931 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2932 break;
2934 case VAR_DECL:
2935 case PARM_DECL:
2936 case RESULT_DECL:
2937 *ws = 0;
2938 obj = *expr_p;
2940 if (DECL_RTL_SET_P (obj))
2941 break;
2943 if (DECL_MODE (obj) == BLKmode)
2944 x = produce_memory_decl_rtl (obj, regno);
2945 else
2946 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2948 break;
2950 default:
2951 break;
2954 if (x)
2956 decl_rtl_to_reset.safe_push (obj);
2957 SET_DECL_RTL (obj, x);
2960 return NULL_TREE;
2963 /* Determines cost of the computation of EXPR. */
2965 static unsigned
2966 computation_cost (tree expr, bool speed)
2968 rtx_insn *seq;
2969 rtx rslt;
2970 tree type = TREE_TYPE (expr);
2971 unsigned cost;
2972 /* Avoid using hard regs in ways which may be unsupported. */
2973 int regno = LAST_VIRTUAL_REGISTER + 1;
2974 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2975 enum node_frequency real_frequency = node->frequency;
2977 node->frequency = NODE_FREQUENCY_NORMAL;
2978 crtl->maybe_hot_insn_p = speed;
2979 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2980 start_sequence ();
2981 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2982 seq = get_insns ();
2983 end_sequence ();
2984 default_rtl_profile ();
2985 node->frequency = real_frequency;
2987 cost = seq_cost (seq, speed);
2988 if (MEM_P (rslt))
2989 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2990 TYPE_ADDR_SPACE (type), speed);
2991 else if (!REG_P (rslt))
2992 cost += set_src_cost (rslt, speed);
2994 return cost;
2997 /* Returns variable containing the value of candidate CAND at statement AT. */
2999 static tree
3000 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3002 if (stmt_after_increment (loop, cand, stmt))
3003 return cand->var_after;
3004 else
3005 return cand->var_before;
3008 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3009 same precision that is at least as wide as the precision of TYPE, stores
3010 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3011 type of A and B. */
3013 static tree
3014 determine_common_wider_type (tree *a, tree *b)
3016 tree wider_type = NULL;
3017 tree suba, subb;
3018 tree atype = TREE_TYPE (*a);
3020 if (CONVERT_EXPR_P (*a))
3022 suba = TREE_OPERAND (*a, 0);
3023 wider_type = TREE_TYPE (suba);
3024 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3025 return atype;
3027 else
3028 return atype;
3030 if (CONVERT_EXPR_P (*b))
3032 subb = TREE_OPERAND (*b, 0);
3033 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3034 return atype;
3036 else
3037 return atype;
3039 *a = suba;
3040 *b = subb;
3041 return wider_type;
3044 /* Determines the expression by that USE is expressed from induction variable
3045 CAND at statement AT in LOOP. The expression is stored in a decomposed
3046 form into AFF. Returns false if USE cannot be expressed using CAND. */
3048 static bool
3049 get_computation_aff (struct loop *loop,
3050 struct iv_use *use, struct iv_cand *cand, gimple at,
3051 struct aff_tree *aff)
3053 tree ubase = use->iv->base;
3054 tree ustep = use->iv->step;
3055 tree cbase = cand->iv->base;
3056 tree cstep = cand->iv->step, cstep_common;
3057 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3058 tree common_type, var;
3059 tree uutype;
3060 aff_tree cbase_aff, var_aff;
3061 widest_int rat;
3063 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3065 /* We do not have a precision to express the values of use. */
3066 return false;
3069 var = var_at_stmt (loop, cand, at);
3070 uutype = unsigned_type_for (utype);
3072 /* If the conversion is not noop, perform it. */
3073 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3075 cstep = fold_convert (uutype, cstep);
3076 cbase = fold_convert (uutype, cbase);
3077 var = fold_convert (uutype, var);
3080 if (!constant_multiple_of (ustep, cstep, &rat))
3081 return false;
3083 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3084 type, we achieve better folding by computing their difference in this
3085 wider type, and cast the result to UUTYPE. We do not need to worry about
3086 overflows, as all the arithmetics will in the end be performed in UUTYPE
3087 anyway. */
3088 common_type = determine_common_wider_type (&ubase, &cbase);
3090 /* use = ubase - ratio * cbase + ratio * var. */
3091 tree_to_aff_combination (ubase, common_type, aff);
3092 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3093 tree_to_aff_combination (var, uutype, &var_aff);
3095 /* We need to shift the value if we are after the increment. */
3096 if (stmt_after_increment (loop, cand, at))
3098 aff_tree cstep_aff;
3100 if (common_type != uutype)
3101 cstep_common = fold_convert (common_type, cstep);
3102 else
3103 cstep_common = cstep;
3105 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3106 aff_combination_add (&cbase_aff, &cstep_aff);
3109 aff_combination_scale (&cbase_aff, -rat);
3110 aff_combination_add (aff, &cbase_aff);
3111 if (common_type != uutype)
3112 aff_combination_convert (aff, uutype);
3114 aff_combination_scale (&var_aff, rat);
3115 aff_combination_add (aff, &var_aff);
3117 return true;
3120 /* Return the type of USE. */
3122 static tree
3123 get_use_type (struct iv_use *use)
3125 tree base_type = TREE_TYPE (use->iv->base);
3126 tree type;
3128 if (use->type == USE_ADDRESS)
3130 /* The base_type may be a void pointer. Create a pointer type based on
3131 the mem_ref instead. */
3132 type = build_pointer_type (TREE_TYPE (*use->op_p));
3133 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3134 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3136 else
3137 type = base_type;
3139 return type;
3142 /* Determines the expression by that USE is expressed from induction variable
3143 CAND at statement AT in LOOP. The computation is unshared. */
3145 static tree
3146 get_computation_at (struct loop *loop,
3147 struct iv_use *use, struct iv_cand *cand, gimple at)
3149 aff_tree aff;
3150 tree type = get_use_type (use);
3152 if (!get_computation_aff (loop, use, cand, at, &aff))
3153 return NULL_TREE;
3154 unshare_aff_combination (&aff);
3155 return fold_convert (type, aff_combination_to_tree (&aff));
3158 /* Determines the expression by that USE is expressed from induction variable
3159 CAND in LOOP. The computation is unshared. */
3161 static tree
3162 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3164 return get_computation_at (loop, use, cand, use->stmt);
3167 /* Adjust the cost COST for being in loop setup rather than loop body.
3168 If we're optimizing for space, the loop setup overhead is constant;
3169 if we're optimizing for speed, amortize it over the per-iteration cost. */
3170 static unsigned
3171 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3173 if (cost == INFTY)
3174 return cost;
3175 else if (optimize_loop_for_speed_p (data->current_loop))
3176 return cost / avg_loop_niter (data->current_loop);
3177 else
3178 return cost;
3181 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3182 validity for a memory reference accessing memory of mode MODE in
3183 address space AS. */
3186 bool
3187 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3188 addr_space_t as)
3190 #define MAX_RATIO 128
3191 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3192 static vec<sbitmap> valid_mult_list;
3193 sbitmap valid_mult;
3195 if (data_index >= valid_mult_list.length ())
3196 valid_mult_list.safe_grow_cleared (data_index + 1);
3198 valid_mult = valid_mult_list[data_index];
3199 if (!valid_mult)
3201 machine_mode address_mode = targetm.addr_space.address_mode (as);
3202 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3203 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3204 rtx addr, scaled;
3205 HOST_WIDE_INT i;
3207 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3208 bitmap_clear (valid_mult);
3209 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3210 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3211 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3213 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3214 if (memory_address_addr_space_p (mode, addr, as)
3215 || memory_address_addr_space_p (mode, scaled, as))
3216 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3219 if (dump_file && (dump_flags & TDF_DETAILS))
3221 fprintf (dump_file, " allowed multipliers:");
3222 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3223 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3224 fprintf (dump_file, " %d", (int) i);
3225 fprintf (dump_file, "\n");
3226 fprintf (dump_file, "\n");
3229 valid_mult_list[data_index] = valid_mult;
3232 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3233 return false;
3235 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3238 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3239 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3240 variable is omitted. Compute the cost for a memory reference that accesses
3241 a memory location of mode MEM_MODE in address space AS.
3243 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3244 size of MEM_MODE / RATIO) is available. To make this determination, we
3245 look at the size of the increment to be made, which is given in CSTEP.
3246 CSTEP may be zero if the step is unknown.
3247 STMT_AFTER_INC is true iff the statement we're looking at is after the
3248 increment of the original biv.
3250 TODO -- there must be some better way. This all is quite crude. */
3252 enum ainc_type
3254 AINC_PRE_INC, /* Pre increment. */
3255 AINC_PRE_DEC, /* Pre decrement. */
3256 AINC_POST_INC, /* Post increment. */
3257 AINC_POST_DEC, /* Post decrement. */
3258 AINC_NONE /* Also the number of auto increment types. */
3261 typedef struct address_cost_data_s
3263 HOST_WIDE_INT min_offset, max_offset;
3264 unsigned costs[2][2][2][2];
3265 unsigned ainc_costs[AINC_NONE];
3266 } *address_cost_data;
3269 static comp_cost
3270 get_address_cost (bool symbol_present, bool var_present,
3271 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3272 HOST_WIDE_INT cstep, machine_mode mem_mode,
3273 addr_space_t as, bool speed,
3274 bool stmt_after_inc, bool *may_autoinc)
3276 machine_mode address_mode = targetm.addr_space.address_mode (as);
3277 static vec<address_cost_data> address_cost_data_list;
3278 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3279 address_cost_data data;
3280 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3281 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3282 unsigned cost, acost, complexity;
3283 enum ainc_type autoinc_type;
3284 bool offset_p, ratio_p, autoinc;
3285 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3286 unsigned HOST_WIDE_INT mask;
3287 unsigned bits;
3289 if (data_index >= address_cost_data_list.length ())
3290 address_cost_data_list.safe_grow_cleared (data_index + 1);
3292 data = address_cost_data_list[data_index];
3293 if (!data)
3295 HOST_WIDE_INT i;
3296 HOST_WIDE_INT rat, off = 0;
3297 int old_cse_not_expected, width;
3298 unsigned sym_p, var_p, off_p, rat_p, add_c;
3299 rtx_insn *seq;
3300 rtx addr, base;
3301 rtx reg0, reg1;
3303 data = (address_cost_data) xcalloc (1, sizeof (*data));
3305 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3307 width = GET_MODE_BITSIZE (address_mode) - 1;
3308 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3309 width = HOST_BITS_PER_WIDE_INT - 1;
3310 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3312 for (i = width; i >= 0; i--)
3314 off = -((unsigned HOST_WIDE_INT) 1 << i);
3315 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3316 if (memory_address_addr_space_p (mem_mode, addr, as))
3317 break;
3319 data->min_offset = (i == -1? 0 : off);
3321 for (i = width; i >= 0; i--)
3323 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3324 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3325 if (memory_address_addr_space_p (mem_mode, addr, as))
3326 break;
3327 /* For some TARGET, like ARM THUMB1, the offset should be nature
3328 aligned. Try an aligned offset if address_mode is not QImode. */
3329 off = (address_mode == QImode)
3331 : ((unsigned HOST_WIDE_INT) 1 << i)
3332 - GET_MODE_SIZE (address_mode);
3333 if (off > 0)
3335 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3336 if (memory_address_addr_space_p (mem_mode, addr, as))
3337 break;
3340 if (i == -1)
3341 off = 0;
3342 data->max_offset = off;
3344 if (dump_file && (dump_flags & TDF_DETAILS))
3346 fprintf (dump_file, "get_address_cost:\n");
3347 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3348 GET_MODE_NAME (mem_mode),
3349 data->min_offset);
3350 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3351 GET_MODE_NAME (mem_mode),
3352 data->max_offset);
3355 rat = 1;
3356 for (i = 2; i <= MAX_RATIO; i++)
3357 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3359 rat = i;
3360 break;
3363 /* Compute the cost of various addressing modes. */
3364 acost = 0;
3365 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3366 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3368 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3369 || USE_STORE_PRE_DECREMENT (mem_mode))
3371 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3372 has_predec[mem_mode]
3373 = memory_address_addr_space_p (mem_mode, addr, as);
3375 if (has_predec[mem_mode])
3376 data->ainc_costs[AINC_PRE_DEC]
3377 = address_cost (addr, mem_mode, as, speed);
3379 if (USE_LOAD_POST_DECREMENT (mem_mode)
3380 || USE_STORE_POST_DECREMENT (mem_mode))
3382 addr = gen_rtx_POST_DEC (address_mode, reg0);
3383 has_postdec[mem_mode]
3384 = memory_address_addr_space_p (mem_mode, addr, as);
3386 if (has_postdec[mem_mode])
3387 data->ainc_costs[AINC_POST_DEC]
3388 = address_cost (addr, mem_mode, as, speed);
3390 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3391 || USE_STORE_PRE_DECREMENT (mem_mode))
3393 addr = gen_rtx_PRE_INC (address_mode, reg0);
3394 has_preinc[mem_mode]
3395 = memory_address_addr_space_p (mem_mode, addr, as);
3397 if (has_preinc[mem_mode])
3398 data->ainc_costs[AINC_PRE_INC]
3399 = address_cost (addr, mem_mode, as, speed);
3401 if (USE_LOAD_POST_INCREMENT (mem_mode)
3402 || USE_STORE_POST_INCREMENT (mem_mode))
3404 addr = gen_rtx_POST_INC (address_mode, reg0);
3405 has_postinc[mem_mode]
3406 = memory_address_addr_space_p (mem_mode, addr, as);
3408 if (has_postinc[mem_mode])
3409 data->ainc_costs[AINC_POST_INC]
3410 = address_cost (addr, mem_mode, as, speed);
3412 for (i = 0; i < 16; i++)
3414 sym_p = i & 1;
3415 var_p = (i >> 1) & 1;
3416 off_p = (i >> 2) & 1;
3417 rat_p = (i >> 3) & 1;
3419 addr = reg0;
3420 if (rat_p)
3421 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3422 gen_int_mode (rat, address_mode));
3424 if (var_p)
3425 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3427 if (sym_p)
3429 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3430 /* ??? We can run into trouble with some backends by presenting
3431 it with symbols which haven't been properly passed through
3432 targetm.encode_section_info. By setting the local bit, we
3433 enhance the probability of things working. */
3434 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3436 if (off_p)
3437 base = gen_rtx_fmt_e (CONST, address_mode,
3438 gen_rtx_fmt_ee
3439 (PLUS, address_mode, base,
3440 gen_int_mode (off, address_mode)));
3442 else if (off_p)
3443 base = gen_int_mode (off, address_mode);
3444 else
3445 base = NULL_RTX;
3447 if (base)
3448 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3450 start_sequence ();
3451 /* To avoid splitting addressing modes, pretend that no cse will
3452 follow. */
3453 old_cse_not_expected = cse_not_expected;
3454 cse_not_expected = true;
3455 addr = memory_address_addr_space (mem_mode, addr, as);
3456 cse_not_expected = old_cse_not_expected;
3457 seq = get_insns ();
3458 end_sequence ();
3460 acost = seq_cost (seq, speed);
3461 acost += address_cost (addr, mem_mode, as, speed);
3463 if (!acost)
3464 acost = 1;
3465 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3468 /* On some targets, it is quite expensive to load symbol to a register,
3469 which makes addresses that contain symbols look much more expensive.
3470 However, the symbol will have to be loaded in any case before the
3471 loop (and quite likely we have it in register already), so it does not
3472 make much sense to penalize them too heavily. So make some final
3473 tweaks for the SYMBOL_PRESENT modes:
3475 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3476 var is cheaper, use this mode with small penalty.
3477 If VAR_PRESENT is true, try whether the mode with
3478 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3479 if this is the case, use it. */
3480 add_c = add_cost (speed, address_mode);
3481 for (i = 0; i < 8; i++)
3483 var_p = i & 1;
3484 off_p = (i >> 1) & 1;
3485 rat_p = (i >> 2) & 1;
3487 acost = data->costs[0][1][off_p][rat_p] + 1;
3488 if (var_p)
3489 acost += add_c;
3491 if (acost < data->costs[1][var_p][off_p][rat_p])
3492 data->costs[1][var_p][off_p][rat_p] = acost;
3495 if (dump_file && (dump_flags & TDF_DETAILS))
3497 fprintf (dump_file, "Address costs:\n");
3499 for (i = 0; i < 16; i++)
3501 sym_p = i & 1;
3502 var_p = (i >> 1) & 1;
3503 off_p = (i >> 2) & 1;
3504 rat_p = (i >> 3) & 1;
3506 fprintf (dump_file, " ");
3507 if (sym_p)
3508 fprintf (dump_file, "sym + ");
3509 if (var_p)
3510 fprintf (dump_file, "var + ");
3511 if (off_p)
3512 fprintf (dump_file, "cst + ");
3513 if (rat_p)
3514 fprintf (dump_file, "rat * ");
3516 acost = data->costs[sym_p][var_p][off_p][rat_p];
3517 fprintf (dump_file, "index costs %d\n", acost);
3519 if (has_predec[mem_mode] || has_postdec[mem_mode]
3520 || has_preinc[mem_mode] || has_postinc[mem_mode])
3521 fprintf (dump_file, " May include autoinc/dec\n");
3522 fprintf (dump_file, "\n");
3525 address_cost_data_list[data_index] = data;
3528 bits = GET_MODE_BITSIZE (address_mode);
3529 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3530 offset &= mask;
3531 if ((offset >> (bits - 1) & 1))
3532 offset |= ~mask;
3533 s_offset = offset;
3535 autoinc = false;
3536 autoinc_type = AINC_NONE;
3537 msize = GET_MODE_SIZE (mem_mode);
3538 autoinc_offset = offset;
3539 if (stmt_after_inc)
3540 autoinc_offset += ratio * cstep;
3541 if (symbol_present || var_present || ratio != 1)
3542 autoinc = false;
3543 else
3545 if (has_postinc[mem_mode] && autoinc_offset == 0
3546 && msize == cstep)
3547 autoinc_type = AINC_POST_INC;
3548 else if (has_postdec[mem_mode] && autoinc_offset == 0
3549 && msize == -cstep)
3550 autoinc_type = AINC_POST_DEC;
3551 else if (has_preinc[mem_mode] && autoinc_offset == msize
3552 && msize == cstep)
3553 autoinc_type = AINC_PRE_INC;
3554 else if (has_predec[mem_mode] && autoinc_offset == -msize
3555 && msize == -cstep)
3556 autoinc_type = AINC_PRE_DEC;
3558 if (autoinc_type != AINC_NONE)
3559 autoinc = true;
3562 cost = 0;
3563 offset_p = (s_offset != 0
3564 && data->min_offset <= s_offset
3565 && s_offset <= data->max_offset);
3566 ratio_p = (ratio != 1
3567 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3569 if (ratio != 1 && !ratio_p)
3570 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3572 if (s_offset && !offset_p && !symbol_present)
3573 cost += add_cost (speed, address_mode);
3575 if (may_autoinc)
3576 *may_autoinc = autoinc;
3577 if (autoinc)
3578 acost = data->ainc_costs[autoinc_type];
3579 else
3580 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3581 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3582 return new_cost (cost + acost, complexity);
3585 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3586 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3587 calculating the operands of EXPR. Returns true if successful, and returns
3588 the cost in COST. */
3590 static bool
3591 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3592 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3594 comp_cost res;
3595 tree op1 = TREE_OPERAND (expr, 1);
3596 tree cst = TREE_OPERAND (mult, 1);
3597 tree multop = TREE_OPERAND (mult, 0);
3598 int m = exact_log2 (int_cst_value (cst));
3599 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3600 int sa_cost;
3601 bool equal_p = false;
3603 if (!(m >= 0 && m < maxm))
3604 return false;
3606 if (operand_equal_p (op1, mult, 0))
3607 equal_p = true;
3609 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3610 ? shiftadd_cost (speed, mode, m)
3611 : (equal_p
3612 ? shiftsub1_cost (speed, mode, m)
3613 : shiftsub0_cost (speed, mode, m)));
3614 res = new_cost (sa_cost, 0);
3615 res = add_costs (res, equal_p ? cost0 : cost1);
3617 STRIP_NOPS (multop);
3618 if (!is_gimple_val (multop))
3619 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3621 *cost = res;
3622 return true;
3625 /* Estimates cost of forcing expression EXPR into a variable. */
3627 static comp_cost
3628 force_expr_to_var_cost (tree expr, bool speed)
3630 static bool costs_initialized = false;
3631 static unsigned integer_cost [2];
3632 static unsigned symbol_cost [2];
3633 static unsigned address_cost [2];
3634 tree op0, op1;
3635 comp_cost cost0, cost1, cost;
3636 machine_mode mode;
3638 if (!costs_initialized)
3640 tree type = build_pointer_type (integer_type_node);
3641 tree var, addr;
3642 rtx x;
3643 int i;
3645 var = create_tmp_var_raw (integer_type_node, "test_var");
3646 TREE_STATIC (var) = 1;
3647 x = produce_memory_decl_rtl (var, NULL);
3648 SET_DECL_RTL (var, x);
3650 addr = build1 (ADDR_EXPR, type, var);
3653 for (i = 0; i < 2; i++)
3655 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3656 2000), i);
3658 symbol_cost[i] = computation_cost (addr, i) + 1;
3660 address_cost[i]
3661 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3662 if (dump_file && (dump_flags & TDF_DETAILS))
3664 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3665 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3666 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3667 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3668 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3669 fprintf (dump_file, "\n");
3673 costs_initialized = true;
3676 STRIP_NOPS (expr);
3678 if (SSA_VAR_P (expr))
3679 return no_cost;
3681 if (is_gimple_min_invariant (expr))
3683 if (TREE_CODE (expr) == INTEGER_CST)
3684 return new_cost (integer_cost [speed], 0);
3686 if (TREE_CODE (expr) == ADDR_EXPR)
3688 tree obj = TREE_OPERAND (expr, 0);
3690 if (TREE_CODE (obj) == VAR_DECL
3691 || TREE_CODE (obj) == PARM_DECL
3692 || TREE_CODE (obj) == RESULT_DECL)
3693 return new_cost (symbol_cost [speed], 0);
3696 return new_cost (address_cost [speed], 0);
3699 switch (TREE_CODE (expr))
3701 case POINTER_PLUS_EXPR:
3702 case PLUS_EXPR:
3703 case MINUS_EXPR:
3704 case MULT_EXPR:
3705 op0 = TREE_OPERAND (expr, 0);
3706 op1 = TREE_OPERAND (expr, 1);
3707 STRIP_NOPS (op0);
3708 STRIP_NOPS (op1);
3709 break;
3711 CASE_CONVERT:
3712 case NEGATE_EXPR:
3713 op0 = TREE_OPERAND (expr, 0);
3714 STRIP_NOPS (op0);
3715 op1 = NULL_TREE;
3716 break;
3718 default:
3719 /* Just an arbitrary value, FIXME. */
3720 return new_cost (target_spill_cost[speed], 0);
3723 if (op0 == NULL_TREE
3724 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3725 cost0 = no_cost;
3726 else
3727 cost0 = force_expr_to_var_cost (op0, speed);
3729 if (op1 == NULL_TREE
3730 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3731 cost1 = no_cost;
3732 else
3733 cost1 = force_expr_to_var_cost (op1, speed);
3735 mode = TYPE_MODE (TREE_TYPE (expr));
3736 switch (TREE_CODE (expr))
3738 case POINTER_PLUS_EXPR:
3739 case PLUS_EXPR:
3740 case MINUS_EXPR:
3741 case NEGATE_EXPR:
3742 cost = new_cost (add_cost (speed, mode), 0);
3743 if (TREE_CODE (expr) != NEGATE_EXPR)
3745 tree mult = NULL_TREE;
3746 comp_cost sa_cost;
3747 if (TREE_CODE (op1) == MULT_EXPR)
3748 mult = op1;
3749 else if (TREE_CODE (op0) == MULT_EXPR)
3750 mult = op0;
3752 if (mult != NULL_TREE
3753 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3754 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3755 speed, &sa_cost))
3756 return sa_cost;
3758 break;
3760 CASE_CONVERT:
3762 tree inner_mode, outer_mode;
3763 outer_mode = TREE_TYPE (expr);
3764 inner_mode = TREE_TYPE (op0);
3765 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3766 TYPE_MODE (inner_mode), speed), 0);
3768 break;
3770 case MULT_EXPR:
3771 if (cst_and_fits_in_hwi (op0))
3772 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3773 mode, speed), 0);
3774 else if (cst_and_fits_in_hwi (op1))
3775 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3776 mode, speed), 0);
3777 else
3778 return new_cost (target_spill_cost [speed], 0);
3779 break;
3781 default:
3782 gcc_unreachable ();
3785 cost = add_costs (cost, cost0);
3786 cost = add_costs (cost, cost1);
3788 /* Bound the cost by target_spill_cost. The parts of complicated
3789 computations often are either loop invariant or at least can
3790 be shared between several iv uses, so letting this grow without
3791 limits would not give reasonable results. */
3792 if (cost.cost > (int) target_spill_cost [speed])
3793 cost.cost = target_spill_cost [speed];
3795 return cost;
3798 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3799 invariants the computation depends on. */
3801 static comp_cost
3802 force_var_cost (struct ivopts_data *data,
3803 tree expr, bitmap *depends_on)
3805 if (depends_on)
3807 fd_ivopts_data = data;
3808 walk_tree (&expr, find_depends, depends_on, NULL);
3811 return force_expr_to_var_cost (expr, data->speed);
3814 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3815 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3816 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3817 invariants the computation depends on. */
3819 static comp_cost
3820 split_address_cost (struct ivopts_data *data,
3821 tree addr, bool *symbol_present, bool *var_present,
3822 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3824 tree core;
3825 HOST_WIDE_INT bitsize;
3826 HOST_WIDE_INT bitpos;
3827 tree toffset;
3828 machine_mode mode;
3829 int unsignedp, volatilep;
3831 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3832 &unsignedp, &volatilep, false);
3834 if (toffset != 0
3835 || bitpos % BITS_PER_UNIT != 0
3836 || TREE_CODE (core) != VAR_DECL)
3838 *symbol_present = false;
3839 *var_present = true;
3840 fd_ivopts_data = data;
3841 walk_tree (&addr, find_depends, depends_on, NULL);
3842 return new_cost (target_spill_cost[data->speed], 0);
3845 *offset += bitpos / BITS_PER_UNIT;
3846 if (TREE_STATIC (core)
3847 || DECL_EXTERNAL (core))
3849 *symbol_present = true;
3850 *var_present = false;
3851 return no_cost;
3854 *symbol_present = false;
3855 *var_present = true;
3856 return no_cost;
3859 /* Estimates cost of expressing difference of addresses E1 - E2 as
3860 var + symbol + offset. The value of offset is added to OFFSET,
3861 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3862 part is missing. DEPENDS_ON is a set of the invariants the computation
3863 depends on. */
3865 static comp_cost
3866 ptr_difference_cost (struct ivopts_data *data,
3867 tree e1, tree e2, bool *symbol_present, bool *var_present,
3868 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3870 HOST_WIDE_INT diff = 0;
3871 aff_tree aff_e1, aff_e2;
3872 tree type;
3874 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3876 if (ptr_difference_const (e1, e2, &diff))
3878 *offset += diff;
3879 *symbol_present = false;
3880 *var_present = false;
3881 return no_cost;
3884 if (integer_zerop (e2))
3885 return split_address_cost (data, TREE_OPERAND (e1, 0),
3886 symbol_present, var_present, offset, depends_on);
3888 *symbol_present = false;
3889 *var_present = true;
3891 type = signed_type_for (TREE_TYPE (e1));
3892 tree_to_aff_combination (e1, type, &aff_e1);
3893 tree_to_aff_combination (e2, type, &aff_e2);
3894 aff_combination_scale (&aff_e2, -1);
3895 aff_combination_add (&aff_e1, &aff_e2);
3897 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3900 /* Estimates cost of expressing difference E1 - E2 as
3901 var + symbol + offset. The value of offset is added to OFFSET,
3902 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3903 part is missing. DEPENDS_ON is a set of the invariants the computation
3904 depends on. */
3906 static comp_cost
3907 difference_cost (struct ivopts_data *data,
3908 tree e1, tree e2, bool *symbol_present, bool *var_present,
3909 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3911 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3912 unsigned HOST_WIDE_INT off1, off2;
3913 aff_tree aff_e1, aff_e2;
3914 tree type;
3916 e1 = strip_offset (e1, &off1);
3917 e2 = strip_offset (e2, &off2);
3918 *offset += off1 - off2;
3920 STRIP_NOPS (e1);
3921 STRIP_NOPS (e2);
3923 if (TREE_CODE (e1) == ADDR_EXPR)
3924 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3925 offset, depends_on);
3926 *symbol_present = false;
3928 if (operand_equal_p (e1, e2, 0))
3930 *var_present = false;
3931 return no_cost;
3934 *var_present = true;
3936 if (integer_zerop (e2))
3937 return force_var_cost (data, e1, depends_on);
3939 if (integer_zerop (e1))
3941 comp_cost cost = force_var_cost (data, e2, depends_on);
3942 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3943 return cost;
3946 type = signed_type_for (TREE_TYPE (e1));
3947 tree_to_aff_combination (e1, type, &aff_e1);
3948 tree_to_aff_combination (e2, type, &aff_e2);
3949 aff_combination_scale (&aff_e2, -1);
3950 aff_combination_add (&aff_e1, &aff_e2);
3952 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3955 /* Returns true if AFF1 and AFF2 are identical. */
3957 static bool
3958 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3960 unsigned i;
3962 if (aff1->n != aff2->n)
3963 return false;
3965 for (i = 0; i < aff1->n; i++)
3967 if (aff1->elts[i].coef != aff2->elts[i].coef)
3968 return false;
3970 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3971 return false;
3973 return true;
3976 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3978 static int
3979 get_expr_id (struct ivopts_data *data, tree expr)
3981 struct iv_inv_expr_ent ent;
3982 struct iv_inv_expr_ent **slot;
3984 ent.expr = expr;
3985 ent.hash = iterative_hash_expr (expr, 0);
3986 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3987 if (*slot)
3988 return (*slot)->id;
3990 *slot = XNEW (struct iv_inv_expr_ent);
3991 (*slot)->expr = expr;
3992 (*slot)->hash = ent.hash;
3993 (*slot)->id = data->inv_expr_id++;
3994 return (*slot)->id;
3997 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3998 requires a new compiler generated temporary. Returns -1 otherwise.
3999 ADDRESS_P is a flag indicating if the expression is for address
4000 computation. */
4002 static int
4003 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4004 tree cbase, HOST_WIDE_INT ratio,
4005 bool address_p)
4007 aff_tree ubase_aff, cbase_aff;
4008 tree expr, ub, cb;
4010 STRIP_NOPS (ubase);
4011 STRIP_NOPS (cbase);
4012 ub = ubase;
4013 cb = cbase;
4015 if ((TREE_CODE (ubase) == INTEGER_CST)
4016 && (TREE_CODE (cbase) == INTEGER_CST))
4017 return -1;
4019 /* Strips the constant part. */
4020 if (TREE_CODE (ubase) == PLUS_EXPR
4021 || TREE_CODE (ubase) == MINUS_EXPR
4022 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4024 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4025 ubase = TREE_OPERAND (ubase, 0);
4028 /* Strips the constant part. */
4029 if (TREE_CODE (cbase) == PLUS_EXPR
4030 || TREE_CODE (cbase) == MINUS_EXPR
4031 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4033 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4034 cbase = TREE_OPERAND (cbase, 0);
4037 if (address_p)
4039 if (((TREE_CODE (ubase) == SSA_NAME)
4040 || (TREE_CODE (ubase) == ADDR_EXPR
4041 && is_gimple_min_invariant (ubase)))
4042 && (TREE_CODE (cbase) == INTEGER_CST))
4043 return -1;
4045 if (((TREE_CODE (cbase) == SSA_NAME)
4046 || (TREE_CODE (cbase) == ADDR_EXPR
4047 && is_gimple_min_invariant (cbase)))
4048 && (TREE_CODE (ubase) == INTEGER_CST))
4049 return -1;
4052 if (ratio == 1)
4054 if (operand_equal_p (ubase, cbase, 0))
4055 return -1;
4057 if (TREE_CODE (ubase) == ADDR_EXPR
4058 && TREE_CODE (cbase) == ADDR_EXPR)
4060 tree usym, csym;
4062 usym = TREE_OPERAND (ubase, 0);
4063 csym = TREE_OPERAND (cbase, 0);
4064 if (TREE_CODE (usym) == ARRAY_REF)
4066 tree ind = TREE_OPERAND (usym, 1);
4067 if (TREE_CODE (ind) == INTEGER_CST
4068 && tree_fits_shwi_p (ind)
4069 && tree_to_shwi (ind) == 0)
4070 usym = TREE_OPERAND (usym, 0);
4072 if (TREE_CODE (csym) == ARRAY_REF)
4074 tree ind = TREE_OPERAND (csym, 1);
4075 if (TREE_CODE (ind) == INTEGER_CST
4076 && tree_fits_shwi_p (ind)
4077 && tree_to_shwi (ind) == 0)
4078 csym = TREE_OPERAND (csym, 0);
4080 if (operand_equal_p (usym, csym, 0))
4081 return -1;
4083 /* Now do more complex comparison */
4084 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4085 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4086 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4087 return -1;
4090 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4091 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4093 aff_combination_scale (&cbase_aff, -1 * ratio);
4094 aff_combination_add (&ubase_aff, &cbase_aff);
4095 expr = aff_combination_to_tree (&ubase_aff);
4096 return get_expr_id (data, expr);
4101 /* Determines the cost of the computation by that USE is expressed
4102 from induction variable CAND. If ADDRESS_P is true, we just need
4103 to create an address from it, otherwise we want to get it into
4104 register. A set of invariants we depend on is stored in
4105 DEPENDS_ON. AT is the statement at that the value is computed.
4106 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4107 addressing is likely. */
4109 static comp_cost
4110 get_computation_cost_at (struct ivopts_data *data,
4111 struct iv_use *use, struct iv_cand *cand,
4112 bool address_p, bitmap *depends_on, gimple at,
4113 bool *can_autoinc,
4114 int *inv_expr_id)
4116 tree ubase = use->iv->base, ustep = use->iv->step;
4117 tree cbase, cstep;
4118 tree utype = TREE_TYPE (ubase), ctype;
4119 unsigned HOST_WIDE_INT cstepi, offset = 0;
4120 HOST_WIDE_INT ratio, aratio;
4121 bool var_present, symbol_present, stmt_is_after_inc;
4122 comp_cost cost;
4123 widest_int rat;
4124 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4125 machine_mode mem_mode = (address_p
4126 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4127 : VOIDmode);
4129 *depends_on = NULL;
4131 /* Only consider real candidates. */
4132 if (!cand->iv)
4133 return infinite_cost;
4135 cbase = cand->iv->base;
4136 cstep = cand->iv->step;
4137 ctype = TREE_TYPE (cbase);
4139 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4141 /* We do not have a precision to express the values of use. */
4142 return infinite_cost;
4145 if (address_p
4146 || (use->iv->base_object
4147 && cand->iv->base_object
4148 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4149 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4151 /* Do not try to express address of an object with computation based
4152 on address of a different object. This may cause problems in rtl
4153 level alias analysis (that does not expect this to be happening,
4154 as this is illegal in C), and would be unlikely to be useful
4155 anyway. */
4156 if (use->iv->base_object
4157 && cand->iv->base_object
4158 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4159 return infinite_cost;
4162 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4164 /* TODO -- add direct handling of this case. */
4165 goto fallback;
4168 /* CSTEPI is removed from the offset in case statement is after the
4169 increment. If the step is not constant, we use zero instead.
4170 This is a bit imprecise (there is the extra addition), but
4171 redundancy elimination is likely to transform the code so that
4172 it uses value of the variable before increment anyway,
4173 so it is not that much unrealistic. */
4174 if (cst_and_fits_in_hwi (cstep))
4175 cstepi = int_cst_value (cstep);
4176 else
4177 cstepi = 0;
4179 if (!constant_multiple_of (ustep, cstep, &rat))
4180 return infinite_cost;
4182 if (wi::fits_shwi_p (rat))
4183 ratio = rat.to_shwi ();
4184 else
4185 return infinite_cost;
4187 STRIP_NOPS (cbase);
4188 ctype = TREE_TYPE (cbase);
4190 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4192 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4193 or ratio == 1, it is better to handle this like
4195 ubase - ratio * cbase + ratio * var
4197 (also holds in the case ratio == -1, TODO. */
4199 if (cst_and_fits_in_hwi (cbase))
4201 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4202 cost = difference_cost (data,
4203 ubase, build_int_cst (utype, 0),
4204 &symbol_present, &var_present, &offset,
4205 depends_on);
4206 cost.cost /= avg_loop_niter (data->current_loop);
4208 else if (ratio == 1)
4210 tree real_cbase = cbase;
4212 /* Check to see if any adjustment is needed. */
4213 if (cstepi == 0 && stmt_is_after_inc)
4215 aff_tree real_cbase_aff;
4216 aff_tree cstep_aff;
4218 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4219 &real_cbase_aff);
4220 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4222 aff_combination_add (&real_cbase_aff, &cstep_aff);
4223 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4226 cost = difference_cost (data,
4227 ubase, real_cbase,
4228 &symbol_present, &var_present, &offset,
4229 depends_on);
4230 cost.cost /= avg_loop_niter (data->current_loop);
4232 else if (address_p
4233 && !POINTER_TYPE_P (ctype)
4234 && multiplier_allowed_in_address_p
4235 (ratio, mem_mode,
4236 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4238 cbase
4239 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4240 cost = difference_cost (data,
4241 ubase, cbase,
4242 &symbol_present, &var_present, &offset,
4243 depends_on);
4244 cost.cost /= avg_loop_niter (data->current_loop);
4246 else
4248 cost = force_var_cost (data, cbase, depends_on);
4249 cost = add_costs (cost,
4250 difference_cost (data,
4251 ubase, build_int_cst (utype, 0),
4252 &symbol_present, &var_present,
4253 &offset, depends_on));
4254 cost.cost /= avg_loop_niter (data->current_loop);
4255 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4258 if (inv_expr_id)
4260 *inv_expr_id =
4261 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4262 /* Clear depends on. */
4263 if (*inv_expr_id != -1 && depends_on && *depends_on)
4264 bitmap_clear (*depends_on);
4267 /* If we are after the increment, the value of the candidate is higher by
4268 one iteration. */
4269 if (stmt_is_after_inc)
4270 offset -= ratio * cstepi;
4272 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4273 (symbol/var1/const parts may be omitted). If we are looking for an
4274 address, find the cost of addressing this. */
4275 if (address_p)
4276 return add_costs (cost,
4277 get_address_cost (symbol_present, var_present,
4278 offset, ratio, cstepi,
4279 mem_mode,
4280 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4281 speed, stmt_is_after_inc,
4282 can_autoinc));
4284 /* Otherwise estimate the costs for computing the expression. */
4285 if (!symbol_present && !var_present && !offset)
4287 if (ratio != 1)
4288 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4289 return cost;
4292 /* Symbol + offset should be compile-time computable so consider that they
4293 are added once to the variable, if present. */
4294 if (var_present && (symbol_present || offset))
4295 cost.cost += adjust_setup_cost (data,
4296 add_cost (speed, TYPE_MODE (ctype)));
4298 /* Having offset does not affect runtime cost in case it is added to
4299 symbol, but it increases complexity. */
4300 if (offset)
4301 cost.complexity++;
4303 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4305 aratio = ratio > 0 ? ratio : -ratio;
4306 if (aratio != 1)
4307 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4308 return cost;
4310 fallback:
4311 if (can_autoinc)
4312 *can_autoinc = false;
4315 /* Just get the expression, expand it and measure the cost. */
4316 tree comp = get_computation_at (data->current_loop, use, cand, at);
4318 if (!comp)
4319 return infinite_cost;
4321 if (address_p)
4322 comp = build_simple_mem_ref (comp);
4324 return new_cost (computation_cost (comp, speed), 0);
4328 /* Determines the cost of the computation by that USE is expressed
4329 from induction variable CAND. If ADDRESS_P is true, we just need
4330 to create an address from it, otherwise we want to get it into
4331 register. A set of invariants we depend on is stored in
4332 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4333 autoinc addressing is likely. */
4335 static comp_cost
4336 get_computation_cost (struct ivopts_data *data,
4337 struct iv_use *use, struct iv_cand *cand,
4338 bool address_p, bitmap *depends_on,
4339 bool *can_autoinc, int *inv_expr_id)
4341 return get_computation_cost_at (data,
4342 use, cand, address_p, depends_on, use->stmt,
4343 can_autoinc, inv_expr_id);
4346 /* Determines cost of basing replacement of USE on CAND in a generic
4347 expression. */
4349 static bool
4350 determine_use_iv_cost_generic (struct ivopts_data *data,
4351 struct iv_use *use, struct iv_cand *cand)
4353 bitmap depends_on;
4354 comp_cost cost;
4355 int inv_expr_id = -1;
4357 /* The simple case first -- if we need to express value of the preserved
4358 original biv, the cost is 0. This also prevents us from counting the
4359 cost of increment twice -- once at this use and once in the cost of
4360 the candidate. */
4361 if (cand->pos == IP_ORIGINAL
4362 && cand->incremented_at == use->stmt)
4364 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4365 ERROR_MARK, -1);
4366 return true;
4369 cost = get_computation_cost (data, use, cand, false, &depends_on,
4370 NULL, &inv_expr_id);
4372 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4373 inv_expr_id);
4375 return !infinite_cost_p (cost);
4378 /* Determines cost of basing replacement of USE on CAND in an address. */
4380 static bool
4381 determine_use_iv_cost_address (struct ivopts_data *data,
4382 struct iv_use *use, struct iv_cand *cand)
4384 bitmap depends_on;
4385 bool can_autoinc;
4386 int inv_expr_id = -1;
4387 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4388 &can_autoinc, &inv_expr_id);
4390 if (cand->ainc_use == use)
4392 if (can_autoinc)
4393 cost.cost -= cand->cost_step;
4394 /* If we generated the candidate solely for exploiting autoincrement
4395 opportunities, and it turns out it can't be used, set the cost to
4396 infinity to make sure we ignore it. */
4397 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4398 cost = infinite_cost;
4400 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4401 inv_expr_id);
4403 return !infinite_cost_p (cost);
4406 /* Computes value of candidate CAND at position AT in iteration NITER, and
4407 stores it to VAL. */
4409 static void
4410 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4411 aff_tree *val)
4413 aff_tree step, delta, nit;
4414 struct iv *iv = cand->iv;
4415 tree type = TREE_TYPE (iv->base);
4416 tree steptype = type;
4417 if (POINTER_TYPE_P (type))
4418 steptype = sizetype;
4419 steptype = unsigned_type_for (type);
4421 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4422 aff_combination_convert (&step, steptype);
4423 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4424 aff_combination_convert (&nit, steptype);
4425 aff_combination_mult (&nit, &step, &delta);
4426 if (stmt_after_increment (loop, cand, at))
4427 aff_combination_add (&delta, &step);
4429 tree_to_aff_combination (iv->base, type, val);
4430 if (!POINTER_TYPE_P (type))
4431 aff_combination_convert (val, steptype);
4432 aff_combination_add (val, &delta);
4435 /* Returns period of induction variable iv. */
4437 static tree
4438 iv_period (struct iv *iv)
4440 tree step = iv->step, period, type;
4441 tree pow2div;
4443 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4445 type = unsigned_type_for (TREE_TYPE (step));
4446 /* Period of the iv is lcm (step, type_range)/step -1,
4447 i.e., N*type_range/step - 1. Since type range is power
4448 of two, N == (step >> num_of_ending_zeros_binary (step),
4449 so the final result is
4451 (type_range >> num_of_ending_zeros_binary (step)) - 1
4454 pow2div = num_ending_zeros (step);
4456 period = build_low_bits_mask (type,
4457 (TYPE_PRECISION (type)
4458 - tree_to_uhwi (pow2div)));
4460 return period;
4463 /* Returns the comparison operator used when eliminating the iv USE. */
4465 static enum tree_code
4466 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4468 struct loop *loop = data->current_loop;
4469 basic_block ex_bb;
4470 edge exit;
4472 ex_bb = gimple_bb (use->stmt);
4473 exit = EDGE_SUCC (ex_bb, 0);
4474 if (flow_bb_inside_loop_p (loop, exit->dest))
4475 exit = EDGE_SUCC (ex_bb, 1);
4477 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4480 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4481 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4482 calculation is performed in non-wrapping type.
4484 TODO: More generally, we could test for the situation that
4485 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4486 This would require knowing the sign of OFFSET. */
4488 static bool
4489 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4491 enum tree_code code;
4492 tree e1, e2;
4493 aff_tree aff_e1, aff_e2, aff_offset;
4495 if (!nowrap_type_p (TREE_TYPE (base)))
4496 return false;
4498 base = expand_simple_operations (base);
4500 if (TREE_CODE (base) == SSA_NAME)
4502 gimple stmt = SSA_NAME_DEF_STMT (base);
4504 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4505 return false;
4507 code = gimple_assign_rhs_code (stmt);
4508 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4509 return false;
4511 e1 = gimple_assign_rhs1 (stmt);
4512 e2 = gimple_assign_rhs2 (stmt);
4514 else
4516 code = TREE_CODE (base);
4517 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4518 return false;
4519 e1 = TREE_OPERAND (base, 0);
4520 e2 = TREE_OPERAND (base, 1);
4523 /* Use affine expansion as deeper inspection to prove the equality. */
4524 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4525 &aff_e2, &data->name_expansion_cache);
4526 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4527 &aff_offset, &data->name_expansion_cache);
4528 aff_combination_scale (&aff_offset, -1);
4529 switch (code)
4531 case PLUS_EXPR:
4532 aff_combination_add (&aff_e2, &aff_offset);
4533 if (aff_combination_zero_p (&aff_e2))
4534 return true;
4536 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4537 &aff_e1, &data->name_expansion_cache);
4538 aff_combination_add (&aff_e1, &aff_offset);
4539 return aff_combination_zero_p (&aff_e1);
4541 case POINTER_PLUS_EXPR:
4542 aff_combination_add (&aff_e2, &aff_offset);
4543 return aff_combination_zero_p (&aff_e2);
4545 default:
4546 return false;
4550 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4551 comparison with CAND. NITER describes the number of iterations of
4552 the loops. If successful, the comparison in COMP_P is altered accordingly.
4554 We aim to handle the following situation:
4556 sometype *base, *p;
4557 int a, b, i;
4559 i = a;
4560 p = p_0 = base + a;
4564 bla (*p);
4565 p++;
4566 i++;
4568 while (i < b);
4570 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4571 We aim to optimize this to
4573 p = p_0 = base + a;
4576 bla (*p);
4577 p++;
4579 while (p < p_0 - a + b);
4581 This preserves the correctness, since the pointer arithmetics does not
4582 overflow. More precisely:
4584 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4585 overflow in computing it or the values of p.
4586 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4587 overflow. To prove this, we use the fact that p_0 = base + a. */
4589 static bool
4590 iv_elimination_compare_lt (struct ivopts_data *data,
4591 struct iv_cand *cand, enum tree_code *comp_p,
4592 struct tree_niter_desc *niter)
4594 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4595 struct aff_tree nit, tmpa, tmpb;
4596 enum tree_code comp;
4597 HOST_WIDE_INT step;
4599 /* We need to know that the candidate induction variable does not overflow.
4600 While more complex analysis may be used to prove this, for now just
4601 check that the variable appears in the original program and that it
4602 is computed in a type that guarantees no overflows. */
4603 cand_type = TREE_TYPE (cand->iv->base);
4604 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4605 return false;
4607 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4608 the calculation of the BOUND could overflow, making the comparison
4609 invalid. */
4610 if (!data->loop_single_exit_p)
4611 return false;
4613 /* We need to be able to decide whether candidate is increasing or decreasing
4614 in order to choose the right comparison operator. */
4615 if (!cst_and_fits_in_hwi (cand->iv->step))
4616 return false;
4617 step = int_cst_value (cand->iv->step);
4619 /* Check that the number of iterations matches the expected pattern:
4620 a + 1 > b ? 0 : b - a - 1. */
4621 mbz = niter->may_be_zero;
4622 if (TREE_CODE (mbz) == GT_EXPR)
4624 /* Handle a + 1 > b. */
4625 tree op0 = TREE_OPERAND (mbz, 0);
4626 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4628 a = TREE_OPERAND (op0, 0);
4629 b = TREE_OPERAND (mbz, 1);
4631 else
4632 return false;
4634 else if (TREE_CODE (mbz) == LT_EXPR)
4636 tree op1 = TREE_OPERAND (mbz, 1);
4638 /* Handle b < a + 1. */
4639 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4641 a = TREE_OPERAND (op1, 0);
4642 b = TREE_OPERAND (mbz, 0);
4644 else
4645 return false;
4647 else
4648 return false;
4650 /* Expected number of iterations is B - A - 1. Check that it matches
4651 the actual number, i.e., that B - A - NITER = 1. */
4652 tree_to_aff_combination (niter->niter, nit_type, &nit);
4653 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4654 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4655 aff_combination_scale (&nit, -1);
4656 aff_combination_scale (&tmpa, -1);
4657 aff_combination_add (&tmpb, &tmpa);
4658 aff_combination_add (&tmpb, &nit);
4659 if (tmpb.n != 0 || tmpb.offset != 1)
4660 return false;
4662 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4663 overflow. */
4664 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4665 cand->iv->step,
4666 fold_convert (TREE_TYPE (cand->iv->step), a));
4667 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4668 return false;
4670 /* Determine the new comparison operator. */
4671 comp = step < 0 ? GT_EXPR : LT_EXPR;
4672 if (*comp_p == NE_EXPR)
4673 *comp_p = comp;
4674 else if (*comp_p == EQ_EXPR)
4675 *comp_p = invert_tree_comparison (comp, false);
4676 else
4677 gcc_unreachable ();
4679 return true;
4682 /* Check whether it is possible to express the condition in USE by comparison
4683 of candidate CAND. If so, store the value compared with to BOUND, and the
4684 comparison operator to COMP. */
4686 static bool
4687 may_eliminate_iv (struct ivopts_data *data,
4688 struct iv_use *use, struct iv_cand *cand, tree *bound,
4689 enum tree_code *comp)
4691 basic_block ex_bb;
4692 edge exit;
4693 tree period;
4694 struct loop *loop = data->current_loop;
4695 aff_tree bnd;
4696 struct tree_niter_desc *desc = NULL;
4698 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4699 return false;
4701 /* For now works only for exits that dominate the loop latch.
4702 TODO: extend to other conditions inside loop body. */
4703 ex_bb = gimple_bb (use->stmt);
4704 if (use->stmt != last_stmt (ex_bb)
4705 || gimple_code (use->stmt) != GIMPLE_COND
4706 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4707 return false;
4709 exit = EDGE_SUCC (ex_bb, 0);
4710 if (flow_bb_inside_loop_p (loop, exit->dest))
4711 exit = EDGE_SUCC (ex_bb, 1);
4712 if (flow_bb_inside_loop_p (loop, exit->dest))
4713 return false;
4715 desc = niter_for_exit (data, exit);
4716 if (!desc)
4717 return false;
4719 /* Determine whether we can use the variable to test the exit condition.
4720 This is the case iff the period of the induction variable is greater
4721 than the number of iterations for which the exit condition is true. */
4722 period = iv_period (cand->iv);
4724 /* If the number of iterations is constant, compare against it directly. */
4725 if (TREE_CODE (desc->niter) == INTEGER_CST)
4727 /* See cand_value_at. */
4728 if (stmt_after_increment (loop, cand, use->stmt))
4730 if (!tree_int_cst_lt (desc->niter, period))
4731 return false;
4733 else
4735 if (tree_int_cst_lt (period, desc->niter))
4736 return false;
4740 /* If not, and if this is the only possible exit of the loop, see whether
4741 we can get a conservative estimate on the number of iterations of the
4742 entire loop and compare against that instead. */
4743 else
4745 widest_int period_value, max_niter;
4747 max_niter = desc->max;
4748 if (stmt_after_increment (loop, cand, use->stmt))
4749 max_niter += 1;
4750 period_value = wi::to_widest (period);
4751 if (wi::gtu_p (max_niter, period_value))
4753 /* See if we can take advantage of inferred loop bound information. */
4754 if (data->loop_single_exit_p)
4756 if (!max_loop_iterations (loop, &max_niter))
4757 return false;
4758 /* The loop bound is already adjusted by adding 1. */
4759 if (wi::gtu_p (max_niter, period_value))
4760 return false;
4762 else
4763 return false;
4767 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4769 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4770 aff_combination_to_tree (&bnd));
4771 *comp = iv_elimination_compare (data, use);
4773 /* It is unlikely that computing the number of iterations using division
4774 would be more profitable than keeping the original induction variable. */
4775 if (expression_expensive_p (*bound))
4776 return false;
4778 /* Sometimes, it is possible to handle the situation that the number of
4779 iterations may be zero unless additional assumtions by using <
4780 instead of != in the exit condition.
4782 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4783 base the exit condition on it. However, that is often too
4784 expensive. */
4785 if (!integer_zerop (desc->may_be_zero))
4786 return iv_elimination_compare_lt (data, cand, comp, desc);
4788 return true;
4791 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4792 be copied, if is is used in the loop body and DATA->body_includes_call. */
4794 static int
4795 parm_decl_cost (struct ivopts_data *data, tree bound)
4797 tree sbound = bound;
4798 STRIP_NOPS (sbound);
4800 if (TREE_CODE (sbound) == SSA_NAME
4801 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4802 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4803 && data->body_includes_call)
4804 return COSTS_N_INSNS (1);
4806 return 0;
4809 /* Determines cost of basing replacement of USE on CAND in a condition. */
4811 static bool
4812 determine_use_iv_cost_condition (struct ivopts_data *data,
4813 struct iv_use *use, struct iv_cand *cand)
4815 tree bound = NULL_TREE;
4816 struct iv *cmp_iv;
4817 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4818 comp_cost elim_cost, express_cost, cost, bound_cost;
4819 bool ok;
4820 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4821 tree *control_var, *bound_cst;
4822 enum tree_code comp = ERROR_MARK;
4824 /* Only consider real candidates. */
4825 if (!cand->iv)
4827 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4828 ERROR_MARK, -1);
4829 return false;
4832 /* Try iv elimination. */
4833 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4835 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4836 if (elim_cost.cost == 0)
4837 elim_cost.cost = parm_decl_cost (data, bound);
4838 else if (TREE_CODE (bound) == INTEGER_CST)
4839 elim_cost.cost = 0;
4840 /* If we replace a loop condition 'i < n' with 'p < base + n',
4841 depends_on_elim will have 'base' and 'n' set, which implies
4842 that both 'base' and 'n' will be live during the loop. More likely,
4843 'base + n' will be loop invariant, resulting in only one live value
4844 during the loop. So in that case we clear depends_on_elim and set
4845 elim_inv_expr_id instead. */
4846 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4848 elim_inv_expr_id = get_expr_id (data, bound);
4849 bitmap_clear (depends_on_elim);
4851 /* The bound is a loop invariant, so it will be only computed
4852 once. */
4853 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4855 else
4856 elim_cost = infinite_cost;
4858 /* Try expressing the original giv. If it is compared with an invariant,
4859 note that we cannot get rid of it. */
4860 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4861 NULL, &cmp_iv);
4862 gcc_assert (ok);
4864 /* When the condition is a comparison of the candidate IV against
4865 zero, prefer this IV.
4867 TODO: The constant that we're subtracting from the cost should
4868 be target-dependent. This information should be added to the
4869 target costs for each backend. */
4870 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4871 && integer_zerop (*bound_cst)
4872 && (operand_equal_p (*control_var, cand->var_after, 0)
4873 || operand_equal_p (*control_var, cand->var_before, 0)))
4874 elim_cost.cost -= 1;
4876 express_cost = get_computation_cost (data, use, cand, false,
4877 &depends_on_express, NULL,
4878 &express_inv_expr_id);
4879 fd_ivopts_data = data;
4880 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4882 /* Count the cost of the original bound as well. */
4883 bound_cost = force_var_cost (data, *bound_cst, NULL);
4884 if (bound_cost.cost == 0)
4885 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4886 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4887 bound_cost.cost = 0;
4888 express_cost.cost += bound_cost.cost;
4890 /* Choose the better approach, preferring the eliminated IV. */
4891 if (compare_costs (elim_cost, express_cost) <= 0)
4893 cost = elim_cost;
4894 depends_on = depends_on_elim;
4895 depends_on_elim = NULL;
4896 inv_expr_id = elim_inv_expr_id;
4898 else
4900 cost = express_cost;
4901 depends_on = depends_on_express;
4902 depends_on_express = NULL;
4903 bound = NULL_TREE;
4904 comp = ERROR_MARK;
4905 inv_expr_id = express_inv_expr_id;
4908 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4910 if (depends_on_elim)
4911 BITMAP_FREE (depends_on_elim);
4912 if (depends_on_express)
4913 BITMAP_FREE (depends_on_express);
4915 return !infinite_cost_p (cost);
4918 /* Determines cost of basing replacement of USE on CAND. Returns false
4919 if USE cannot be based on CAND. */
4921 static bool
4922 determine_use_iv_cost (struct ivopts_data *data,
4923 struct iv_use *use, struct iv_cand *cand)
4925 switch (use->type)
4927 case USE_NONLINEAR_EXPR:
4928 return determine_use_iv_cost_generic (data, use, cand);
4930 case USE_ADDRESS:
4931 return determine_use_iv_cost_address (data, use, cand);
4933 case USE_COMPARE:
4934 return determine_use_iv_cost_condition (data, use, cand);
4936 default:
4937 gcc_unreachable ();
4941 /* Return true if get_computation_cost indicates that autoincrement is
4942 a possibility for the pair of USE and CAND, false otherwise. */
4944 static bool
4945 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4946 struct iv_cand *cand)
4948 bitmap depends_on;
4949 bool can_autoinc;
4950 comp_cost cost;
4952 if (use->type != USE_ADDRESS)
4953 return false;
4955 cost = get_computation_cost (data, use, cand, true, &depends_on,
4956 &can_autoinc, NULL);
4958 BITMAP_FREE (depends_on);
4960 return !infinite_cost_p (cost) && can_autoinc;
4963 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4964 use that allows autoincrement, and set their AINC_USE if possible. */
4966 static void
4967 set_autoinc_for_original_candidates (struct ivopts_data *data)
4969 unsigned i, j;
4971 for (i = 0; i < n_iv_cands (data); i++)
4973 struct iv_cand *cand = iv_cand (data, i);
4974 struct iv_use *closest_before = NULL;
4975 struct iv_use *closest_after = NULL;
4976 if (cand->pos != IP_ORIGINAL)
4977 continue;
4979 for (j = 0; j < n_iv_uses (data); j++)
4981 struct iv_use *use = iv_use (data, j);
4982 unsigned uid = gimple_uid (use->stmt);
4984 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4985 continue;
4987 if (uid < gimple_uid (cand->incremented_at)
4988 && (closest_before == NULL
4989 || uid > gimple_uid (closest_before->stmt)))
4990 closest_before = use;
4992 if (uid > gimple_uid (cand->incremented_at)
4993 && (closest_after == NULL
4994 || uid < gimple_uid (closest_after->stmt)))
4995 closest_after = use;
4998 if (closest_before != NULL
4999 && autoinc_possible_for_pair (data, closest_before, cand))
5000 cand->ainc_use = closest_before;
5001 else if (closest_after != NULL
5002 && autoinc_possible_for_pair (data, closest_after, cand))
5003 cand->ainc_use = closest_after;
5007 /* Finds the candidates for the induction variables. */
5009 static void
5010 find_iv_candidates (struct ivopts_data *data)
5012 /* Add commonly used ivs. */
5013 add_standard_iv_candidates (data);
5015 /* Add old induction variables. */
5016 add_old_ivs_candidates (data);
5018 /* Add induction variables derived from uses. */
5019 add_derived_ivs_candidates (data);
5021 set_autoinc_for_original_candidates (data);
5023 /* Record the important candidates. */
5024 record_important_candidates (data);
5027 /* Determines costs of basing the use of the iv on an iv candidate. */
5029 static void
5030 determine_use_iv_costs (struct ivopts_data *data)
5032 unsigned i, j;
5033 struct iv_use *use;
5034 struct iv_cand *cand;
5035 bitmap to_clear = BITMAP_ALLOC (NULL);
5037 alloc_use_cost_map (data);
5039 for (i = 0; i < n_iv_uses (data); i++)
5041 use = iv_use (data, i);
5043 if (data->consider_all_candidates)
5045 for (j = 0; j < n_iv_cands (data); j++)
5047 cand = iv_cand (data, j);
5048 determine_use_iv_cost (data, use, cand);
5051 else
5053 bitmap_iterator bi;
5055 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5057 cand = iv_cand (data, j);
5058 if (!determine_use_iv_cost (data, use, cand))
5059 bitmap_set_bit (to_clear, j);
5062 /* Remove the candidates for that the cost is infinite from
5063 the list of related candidates. */
5064 bitmap_and_compl_into (use->related_cands, to_clear);
5065 bitmap_clear (to_clear);
5069 BITMAP_FREE (to_clear);
5071 if (dump_file && (dump_flags & TDF_DETAILS))
5073 fprintf (dump_file, "Use-candidate costs:\n");
5075 for (i = 0; i < n_iv_uses (data); i++)
5077 use = iv_use (data, i);
5079 fprintf (dump_file, "Use %d:\n", i);
5080 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5081 for (j = 0; j < use->n_map_members; j++)
5083 if (!use->cost_map[j].cand
5084 || infinite_cost_p (use->cost_map[j].cost))
5085 continue;
5087 fprintf (dump_file, " %d\t%d\t%d\t",
5088 use->cost_map[j].cand->id,
5089 use->cost_map[j].cost.cost,
5090 use->cost_map[j].cost.complexity);
5091 if (use->cost_map[j].depends_on)
5092 bitmap_print (dump_file,
5093 use->cost_map[j].depends_on, "","");
5094 if (use->cost_map[j].inv_expr_id != -1)
5095 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5096 fprintf (dump_file, "\n");
5099 fprintf (dump_file, "\n");
5101 fprintf (dump_file, "\n");
5105 /* Determines cost of the candidate CAND. */
5107 static void
5108 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5110 comp_cost cost_base;
5111 unsigned cost, cost_step;
5112 tree base;
5114 if (!cand->iv)
5116 cand->cost = 0;
5117 return;
5120 /* There are two costs associated with the candidate -- its increment
5121 and its initialization. The second is almost negligible for any loop
5122 that rolls enough, so we take it just very little into account. */
5124 base = cand->iv->base;
5125 cost_base = force_var_cost (data, base, NULL);
5126 /* It will be exceptional that the iv register happens to be initialized with
5127 the proper value at no cost. In general, there will at least be a regcopy
5128 or a const set. */
5129 if (cost_base.cost == 0)
5130 cost_base.cost = COSTS_N_INSNS (1);
5131 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5133 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5135 /* Prefer the original ivs unless we may gain something by replacing it.
5136 The reason is to make debugging simpler; so this is not relevant for
5137 artificial ivs created by other optimization passes. */
5138 if (cand->pos != IP_ORIGINAL
5139 || !SSA_NAME_VAR (cand->var_before)
5140 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5141 cost++;
5143 /* Prefer not to insert statements into latch unless there are some
5144 already (so that we do not create unnecessary jumps). */
5145 if (cand->pos == IP_END
5146 && empty_block_p (ip_end_pos (data->current_loop)))
5147 cost++;
5149 cand->cost = cost;
5150 cand->cost_step = cost_step;
5153 /* Determines costs of computation of the candidates. */
5155 static void
5156 determine_iv_costs (struct ivopts_data *data)
5158 unsigned i;
5160 if (dump_file && (dump_flags & TDF_DETAILS))
5162 fprintf (dump_file, "Candidate costs:\n");
5163 fprintf (dump_file, " cand\tcost\n");
5166 for (i = 0; i < n_iv_cands (data); i++)
5168 struct iv_cand *cand = iv_cand (data, i);
5170 determine_iv_cost (data, cand);
5172 if (dump_file && (dump_flags & TDF_DETAILS))
5173 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5176 if (dump_file && (dump_flags & TDF_DETAILS))
5177 fprintf (dump_file, "\n");
5180 /* Calculates cost for having SIZE induction variables. */
5182 static unsigned
5183 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5185 /* We add size to the cost, so that we prefer eliminating ivs
5186 if possible. */
5187 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5188 data->body_includes_call);
5191 /* For each size of the induction variable set determine the penalty. */
5193 static void
5194 determine_set_costs (struct ivopts_data *data)
5196 unsigned j, n;
5197 gphi *phi;
5198 gphi_iterator psi;
5199 tree op;
5200 struct loop *loop = data->current_loop;
5201 bitmap_iterator bi;
5203 if (dump_file && (dump_flags & TDF_DETAILS))
5205 fprintf (dump_file, "Global costs:\n");
5206 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5207 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5208 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5209 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5212 n = 0;
5213 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5215 phi = psi.phi ();
5216 op = PHI_RESULT (phi);
5218 if (virtual_operand_p (op))
5219 continue;
5221 if (get_iv (data, op))
5222 continue;
5224 n++;
5227 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5229 struct version_info *info = ver_info (data, j);
5231 if (info->inv_id && info->has_nonlin_use)
5232 n++;
5235 data->regs_used = n;
5236 if (dump_file && (dump_flags & TDF_DETAILS))
5237 fprintf (dump_file, " regs_used %d\n", n);
5239 if (dump_file && (dump_flags & TDF_DETAILS))
5241 fprintf (dump_file, " cost for size:\n");
5242 fprintf (dump_file, " ivs\tcost\n");
5243 for (j = 0; j <= 2 * target_avail_regs; j++)
5244 fprintf (dump_file, " %d\t%d\n", j,
5245 ivopts_global_cost_for_size (data, j));
5246 fprintf (dump_file, "\n");
5250 /* Returns true if A is a cheaper cost pair than B. */
5252 static bool
5253 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5255 int cmp;
5257 if (!a)
5258 return false;
5260 if (!b)
5261 return true;
5263 cmp = compare_costs (a->cost, b->cost);
5264 if (cmp < 0)
5265 return true;
5267 if (cmp > 0)
5268 return false;
5270 /* In case the costs are the same, prefer the cheaper candidate. */
5271 if (a->cand->cost < b->cand->cost)
5272 return true;
5274 return false;
5278 /* Returns candidate by that USE is expressed in IVS. */
5280 static struct cost_pair *
5281 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5283 return ivs->cand_for_use[use->id];
5286 /* Computes the cost field of IVS structure. */
5288 static void
5289 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5291 comp_cost cost = ivs->cand_use_cost;
5293 cost.cost += ivs->cand_cost;
5295 cost.cost += ivopts_global_cost_for_size (data,
5296 ivs->n_regs + ivs->num_used_inv_expr);
5298 ivs->cost = cost;
5301 /* Remove invariants in set INVS to set IVS. */
5303 static void
5304 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5306 bitmap_iterator bi;
5307 unsigned iid;
5309 if (!invs)
5310 return;
5312 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5314 ivs->n_invariant_uses[iid]--;
5315 if (ivs->n_invariant_uses[iid] == 0)
5316 ivs->n_regs--;
5320 /* Set USE not to be expressed by any candidate in IVS. */
5322 static void
5323 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5324 struct iv_use *use)
5326 unsigned uid = use->id, cid;
5327 struct cost_pair *cp;
5329 cp = ivs->cand_for_use[uid];
5330 if (!cp)
5331 return;
5332 cid = cp->cand->id;
5334 ivs->bad_uses++;
5335 ivs->cand_for_use[uid] = NULL;
5336 ivs->n_cand_uses[cid]--;
5338 if (ivs->n_cand_uses[cid] == 0)
5340 bitmap_clear_bit (ivs->cands, cid);
5341 /* Do not count the pseudocandidates. */
5342 if (cp->cand->iv)
5343 ivs->n_regs--;
5344 ivs->n_cands--;
5345 ivs->cand_cost -= cp->cand->cost;
5347 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5350 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5352 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5354 if (cp->inv_expr_id != -1)
5356 ivs->used_inv_expr[cp->inv_expr_id]--;
5357 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5358 ivs->num_used_inv_expr--;
5360 iv_ca_recount_cost (data, ivs);
5363 /* Add invariants in set INVS to set IVS. */
5365 static void
5366 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5368 bitmap_iterator bi;
5369 unsigned iid;
5371 if (!invs)
5372 return;
5374 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5376 ivs->n_invariant_uses[iid]++;
5377 if (ivs->n_invariant_uses[iid] == 1)
5378 ivs->n_regs++;
5382 /* Set cost pair for USE in set IVS to CP. */
5384 static void
5385 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5386 struct iv_use *use, struct cost_pair *cp)
5388 unsigned uid = use->id, cid;
5390 if (ivs->cand_for_use[uid] == cp)
5391 return;
5393 if (ivs->cand_for_use[uid])
5394 iv_ca_set_no_cp (data, ivs, use);
5396 if (cp)
5398 cid = cp->cand->id;
5400 ivs->bad_uses--;
5401 ivs->cand_for_use[uid] = cp;
5402 ivs->n_cand_uses[cid]++;
5403 if (ivs->n_cand_uses[cid] == 1)
5405 bitmap_set_bit (ivs->cands, cid);
5406 /* Do not count the pseudocandidates. */
5407 if (cp->cand->iv)
5408 ivs->n_regs++;
5409 ivs->n_cands++;
5410 ivs->cand_cost += cp->cand->cost;
5412 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5415 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5416 iv_ca_set_add_invariants (ivs, cp->depends_on);
5418 if (cp->inv_expr_id != -1)
5420 ivs->used_inv_expr[cp->inv_expr_id]++;
5421 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5422 ivs->num_used_inv_expr++;
5424 iv_ca_recount_cost (data, ivs);
5428 /* Extend set IVS by expressing USE by some of the candidates in it
5429 if possible. Consider all important candidates if candidates in
5430 set IVS don't give any result. */
5432 static void
5433 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5434 struct iv_use *use)
5436 struct cost_pair *best_cp = NULL, *cp;
5437 bitmap_iterator bi;
5438 unsigned i;
5439 struct iv_cand *cand;
5441 gcc_assert (ivs->upto >= use->id);
5442 ivs->upto++;
5443 ivs->bad_uses++;
5445 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5447 cand = iv_cand (data, i);
5448 cp = get_use_iv_cost (data, use, cand);
5449 if (cheaper_cost_pair (cp, best_cp))
5450 best_cp = cp;
5453 if (best_cp == NULL)
5455 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5457 cand = iv_cand (data, i);
5458 cp = get_use_iv_cost (data, use, cand);
5459 if (cheaper_cost_pair (cp, best_cp))
5460 best_cp = cp;
5464 iv_ca_set_cp (data, ivs, use, best_cp);
5467 /* Get cost for assignment IVS. */
5469 static comp_cost
5470 iv_ca_cost (struct iv_ca *ivs)
5472 /* This was a conditional expression but it triggered a bug in
5473 Sun C 5.5. */
5474 if (ivs->bad_uses)
5475 return infinite_cost;
5476 else
5477 return ivs->cost;
5480 /* Returns true if all dependences of CP are among invariants in IVS. */
5482 static bool
5483 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5485 unsigned i;
5486 bitmap_iterator bi;
5488 if (!cp->depends_on)
5489 return true;
5491 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5493 if (ivs->n_invariant_uses[i] == 0)
5494 return false;
5497 return true;
5500 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5501 it before NEXT_CHANGE. */
5503 static struct iv_ca_delta *
5504 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5505 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5507 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5509 change->use = use;
5510 change->old_cp = old_cp;
5511 change->new_cp = new_cp;
5512 change->next_change = next_change;
5514 return change;
5517 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5518 are rewritten. */
5520 static struct iv_ca_delta *
5521 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5523 struct iv_ca_delta *last;
5525 if (!l2)
5526 return l1;
5528 if (!l1)
5529 return l2;
5531 for (last = l1; last->next_change; last = last->next_change)
5532 continue;
5533 last->next_change = l2;
5535 return l1;
5538 /* Reverse the list of changes DELTA, forming the inverse to it. */
5540 static struct iv_ca_delta *
5541 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5543 struct iv_ca_delta *act, *next, *prev = NULL;
5544 struct cost_pair *tmp;
5546 for (act = delta; act; act = next)
5548 next = act->next_change;
5549 act->next_change = prev;
5550 prev = act;
5552 tmp = act->old_cp;
5553 act->old_cp = act->new_cp;
5554 act->new_cp = tmp;
5557 return prev;
5560 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5561 reverted instead. */
5563 static void
5564 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5565 struct iv_ca_delta *delta, bool forward)
5567 struct cost_pair *from, *to;
5568 struct iv_ca_delta *act;
5570 if (!forward)
5571 delta = iv_ca_delta_reverse (delta);
5573 for (act = delta; act; act = act->next_change)
5575 from = act->old_cp;
5576 to = act->new_cp;
5577 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5578 iv_ca_set_cp (data, ivs, act->use, to);
5581 if (!forward)
5582 iv_ca_delta_reverse (delta);
5585 /* Returns true if CAND is used in IVS. */
5587 static bool
5588 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5590 return ivs->n_cand_uses[cand->id] > 0;
5593 /* Returns number of induction variable candidates in the set IVS. */
5595 static unsigned
5596 iv_ca_n_cands (struct iv_ca *ivs)
5598 return ivs->n_cands;
5601 /* Free the list of changes DELTA. */
5603 static void
5604 iv_ca_delta_free (struct iv_ca_delta **delta)
5606 struct iv_ca_delta *act, *next;
5608 for (act = *delta; act; act = next)
5610 next = act->next_change;
5611 free (act);
5614 *delta = NULL;
5617 /* Allocates new iv candidates assignment. */
5619 static struct iv_ca *
5620 iv_ca_new (struct ivopts_data *data)
5622 struct iv_ca *nw = XNEW (struct iv_ca);
5624 nw->upto = 0;
5625 nw->bad_uses = 0;
5626 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5627 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5628 nw->cands = BITMAP_ALLOC (NULL);
5629 nw->n_cands = 0;
5630 nw->n_regs = 0;
5631 nw->cand_use_cost = no_cost;
5632 nw->cand_cost = 0;
5633 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5634 nw->cost = no_cost;
5635 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5636 nw->num_used_inv_expr = 0;
5638 return nw;
5641 /* Free memory occupied by the set IVS. */
5643 static void
5644 iv_ca_free (struct iv_ca **ivs)
5646 free ((*ivs)->cand_for_use);
5647 free ((*ivs)->n_cand_uses);
5648 BITMAP_FREE ((*ivs)->cands);
5649 free ((*ivs)->n_invariant_uses);
5650 free ((*ivs)->used_inv_expr);
5651 free (*ivs);
5652 *ivs = NULL;
5655 /* Dumps IVS to FILE. */
5657 static void
5658 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5660 const char *pref = " invariants ";
5661 unsigned i;
5662 comp_cost cost = iv_ca_cost (ivs);
5664 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5665 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5666 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5667 bitmap_print (file, ivs->cands, " candidates: ","\n");
5669 for (i = 0; i < ivs->upto; i++)
5671 struct iv_use *use = iv_use (data, i);
5672 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5673 if (cp)
5674 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5675 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5676 else
5677 fprintf (file, " use:%d --> ??\n", use->id);
5680 for (i = 1; i <= data->max_inv_id; i++)
5681 if (ivs->n_invariant_uses[i])
5683 fprintf (file, "%s%d", pref, i);
5684 pref = ", ";
5686 fprintf (file, "\n\n");
5689 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5690 new set, and store differences in DELTA. Number of induction variables
5691 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5692 the function will try to find a solution with mimimal iv candidates. */
5694 static comp_cost
5695 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5696 struct iv_cand *cand, struct iv_ca_delta **delta,
5697 unsigned *n_ivs, bool min_ncand)
5699 unsigned i;
5700 comp_cost cost;
5701 struct iv_use *use;
5702 struct cost_pair *old_cp, *new_cp;
5704 *delta = NULL;
5705 for (i = 0; i < ivs->upto; i++)
5707 use = iv_use (data, i);
5708 old_cp = iv_ca_cand_for_use (ivs, use);
5710 if (old_cp
5711 && old_cp->cand == cand)
5712 continue;
5714 new_cp = get_use_iv_cost (data, use, cand);
5715 if (!new_cp)
5716 continue;
5718 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5719 continue;
5721 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5722 continue;
5724 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5727 iv_ca_delta_commit (data, ivs, *delta, true);
5728 cost = iv_ca_cost (ivs);
5729 if (n_ivs)
5730 *n_ivs = iv_ca_n_cands (ivs);
5731 iv_ca_delta_commit (data, ivs, *delta, false);
5733 return cost;
5736 /* Try narrowing set IVS by removing CAND. Return the cost of
5737 the new set and store the differences in DELTA. START is
5738 the candidate with which we start narrowing. */
5740 static comp_cost
5741 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5742 struct iv_cand *cand, struct iv_cand *start,
5743 struct iv_ca_delta **delta)
5745 unsigned i, ci;
5746 struct iv_use *use;
5747 struct cost_pair *old_cp, *new_cp, *cp;
5748 bitmap_iterator bi;
5749 struct iv_cand *cnd;
5750 comp_cost cost, best_cost, acost;
5752 *delta = NULL;
5753 for (i = 0; i < n_iv_uses (data); i++)
5755 use = iv_use (data, i);
5757 old_cp = iv_ca_cand_for_use (ivs, use);
5758 if (old_cp->cand != cand)
5759 continue;
5761 best_cost = iv_ca_cost (ivs);
5762 /* Start narrowing with START. */
5763 new_cp = get_use_iv_cost (data, use, start);
5765 if (data->consider_all_candidates)
5767 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5769 if (ci == cand->id || (start && ci == start->id))
5770 continue;
5772 cnd = iv_cand (data, ci);
5774 cp = get_use_iv_cost (data, use, cnd);
5775 if (!cp)
5776 continue;
5778 iv_ca_set_cp (data, ivs, use, cp);
5779 acost = iv_ca_cost (ivs);
5781 if (compare_costs (acost, best_cost) < 0)
5783 best_cost = acost;
5784 new_cp = cp;
5788 else
5790 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5792 if (ci == cand->id || (start && ci == start->id))
5793 continue;
5795 cnd = iv_cand (data, ci);
5797 cp = get_use_iv_cost (data, use, cnd);
5798 if (!cp)
5799 continue;
5801 iv_ca_set_cp (data, ivs, use, cp);
5802 acost = iv_ca_cost (ivs);
5804 if (compare_costs (acost, best_cost) < 0)
5806 best_cost = acost;
5807 new_cp = cp;
5811 /* Restore to old cp for use. */
5812 iv_ca_set_cp (data, ivs, use, old_cp);
5814 if (!new_cp)
5816 iv_ca_delta_free (delta);
5817 return infinite_cost;
5820 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5823 iv_ca_delta_commit (data, ivs, *delta, true);
5824 cost = iv_ca_cost (ivs);
5825 iv_ca_delta_commit (data, ivs, *delta, false);
5827 return cost;
5830 /* Try optimizing the set of candidates IVS by removing candidates different
5831 from to EXCEPT_CAND from it. Return cost of the new set, and store
5832 differences in DELTA. */
5834 static comp_cost
5835 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5836 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5838 bitmap_iterator bi;
5839 struct iv_ca_delta *act_delta, *best_delta;
5840 unsigned i;
5841 comp_cost best_cost, acost;
5842 struct iv_cand *cand;
5844 best_delta = NULL;
5845 best_cost = iv_ca_cost (ivs);
5847 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5849 cand = iv_cand (data, i);
5851 if (cand == except_cand)
5852 continue;
5854 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5856 if (compare_costs (acost, best_cost) < 0)
5858 best_cost = acost;
5859 iv_ca_delta_free (&best_delta);
5860 best_delta = act_delta;
5862 else
5863 iv_ca_delta_free (&act_delta);
5866 if (!best_delta)
5868 *delta = NULL;
5869 return best_cost;
5872 /* Recurse to possibly remove other unnecessary ivs. */
5873 iv_ca_delta_commit (data, ivs, best_delta, true);
5874 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5875 iv_ca_delta_commit (data, ivs, best_delta, false);
5876 *delta = iv_ca_delta_join (best_delta, *delta);
5877 return best_cost;
5880 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5881 cheaper local cost for USE than BEST_CP. Return pointer to
5882 the corresponding cost_pair, otherwise just return BEST_CP. */
5884 static struct cost_pair*
5885 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5886 unsigned int cand_idx, struct iv_cand *old_cand,
5887 struct cost_pair *best_cp)
5889 struct iv_cand *cand;
5890 struct cost_pair *cp;
5892 gcc_assert (old_cand != NULL && best_cp != NULL);
5893 if (cand_idx == old_cand->id)
5894 return best_cp;
5896 cand = iv_cand (data, cand_idx);
5897 cp = get_use_iv_cost (data, use, cand);
5898 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5899 return cp;
5901 return best_cp;
5904 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5905 which are used by more than one iv uses. For each of those candidates,
5906 this function tries to represent iv uses under that candidate using
5907 other ones with lower local cost, then tries to prune the new set.
5908 If the new set has lower cost, It returns the new cost after recording
5909 candidate replacement in list DELTA. */
5911 static comp_cost
5912 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5913 struct iv_ca_delta **delta)
5915 bitmap_iterator bi, bj;
5916 unsigned int i, j, k;
5917 struct iv_use *use;
5918 struct iv_cand *cand;
5919 comp_cost orig_cost, acost;
5920 struct iv_ca_delta *act_delta, *tmp_delta;
5921 struct cost_pair *old_cp, *best_cp = NULL;
5923 *delta = NULL;
5924 orig_cost = iv_ca_cost (ivs);
5926 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5928 if (ivs->n_cand_uses[i] == 1
5929 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5930 continue;
5932 cand = iv_cand (data, i);
5934 act_delta = NULL;
5935 /* Represent uses under current candidate using other ones with
5936 lower local cost. */
5937 for (j = 0; j < ivs->upto; j++)
5939 use = iv_use (data, j);
5940 old_cp = iv_ca_cand_for_use (ivs, use);
5942 if (old_cp->cand != cand)
5943 continue;
5945 best_cp = old_cp;
5946 if (data->consider_all_candidates)
5947 for (k = 0; k < n_iv_cands (data); k++)
5948 best_cp = cheaper_cost_with_cand (data, use, k,
5949 old_cp->cand, best_cp);
5950 else
5951 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
5952 best_cp = cheaper_cost_with_cand (data, use, k,
5953 old_cp->cand, best_cp);
5955 if (best_cp == old_cp)
5956 continue;
5958 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
5960 /* No need for further prune. */
5961 if (!act_delta)
5962 continue;
5964 /* Prune the new candidate set. */
5965 iv_ca_delta_commit (data, ivs, act_delta, true);
5966 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
5967 iv_ca_delta_commit (data, ivs, act_delta, false);
5968 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5970 if (compare_costs (acost, orig_cost) < 0)
5972 *delta = act_delta;
5973 return acost;
5975 else
5976 iv_ca_delta_free (&act_delta);
5979 return orig_cost;
5982 /* Tries to extend the sets IVS in the best possible way in order
5983 to express the USE. If ORIGINALP is true, prefer candidates from
5984 the original set of IVs, otherwise favor important candidates not
5985 based on any memory object. */
5987 static bool
5988 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5989 struct iv_use *use, bool originalp)
5991 comp_cost best_cost, act_cost;
5992 unsigned i;
5993 bitmap_iterator bi;
5994 struct iv_cand *cand;
5995 struct iv_ca_delta *best_delta = NULL, *act_delta;
5996 struct cost_pair *cp;
5998 iv_ca_add_use (data, ivs, use);
5999 best_cost = iv_ca_cost (ivs);
6000 cp = iv_ca_cand_for_use (ivs, use);
6001 if (cp)
6003 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6004 iv_ca_set_no_cp (data, ivs, use);
6007 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6008 first try important candidates not based on any memory object. Only if
6009 this fails, try the specific ones. Rationale -- in loops with many
6010 variables the best choice often is to use just one generic biv. If we
6011 added here many ivs specific to the uses, the optimization algorithm later
6012 would be likely to get stuck in a local minimum, thus causing us to create
6013 too many ivs. The approach from few ivs to more seems more likely to be
6014 successful -- starting from few ivs, replacing an expensive use by a
6015 specific iv should always be a win. */
6016 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6018 cand = iv_cand (data, i);
6020 if (originalp && cand->pos !=IP_ORIGINAL)
6021 continue;
6023 if (!originalp && cand->iv->base_object != NULL_TREE)
6024 continue;
6026 if (iv_ca_cand_used_p (ivs, cand))
6027 continue;
6029 cp = get_use_iv_cost (data, use, cand);
6030 if (!cp)
6031 continue;
6033 iv_ca_set_cp (data, ivs, use, cp);
6034 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6035 true);
6036 iv_ca_set_no_cp (data, ivs, use);
6037 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6039 if (compare_costs (act_cost, best_cost) < 0)
6041 best_cost = act_cost;
6043 iv_ca_delta_free (&best_delta);
6044 best_delta = act_delta;
6046 else
6047 iv_ca_delta_free (&act_delta);
6050 if (infinite_cost_p (best_cost))
6052 for (i = 0; i < use->n_map_members; i++)
6054 cp = use->cost_map + i;
6055 cand = cp->cand;
6056 if (!cand)
6057 continue;
6059 /* Already tried this. */
6060 if (cand->important)
6062 if (originalp && cand->pos == IP_ORIGINAL)
6063 continue;
6064 if (!originalp && cand->iv->base_object == NULL_TREE)
6065 continue;
6068 if (iv_ca_cand_used_p (ivs, cand))
6069 continue;
6071 act_delta = NULL;
6072 iv_ca_set_cp (data, ivs, use, cp);
6073 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6074 iv_ca_set_no_cp (data, ivs, use);
6075 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6076 cp, act_delta);
6078 if (compare_costs (act_cost, best_cost) < 0)
6080 best_cost = act_cost;
6082 if (best_delta)
6083 iv_ca_delta_free (&best_delta);
6084 best_delta = act_delta;
6086 else
6087 iv_ca_delta_free (&act_delta);
6091 iv_ca_delta_commit (data, ivs, best_delta, true);
6092 iv_ca_delta_free (&best_delta);
6094 return !infinite_cost_p (best_cost);
6097 /* Finds an initial assignment of candidates to uses. */
6099 static struct iv_ca *
6100 get_initial_solution (struct ivopts_data *data, bool originalp)
6102 struct iv_ca *ivs = iv_ca_new (data);
6103 unsigned i;
6105 for (i = 0; i < n_iv_uses (data); i++)
6106 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6108 iv_ca_free (&ivs);
6109 return NULL;
6112 return ivs;
6115 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6116 points to a bool variable, this function tries to break local
6117 optimal fixed-point by replacing candidates in IVS if it's true. */
6119 static bool
6120 try_improve_iv_set (struct ivopts_data *data,
6121 struct iv_ca *ivs, bool *try_replace_p)
6123 unsigned i, n_ivs;
6124 comp_cost acost, best_cost = iv_ca_cost (ivs);
6125 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6126 struct iv_cand *cand;
6128 /* Try extending the set of induction variables by one. */
6129 for (i = 0; i < n_iv_cands (data); i++)
6131 cand = iv_cand (data, i);
6133 if (iv_ca_cand_used_p (ivs, cand))
6134 continue;
6136 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6137 if (!act_delta)
6138 continue;
6140 /* If we successfully added the candidate and the set is small enough,
6141 try optimizing it by removing other candidates. */
6142 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6144 iv_ca_delta_commit (data, ivs, act_delta, true);
6145 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6146 iv_ca_delta_commit (data, ivs, act_delta, false);
6147 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6150 if (compare_costs (acost, best_cost) < 0)
6152 best_cost = acost;
6153 iv_ca_delta_free (&best_delta);
6154 best_delta = act_delta;
6156 else
6157 iv_ca_delta_free (&act_delta);
6160 if (!best_delta)
6162 /* Try removing the candidates from the set instead. */
6163 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6165 if (!best_delta && *try_replace_p)
6167 *try_replace_p = false;
6168 /* So far candidate selecting algorithm tends to choose fewer IVs
6169 so that it can handle cases in which loops have many variables
6170 but the best choice is often to use only one general biv. One
6171 weakness is it can't handle opposite cases, in which different
6172 candidates should be chosen with respect to each use. To solve
6173 the problem, we replace candidates in a manner described by the
6174 comments of iv_ca_replace, thus give general algorithm a chance
6175 to break local optimal fixed-point in these cases. */
6176 best_cost = iv_ca_replace (data, ivs, &best_delta);
6179 if (!best_delta)
6180 return false;
6183 iv_ca_delta_commit (data, ivs, best_delta, true);
6184 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6185 iv_ca_delta_free (&best_delta);
6186 return true;
6189 /* Attempts to find the optimal set of induction variables. We do simple
6190 greedy heuristic -- we try to replace at most one candidate in the selected
6191 solution and remove the unused ivs while this improves the cost. */
6193 static struct iv_ca *
6194 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6196 struct iv_ca *set;
6197 bool try_replace_p = true;
6199 /* Get the initial solution. */
6200 set = get_initial_solution (data, originalp);
6201 if (!set)
6203 if (dump_file && (dump_flags & TDF_DETAILS))
6204 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6205 return NULL;
6208 if (dump_file && (dump_flags & TDF_DETAILS))
6210 fprintf (dump_file, "Initial set of candidates:\n");
6211 iv_ca_dump (data, dump_file, set);
6214 while (try_improve_iv_set (data, set, &try_replace_p))
6216 if (dump_file && (dump_flags & TDF_DETAILS))
6218 fprintf (dump_file, "Improved to:\n");
6219 iv_ca_dump (data, dump_file, set);
6223 return set;
6226 static struct iv_ca *
6227 find_optimal_iv_set (struct ivopts_data *data)
6229 unsigned i;
6230 struct iv_ca *set, *origset;
6231 struct iv_use *use;
6232 comp_cost cost, origcost;
6234 /* Determine the cost based on a strategy that starts with original IVs,
6235 and try again using a strategy that prefers candidates not based
6236 on any IVs. */
6237 origset = find_optimal_iv_set_1 (data, true);
6238 set = find_optimal_iv_set_1 (data, false);
6240 if (!origset && !set)
6241 return NULL;
6243 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6244 cost = set ? iv_ca_cost (set) : infinite_cost;
6246 if (dump_file && (dump_flags & TDF_DETAILS))
6248 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6249 origcost.cost, origcost.complexity);
6250 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6251 cost.cost, cost.complexity);
6254 /* Choose the one with the best cost. */
6255 if (compare_costs (origcost, cost) <= 0)
6257 if (set)
6258 iv_ca_free (&set);
6259 set = origset;
6261 else if (origset)
6262 iv_ca_free (&origset);
6264 for (i = 0; i < n_iv_uses (data); i++)
6266 use = iv_use (data, i);
6267 use->selected = iv_ca_cand_for_use (set, use)->cand;
6270 return set;
6273 /* Creates a new induction variable corresponding to CAND. */
6275 static void
6276 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6278 gimple_stmt_iterator incr_pos;
6279 tree base;
6280 bool after = false;
6282 if (!cand->iv)
6283 return;
6285 switch (cand->pos)
6287 case IP_NORMAL:
6288 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6289 break;
6291 case IP_END:
6292 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6293 after = true;
6294 break;
6296 case IP_AFTER_USE:
6297 after = true;
6298 /* fall through */
6299 case IP_BEFORE_USE:
6300 incr_pos = gsi_for_stmt (cand->incremented_at);
6301 break;
6303 case IP_ORIGINAL:
6304 /* Mark that the iv is preserved. */
6305 name_info (data, cand->var_before)->preserve_biv = true;
6306 name_info (data, cand->var_after)->preserve_biv = true;
6308 /* Rewrite the increment so that it uses var_before directly. */
6309 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6310 return;
6313 gimple_add_tmp_var (cand->var_before);
6315 base = unshare_expr (cand->iv->base);
6317 create_iv (base, unshare_expr (cand->iv->step),
6318 cand->var_before, data->current_loop,
6319 &incr_pos, after, &cand->var_before, &cand->var_after);
6322 /* Creates new induction variables described in SET. */
6324 static void
6325 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6327 unsigned i;
6328 struct iv_cand *cand;
6329 bitmap_iterator bi;
6331 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6333 cand = iv_cand (data, i);
6334 create_new_iv (data, cand);
6337 if (dump_file && (dump_flags & TDF_DETAILS))
6339 fprintf (dump_file, "\nSelected IV set: \n");
6340 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6342 cand = iv_cand (data, i);
6343 dump_cand (dump_file, cand);
6345 fprintf (dump_file, "\n");
6349 /* Rewrites USE (definition of iv used in a nonlinear expression)
6350 using candidate CAND. */
6352 static void
6353 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6354 struct iv_use *use, struct iv_cand *cand)
6356 tree comp;
6357 tree op, tgt;
6358 gassign *ass;
6359 gimple_stmt_iterator bsi;
6361 /* An important special case -- if we are asked to express value of
6362 the original iv by itself, just exit; there is no need to
6363 introduce a new computation (that might also need casting the
6364 variable to unsigned and back). */
6365 if (cand->pos == IP_ORIGINAL
6366 && cand->incremented_at == use->stmt)
6368 enum tree_code stmt_code;
6370 gcc_assert (is_gimple_assign (use->stmt));
6371 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6373 /* Check whether we may leave the computation unchanged.
6374 This is the case only if it does not rely on other
6375 computations in the loop -- otherwise, the computation
6376 we rely upon may be removed in remove_unused_ivs,
6377 thus leading to ICE. */
6378 stmt_code = gimple_assign_rhs_code (use->stmt);
6379 if (stmt_code == PLUS_EXPR
6380 || stmt_code == MINUS_EXPR
6381 || stmt_code == POINTER_PLUS_EXPR)
6383 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6384 op = gimple_assign_rhs2 (use->stmt);
6385 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6386 op = gimple_assign_rhs1 (use->stmt);
6387 else
6388 op = NULL_TREE;
6390 else
6391 op = NULL_TREE;
6393 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6394 return;
6397 comp = get_computation (data->current_loop, use, cand);
6398 gcc_assert (comp != NULL_TREE);
6400 switch (gimple_code (use->stmt))
6402 case GIMPLE_PHI:
6403 tgt = PHI_RESULT (use->stmt);
6405 /* If we should keep the biv, do not replace it. */
6406 if (name_info (data, tgt)->preserve_biv)
6407 return;
6409 bsi = gsi_after_labels (gimple_bb (use->stmt));
6410 break;
6412 case GIMPLE_ASSIGN:
6413 tgt = gimple_assign_lhs (use->stmt);
6414 bsi = gsi_for_stmt (use->stmt);
6415 break;
6417 default:
6418 gcc_unreachable ();
6421 if (!valid_gimple_rhs_p (comp)
6422 || (gimple_code (use->stmt) != GIMPLE_PHI
6423 /* We can't allow re-allocating the stmt as it might be pointed
6424 to still. */
6425 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6426 >= gimple_num_ops (gsi_stmt (bsi)))))
6428 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6429 true, GSI_SAME_STMT);
6430 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6432 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6433 /* As this isn't a plain copy we have to reset alignment
6434 information. */
6435 if (SSA_NAME_PTR_INFO (comp))
6436 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6440 if (gimple_code (use->stmt) == GIMPLE_PHI)
6442 ass = gimple_build_assign (tgt, comp);
6443 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6445 bsi = gsi_for_stmt (use->stmt);
6446 remove_phi_node (&bsi, false);
6448 else
6450 gimple_assign_set_rhs_from_tree (&bsi, comp);
6451 use->stmt = gsi_stmt (bsi);
6455 /* Performs a peephole optimization to reorder the iv update statement with
6456 a mem ref to enable instruction combining in later phases. The mem ref uses
6457 the iv value before the update, so the reordering transformation requires
6458 adjustment of the offset. CAND is the selected IV_CAND.
6460 Example:
6462 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6463 iv2 = iv1 + 1;
6465 if (t < val) (1)
6466 goto L;
6467 goto Head;
6470 directly propagating t over to (1) will introduce overlapping live range
6471 thus increase register pressure. This peephole transform it into:
6474 iv2 = iv1 + 1;
6475 t = MEM_REF (base, iv2, 8, 8);
6476 if (t < val)
6477 goto L;
6478 goto Head;
6481 static void
6482 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6484 tree var_after;
6485 gimple iv_update, stmt;
6486 basic_block bb;
6487 gimple_stmt_iterator gsi, gsi_iv;
6489 if (cand->pos != IP_NORMAL)
6490 return;
6492 var_after = cand->var_after;
6493 iv_update = SSA_NAME_DEF_STMT (var_after);
6495 bb = gimple_bb (iv_update);
6496 gsi = gsi_last_nondebug_bb (bb);
6497 stmt = gsi_stmt (gsi);
6499 /* Only handle conditional statement for now. */
6500 if (gimple_code (stmt) != GIMPLE_COND)
6501 return;
6503 gsi_prev_nondebug (&gsi);
6504 stmt = gsi_stmt (gsi);
6505 if (stmt != iv_update)
6506 return;
6508 gsi_prev_nondebug (&gsi);
6509 if (gsi_end_p (gsi))
6510 return;
6512 stmt = gsi_stmt (gsi);
6513 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6514 return;
6516 if (stmt != use->stmt)
6517 return;
6519 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6520 return;
6522 if (dump_file && (dump_flags & TDF_DETAILS))
6524 fprintf (dump_file, "Reordering \n");
6525 print_gimple_stmt (dump_file, iv_update, 0, 0);
6526 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6527 fprintf (dump_file, "\n");
6530 gsi = gsi_for_stmt (use->stmt);
6531 gsi_iv = gsi_for_stmt (iv_update);
6532 gsi_move_before (&gsi_iv, &gsi);
6534 cand->pos = IP_BEFORE_USE;
6535 cand->incremented_at = use->stmt;
6538 /* Rewrites USE (address that is an iv) using candidate CAND. */
6540 static void
6541 rewrite_use_address (struct ivopts_data *data,
6542 struct iv_use *use, struct iv_cand *cand)
6544 aff_tree aff;
6545 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6546 tree base_hint = NULL_TREE;
6547 tree ref, iv;
6548 bool ok;
6550 adjust_iv_update_pos (cand, use);
6551 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6552 gcc_assert (ok);
6553 unshare_aff_combination (&aff);
6555 /* To avoid undefined overflow problems, all IV candidates use unsigned
6556 integer types. The drawback is that this makes it impossible for
6557 create_mem_ref to distinguish an IV that is based on a memory object
6558 from one that represents simply an offset.
6560 To work around this problem, we pass a hint to create_mem_ref that
6561 indicates which variable (if any) in aff is an IV based on a memory
6562 object. Note that we only consider the candidate. If this is not
6563 based on an object, the base of the reference is in some subexpression
6564 of the use -- but these will use pointer types, so they are recognized
6565 by the create_mem_ref heuristics anyway. */
6566 if (cand->iv->base_object)
6567 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6569 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6570 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6571 reference_alias_ptr_type (*use->op_p),
6572 iv, base_hint, data->speed);
6573 copy_ref_info (ref, *use->op_p);
6574 *use->op_p = ref;
6577 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6578 candidate CAND. */
6580 static void
6581 rewrite_use_compare (struct ivopts_data *data,
6582 struct iv_use *use, struct iv_cand *cand)
6584 tree comp, *var_p, op, bound;
6585 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6586 enum tree_code compare;
6587 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6588 bool ok;
6590 bound = cp->value;
6591 if (bound)
6593 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6594 tree var_type = TREE_TYPE (var);
6595 gimple_seq stmts;
6597 if (dump_file && (dump_flags & TDF_DETAILS))
6599 fprintf (dump_file, "Replacing exit test: ");
6600 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6602 compare = cp->comp;
6603 bound = unshare_expr (fold_convert (var_type, bound));
6604 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6605 if (stmts)
6606 gsi_insert_seq_on_edge_immediate (
6607 loop_preheader_edge (data->current_loop),
6608 stmts);
6610 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6611 gimple_cond_set_lhs (cond_stmt, var);
6612 gimple_cond_set_code (cond_stmt, compare);
6613 gimple_cond_set_rhs (cond_stmt, op);
6614 return;
6617 /* The induction variable elimination failed; just express the original
6618 giv. */
6619 comp = get_computation (data->current_loop, use, cand);
6620 gcc_assert (comp != NULL_TREE);
6622 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6623 gcc_assert (ok);
6625 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6626 true, GSI_SAME_STMT);
6629 /* Rewrites USE using candidate CAND. */
6631 static void
6632 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6634 switch (use->type)
6636 case USE_NONLINEAR_EXPR:
6637 rewrite_use_nonlinear_expr (data, use, cand);
6638 break;
6640 case USE_ADDRESS:
6641 rewrite_use_address (data, use, cand);
6642 break;
6644 case USE_COMPARE:
6645 rewrite_use_compare (data, use, cand);
6646 break;
6648 default:
6649 gcc_unreachable ();
6652 update_stmt (use->stmt);
6655 /* Rewrite the uses using the selected induction variables. */
6657 static void
6658 rewrite_uses (struct ivopts_data *data)
6660 unsigned i;
6661 struct iv_cand *cand;
6662 struct iv_use *use;
6664 for (i = 0; i < n_iv_uses (data); i++)
6666 use = iv_use (data, i);
6667 cand = use->selected;
6668 gcc_assert (cand);
6670 rewrite_use (data, use, cand);
6674 /* Removes the ivs that are not used after rewriting. */
6676 static void
6677 remove_unused_ivs (struct ivopts_data *data)
6679 unsigned j;
6680 bitmap_iterator bi;
6681 bitmap toremove = BITMAP_ALLOC (NULL);
6683 /* Figure out an order in which to release SSA DEFs so that we don't
6684 release something that we'd have to propagate into a debug stmt
6685 afterwards. */
6686 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6688 struct version_info *info;
6690 info = ver_info (data, j);
6691 if (info->iv
6692 && !integer_zerop (info->iv->step)
6693 && !info->inv_id
6694 && !info->iv->have_use_for
6695 && !info->preserve_biv)
6697 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6699 tree def = info->iv->ssa_name;
6701 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6703 imm_use_iterator imm_iter;
6704 use_operand_p use_p;
6705 gimple stmt;
6706 int count = 0;
6708 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6710 if (!gimple_debug_bind_p (stmt))
6711 continue;
6713 /* We just want to determine whether to do nothing
6714 (count == 0), to substitute the computed
6715 expression into a single use of the SSA DEF by
6716 itself (count == 1), or to use a debug temp
6717 because the SSA DEF is used multiple times or as
6718 part of a larger expression (count > 1). */
6719 count++;
6720 if (gimple_debug_bind_get_value (stmt) != def)
6721 count++;
6723 if (count > 1)
6724 BREAK_FROM_IMM_USE_STMT (imm_iter);
6727 if (!count)
6728 continue;
6730 struct iv_use dummy_use;
6731 struct iv_cand *best_cand = NULL, *cand;
6732 unsigned i, best_pref = 0, cand_pref;
6734 memset (&dummy_use, 0, sizeof (dummy_use));
6735 dummy_use.iv = info->iv;
6736 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6738 cand = iv_use (data, i)->selected;
6739 if (cand == best_cand)
6740 continue;
6741 cand_pref = operand_equal_p (cand->iv->step,
6742 info->iv->step, 0)
6743 ? 4 : 0;
6744 cand_pref
6745 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6746 == TYPE_MODE (TREE_TYPE (info->iv->base))
6747 ? 2 : 0;
6748 cand_pref
6749 += TREE_CODE (cand->iv->base) == INTEGER_CST
6750 ? 1 : 0;
6751 if (best_cand == NULL || best_pref < cand_pref)
6753 best_cand = cand;
6754 best_pref = cand_pref;
6758 if (!best_cand)
6759 continue;
6761 tree comp = get_computation_at (data->current_loop,
6762 &dummy_use, best_cand,
6763 SSA_NAME_DEF_STMT (def));
6764 if (!comp)
6765 continue;
6767 if (count > 1)
6769 tree vexpr = make_node (DEBUG_EXPR_DECL);
6770 DECL_ARTIFICIAL (vexpr) = 1;
6771 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6772 if (SSA_NAME_VAR (def))
6773 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6774 else
6775 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6776 gdebug *def_temp
6777 = gimple_build_debug_bind (vexpr, comp, NULL);
6778 gimple_stmt_iterator gsi;
6780 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6781 gsi = gsi_after_labels (gimple_bb
6782 (SSA_NAME_DEF_STMT (def)));
6783 else
6784 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6786 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6787 comp = vexpr;
6790 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6792 if (!gimple_debug_bind_p (stmt))
6793 continue;
6795 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6796 SET_USE (use_p, comp);
6798 update_stmt (stmt);
6804 release_defs_bitset (toremove);
6806 BITMAP_FREE (toremove);
6809 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6810 for hash_map::traverse. */
6812 bool
6813 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6815 free (value);
6816 return true;
6819 /* Frees data allocated by the optimization of a single loop. */
6821 static void
6822 free_loop_data (struct ivopts_data *data)
6824 unsigned i, j;
6825 bitmap_iterator bi;
6826 tree obj;
6828 if (data->niters)
6830 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6831 delete data->niters;
6832 data->niters = NULL;
6835 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6837 struct version_info *info;
6839 info = ver_info (data, i);
6840 free (info->iv);
6841 info->iv = NULL;
6842 info->has_nonlin_use = false;
6843 info->preserve_biv = false;
6844 info->inv_id = 0;
6846 bitmap_clear (data->relevant);
6847 bitmap_clear (data->important_candidates);
6849 for (i = 0; i < n_iv_uses (data); i++)
6851 struct iv_use *use = iv_use (data, i);
6853 free (use->iv);
6854 BITMAP_FREE (use->related_cands);
6855 for (j = 0; j < use->n_map_members; j++)
6856 if (use->cost_map[j].depends_on)
6857 BITMAP_FREE (use->cost_map[j].depends_on);
6858 free (use->cost_map);
6859 free (use);
6861 data->iv_uses.truncate (0);
6863 for (i = 0; i < n_iv_cands (data); i++)
6865 struct iv_cand *cand = iv_cand (data, i);
6867 free (cand->iv);
6868 if (cand->depends_on)
6869 BITMAP_FREE (cand->depends_on);
6870 free (cand);
6872 data->iv_candidates.truncate (0);
6874 if (data->version_info_size < num_ssa_names)
6876 data->version_info_size = 2 * num_ssa_names;
6877 free (data->version_info);
6878 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6881 data->max_inv_id = 0;
6883 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6884 SET_DECL_RTL (obj, NULL_RTX);
6886 decl_rtl_to_reset.truncate (0);
6888 data->inv_expr_tab->empty ();
6889 data->inv_expr_id = 0;
6892 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6893 loop tree. */
6895 static void
6896 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6898 free_loop_data (data);
6899 free (data->version_info);
6900 BITMAP_FREE (data->relevant);
6901 BITMAP_FREE (data->important_candidates);
6903 decl_rtl_to_reset.release ();
6904 data->iv_uses.release ();
6905 data->iv_candidates.release ();
6906 delete data->inv_expr_tab;
6907 data->inv_expr_tab = NULL;
6908 free_affine_expand_cache (&data->name_expansion_cache);
6911 /* Returns true if the loop body BODY includes any function calls. */
6913 static bool
6914 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6916 gimple_stmt_iterator gsi;
6917 unsigned i;
6919 for (i = 0; i < num_nodes; i++)
6920 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6922 gimple stmt = gsi_stmt (gsi);
6923 if (is_gimple_call (stmt)
6924 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6925 return true;
6927 return false;
6930 /* Optimizes the LOOP. Returns true if anything changed. */
6932 static bool
6933 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6935 bool changed = false;
6936 struct iv_ca *iv_ca;
6937 edge exit = single_dom_exit (loop);
6938 basic_block *body;
6940 gcc_assert (!data->niters);
6941 data->current_loop = loop;
6942 data->speed = optimize_loop_for_speed_p (loop);
6944 if (dump_file && (dump_flags & TDF_DETAILS))
6946 fprintf (dump_file, "Processing loop %d\n", loop->num);
6948 if (exit)
6950 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6951 exit->src->index, exit->dest->index);
6952 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6953 fprintf (dump_file, "\n");
6956 fprintf (dump_file, "\n");
6959 body = get_loop_body (loop);
6960 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6961 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6962 free (body);
6964 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6966 /* For each ssa name determines whether it behaves as an induction variable
6967 in some loop. */
6968 if (!find_induction_variables (data))
6969 goto finish;
6971 /* Finds interesting uses (item 1). */
6972 find_interesting_uses (data);
6973 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6974 goto finish;
6976 /* Finds candidates for the induction variables (item 2). */
6977 find_iv_candidates (data);
6979 /* Calculates the costs (item 3, part 1). */
6980 determine_iv_costs (data);
6981 determine_use_iv_costs (data);
6982 determine_set_costs (data);
6984 /* Find the optimal set of induction variables (item 3, part 2). */
6985 iv_ca = find_optimal_iv_set (data);
6986 if (!iv_ca)
6987 goto finish;
6988 changed = true;
6990 /* Create the new induction variables (item 4, part 1). */
6991 create_new_ivs (data, iv_ca);
6992 iv_ca_free (&iv_ca);
6994 /* Rewrite the uses (item 4, part 2). */
6995 rewrite_uses (data);
6997 /* Remove the ivs that are unused after rewriting. */
6998 remove_unused_ivs (data);
7000 /* We have changed the structure of induction variables; it might happen
7001 that definitions in the scev database refer to some of them that were
7002 eliminated. */
7003 scev_reset ();
7005 finish:
7006 free_loop_data (data);
7008 return changed;
7011 /* Main entry point. Optimizes induction variables in loops. */
7013 void
7014 tree_ssa_iv_optimize (void)
7016 struct loop *loop;
7017 struct ivopts_data data;
7019 tree_ssa_iv_optimize_init (&data);
7021 /* Optimize the loops starting with the innermost ones. */
7022 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7024 if (dump_file && (dump_flags & TDF_DETAILS))
7025 flow_loop_dump (loop, dump_file, NULL, 1);
7027 tree_ssa_iv_optimize_loop (&data, loop);
7030 tree_ssa_iv_optimize_finalize (&data);