* tree-ssa-loop-ivopts.c (get_loop_invariant_expr): Simplify.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob43cab302a5c4d6e484cb7714bd9bc8ce07679122
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113 cost of different addressing modes. This should be moved to a TBD
114 interface between the GIMPLE and RTL worlds. */
116 /* The infinite cost. */
117 #define INFTY 10000000
119 /* Returns the expected number of loop iterations for LOOP.
120 The average trip count is computed from profile data if it
121 exists. */
123 static inline HOST_WIDE_INT
124 avg_loop_niter (struct loop *loop)
126 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127 if (niter == -1)
129 niter = likely_max_stmt_executions_int (loop);
131 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
135 return niter;
138 struct iv_use;
140 /* Representation of the induction variable. */
141 struct iv
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
165 /* Types of uses. */
166 enum use_type
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_ADDRESS, /* Use in an address. */
170 USE_COMPARE /* Use is a compare. */
173 /* Cost of a computation. */
174 struct comp_cost
176 comp_cost (): cost (0), complexity (0), scratch (0)
179 comp_cost (int cost, unsigned complexity, int scratch = 0)
180 : cost (cost), complexity (complexity), scratch (scratch)
183 /* Returns true if COST is infinite. */
184 bool infinite_cost_p ();
186 /* Adds costs COST1 and COST2. */
187 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
189 /* Adds COST to the comp_cost. */
190 comp_cost operator+= (comp_cost cost);
192 /* Adds constant C to this comp_cost. */
193 comp_cost operator+= (HOST_WIDE_INT c);
195 /* Subtracts constant C to this comp_cost. */
196 comp_cost operator-= (HOST_WIDE_INT c);
198 /* Divide the comp_cost by constant C. */
199 comp_cost operator/= (HOST_WIDE_INT c);
201 /* Multiply the comp_cost by constant C. */
202 comp_cost operator*= (HOST_WIDE_INT c);
204 /* Subtracts costs COST1 and COST2. */
205 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
207 /* Subtracts COST from this comp_cost. */
208 comp_cost operator-= (comp_cost cost);
210 /* Returns true if COST1 is smaller than COST2. */
211 friend bool operator< (comp_cost cost1, comp_cost cost2);
213 /* Returns true if COST1 and COST2 are equal. */
214 friend bool operator== (comp_cost cost1, comp_cost cost2);
216 /* Returns true if COST1 is smaller or equal than COST2. */
217 friend bool operator<= (comp_cost cost1, comp_cost cost2);
219 int cost; /* The runtime cost. */
220 unsigned complexity; /* The estimate of the complexity of the code for
221 the computation (in no concrete units --
222 complexity field should be larger for more
223 complex expressions and addressing modes). */
224 int scratch; /* Scratch used during cost computation. */
227 static const comp_cost no_cost;
228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
230 bool
231 comp_cost::infinite_cost_p ()
233 return cost == INFTY;
236 comp_cost
237 operator+ (comp_cost cost1, comp_cost cost2)
239 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
240 return infinite_cost;
242 cost1.cost += cost2.cost;
243 cost1.complexity += cost2.complexity;
245 return cost1;
248 comp_cost
249 operator- (comp_cost cost1, comp_cost cost2)
251 if (cost1.infinite_cost_p ())
252 return infinite_cost;
254 gcc_assert (!cost2.infinite_cost_p ());
256 cost1.cost -= cost2.cost;
257 cost1.complexity -= cost2.complexity;
259 return cost1;
262 comp_cost
263 comp_cost::operator+= (comp_cost cost)
265 *this = *this + cost;
266 return *this;
269 comp_cost
270 comp_cost::operator+= (HOST_WIDE_INT c)
272 if (infinite_cost_p ())
273 return *this;
275 this->cost += c;
277 return *this;
280 comp_cost
281 comp_cost::operator-= (HOST_WIDE_INT c)
283 if (infinite_cost_p ())
284 return *this;
286 this->cost -= c;
288 return *this;
291 comp_cost
292 comp_cost::operator/= (HOST_WIDE_INT c)
294 if (infinite_cost_p ())
295 return *this;
297 this->cost /= c;
299 return *this;
302 comp_cost
303 comp_cost::operator*= (HOST_WIDE_INT c)
305 if (infinite_cost_p ())
306 return *this;
308 this->cost *= c;
310 return *this;
313 comp_cost
314 comp_cost::operator-= (comp_cost cost)
316 *this = *this - cost;
317 return *this;
320 bool
321 operator< (comp_cost cost1, comp_cost cost2)
323 if (cost1.cost == cost2.cost)
324 return cost1.complexity < cost2.complexity;
326 return cost1.cost < cost2.cost;
329 bool
330 operator== (comp_cost cost1, comp_cost cost2)
332 return cost1.cost == cost2.cost
333 && cost1.complexity == cost2.complexity;
336 bool
337 operator<= (comp_cost cost1, comp_cost cost2)
339 return cost1 < cost2 || cost1 == cost2;
342 struct iv_inv_expr_ent;
344 /* The candidate - cost pair. */
345 struct cost_pair
347 struct iv_cand *cand; /* The candidate. */
348 comp_cost cost; /* The cost. */
349 enum tree_code comp; /* For iv elimination, the comparison. */
350 bitmap inv_vars; /* The list of invariants that have to be
351 preserved. */
352 bitmap inv_exprs; /* Loop invariant expressions. */
353 tree value; /* For final value elimination, the expression for
354 the final value of the iv. For iv elimination,
355 the new bound to compare with. */
358 /* Use. */
359 struct iv_use
361 unsigned id; /* The id of the use. */
362 unsigned group_id; /* The group id the use belongs to. */
363 enum use_type type; /* Type of the use. */
364 struct iv *iv; /* The induction variable it is based on. */
365 gimple *stmt; /* Statement in that it occurs. */
366 tree *op_p; /* The place where it occurs. */
368 tree addr_base; /* Base address with const offset stripped. */
369 unsigned HOST_WIDE_INT addr_offset;
370 /* Const offset stripped from base address. */
373 /* Group of uses. */
374 struct iv_group
376 /* The id of the group. */
377 unsigned id;
378 /* Uses of the group are of the same type. */
379 enum use_type type;
380 /* The set of "related" IV candidates, plus the important ones. */
381 bitmap related_cands;
382 /* Number of IV candidates in the cost_map. */
383 unsigned n_map_members;
384 /* The costs wrto the iv candidates. */
385 struct cost_pair *cost_map;
386 /* The selected candidate for the group. */
387 struct iv_cand *selected;
388 /* Uses in the group. */
389 vec<struct iv_use *> vuses;
392 /* The position where the iv is computed. */
393 enum iv_position
395 IP_NORMAL, /* At the end, just before the exit condition. */
396 IP_END, /* At the end of the latch block. */
397 IP_BEFORE_USE, /* Immediately before a specific use. */
398 IP_AFTER_USE, /* Immediately after a specific use. */
399 IP_ORIGINAL /* The original biv. */
402 /* The induction variable candidate. */
403 struct iv_cand
405 unsigned id; /* The number of the candidate. */
406 bool important; /* Whether this is an "important" candidate, i.e. such
407 that it should be considered by all uses. */
408 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
409 gimple *incremented_at;/* For original biv, the statement where it is
410 incremented. */
411 tree var_before; /* The variable used for it before increment. */
412 tree var_after; /* The variable used for it after increment. */
413 struct iv *iv; /* The value of the candidate. NULL for
414 "pseudocandidate" used to indicate the possibility
415 to replace the final value of an iv by direct
416 computation of the value. */
417 unsigned cost; /* Cost of the candidate. */
418 unsigned cost_step; /* Cost of the candidate's increment operation. */
419 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
420 where it is incremented. */
421 bitmap inv_vars; /* The list of invariants that are used in step of the
422 biv. */
423 struct iv *orig_iv; /* The original iv if this cand is added from biv with
424 smaller type. */
427 /* Hashtable entry for common candidate derived from iv uses. */
428 struct iv_common_cand
430 tree base;
431 tree step;
432 /* IV uses from which this common candidate is derived. */
433 auto_vec<struct iv_use *> uses;
434 hashval_t hash;
437 /* Hashtable helpers. */
439 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
441 static inline hashval_t hash (const iv_common_cand *);
442 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
445 /* Hash function for possible common candidates. */
447 inline hashval_t
448 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
450 return ccand->hash;
453 /* Hash table equality function for common candidates. */
455 inline bool
456 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
457 const iv_common_cand *ccand2)
459 return (ccand1->hash == ccand2->hash
460 && operand_equal_p (ccand1->base, ccand2->base, 0)
461 && operand_equal_p (ccand1->step, ccand2->step, 0)
462 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
463 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
466 /* Loop invariant expression hashtable entry. */
468 struct iv_inv_expr_ent
470 /* Tree expression of the entry. */
471 tree expr;
472 /* Unique indentifier. */
473 int id;
474 /* Hash value. */
475 hashval_t hash;
478 /* Sort iv_inv_expr_ent pair A and B by id field. */
480 static int
481 sort_iv_inv_expr_ent (const void *a, const void *b)
483 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
484 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
486 unsigned id1 = (*e1)->id;
487 unsigned id2 = (*e2)->id;
489 if (id1 < id2)
490 return -1;
491 else if (id1 > id2)
492 return 1;
493 else
494 return 0;
497 /* Hashtable helpers. */
499 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
501 static inline hashval_t hash (const iv_inv_expr_ent *);
502 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
505 /* Hash function for loop invariant expressions. */
507 inline hashval_t
508 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
510 return expr->hash;
513 /* Hash table equality function for expressions. */
515 inline bool
516 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
517 const iv_inv_expr_ent *expr2)
519 return expr1->hash == expr2->hash
520 && operand_equal_p (expr1->expr, expr2->expr, 0);
523 struct ivopts_data
525 /* The currently optimized loop. */
526 struct loop *current_loop;
527 source_location loop_loc;
529 /* Numbers of iterations for all exits of the current loop. */
530 hash_map<edge, tree_niter_desc *> *niters;
532 /* Number of registers used in it. */
533 unsigned regs_used;
535 /* The size of version_info array allocated. */
536 unsigned version_info_size;
538 /* The array of information for the ssa names. */
539 struct version_info *version_info;
541 /* The hashtable of loop invariant expressions created
542 by ivopt. */
543 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
545 /* The bitmap of indices in version_info whose value was changed. */
546 bitmap relevant;
548 /* The uses of induction variables. */
549 vec<iv_group *> vgroups;
551 /* The candidates. */
552 vec<iv_cand *> vcands;
554 /* A bitmap of important candidates. */
555 bitmap important_candidates;
557 /* Cache used by tree_to_aff_combination_expand. */
558 hash_map<tree, name_expansion *> *name_expansion_cache;
560 /* The hashtable of common candidates derived from iv uses. */
561 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
563 /* The common candidates. */
564 vec<iv_common_cand *> iv_common_cands;
566 /* The maximum invariant variable id. */
567 unsigned max_inv_var_id;
569 /* The maximum invariant expression id. */
570 unsigned max_inv_expr_id;
572 /* Number of no_overflow BIVs which are not used in memory address. */
573 unsigned bivs_not_used_in_addr;
575 /* Obstack for iv structure. */
576 struct obstack iv_obstack;
578 /* Whether to consider just related and important candidates when replacing a
579 use. */
580 bool consider_all_candidates;
582 /* Are we optimizing for speed? */
583 bool speed;
585 /* Whether the loop body includes any function calls. */
586 bool body_includes_call;
588 /* Whether the loop body can only be exited via single exit. */
589 bool loop_single_exit_p;
592 /* An assignment of iv candidates to uses. */
594 struct iv_ca
596 /* The number of uses covered by the assignment. */
597 unsigned upto;
599 /* Number of uses that cannot be expressed by the candidates in the set. */
600 unsigned bad_groups;
602 /* Candidate assigned to a use, together with the related costs. */
603 struct cost_pair **cand_for_group;
605 /* Number of times each candidate is used. */
606 unsigned *n_cand_uses;
608 /* The candidates used. */
609 bitmap cands;
611 /* The number of candidates in the set. */
612 unsigned n_cands;
614 /* The number of invariants needed, including both invariant variants and
615 invariant expressions. */
616 unsigned n_invs;
618 /* Total cost of expressing uses. */
619 comp_cost cand_use_cost;
621 /* Total cost of candidates. */
622 unsigned cand_cost;
624 /* Number of times each invariant variable is used. */
625 unsigned *n_inv_var_uses;
627 /* Number of times each invariant expression is used. */
628 unsigned *n_inv_expr_uses;
630 /* Total cost of the assignment. */
631 comp_cost cost;
634 /* Difference of two iv candidate assignments. */
636 struct iv_ca_delta
638 /* Changed group. */
639 struct iv_group *group;
641 /* An old assignment (for rollback purposes). */
642 struct cost_pair *old_cp;
644 /* A new assignment. */
645 struct cost_pair *new_cp;
647 /* Next change in the list. */
648 struct iv_ca_delta *next;
651 /* Bound on number of candidates below that all candidates are considered. */
653 #define CONSIDER_ALL_CANDIDATES_BOUND \
654 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
656 /* If there are more iv occurrences, we just give up (it is quite unlikely that
657 optimizing such a loop would help, and it would take ages). */
659 #define MAX_CONSIDERED_GROUPS \
660 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
662 /* If there are at most this number of ivs in the set, try removing unnecessary
663 ivs from the set always. */
665 #define ALWAYS_PRUNE_CAND_SET_BOUND \
666 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
668 /* The list of trees for that the decl_rtl field must be reset is stored
669 here. */
671 static vec<tree> decl_rtl_to_reset;
673 static comp_cost force_expr_to_var_cost (tree, bool);
675 /* The single loop exit if it dominates the latch, NULL otherwise. */
677 edge
678 single_dom_exit (struct loop *loop)
680 edge exit = single_exit (loop);
682 if (!exit)
683 return NULL;
685 if (!just_once_each_iteration_p (loop, exit->src))
686 return NULL;
688 return exit;
691 /* Dumps information about the induction variable IV to FILE. Don't dump
692 variable's name if DUMP_NAME is FALSE. The information is dumped with
693 preceding spaces indicated by INDENT_LEVEL. */
695 void
696 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
698 const char *p;
699 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
701 if (indent_level > 4)
702 indent_level = 4;
703 p = spaces + 8 - (indent_level << 1);
705 fprintf (file, "%sIV struct:\n", p);
706 if (iv->ssa_name && dump_name)
708 fprintf (file, "%s SSA_NAME:\t", p);
709 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
710 fprintf (file, "\n");
713 fprintf (file, "%s Type:\t", p);
714 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
715 fprintf (file, "\n");
717 fprintf (file, "%s Base:\t", p);
718 print_generic_expr (file, iv->base, TDF_SLIM);
719 fprintf (file, "\n");
721 fprintf (file, "%s Step:\t", p);
722 print_generic_expr (file, iv->step, TDF_SLIM);
723 fprintf (file, "\n");
725 if (iv->base_object)
727 fprintf (file, "%s Object:\t", p);
728 print_generic_expr (file, iv->base_object, TDF_SLIM);
729 fprintf (file, "\n");
732 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
734 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
735 p, iv->no_overflow ? "No-overflow" : "Overflow");
738 /* Dumps information about the USE to FILE. */
740 void
741 dump_use (FILE *file, struct iv_use *use)
743 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
744 fprintf (file, " At stmt:\t");
745 print_gimple_stmt (file, use->stmt, 0, 0);
746 fprintf (file, " At pos:\t");
747 if (use->op_p)
748 print_generic_expr (file, *use->op_p, TDF_SLIM);
749 fprintf (file, "\n");
750 dump_iv (file, use->iv, false, 2);
753 /* Dumps information about the uses to FILE. */
755 void
756 dump_groups (FILE *file, struct ivopts_data *data)
758 unsigned i, j;
759 struct iv_group *group;
761 for (i = 0; i < data->vgroups.length (); i++)
763 group = data->vgroups[i];
764 fprintf (file, "Group %d:\n", group->id);
765 if (group->type == USE_NONLINEAR_EXPR)
766 fprintf (file, " Type:\tGENERIC\n");
767 else if (group->type == USE_ADDRESS)
768 fprintf (file, " Type:\tADDRESS\n");
769 else
771 gcc_assert (group->type == USE_COMPARE);
772 fprintf (file, " Type:\tCOMPARE\n");
774 for (j = 0; j < group->vuses.length (); j++)
775 dump_use (file, group->vuses[j]);
779 /* Dumps information about induction variable candidate CAND to FILE. */
781 void
782 dump_cand (FILE *file, struct iv_cand *cand)
784 struct iv *iv = cand->iv;
786 fprintf (file, "Candidate %d:\n", cand->id);
787 if (cand->inv_vars)
789 fprintf (file, " Depend on inv.vars: ");
790 dump_bitmap (file, cand->inv_vars);
793 if (cand->var_before)
795 fprintf (file, " Var befor: ");
796 print_generic_expr (file, cand->var_before, TDF_SLIM);
797 fprintf (file, "\n");
799 if (cand->var_after)
801 fprintf (file, " Var after: ");
802 print_generic_expr (file, cand->var_after, TDF_SLIM);
803 fprintf (file, "\n");
806 switch (cand->pos)
808 case IP_NORMAL:
809 fprintf (file, " Incr POS: before exit test\n");
810 break;
812 case IP_BEFORE_USE:
813 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
814 break;
816 case IP_AFTER_USE:
817 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
818 break;
820 case IP_END:
821 fprintf (file, " Incr POS: at end\n");
822 break;
824 case IP_ORIGINAL:
825 fprintf (file, " Incr POS: orig biv\n");
826 break;
829 dump_iv (file, iv, false, 1);
832 /* Returns the info for ssa version VER. */
834 static inline struct version_info *
835 ver_info (struct ivopts_data *data, unsigned ver)
837 return data->version_info + ver;
840 /* Returns the info for ssa name NAME. */
842 static inline struct version_info *
843 name_info (struct ivopts_data *data, tree name)
845 return ver_info (data, SSA_NAME_VERSION (name));
848 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
849 emitted in LOOP. */
851 static bool
852 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
854 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
856 gcc_assert (bb);
858 if (sbb == loop->latch)
859 return true;
861 if (sbb != bb)
862 return false;
864 return stmt == last_stmt (bb);
867 /* Returns true if STMT if after the place where the original induction
868 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
869 if the positions are identical. */
871 static bool
872 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
874 basic_block cand_bb = gimple_bb (cand->incremented_at);
875 basic_block stmt_bb = gimple_bb (stmt);
877 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
878 return false;
880 if (stmt_bb != cand_bb)
881 return true;
883 if (true_if_equal
884 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
885 return true;
886 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
889 /* Returns true if STMT if after the place where the induction variable
890 CAND is incremented in LOOP. */
892 static bool
893 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
895 switch (cand->pos)
897 case IP_END:
898 return false;
900 case IP_NORMAL:
901 return stmt_after_ip_normal_pos (loop, stmt);
903 case IP_ORIGINAL:
904 case IP_AFTER_USE:
905 return stmt_after_inc_pos (cand, stmt, false);
907 case IP_BEFORE_USE:
908 return stmt_after_inc_pos (cand, stmt, true);
910 default:
911 gcc_unreachable ();
915 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
917 static bool
918 abnormal_ssa_name_p (tree exp)
920 if (!exp)
921 return false;
923 if (TREE_CODE (exp) != SSA_NAME)
924 return false;
926 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
929 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
930 abnormal phi node. Callback for for_each_index. */
932 static bool
933 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
934 void *data ATTRIBUTE_UNUSED)
936 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
938 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
939 return false;
940 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
941 return false;
944 return !abnormal_ssa_name_p (*index);
947 /* Returns true if EXPR contains a ssa name that occurs in an
948 abnormal phi node. */
950 bool
951 contains_abnormal_ssa_name_p (tree expr)
953 enum tree_code code;
954 enum tree_code_class codeclass;
956 if (!expr)
957 return false;
959 code = TREE_CODE (expr);
960 codeclass = TREE_CODE_CLASS (code);
962 if (code == SSA_NAME)
963 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
965 if (code == INTEGER_CST
966 || is_gimple_min_invariant (expr))
967 return false;
969 if (code == ADDR_EXPR)
970 return !for_each_index (&TREE_OPERAND (expr, 0),
971 idx_contains_abnormal_ssa_name_p,
972 NULL);
974 if (code == COND_EXPR)
975 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
976 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
977 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
979 switch (codeclass)
981 case tcc_binary:
982 case tcc_comparison:
983 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
984 return true;
986 /* Fallthru. */
987 case tcc_unary:
988 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
989 return true;
991 break;
993 default:
994 gcc_unreachable ();
997 return false;
1000 /* Returns the structure describing number of iterations determined from
1001 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1003 static struct tree_niter_desc *
1004 niter_for_exit (struct ivopts_data *data, edge exit)
1006 struct tree_niter_desc *desc;
1007 tree_niter_desc **slot;
1009 if (!data->niters)
1011 data->niters = new hash_map<edge, tree_niter_desc *>;
1012 slot = NULL;
1014 else
1015 slot = data->niters->get (exit);
1017 if (!slot)
1019 /* Try to determine number of iterations. We cannot safely work with ssa
1020 names that appear in phi nodes on abnormal edges, so that we do not
1021 create overlapping life ranges for them (PR 27283). */
1022 desc = XNEW (struct tree_niter_desc);
1023 if (!number_of_iterations_exit (data->current_loop,
1024 exit, desc, true)
1025 || contains_abnormal_ssa_name_p (desc->niter))
1027 XDELETE (desc);
1028 desc = NULL;
1030 data->niters->put (exit, desc);
1032 else
1033 desc = *slot;
1035 return desc;
1038 /* Returns the structure describing number of iterations determined from
1039 single dominating exit of DATA->current_loop, or NULL if something
1040 goes wrong. */
1042 static struct tree_niter_desc *
1043 niter_for_single_dom_exit (struct ivopts_data *data)
1045 edge exit = single_dom_exit (data->current_loop);
1047 if (!exit)
1048 return NULL;
1050 return niter_for_exit (data, exit);
1053 /* Initializes data structures used by the iv optimization pass, stored
1054 in DATA. */
1056 static void
1057 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1059 data->version_info_size = 2 * num_ssa_names;
1060 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1061 data->relevant = BITMAP_ALLOC (NULL);
1062 data->important_candidates = BITMAP_ALLOC (NULL);
1063 data->max_inv_var_id = 0;
1064 data->max_inv_expr_id = 0;
1065 data->niters = NULL;
1066 data->vgroups.create (20);
1067 data->vcands.create (20);
1068 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1069 data->name_expansion_cache = NULL;
1070 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1071 data->iv_common_cands.create (20);
1072 decl_rtl_to_reset.create (20);
1073 gcc_obstack_init (&data->iv_obstack);
1076 /* Returns a memory object to that EXPR points. In case we are able to
1077 determine that it does not point to any such object, NULL is returned. */
1079 static tree
1080 determine_base_object (tree expr)
1082 enum tree_code code = TREE_CODE (expr);
1083 tree base, obj;
1085 /* If this is a pointer casted to any type, we need to determine
1086 the base object for the pointer; so handle conversions before
1087 throwing away non-pointer expressions. */
1088 if (CONVERT_EXPR_P (expr))
1089 return determine_base_object (TREE_OPERAND (expr, 0));
1091 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1092 return NULL_TREE;
1094 switch (code)
1096 case INTEGER_CST:
1097 return NULL_TREE;
1099 case ADDR_EXPR:
1100 obj = TREE_OPERAND (expr, 0);
1101 base = get_base_address (obj);
1103 if (!base)
1104 return expr;
1106 if (TREE_CODE (base) == MEM_REF)
1107 return determine_base_object (TREE_OPERAND (base, 0));
1109 return fold_convert (ptr_type_node,
1110 build_fold_addr_expr (base));
1112 case POINTER_PLUS_EXPR:
1113 return determine_base_object (TREE_OPERAND (expr, 0));
1115 case PLUS_EXPR:
1116 case MINUS_EXPR:
1117 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1118 gcc_unreachable ();
1120 default:
1121 return fold_convert (ptr_type_node, expr);
1125 /* Return true if address expression with non-DECL_P operand appears
1126 in EXPR. */
1128 static bool
1129 contain_complex_addr_expr (tree expr)
1131 bool res = false;
1133 STRIP_NOPS (expr);
1134 switch (TREE_CODE (expr))
1136 case POINTER_PLUS_EXPR:
1137 case PLUS_EXPR:
1138 case MINUS_EXPR:
1139 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1140 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1141 break;
1143 case ADDR_EXPR:
1144 return (!DECL_P (TREE_OPERAND (expr, 0)));
1146 default:
1147 return false;
1150 return res;
1153 /* Allocates an induction variable with given initial value BASE and step STEP
1154 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1156 static struct iv *
1157 alloc_iv (struct ivopts_data *data, tree base, tree step,
1158 bool no_overflow = false)
1160 tree expr = base;
1161 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1162 sizeof (struct iv));
1163 gcc_assert (step != NULL_TREE);
1165 /* Lower address expression in base except ones with DECL_P as operand.
1166 By doing this:
1167 1) More accurate cost can be computed for address expressions;
1168 2) Duplicate candidates won't be created for bases in different
1169 forms, like &a[0] and &a. */
1170 STRIP_NOPS (expr);
1171 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1172 || contain_complex_addr_expr (expr))
1174 aff_tree comb;
1175 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1176 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1179 iv->base = base;
1180 iv->base_object = determine_base_object (base);
1181 iv->step = step;
1182 iv->biv_p = false;
1183 iv->nonlin_use = NULL;
1184 iv->ssa_name = NULL_TREE;
1185 if (!no_overflow
1186 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1187 base, step))
1188 no_overflow = true;
1189 iv->no_overflow = no_overflow;
1190 iv->have_address_use = false;
1192 return iv;
1195 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1196 doesn't overflow. */
1198 static void
1199 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1200 bool no_overflow)
1202 struct version_info *info = name_info (data, iv);
1204 gcc_assert (!info->iv);
1206 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1207 info->iv = alloc_iv (data, base, step, no_overflow);
1208 info->iv->ssa_name = iv;
1211 /* Finds induction variable declaration for VAR. */
1213 static struct iv *
1214 get_iv (struct ivopts_data *data, tree var)
1216 basic_block bb;
1217 tree type = TREE_TYPE (var);
1219 if (!POINTER_TYPE_P (type)
1220 && !INTEGRAL_TYPE_P (type))
1221 return NULL;
1223 if (!name_info (data, var)->iv)
1225 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1227 if (!bb
1228 || !flow_bb_inside_loop_p (data->current_loop, bb))
1229 set_iv (data, var, var, build_int_cst (type, 0), true);
1232 return name_info (data, var)->iv;
1235 /* Return the first non-invariant ssa var found in EXPR. */
1237 static tree
1238 extract_single_var_from_expr (tree expr)
1240 int i, n;
1241 tree tmp;
1242 enum tree_code code;
1244 if (!expr || is_gimple_min_invariant (expr))
1245 return NULL;
1247 code = TREE_CODE (expr);
1248 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1250 n = TREE_OPERAND_LENGTH (expr);
1251 for (i = 0; i < n; i++)
1253 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1255 if (tmp)
1256 return tmp;
1259 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1262 /* Finds basic ivs. */
1264 static bool
1265 find_bivs (struct ivopts_data *data)
1267 gphi *phi;
1268 affine_iv iv;
1269 tree step, type, base, stop;
1270 bool found = false;
1271 struct loop *loop = data->current_loop;
1272 gphi_iterator psi;
1274 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1276 phi = psi.phi ();
1278 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1279 continue;
1281 if (virtual_operand_p (PHI_RESULT (phi)))
1282 continue;
1284 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1285 continue;
1287 if (integer_zerop (iv.step))
1288 continue;
1290 step = iv.step;
1291 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1292 /* Stop expanding iv base at the first ssa var referred by iv step.
1293 Ideally we should stop at any ssa var, because that's expensive
1294 and unusual to happen, we just do it on the first one.
1296 See PR64705 for the rationale. */
1297 stop = extract_single_var_from_expr (step);
1298 base = expand_simple_operations (base, stop);
1299 if (contains_abnormal_ssa_name_p (base)
1300 || contains_abnormal_ssa_name_p (step))
1301 continue;
1303 type = TREE_TYPE (PHI_RESULT (phi));
1304 base = fold_convert (type, base);
1305 if (step)
1307 if (POINTER_TYPE_P (type))
1308 step = convert_to_ptrofftype (step);
1309 else
1310 step = fold_convert (type, step);
1313 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1314 found = true;
1317 return found;
1320 /* Marks basic ivs. */
1322 static void
1323 mark_bivs (struct ivopts_data *data)
1325 gphi *phi;
1326 gimple *def;
1327 tree var;
1328 struct iv *iv, *incr_iv;
1329 struct loop *loop = data->current_loop;
1330 basic_block incr_bb;
1331 gphi_iterator psi;
1333 data->bivs_not_used_in_addr = 0;
1334 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1336 phi = psi.phi ();
1338 iv = get_iv (data, PHI_RESULT (phi));
1339 if (!iv)
1340 continue;
1342 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1343 def = SSA_NAME_DEF_STMT (var);
1344 /* Don't mark iv peeled from other one as biv. */
1345 if (def
1346 && gimple_code (def) == GIMPLE_PHI
1347 && gimple_bb (def) == loop->header)
1348 continue;
1350 incr_iv = get_iv (data, var);
1351 if (!incr_iv)
1352 continue;
1354 /* If the increment is in the subloop, ignore it. */
1355 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1356 if (incr_bb->loop_father != data->current_loop
1357 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1358 continue;
1360 iv->biv_p = true;
1361 incr_iv->biv_p = true;
1362 if (iv->no_overflow)
1363 data->bivs_not_used_in_addr++;
1364 if (incr_iv->no_overflow)
1365 data->bivs_not_used_in_addr++;
1369 /* Checks whether STMT defines a linear induction variable and stores its
1370 parameters to IV. */
1372 static bool
1373 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1375 tree lhs, stop;
1376 struct loop *loop = data->current_loop;
1378 iv->base = NULL_TREE;
1379 iv->step = NULL_TREE;
1381 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1382 return false;
1384 lhs = gimple_assign_lhs (stmt);
1385 if (TREE_CODE (lhs) != SSA_NAME)
1386 return false;
1388 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1389 return false;
1391 /* Stop expanding iv base at the first ssa var referred by iv step.
1392 Ideally we should stop at any ssa var, because that's expensive
1393 and unusual to happen, we just do it on the first one.
1395 See PR64705 for the rationale. */
1396 stop = extract_single_var_from_expr (iv->step);
1397 iv->base = expand_simple_operations (iv->base, stop);
1398 if (contains_abnormal_ssa_name_p (iv->base)
1399 || contains_abnormal_ssa_name_p (iv->step))
1400 return false;
1402 /* If STMT could throw, then do not consider STMT as defining a GIV.
1403 While this will suppress optimizations, we can not safely delete this
1404 GIV and associated statements, even if it appears it is not used. */
1405 if (stmt_could_throw_p (stmt))
1406 return false;
1408 return true;
1411 /* Finds general ivs in statement STMT. */
1413 static void
1414 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1416 affine_iv iv;
1418 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1419 return;
1421 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1424 /* Finds general ivs in basic block BB. */
1426 static void
1427 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1429 gimple_stmt_iterator bsi;
1431 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1432 find_givs_in_stmt (data, gsi_stmt (bsi));
1435 /* Finds general ivs. */
1437 static void
1438 find_givs (struct ivopts_data *data)
1440 struct loop *loop = data->current_loop;
1441 basic_block *body = get_loop_body_in_dom_order (loop);
1442 unsigned i;
1444 for (i = 0; i < loop->num_nodes; i++)
1445 find_givs_in_bb (data, body[i]);
1446 free (body);
1449 /* For each ssa name defined in LOOP determines whether it is an induction
1450 variable and if so, its initial value and step. */
1452 static bool
1453 find_induction_variables (struct ivopts_data *data)
1455 unsigned i;
1456 bitmap_iterator bi;
1458 if (!find_bivs (data))
1459 return false;
1461 find_givs (data);
1462 mark_bivs (data);
1464 if (dump_file && (dump_flags & TDF_DETAILS))
1466 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1468 if (niter)
1470 fprintf (dump_file, " number of iterations ");
1471 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1472 if (!integer_zerop (niter->may_be_zero))
1474 fprintf (dump_file, "; zero if ");
1475 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1477 fprintf (dump_file, "\n");
1480 fprintf (dump_file, "\n<Induction Vars>:\n");
1481 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1483 struct version_info *info = ver_info (data, i);
1484 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1485 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1489 return true;
1492 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1493 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1494 is the const offset stripped from IV base; for other types use, both
1495 are zero by default. */
1497 static struct iv_use *
1498 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1499 gimple *stmt, enum use_type type, tree addr_base,
1500 unsigned HOST_WIDE_INT addr_offset)
1502 struct iv_use *use = XCNEW (struct iv_use);
1504 use->id = group->vuses.length ();
1505 use->group_id = group->id;
1506 use->type = type;
1507 use->iv = iv;
1508 use->stmt = stmt;
1509 use->op_p = use_p;
1510 use->addr_base = addr_base;
1511 use->addr_offset = addr_offset;
1513 group->vuses.safe_push (use);
1514 return use;
1517 /* Checks whether OP is a loop-level invariant and if so, records it.
1518 NONLINEAR_USE is true if the invariant is used in a way we do not
1519 handle specially. */
1521 static void
1522 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1524 basic_block bb;
1525 struct version_info *info;
1527 if (TREE_CODE (op) != SSA_NAME
1528 || virtual_operand_p (op))
1529 return;
1531 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1532 if (bb
1533 && flow_bb_inside_loop_p (data->current_loop, bb))
1534 return;
1536 info = name_info (data, op);
1537 info->name = op;
1538 info->has_nonlin_use |= nonlinear_use;
1539 if (!info->inv_id)
1540 info->inv_id = ++data->max_inv_var_id;
1541 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1544 static tree
1545 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1547 /* Record a group of TYPE. */
1549 static struct iv_group *
1550 record_group (struct ivopts_data *data, enum use_type type)
1552 struct iv_group *group = XCNEW (struct iv_group);
1554 group->id = data->vgroups.length ();
1555 group->type = type;
1556 group->related_cands = BITMAP_ALLOC (NULL);
1557 group->vuses.create (1);
1559 data->vgroups.safe_push (group);
1560 return group;
1563 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1564 New group will be created if there is no existing group for the use. */
1566 static struct iv_use *
1567 record_group_use (struct ivopts_data *data, tree *use_p,
1568 struct iv *iv, gimple *stmt, enum use_type type)
1570 tree addr_base = NULL;
1571 struct iv_group *group = NULL;
1572 unsigned HOST_WIDE_INT addr_offset = 0;
1574 /* Record non address type use in a new group. */
1575 if (type == USE_ADDRESS && iv->base_object)
1577 unsigned int i;
1579 addr_base = strip_offset (iv->base, &addr_offset);
1580 for (i = 0; i < data->vgroups.length (); i++)
1582 struct iv_use *use;
1584 group = data->vgroups[i];
1585 use = group->vuses[0];
1586 if (use->type != USE_ADDRESS || !use->iv->base_object)
1587 continue;
1589 /* Check if it has the same stripped base and step. */
1590 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1591 && operand_equal_p (iv->step, use->iv->step, 0)
1592 && operand_equal_p (addr_base, use->addr_base, 0))
1593 break;
1595 if (i == data->vgroups.length ())
1596 group = NULL;
1599 if (!group)
1600 group = record_group (data, type);
1602 return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1605 /* Checks whether the use OP is interesting and if so, records it. */
1607 static struct iv_use *
1608 find_interesting_uses_op (struct ivopts_data *data, tree op)
1610 struct iv *iv;
1611 gimple *stmt;
1612 struct iv_use *use;
1614 if (TREE_CODE (op) != SSA_NAME)
1615 return NULL;
1617 iv = get_iv (data, op);
1618 if (!iv)
1619 return NULL;
1621 if (iv->nonlin_use)
1623 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1624 return iv->nonlin_use;
1627 if (integer_zerop (iv->step))
1629 record_invariant (data, op, true);
1630 return NULL;
1633 stmt = SSA_NAME_DEF_STMT (op);
1634 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1636 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1637 iv->nonlin_use = use;
1638 return use;
1641 /* Given a condition in statement STMT, checks whether it is a compare
1642 of an induction variable and an invariant. If this is the case,
1643 CONTROL_VAR is set to location of the iv, BOUND to the location of
1644 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1645 induction variable descriptions, and true is returned. If this is not
1646 the case, CONTROL_VAR and BOUND are set to the arguments of the
1647 condition and false is returned. */
1649 static bool
1650 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1651 tree **control_var, tree **bound,
1652 struct iv **iv_var, struct iv **iv_bound)
1654 /* The objects returned when COND has constant operands. */
1655 static struct iv const_iv;
1656 static tree zero;
1657 tree *op0 = &zero, *op1 = &zero;
1658 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1659 bool ret = false;
1661 if (gimple_code (stmt) == GIMPLE_COND)
1663 gcond *cond_stmt = as_a <gcond *> (stmt);
1664 op0 = gimple_cond_lhs_ptr (cond_stmt);
1665 op1 = gimple_cond_rhs_ptr (cond_stmt);
1667 else
1669 op0 = gimple_assign_rhs1_ptr (stmt);
1670 op1 = gimple_assign_rhs2_ptr (stmt);
1673 zero = integer_zero_node;
1674 const_iv.step = integer_zero_node;
1676 if (TREE_CODE (*op0) == SSA_NAME)
1677 iv0 = get_iv (data, *op0);
1678 if (TREE_CODE (*op1) == SSA_NAME)
1679 iv1 = get_iv (data, *op1);
1681 /* Exactly one of the compared values must be an iv, and the other one must
1682 be an invariant. */
1683 if (!iv0 || !iv1)
1684 goto end;
1686 if (integer_zerop (iv0->step))
1688 /* Control variable may be on the other side. */
1689 std::swap (op0, op1);
1690 std::swap (iv0, iv1);
1692 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1694 end:
1695 if (control_var)
1696 *control_var = op0;
1697 if (iv_var)
1698 *iv_var = iv0;
1699 if (bound)
1700 *bound = op1;
1701 if (iv_bound)
1702 *iv_bound = iv1;
1704 return ret;
1707 /* Checks whether the condition in STMT is interesting and if so,
1708 records it. */
1710 static void
1711 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1713 tree *var_p, *bound_p;
1714 struct iv *var_iv;
1716 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1718 find_interesting_uses_op (data, *var_p);
1719 find_interesting_uses_op (data, *bound_p);
1720 return;
1723 record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1726 /* Returns the outermost loop EXPR is obviously invariant in
1727 relative to the loop LOOP, i.e. if all its operands are defined
1728 outside of the returned loop. Returns NULL if EXPR is not
1729 even obviously invariant in LOOP. */
1731 struct loop *
1732 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1734 basic_block def_bb;
1735 unsigned i, len;
1737 if (is_gimple_min_invariant (expr))
1738 return current_loops->tree_root;
1740 if (TREE_CODE (expr) == SSA_NAME)
1742 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1743 if (def_bb)
1745 if (flow_bb_inside_loop_p (loop, def_bb))
1746 return NULL;
1747 return superloop_at_depth (loop,
1748 loop_depth (def_bb->loop_father) + 1);
1751 return current_loops->tree_root;
1754 if (!EXPR_P (expr))
1755 return NULL;
1757 unsigned maxdepth = 0;
1758 len = TREE_OPERAND_LENGTH (expr);
1759 for (i = 0; i < len; i++)
1761 struct loop *ivloop;
1762 if (!TREE_OPERAND (expr, i))
1763 continue;
1765 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1766 if (!ivloop)
1767 return NULL;
1768 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1771 return superloop_at_depth (loop, maxdepth);
1774 /* Returns true if expression EXPR is obviously invariant in LOOP,
1775 i.e. if all its operands are defined outside of the LOOP. LOOP
1776 should not be the function body. */
1778 bool
1779 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1781 basic_block def_bb;
1782 unsigned i, len;
1784 gcc_assert (loop_depth (loop) > 0);
1786 if (is_gimple_min_invariant (expr))
1787 return true;
1789 if (TREE_CODE (expr) == SSA_NAME)
1791 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1792 if (def_bb
1793 && flow_bb_inside_loop_p (loop, def_bb))
1794 return false;
1796 return true;
1799 if (!EXPR_P (expr))
1800 return false;
1802 len = TREE_OPERAND_LENGTH (expr);
1803 for (i = 0; i < len; i++)
1804 if (TREE_OPERAND (expr, i)
1805 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1806 return false;
1808 return true;
1811 /* Given expression EXPR which computes inductive values with respect
1812 to loop recorded in DATA, this function returns biv from which EXPR
1813 is derived by tracing definition chains of ssa variables in EXPR. */
1815 static struct iv*
1816 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1818 struct iv *iv;
1819 unsigned i, n;
1820 tree e2, e1;
1821 enum tree_code code;
1822 gimple *stmt;
1824 if (expr == NULL_TREE)
1825 return NULL;
1827 if (is_gimple_min_invariant (expr))
1828 return NULL;
1830 code = TREE_CODE (expr);
1831 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1833 n = TREE_OPERAND_LENGTH (expr);
1834 for (i = 0; i < n; i++)
1836 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1837 if (iv)
1838 return iv;
1842 /* Stop if it's not ssa name. */
1843 if (code != SSA_NAME)
1844 return NULL;
1846 iv = get_iv (data, expr);
1847 if (!iv || integer_zerop (iv->step))
1848 return NULL;
1849 else if (iv->biv_p)
1850 return iv;
1852 stmt = SSA_NAME_DEF_STMT (expr);
1853 if (gphi *phi = dyn_cast <gphi *> (stmt))
1855 ssa_op_iter iter;
1856 use_operand_p use_p;
1857 basic_block phi_bb = gimple_bb (phi);
1859 /* Skip loop header PHI that doesn't define biv. */
1860 if (phi_bb->loop_father == data->current_loop)
1861 return NULL;
1863 if (virtual_operand_p (gimple_phi_result (phi)))
1864 return NULL;
1866 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1868 tree use = USE_FROM_PTR (use_p);
1869 iv = find_deriving_biv_for_expr (data, use);
1870 if (iv)
1871 return iv;
1873 return NULL;
1875 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1876 return NULL;
1878 e1 = gimple_assign_rhs1 (stmt);
1879 code = gimple_assign_rhs_code (stmt);
1880 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1881 return find_deriving_biv_for_expr (data, e1);
1883 switch (code)
1885 case MULT_EXPR:
1886 case PLUS_EXPR:
1887 case MINUS_EXPR:
1888 case POINTER_PLUS_EXPR:
1889 /* Increments, decrements and multiplications by a constant
1890 are simple. */
1891 e2 = gimple_assign_rhs2 (stmt);
1892 iv = find_deriving_biv_for_expr (data, e2);
1893 if (iv)
1894 return iv;
1895 gcc_fallthrough ();
1897 CASE_CONVERT:
1898 /* Casts are simple. */
1899 return find_deriving_biv_for_expr (data, e1);
1901 default:
1902 break;
1905 return NULL;
1908 /* Record BIV, its predecessor and successor that they are used in
1909 address type uses. */
1911 static void
1912 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1914 unsigned i;
1915 tree type, base_1, base_2;
1916 bitmap_iterator bi;
1918 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1919 || biv->have_address_use || !biv->no_overflow)
1920 return;
1922 type = TREE_TYPE (biv->base);
1923 if (!INTEGRAL_TYPE_P (type))
1924 return;
1926 biv->have_address_use = true;
1927 data->bivs_not_used_in_addr--;
1928 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1929 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1931 struct iv *iv = ver_info (data, i)->iv;
1933 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1934 || iv->have_address_use || !iv->no_overflow)
1935 continue;
1937 if (type != TREE_TYPE (iv->base)
1938 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1939 continue;
1941 if (!operand_equal_p (biv->step, iv->step, 0))
1942 continue;
1944 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1945 if (operand_equal_p (base_1, iv->base, 0)
1946 || operand_equal_p (base_2, biv->base, 0))
1948 iv->have_address_use = true;
1949 data->bivs_not_used_in_addr--;
1954 /* Cumulates the steps of indices into DATA and replaces their values with the
1955 initial ones. Returns false when the value of the index cannot be determined.
1956 Callback for for_each_index. */
1958 struct ifs_ivopts_data
1960 struct ivopts_data *ivopts_data;
1961 gimple *stmt;
1962 tree step;
1965 static bool
1966 idx_find_step (tree base, tree *idx, void *data)
1968 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1969 struct iv *iv;
1970 bool use_overflow_semantics = false;
1971 tree step, iv_base, iv_step, lbound, off;
1972 struct loop *loop = dta->ivopts_data->current_loop;
1974 /* If base is a component ref, require that the offset of the reference
1975 be invariant. */
1976 if (TREE_CODE (base) == COMPONENT_REF)
1978 off = component_ref_field_offset (base);
1979 return expr_invariant_in_loop_p (loop, off);
1982 /* If base is array, first check whether we will be able to move the
1983 reference out of the loop (in order to take its address in strength
1984 reduction). In order for this to work we need both lower bound
1985 and step to be loop invariants. */
1986 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1988 /* Moreover, for a range, the size needs to be invariant as well. */
1989 if (TREE_CODE (base) == ARRAY_RANGE_REF
1990 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1991 return false;
1993 step = array_ref_element_size (base);
1994 lbound = array_ref_low_bound (base);
1996 if (!expr_invariant_in_loop_p (loop, step)
1997 || !expr_invariant_in_loop_p (loop, lbound))
1998 return false;
2001 if (TREE_CODE (*idx) != SSA_NAME)
2002 return true;
2004 iv = get_iv (dta->ivopts_data, *idx);
2005 if (!iv)
2006 return false;
2008 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2009 *&x[0], which is not folded and does not trigger the
2010 ARRAY_REF path below. */
2011 *idx = iv->base;
2013 if (integer_zerop (iv->step))
2014 return true;
2016 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2018 step = array_ref_element_size (base);
2020 /* We only handle addresses whose step is an integer constant. */
2021 if (TREE_CODE (step) != INTEGER_CST)
2022 return false;
2024 else
2025 /* The step for pointer arithmetics already is 1 byte. */
2026 step = size_one_node;
2028 iv_base = iv->base;
2029 iv_step = iv->step;
2030 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2031 use_overflow_semantics = true;
2033 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2034 sizetype, &iv_base, &iv_step, dta->stmt,
2035 use_overflow_semantics))
2037 /* The index might wrap. */
2038 return false;
2041 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2042 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2044 if (dta->ivopts_data->bivs_not_used_in_addr)
2046 if (!iv->biv_p)
2047 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2049 record_biv_for_address_use (dta->ivopts_data, iv);
2051 return true;
2054 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2055 object is passed to it in DATA. */
2057 static bool
2058 idx_record_use (tree base, tree *idx,
2059 void *vdata)
2061 struct ivopts_data *data = (struct ivopts_data *) vdata;
2062 find_interesting_uses_op (data, *idx);
2063 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2065 find_interesting_uses_op (data, array_ref_element_size (base));
2066 find_interesting_uses_op (data, array_ref_low_bound (base));
2068 return true;
2071 /* If we can prove that TOP = cst * BOT for some constant cst,
2072 store cst to MUL and return true. Otherwise return false.
2073 The returned value is always sign-extended, regardless of the
2074 signedness of TOP and BOT. */
2076 static bool
2077 constant_multiple_of (tree top, tree bot, widest_int *mul)
2079 tree mby;
2080 enum tree_code code;
2081 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2082 widest_int res, p0, p1;
2084 STRIP_NOPS (top);
2085 STRIP_NOPS (bot);
2087 if (operand_equal_p (top, bot, 0))
2089 *mul = 1;
2090 return true;
2093 code = TREE_CODE (top);
2094 switch (code)
2096 case MULT_EXPR:
2097 mby = TREE_OPERAND (top, 1);
2098 if (TREE_CODE (mby) != INTEGER_CST)
2099 return false;
2101 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2102 return false;
2104 *mul = wi::sext (res * wi::to_widest (mby), precision);
2105 return true;
2107 case PLUS_EXPR:
2108 case MINUS_EXPR:
2109 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2110 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2111 return false;
2113 if (code == MINUS_EXPR)
2114 p1 = -p1;
2115 *mul = wi::sext (p0 + p1, precision);
2116 return true;
2118 case INTEGER_CST:
2119 if (TREE_CODE (bot) != INTEGER_CST)
2120 return false;
2122 p0 = widest_int::from (top, SIGNED);
2123 p1 = widest_int::from (bot, SIGNED);
2124 if (p1 == 0)
2125 return false;
2126 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2127 return res == 0;
2129 default:
2130 return false;
2134 /* Return true if memory reference REF with step STEP may be unaligned. */
2136 static bool
2137 may_be_unaligned_p (tree ref, tree step)
2139 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2140 thus they are not misaligned. */
2141 if (TREE_CODE (ref) == TARGET_MEM_REF)
2142 return false;
2144 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2145 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2146 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2148 unsigned HOST_WIDE_INT bitpos;
2149 unsigned int ref_align;
2150 get_object_alignment_1 (ref, &ref_align, &bitpos);
2151 if (ref_align < align
2152 || (bitpos % align) != 0
2153 || (bitpos % BITS_PER_UNIT) != 0)
2154 return true;
2156 unsigned int trailing_zeros = tree_ctz (step);
2157 if (trailing_zeros < HOST_BITS_PER_INT
2158 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2159 return true;
2161 return false;
2164 /* Return true if EXPR may be non-addressable. */
2166 bool
2167 may_be_nonaddressable_p (tree expr)
2169 switch (TREE_CODE (expr))
2171 case TARGET_MEM_REF:
2172 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2173 target, thus they are always addressable. */
2174 return false;
2176 case MEM_REF:
2177 /* Likewise for MEM_REFs, modulo the storage order. */
2178 return REF_REVERSE_STORAGE_ORDER (expr);
2180 case BIT_FIELD_REF:
2181 if (REF_REVERSE_STORAGE_ORDER (expr))
2182 return true;
2183 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2185 case COMPONENT_REF:
2186 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2187 return true;
2188 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2189 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2191 case ARRAY_REF:
2192 case ARRAY_RANGE_REF:
2193 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2194 return true;
2195 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2197 case VIEW_CONVERT_EXPR:
2198 /* This kind of view-conversions may wrap non-addressable objects
2199 and make them look addressable. After some processing the
2200 non-addressability may be uncovered again, causing ADDR_EXPRs
2201 of inappropriate objects to be built. */
2202 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2203 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2204 return true;
2205 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2207 CASE_CONVERT:
2208 return true;
2210 default:
2211 break;
2214 return false;
2217 /* Finds addresses in *OP_P inside STMT. */
2219 static void
2220 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2221 tree *op_p)
2223 tree base = *op_p, step = size_zero_node;
2224 struct iv *civ;
2225 struct ifs_ivopts_data ifs_ivopts_data;
2227 /* Do not play with volatile memory references. A bit too conservative,
2228 perhaps, but safe. */
2229 if (gimple_has_volatile_ops (stmt))
2230 goto fail;
2232 /* Ignore bitfields for now. Not really something terribly complicated
2233 to handle. TODO. */
2234 if (TREE_CODE (base) == BIT_FIELD_REF)
2235 goto fail;
2237 base = unshare_expr (base);
2239 if (TREE_CODE (base) == TARGET_MEM_REF)
2241 tree type = build_pointer_type (TREE_TYPE (base));
2242 tree astep;
2244 if (TMR_BASE (base)
2245 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2247 civ = get_iv (data, TMR_BASE (base));
2248 if (!civ)
2249 goto fail;
2251 TMR_BASE (base) = civ->base;
2252 step = civ->step;
2254 if (TMR_INDEX2 (base)
2255 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2257 civ = get_iv (data, TMR_INDEX2 (base));
2258 if (!civ)
2259 goto fail;
2261 TMR_INDEX2 (base) = civ->base;
2262 step = civ->step;
2264 if (TMR_INDEX (base)
2265 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2267 civ = get_iv (data, TMR_INDEX (base));
2268 if (!civ)
2269 goto fail;
2271 TMR_INDEX (base) = civ->base;
2272 astep = civ->step;
2274 if (astep)
2276 if (TMR_STEP (base))
2277 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2279 step = fold_build2 (PLUS_EXPR, type, step, astep);
2283 if (integer_zerop (step))
2284 goto fail;
2285 base = tree_mem_ref_addr (type, base);
2287 else
2289 ifs_ivopts_data.ivopts_data = data;
2290 ifs_ivopts_data.stmt = stmt;
2291 ifs_ivopts_data.step = size_zero_node;
2292 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2293 || integer_zerop (ifs_ivopts_data.step))
2294 goto fail;
2295 step = ifs_ivopts_data.step;
2297 /* Check that the base expression is addressable. This needs
2298 to be done after substituting bases of IVs into it. */
2299 if (may_be_nonaddressable_p (base))
2300 goto fail;
2302 /* Moreover, on strict alignment platforms, check that it is
2303 sufficiently aligned. */
2304 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2305 goto fail;
2307 base = build_fold_addr_expr (base);
2309 /* Substituting bases of IVs into the base expression might
2310 have caused folding opportunities. */
2311 if (TREE_CODE (base) == ADDR_EXPR)
2313 tree *ref = &TREE_OPERAND (base, 0);
2314 while (handled_component_p (*ref))
2315 ref = &TREE_OPERAND (*ref, 0);
2316 if (TREE_CODE (*ref) == MEM_REF)
2318 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2319 TREE_OPERAND (*ref, 0),
2320 TREE_OPERAND (*ref, 1));
2321 if (tem)
2322 *ref = tem;
2327 civ = alloc_iv (data, base, step);
2328 /* Fail if base object of this memory reference is unknown. */
2329 if (civ->base_object == NULL_TREE)
2330 goto fail;
2332 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2333 return;
2335 fail:
2336 for_each_index (op_p, idx_record_use, data);
2339 /* Finds and records invariants used in STMT. */
2341 static void
2342 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2344 ssa_op_iter iter;
2345 use_operand_p use_p;
2346 tree op;
2348 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2350 op = USE_FROM_PTR (use_p);
2351 record_invariant (data, op, false);
2355 /* Finds interesting uses of induction variables in the statement STMT. */
2357 static void
2358 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2360 struct iv *iv;
2361 tree op, *lhs, *rhs;
2362 ssa_op_iter iter;
2363 use_operand_p use_p;
2364 enum tree_code code;
2366 find_invariants_stmt (data, stmt);
2368 if (gimple_code (stmt) == GIMPLE_COND)
2370 find_interesting_uses_cond (data, stmt);
2371 return;
2374 if (is_gimple_assign (stmt))
2376 lhs = gimple_assign_lhs_ptr (stmt);
2377 rhs = gimple_assign_rhs1_ptr (stmt);
2379 if (TREE_CODE (*lhs) == SSA_NAME)
2381 /* If the statement defines an induction variable, the uses are not
2382 interesting by themselves. */
2384 iv = get_iv (data, *lhs);
2386 if (iv && !integer_zerop (iv->step))
2387 return;
2390 code = gimple_assign_rhs_code (stmt);
2391 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2392 && (REFERENCE_CLASS_P (*rhs)
2393 || is_gimple_val (*rhs)))
2395 if (REFERENCE_CLASS_P (*rhs))
2396 find_interesting_uses_address (data, stmt, rhs);
2397 else
2398 find_interesting_uses_op (data, *rhs);
2400 if (REFERENCE_CLASS_P (*lhs))
2401 find_interesting_uses_address (data, stmt, lhs);
2402 return;
2404 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2406 find_interesting_uses_cond (data, stmt);
2407 return;
2410 /* TODO -- we should also handle address uses of type
2412 memory = call (whatever);
2416 call (memory). */
2419 if (gimple_code (stmt) == GIMPLE_PHI
2420 && gimple_bb (stmt) == data->current_loop->header)
2422 iv = get_iv (data, PHI_RESULT (stmt));
2424 if (iv && !integer_zerop (iv->step))
2425 return;
2428 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2430 op = USE_FROM_PTR (use_p);
2432 if (TREE_CODE (op) != SSA_NAME)
2433 continue;
2435 iv = get_iv (data, op);
2436 if (!iv)
2437 continue;
2439 find_interesting_uses_op (data, op);
2443 /* Finds interesting uses of induction variables outside of loops
2444 on loop exit edge EXIT. */
2446 static void
2447 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2449 gphi *phi;
2450 gphi_iterator psi;
2451 tree def;
2453 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2455 phi = psi.phi ();
2456 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2457 if (!virtual_operand_p (def))
2458 find_interesting_uses_op (data, def);
2462 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2463 mode for memory reference represented by USE. */
2465 static GTY (()) vec<rtx, va_gc> *addr_list;
2467 static bool
2468 addr_offset_valid_p (struct iv_use *use, HOST_WIDE_INT offset)
2470 rtx reg, addr;
2471 unsigned list_index;
2472 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2473 machine_mode addr_mode, mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2475 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2476 if (list_index >= vec_safe_length (addr_list))
2477 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2479 addr = (*addr_list)[list_index];
2480 if (!addr)
2482 addr_mode = targetm.addr_space.address_mode (as);
2483 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2484 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2485 (*addr_list)[list_index] = addr;
2487 else
2488 addr_mode = GET_MODE (addr);
2490 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2491 return (memory_address_addr_space_p (mem_mode, addr, as));
2494 /* Comparison function to sort group in ascending order of addr_offset. */
2496 static int
2497 group_compare_offset (const void *a, const void *b)
2499 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2500 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2502 if ((*u1)->addr_offset != (*u2)->addr_offset)
2503 return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2504 else
2505 return 0;
2508 /* Check if small groups should be split. Return true if no group
2509 contains more than two uses with distinct addr_offsets. Return
2510 false otherwise. We want to split such groups because:
2512 1) Small groups don't have much benefit and may interfer with
2513 general candidate selection.
2514 2) Size for problem with only small groups is usually small and
2515 general algorithm can handle it well.
2517 TODO -- Above claim may not hold when we want to merge memory
2518 accesses with conseuctive addresses. */
2520 static bool
2521 split_small_address_groups_p (struct ivopts_data *data)
2523 unsigned int i, j, distinct = 1;
2524 struct iv_use *pre;
2525 struct iv_group *group;
2527 for (i = 0; i < data->vgroups.length (); i++)
2529 group = data->vgroups[i];
2530 if (group->vuses.length () == 1)
2531 continue;
2533 gcc_assert (group->type == USE_ADDRESS);
2534 if (group->vuses.length () == 2)
2536 if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2537 std::swap (group->vuses[0], group->vuses[1]);
2539 else
2540 group->vuses.qsort (group_compare_offset);
2542 if (distinct > 2)
2543 continue;
2545 distinct = 1;
2546 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2548 if (group->vuses[j]->addr_offset != pre->addr_offset)
2550 pre = group->vuses[j];
2551 distinct++;
2554 if (distinct > 2)
2555 break;
2559 return (distinct <= 2);
2562 /* For each group of address type uses, this function further groups
2563 these uses according to the maximum offset supported by target's
2564 [base + offset] addressing mode. */
2566 static void
2567 split_address_groups (struct ivopts_data *data)
2569 unsigned int i, j;
2570 /* Always split group. */
2571 bool split_p = split_small_address_groups_p (data);
2573 for (i = 0; i < data->vgroups.length (); i++)
2575 struct iv_group *new_group = NULL;
2576 struct iv_group *group = data->vgroups[i];
2577 struct iv_use *use = group->vuses[0];
2579 use->id = 0;
2580 use->group_id = group->id;
2581 if (group->vuses.length () == 1)
2582 continue;
2584 gcc_assert (group->type == USE_ADDRESS);
2586 for (j = 1; j < group->vuses.length ();)
2588 struct iv_use *next = group->vuses[j];
2589 HOST_WIDE_INT offset = next->addr_offset - use->addr_offset;
2591 /* Split group if aksed to, or the offset against the first
2592 use can't fit in offset part of addressing mode. IV uses
2593 having the same offset are still kept in one group. */
2594 if (offset != 0 &&
2595 (split_p || !addr_offset_valid_p (use, offset)))
2597 if (!new_group)
2598 new_group = record_group (data, group->type);
2599 group->vuses.ordered_remove (j);
2600 new_group->vuses.safe_push (next);
2601 continue;
2604 next->id = j;
2605 next->group_id = group->id;
2606 j++;
2611 /* Finds uses of the induction variables that are interesting. */
2613 static void
2614 find_interesting_uses (struct ivopts_data *data)
2616 basic_block bb;
2617 gimple_stmt_iterator bsi;
2618 basic_block *body = get_loop_body (data->current_loop);
2619 unsigned i;
2620 edge e;
2622 for (i = 0; i < data->current_loop->num_nodes; i++)
2624 edge_iterator ei;
2625 bb = body[i];
2627 FOR_EACH_EDGE (e, ei, bb->succs)
2628 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2629 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2630 find_interesting_uses_outside (data, e);
2632 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2633 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2634 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2635 if (!is_gimple_debug (gsi_stmt (bsi)))
2636 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2639 split_address_groups (data);
2641 if (dump_file && (dump_flags & TDF_DETAILS))
2643 bitmap_iterator bi;
2645 fprintf (dump_file, "\n<Invariant Vars>:\n");
2646 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2648 struct version_info *info = ver_info (data, i);
2649 if (info->inv_id)
2651 fprintf (dump_file, "Inv %d:\t", info->inv_id);
2652 print_generic_expr (dump_file, info->name, TDF_SLIM);
2653 fprintf (dump_file, "%s\n",
2654 info->has_nonlin_use ? "" : "\t(eliminable)");
2658 fprintf (dump_file, "\n<IV Groups>:\n");
2659 dump_groups (dump_file, data);
2660 fprintf (dump_file, "\n");
2663 free (body);
2666 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2667 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2668 we are at the top-level of the processed address. */
2670 static tree
2671 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2672 HOST_WIDE_INT *offset)
2674 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2675 enum tree_code code;
2676 tree type, orig_type = TREE_TYPE (expr);
2677 HOST_WIDE_INT off0, off1, st;
2678 tree orig_expr = expr;
2680 STRIP_NOPS (expr);
2682 type = TREE_TYPE (expr);
2683 code = TREE_CODE (expr);
2684 *offset = 0;
2686 switch (code)
2688 case INTEGER_CST:
2689 if (!cst_and_fits_in_hwi (expr)
2690 || integer_zerop (expr))
2691 return orig_expr;
2693 *offset = int_cst_value (expr);
2694 return build_int_cst (orig_type, 0);
2696 case POINTER_PLUS_EXPR:
2697 case PLUS_EXPR:
2698 case MINUS_EXPR:
2699 op0 = TREE_OPERAND (expr, 0);
2700 op1 = TREE_OPERAND (expr, 1);
2702 op0 = strip_offset_1 (op0, false, false, &off0);
2703 op1 = strip_offset_1 (op1, false, false, &off1);
2705 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2706 if (op0 == TREE_OPERAND (expr, 0)
2707 && op1 == TREE_OPERAND (expr, 1))
2708 return orig_expr;
2710 if (integer_zerop (op1))
2711 expr = op0;
2712 else if (integer_zerop (op0))
2714 if (code == MINUS_EXPR)
2715 expr = fold_build1 (NEGATE_EXPR, type, op1);
2716 else
2717 expr = op1;
2719 else
2720 expr = fold_build2 (code, type, op0, op1);
2722 return fold_convert (orig_type, expr);
2724 case MULT_EXPR:
2725 op1 = TREE_OPERAND (expr, 1);
2726 if (!cst_and_fits_in_hwi (op1))
2727 return orig_expr;
2729 op0 = TREE_OPERAND (expr, 0);
2730 op0 = strip_offset_1 (op0, false, false, &off0);
2731 if (op0 == TREE_OPERAND (expr, 0))
2732 return orig_expr;
2734 *offset = off0 * int_cst_value (op1);
2735 if (integer_zerop (op0))
2736 expr = op0;
2737 else
2738 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2740 return fold_convert (orig_type, expr);
2742 case ARRAY_REF:
2743 case ARRAY_RANGE_REF:
2744 if (!inside_addr)
2745 return orig_expr;
2747 step = array_ref_element_size (expr);
2748 if (!cst_and_fits_in_hwi (step))
2749 break;
2751 st = int_cst_value (step);
2752 op1 = TREE_OPERAND (expr, 1);
2753 op1 = strip_offset_1 (op1, false, false, &off1);
2754 *offset = off1 * st;
2756 if (top_compref
2757 && integer_zerop (op1))
2759 /* Strip the component reference completely. */
2760 op0 = TREE_OPERAND (expr, 0);
2761 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2762 *offset += off0;
2763 return op0;
2765 break;
2767 case COMPONENT_REF:
2769 tree field;
2771 if (!inside_addr)
2772 return orig_expr;
2774 tmp = component_ref_field_offset (expr);
2775 field = TREE_OPERAND (expr, 1);
2776 if (top_compref
2777 && cst_and_fits_in_hwi (tmp)
2778 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2780 HOST_WIDE_INT boffset, abs_off;
2782 /* Strip the component reference completely. */
2783 op0 = TREE_OPERAND (expr, 0);
2784 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2785 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2786 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2787 if (boffset < 0)
2788 abs_off = -abs_off;
2790 *offset = off0 + int_cst_value (tmp) + abs_off;
2791 return op0;
2794 break;
2796 case ADDR_EXPR:
2797 op0 = TREE_OPERAND (expr, 0);
2798 op0 = strip_offset_1 (op0, true, true, &off0);
2799 *offset += off0;
2801 if (op0 == TREE_OPERAND (expr, 0))
2802 return orig_expr;
2804 expr = build_fold_addr_expr (op0);
2805 return fold_convert (orig_type, expr);
2807 case MEM_REF:
2808 /* ??? Offset operand? */
2809 inside_addr = false;
2810 break;
2812 default:
2813 return orig_expr;
2816 /* Default handling of expressions for that we want to recurse into
2817 the first operand. */
2818 op0 = TREE_OPERAND (expr, 0);
2819 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2820 *offset += off0;
2822 if (op0 == TREE_OPERAND (expr, 0)
2823 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2824 return orig_expr;
2826 expr = copy_node (expr);
2827 TREE_OPERAND (expr, 0) = op0;
2828 if (op1)
2829 TREE_OPERAND (expr, 1) = op1;
2831 /* Inside address, we might strip the top level component references,
2832 thus changing type of the expression. Handling of ADDR_EXPR
2833 will fix that. */
2834 expr = fold_convert (orig_type, expr);
2836 return expr;
2839 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2841 static tree
2842 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2844 HOST_WIDE_INT off;
2845 tree core = strip_offset_1 (expr, false, false, &off);
2846 *offset = off;
2847 return core;
2850 /* Returns variant of TYPE that can be used as base for different uses.
2851 We return unsigned type with the same precision, which avoids problems
2852 with overflows. */
2854 static tree
2855 generic_type_for (tree type)
2857 if (POINTER_TYPE_P (type))
2858 return unsigned_type_for (type);
2860 if (TYPE_UNSIGNED (type))
2861 return type;
2863 return unsigned_type_for (type);
2866 /* Private data for walk_tree. */
2868 struct walk_tree_data
2870 bitmap *inv_vars;
2871 struct ivopts_data *idata;
2874 /* Callback function for walk_tree, it records invariants and symbol
2875 reference in *EXPR_P. DATA is the structure storing result info. */
2877 static tree
2878 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2880 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2881 struct version_info *info;
2883 if (TREE_CODE (*expr_p) != SSA_NAME)
2884 return NULL_TREE;
2886 info = name_info (wdata->idata, *expr_p);
2887 if (!info->inv_id || info->has_nonlin_use)
2888 return NULL_TREE;
2890 if (!*wdata->inv_vars)
2891 *wdata->inv_vars = BITMAP_ALLOC (NULL);
2892 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2894 return NULL_TREE;
2897 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
2898 store it. */
2900 static inline void
2901 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2903 struct walk_tree_data wdata;
2905 if (!inv_vars)
2906 return;
2908 wdata.idata = data;
2909 wdata.inv_vars = inv_vars;
2910 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2913 /* Get entry from invariant expr hash table for INV_EXPR. New entry
2914 will be recorded if it doesn't exist yet. Given below two exprs:
2915 inv_expr + cst1, inv_expr + cst2
2916 It's hard to make decision whether constant part should be stripped
2917 or not. We choose to not strip based on below facts:
2918 1) We need to count ADD cost for constant part if it's stripped,
2919 which is't always trivial where this functions is called.
2920 2) Stripping constant away may be conflict with following loop
2921 invariant hoisting pass.
2922 3) Not stripping constant away results in more invariant exprs,
2923 which usually leads to decision preferring lower reg pressure. */
2925 static iv_inv_expr_ent *
2926 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
2928 STRIP_NOPS (inv_expr);
2930 if (TREE_CODE (inv_expr) == INTEGER_CST || TREE_CODE (inv_expr) == SSA_NAME)
2931 return NULL;
2933 /* Don't strip constant part away as we used to. */
2935 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
2936 struct iv_inv_expr_ent ent;
2937 ent.expr = inv_expr;
2938 ent.hash = iterative_hash_expr (inv_expr, 0);
2939 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
2941 if (!*slot)
2943 *slot = XNEW (struct iv_inv_expr_ent);
2944 (*slot)->expr = inv_expr;
2945 (*slot)->hash = ent.hash;
2946 (*slot)->id = ++data->max_inv_expr_id;
2949 return *slot;
2952 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2953 position to POS. If USE is not NULL, the candidate is set as related to
2954 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2955 replacement of the final value of the iv by a direct computation. */
2957 static struct iv_cand *
2958 add_candidate_1 (struct ivopts_data *data,
2959 tree base, tree step, bool important, enum iv_position pos,
2960 struct iv_use *use, gimple *incremented_at,
2961 struct iv *orig_iv = NULL)
2963 unsigned i;
2964 struct iv_cand *cand = NULL;
2965 tree type, orig_type;
2967 gcc_assert (base && step);
2969 /* -fkeep-gc-roots-live means that we have to keep a real pointer
2970 live, but the ivopts code may replace a real pointer with one
2971 pointing before or after the memory block that is then adjusted
2972 into the memory block during the loop. FIXME: It would likely be
2973 better to actually force the pointer live and still use ivopts;
2974 for example, it would be enough to write the pointer into memory
2975 and keep it there until after the loop. */
2976 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2977 return NULL;
2979 /* For non-original variables, make sure their values are computed in a type
2980 that does not invoke undefined behavior on overflows (since in general,
2981 we cannot prove that these induction variables are non-wrapping). */
2982 if (pos != IP_ORIGINAL)
2984 orig_type = TREE_TYPE (base);
2985 type = generic_type_for (orig_type);
2986 if (type != orig_type)
2988 base = fold_convert (type, base);
2989 step = fold_convert (type, step);
2993 for (i = 0; i < data->vcands.length (); i++)
2995 cand = data->vcands[i];
2997 if (cand->pos != pos)
2998 continue;
3000 if (cand->incremented_at != incremented_at
3001 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3002 && cand->ainc_use != use))
3003 continue;
3005 if (operand_equal_p (base, cand->iv->base, 0)
3006 && operand_equal_p (step, cand->iv->step, 0)
3007 && (TYPE_PRECISION (TREE_TYPE (base))
3008 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3009 break;
3012 if (i == data->vcands.length ())
3014 cand = XCNEW (struct iv_cand);
3015 cand->id = i;
3016 cand->iv = alloc_iv (data, base, step);
3017 cand->pos = pos;
3018 if (pos != IP_ORIGINAL)
3020 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3021 cand->var_after = cand->var_before;
3023 cand->important = important;
3024 cand->incremented_at = incremented_at;
3025 data->vcands.safe_push (cand);
3027 if (TREE_CODE (step) != INTEGER_CST)
3028 find_inv_vars (data, &step, &cand->inv_vars);
3030 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3031 cand->ainc_use = use;
3032 else
3033 cand->ainc_use = NULL;
3035 cand->orig_iv = orig_iv;
3036 if (dump_file && (dump_flags & TDF_DETAILS))
3037 dump_cand (dump_file, cand);
3040 cand->important |= important;
3042 /* Relate candidate to the group for which it is added. */
3043 if (use)
3044 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3046 return cand;
3049 /* Returns true if incrementing the induction variable at the end of the LOOP
3050 is allowed.
3052 The purpose is to avoid splitting latch edge with a biv increment, thus
3053 creating a jump, possibly confusing other optimization passes and leaving
3054 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
3055 is not available (so we do not have a better alternative), or if the latch
3056 edge is already nonempty. */
3058 static bool
3059 allow_ip_end_pos_p (struct loop *loop)
3061 if (!ip_normal_pos (loop))
3062 return true;
3064 if (!empty_block_p (ip_end_pos (loop)))
3065 return true;
3067 return false;
3070 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3071 Important field is set to IMPORTANT. */
3073 static void
3074 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3075 bool important, struct iv_use *use)
3077 basic_block use_bb = gimple_bb (use->stmt);
3078 machine_mode mem_mode;
3079 unsigned HOST_WIDE_INT cstepi;
3081 /* If we insert the increment in any position other than the standard
3082 ones, we must ensure that it is incremented once per iteration.
3083 It must not be in an inner nested loop, or one side of an if
3084 statement. */
3085 if (use_bb->loop_father != data->current_loop
3086 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3087 || stmt_could_throw_p (use->stmt)
3088 || !cst_and_fits_in_hwi (step))
3089 return;
3091 cstepi = int_cst_value (step);
3093 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3094 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3095 || USE_STORE_PRE_INCREMENT (mem_mode))
3096 && GET_MODE_SIZE (mem_mode) == cstepi)
3097 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3098 || USE_STORE_PRE_DECREMENT (mem_mode))
3099 && GET_MODE_SIZE (mem_mode) == -cstepi))
3101 enum tree_code code = MINUS_EXPR;
3102 tree new_base;
3103 tree new_step = step;
3105 if (POINTER_TYPE_P (TREE_TYPE (base)))
3107 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3108 code = POINTER_PLUS_EXPR;
3110 else
3111 new_step = fold_convert (TREE_TYPE (base), new_step);
3112 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3113 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3114 use->stmt);
3116 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3117 || USE_STORE_POST_INCREMENT (mem_mode))
3118 && GET_MODE_SIZE (mem_mode) == cstepi)
3119 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3120 || USE_STORE_POST_DECREMENT (mem_mode))
3121 && GET_MODE_SIZE (mem_mode) == -cstepi))
3123 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3124 use->stmt);
3128 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3129 position to POS. If USE is not NULL, the candidate is set as related to
3130 it. The candidate computation is scheduled before exit condition and at
3131 the end of loop. */
3133 static void
3134 add_candidate (struct ivopts_data *data,
3135 tree base, tree step, bool important, struct iv_use *use,
3136 struct iv *orig_iv = NULL)
3138 if (ip_normal_pos (data->current_loop))
3139 add_candidate_1 (data, base, step, important,
3140 IP_NORMAL, use, NULL, orig_iv);
3141 if (ip_end_pos (data->current_loop)
3142 && allow_ip_end_pos_p (data->current_loop))
3143 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3146 /* Adds standard iv candidates. */
3148 static void
3149 add_standard_iv_candidates (struct ivopts_data *data)
3151 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3153 /* The same for a double-integer type if it is still fast enough. */
3154 if (TYPE_PRECISION
3155 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3156 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3157 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3158 build_int_cst (long_integer_type_node, 1), true, NULL);
3160 /* The same for a double-integer type if it is still fast enough. */
3161 if (TYPE_PRECISION
3162 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3163 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3164 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3165 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3169 /* Adds candidates bases on the old induction variable IV. */
3171 static void
3172 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3174 gimple *phi;
3175 tree def;
3176 struct iv_cand *cand;
3178 /* Check if this biv is used in address type use. */
3179 if (iv->no_overflow && iv->have_address_use
3180 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3181 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3183 tree base = fold_convert (sizetype, iv->base);
3184 tree step = fold_convert (sizetype, iv->step);
3186 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3187 add_candidate (data, base, step, true, NULL, iv);
3188 /* Add iv cand of the original type only if it has nonlinear use. */
3189 if (iv->nonlin_use)
3190 add_candidate (data, iv->base, iv->step, true, NULL);
3192 else
3193 add_candidate (data, iv->base, iv->step, true, NULL);
3195 /* The same, but with initial value zero. */
3196 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3197 add_candidate (data, size_int (0), iv->step, true, NULL);
3198 else
3199 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3200 iv->step, true, NULL);
3202 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3203 if (gimple_code (phi) == GIMPLE_PHI)
3205 /* Additionally record the possibility of leaving the original iv
3206 untouched. */
3207 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3208 /* Don't add candidate if it's from another PHI node because
3209 it's an affine iv appearing in the form of PEELED_CHREC. */
3210 phi = SSA_NAME_DEF_STMT (def);
3211 if (gimple_code (phi) != GIMPLE_PHI)
3213 cand = add_candidate_1 (data,
3214 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3215 SSA_NAME_DEF_STMT (def));
3216 if (cand)
3218 cand->var_before = iv->ssa_name;
3219 cand->var_after = def;
3222 else
3223 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3227 /* Adds candidates based on the old induction variables. */
3229 static void
3230 add_iv_candidate_for_bivs (struct ivopts_data *data)
3232 unsigned i;
3233 struct iv *iv;
3234 bitmap_iterator bi;
3236 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3238 iv = ver_info (data, i)->iv;
3239 if (iv && iv->biv_p && !integer_zerop (iv->step))
3240 add_iv_candidate_for_biv (data, iv);
3244 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3246 static void
3247 record_common_cand (struct ivopts_data *data, tree base,
3248 tree step, struct iv_use *use)
3250 struct iv_common_cand ent;
3251 struct iv_common_cand **slot;
3253 ent.base = base;
3254 ent.step = step;
3255 ent.hash = iterative_hash_expr (base, 0);
3256 ent.hash = iterative_hash_expr (step, ent.hash);
3258 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3259 if (*slot == NULL)
3261 *slot = new iv_common_cand ();
3262 (*slot)->base = base;
3263 (*slot)->step = step;
3264 (*slot)->uses.create (8);
3265 (*slot)->hash = ent.hash;
3266 data->iv_common_cands.safe_push ((*slot));
3269 gcc_assert (use != NULL);
3270 (*slot)->uses.safe_push (use);
3271 return;
3274 /* Comparison function used to sort common candidates. */
3276 static int
3277 common_cand_cmp (const void *p1, const void *p2)
3279 unsigned n1, n2;
3280 const struct iv_common_cand *const *const ccand1
3281 = (const struct iv_common_cand *const *)p1;
3282 const struct iv_common_cand *const *const ccand2
3283 = (const struct iv_common_cand *const *)p2;
3285 n1 = (*ccand1)->uses.length ();
3286 n2 = (*ccand2)->uses.length ();
3287 return n2 - n1;
3290 /* Adds IV candidates based on common candidated recorded. */
3292 static void
3293 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3295 unsigned i, j;
3296 struct iv_cand *cand_1, *cand_2;
3298 data->iv_common_cands.qsort (common_cand_cmp);
3299 for (i = 0; i < data->iv_common_cands.length (); i++)
3301 struct iv_common_cand *ptr = data->iv_common_cands[i];
3303 /* Only add IV candidate if it's derived from multiple uses. */
3304 if (ptr->uses.length () <= 1)
3305 break;
3307 cand_1 = NULL;
3308 cand_2 = NULL;
3309 if (ip_normal_pos (data->current_loop))
3310 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3311 false, IP_NORMAL, NULL, NULL);
3313 if (ip_end_pos (data->current_loop)
3314 && allow_ip_end_pos_p (data->current_loop))
3315 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3316 false, IP_END, NULL, NULL);
3318 /* Bind deriving uses and the new candidates. */
3319 for (j = 0; j < ptr->uses.length (); j++)
3321 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3322 if (cand_1)
3323 bitmap_set_bit (group->related_cands, cand_1->id);
3324 if (cand_2)
3325 bitmap_set_bit (group->related_cands, cand_2->id);
3329 /* Release data since it is useless from this point. */
3330 data->iv_common_cand_tab->empty ();
3331 data->iv_common_cands.truncate (0);
3334 /* Adds candidates based on the value of USE's iv. */
3336 static void
3337 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3339 unsigned HOST_WIDE_INT offset;
3340 tree base;
3341 tree basetype;
3342 struct iv *iv = use->iv;
3344 add_candidate (data, iv->base, iv->step, false, use);
3346 /* Record common candidate for use in case it can be shared by others. */
3347 record_common_cand (data, iv->base, iv->step, use);
3349 /* Record common candidate with initial value zero. */
3350 basetype = TREE_TYPE (iv->base);
3351 if (POINTER_TYPE_P (basetype))
3352 basetype = sizetype;
3353 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3355 /* Record common candidate with constant offset stripped in base.
3356 Like the use itself, we also add candidate directly for it. */
3357 base = strip_offset (iv->base, &offset);
3358 if (offset || base != iv->base)
3360 record_common_cand (data, base, iv->step, use);
3361 add_candidate (data, base, iv->step, false, use);
3364 /* Record common candidate with base_object removed in base. */
3365 base = iv->base;
3366 STRIP_NOPS (base);
3367 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3369 tree step = iv->step;
3371 STRIP_NOPS (step);
3372 base = TREE_OPERAND (base, 1);
3373 step = fold_convert (sizetype, step);
3374 record_common_cand (data, base, step, use);
3375 /* Also record common candidate with offset stripped. */
3376 base = strip_offset (base, &offset);
3377 if (offset)
3378 record_common_cand (data, base, step, use);
3381 /* At last, add auto-incremental candidates. Make such variables
3382 important since other iv uses with same base object may be based
3383 on it. */
3384 if (use != NULL && use->type == USE_ADDRESS)
3385 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3388 /* Adds candidates based on the uses. */
3390 static void
3391 add_iv_candidate_for_groups (struct ivopts_data *data)
3393 unsigned i;
3395 /* Only add candidate for the first use in group. */
3396 for (i = 0; i < data->vgroups.length (); i++)
3398 struct iv_group *group = data->vgroups[i];
3400 gcc_assert (group->vuses[0] != NULL);
3401 add_iv_candidate_for_use (data, group->vuses[0]);
3403 add_iv_candidate_derived_from_uses (data);
3406 /* Record important candidates and add them to related_cands bitmaps. */
3408 static void
3409 record_important_candidates (struct ivopts_data *data)
3411 unsigned i;
3412 struct iv_group *group;
3414 for (i = 0; i < data->vcands.length (); i++)
3416 struct iv_cand *cand = data->vcands[i];
3418 if (cand->important)
3419 bitmap_set_bit (data->important_candidates, i);
3422 data->consider_all_candidates = (data->vcands.length ()
3423 <= CONSIDER_ALL_CANDIDATES_BOUND);
3425 /* Add important candidates to groups' related_cands bitmaps. */
3426 for (i = 0; i < data->vgroups.length (); i++)
3428 group = data->vgroups[i];
3429 bitmap_ior_into (group->related_cands, data->important_candidates);
3433 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3434 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3435 we allocate a simple list to every use. */
3437 static void
3438 alloc_use_cost_map (struct ivopts_data *data)
3440 unsigned i, size, s;
3442 for (i = 0; i < data->vgroups.length (); i++)
3444 struct iv_group *group = data->vgroups[i];
3446 if (data->consider_all_candidates)
3447 size = data->vcands.length ();
3448 else
3450 s = bitmap_count_bits (group->related_cands);
3452 /* Round up to the power of two, so that moduling by it is fast. */
3453 size = s ? (1 << ceil_log2 (s)) : 1;
3456 group->n_map_members = size;
3457 group->cost_map = XCNEWVEC (struct cost_pair, size);
3461 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3462 on invariants INV_VARS and that the value used in expressing it is
3463 VALUE, and in case of iv elimination the comparison operator is COMP. */
3465 static void
3466 set_group_iv_cost (struct ivopts_data *data,
3467 struct iv_group *group, struct iv_cand *cand,
3468 comp_cost cost, bitmap inv_vars, tree value,
3469 enum tree_code comp, bitmap inv_exprs)
3471 unsigned i, s;
3473 if (cost.infinite_cost_p ())
3475 BITMAP_FREE (inv_vars);
3476 BITMAP_FREE (inv_exprs);
3477 return;
3480 if (data->consider_all_candidates)
3482 group->cost_map[cand->id].cand = cand;
3483 group->cost_map[cand->id].cost = cost;
3484 group->cost_map[cand->id].inv_vars = inv_vars;
3485 group->cost_map[cand->id].inv_exprs = inv_exprs;
3486 group->cost_map[cand->id].value = value;
3487 group->cost_map[cand->id].comp = comp;
3488 return;
3491 /* n_map_members is a power of two, so this computes modulo. */
3492 s = cand->id & (group->n_map_members - 1);
3493 for (i = s; i < group->n_map_members; i++)
3494 if (!group->cost_map[i].cand)
3495 goto found;
3496 for (i = 0; i < s; i++)
3497 if (!group->cost_map[i].cand)
3498 goto found;
3500 gcc_unreachable ();
3502 found:
3503 group->cost_map[i].cand = cand;
3504 group->cost_map[i].cost = cost;
3505 group->cost_map[i].inv_vars = inv_vars;
3506 group->cost_map[i].inv_exprs = inv_exprs;
3507 group->cost_map[i].value = value;
3508 group->cost_map[i].comp = comp;
3511 /* Gets cost of (GROUP, CAND) pair. */
3513 static struct cost_pair *
3514 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3515 struct iv_cand *cand)
3517 unsigned i, s;
3518 struct cost_pair *ret;
3520 if (!cand)
3521 return NULL;
3523 if (data->consider_all_candidates)
3525 ret = group->cost_map + cand->id;
3526 if (!ret->cand)
3527 return NULL;
3529 return ret;
3532 /* n_map_members is a power of two, so this computes modulo. */
3533 s = cand->id & (group->n_map_members - 1);
3534 for (i = s; i < group->n_map_members; i++)
3535 if (group->cost_map[i].cand == cand)
3536 return group->cost_map + i;
3537 else if (group->cost_map[i].cand == NULL)
3538 return NULL;
3539 for (i = 0; i < s; i++)
3540 if (group->cost_map[i].cand == cand)
3541 return group->cost_map + i;
3542 else if (group->cost_map[i].cand == NULL)
3543 return NULL;
3545 return NULL;
3548 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3549 static rtx
3550 produce_memory_decl_rtl (tree obj, int *regno)
3552 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3553 machine_mode address_mode = targetm.addr_space.address_mode (as);
3554 rtx x;
3556 gcc_assert (obj);
3557 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3559 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3560 x = gen_rtx_SYMBOL_REF (address_mode, name);
3561 SET_SYMBOL_REF_DECL (x, obj);
3562 x = gen_rtx_MEM (DECL_MODE (obj), x);
3563 set_mem_addr_space (x, as);
3564 targetm.encode_section_info (obj, x, true);
3566 else
3568 x = gen_raw_REG (address_mode, (*regno)++);
3569 x = gen_rtx_MEM (DECL_MODE (obj), x);
3570 set_mem_addr_space (x, as);
3573 return x;
3576 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3577 walk_tree. DATA contains the actual fake register number. */
3579 static tree
3580 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3582 tree obj = NULL_TREE;
3583 rtx x = NULL_RTX;
3584 int *regno = (int *) data;
3586 switch (TREE_CODE (*expr_p))
3588 case ADDR_EXPR:
3589 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3590 handled_component_p (*expr_p);
3591 expr_p = &TREE_OPERAND (*expr_p, 0))
3592 continue;
3593 obj = *expr_p;
3594 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3595 x = produce_memory_decl_rtl (obj, regno);
3596 break;
3598 case SSA_NAME:
3599 *ws = 0;
3600 obj = SSA_NAME_VAR (*expr_p);
3601 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3602 if (!obj)
3603 return NULL_TREE;
3604 if (!DECL_RTL_SET_P (obj))
3605 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3606 break;
3608 case VAR_DECL:
3609 case PARM_DECL:
3610 case RESULT_DECL:
3611 *ws = 0;
3612 obj = *expr_p;
3614 if (DECL_RTL_SET_P (obj))
3615 break;
3617 if (DECL_MODE (obj) == BLKmode)
3618 x = produce_memory_decl_rtl (obj, regno);
3619 else
3620 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3622 break;
3624 default:
3625 break;
3628 if (x)
3630 decl_rtl_to_reset.safe_push (obj);
3631 SET_DECL_RTL (obj, x);
3634 return NULL_TREE;
3637 /* Determines cost of the computation of EXPR. */
3639 static unsigned
3640 computation_cost (tree expr, bool speed)
3642 rtx_insn *seq;
3643 rtx rslt;
3644 tree type = TREE_TYPE (expr);
3645 unsigned cost;
3646 /* Avoid using hard regs in ways which may be unsupported. */
3647 int regno = LAST_VIRTUAL_REGISTER + 1;
3648 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3649 enum node_frequency real_frequency = node->frequency;
3651 node->frequency = NODE_FREQUENCY_NORMAL;
3652 crtl->maybe_hot_insn_p = speed;
3653 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3654 start_sequence ();
3655 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3656 seq = get_insns ();
3657 end_sequence ();
3658 default_rtl_profile ();
3659 node->frequency = real_frequency;
3661 cost = seq_cost (seq, speed);
3662 if (MEM_P (rslt))
3663 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3664 TYPE_ADDR_SPACE (type), speed);
3665 else if (!REG_P (rslt))
3666 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3668 return cost;
3671 /* Returns variable containing the value of candidate CAND at statement AT. */
3673 static tree
3674 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3676 if (stmt_after_increment (loop, cand, stmt))
3677 return cand->var_after;
3678 else
3679 return cand->var_before;
3682 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3683 same precision that is at least as wide as the precision of TYPE, stores
3684 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3685 type of A and B. */
3687 static tree
3688 determine_common_wider_type (tree *a, tree *b)
3690 tree wider_type = NULL;
3691 tree suba, subb;
3692 tree atype = TREE_TYPE (*a);
3694 if (CONVERT_EXPR_P (*a))
3696 suba = TREE_OPERAND (*a, 0);
3697 wider_type = TREE_TYPE (suba);
3698 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3699 return atype;
3701 else
3702 return atype;
3704 if (CONVERT_EXPR_P (*b))
3706 subb = TREE_OPERAND (*b, 0);
3707 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3708 return atype;
3710 else
3711 return atype;
3713 *a = suba;
3714 *b = subb;
3715 return wider_type;
3718 /* Determines the expression by that USE is expressed from induction variable
3719 CAND at statement AT in LOOP. The expression is stored in two parts in a
3720 decomposed form. The invariant part is stored in AFF_INV; while variant
3721 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3722 non-null. Returns false if USE cannot be expressed using CAND. */
3724 static bool
3725 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3726 struct iv_cand *cand, struct aff_tree *aff_inv,
3727 struct aff_tree *aff_var, widest_int *prat = NULL)
3729 tree ubase = use->iv->base, ustep = use->iv->step;
3730 tree cbase = cand->iv->base, cstep = cand->iv->step;
3731 tree common_type, uutype, var, cstep_common;
3732 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3733 aff_tree aff_cbase;
3734 widest_int rat;
3736 /* We must have a precision to express the values of use. */
3737 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3738 return false;
3740 var = var_at_stmt (loop, cand, at);
3741 uutype = unsigned_type_for (utype);
3743 /* If the conversion is not noop, perform it. */
3744 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3746 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3747 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3749 tree inner_base, inner_step, inner_type;
3750 inner_base = TREE_OPERAND (cbase, 0);
3751 if (CONVERT_EXPR_P (cstep))
3752 inner_step = TREE_OPERAND (cstep, 0);
3753 else
3754 inner_step = cstep;
3756 inner_type = TREE_TYPE (inner_base);
3757 /* If candidate is added from a biv whose type is smaller than
3758 ctype, we know both candidate and the biv won't overflow.
3759 In this case, it's safe to skip the convertion in candidate.
3760 As an example, (unsigned short)((unsigned long)A) equals to
3761 (unsigned short)A, if A has a type no larger than short. */
3762 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3764 cbase = inner_base;
3765 cstep = inner_step;
3768 cbase = fold_convert (uutype, cbase);
3769 cstep = fold_convert (uutype, cstep);
3770 var = fold_convert (uutype, var);
3773 /* Ratio is 1 when computing the value of biv cand by itself.
3774 We can't rely on constant_multiple_of in this case because the
3775 use is created after the original biv is selected. The call
3776 could fail because of inconsistent fold behavior. See PR68021
3777 for more information. */
3778 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3780 gcc_assert (is_gimple_assign (use->stmt));
3781 gcc_assert (use->iv->ssa_name == cand->var_after);
3782 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3783 rat = 1;
3785 else if (!constant_multiple_of (ustep, cstep, &rat))
3786 return false;
3788 if (prat)
3789 *prat = rat;
3791 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3792 type, we achieve better folding by computing their difference in this
3793 wider type, and cast the result to UUTYPE. We do not need to worry about
3794 overflows, as all the arithmetics will in the end be performed in UUTYPE
3795 anyway. */
3796 common_type = determine_common_wider_type (&ubase, &cbase);
3798 /* use = ubase - ratio * cbase + ratio * var. */
3799 tree_to_aff_combination (ubase, common_type, aff_inv);
3800 tree_to_aff_combination (cbase, common_type, &aff_cbase);
3801 tree_to_aff_combination (var, uutype, aff_var);
3803 /* We need to shift the value if we are after the increment. */
3804 if (stmt_after_increment (loop, cand, at))
3806 aff_tree cstep_aff;
3808 if (common_type != uutype)
3809 cstep_common = fold_convert (common_type, cstep);
3810 else
3811 cstep_common = cstep;
3813 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3814 aff_combination_add (&aff_cbase, &cstep_aff);
3817 aff_combination_scale (&aff_cbase, -rat);
3818 aff_combination_add (aff_inv, &aff_cbase);
3819 if (common_type != uutype)
3820 aff_combination_convert (aff_inv, uutype);
3822 aff_combination_scale (aff_var, rat);
3823 return true;
3826 /* Determines the expression by that USE is expressed from induction variable
3827 CAND at statement AT in LOOP. The expression is stored in a decomposed
3828 form into AFF. Returns false if USE cannot be expressed using CAND. */
3830 static bool
3831 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3832 struct iv_cand *cand, struct aff_tree *aff)
3834 aff_tree aff_var;
3836 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3837 return false;
3839 aff_combination_add (aff, &aff_var);
3840 return true;
3843 /* Return the type of USE. */
3845 static tree
3846 get_use_type (struct iv_use *use)
3848 tree base_type = TREE_TYPE (use->iv->base);
3849 tree type;
3851 if (use->type == USE_ADDRESS)
3853 /* The base_type may be a void pointer. Create a pointer type based on
3854 the mem_ref instead. */
3855 type = build_pointer_type (TREE_TYPE (*use->op_p));
3856 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3857 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3859 else
3860 type = base_type;
3862 return type;
3865 /* Determines the expression by that USE is expressed from induction variable
3866 CAND at statement AT in LOOP. The computation is unshared. */
3868 static tree
3869 get_computation_at (struct loop *loop, gimple *at,
3870 struct iv_use *use, struct iv_cand *cand)
3872 aff_tree aff;
3873 tree type = get_use_type (use);
3875 if (!get_computation_aff (loop, at, use, cand, &aff))
3876 return NULL_TREE;
3877 unshare_aff_combination (&aff);
3878 return fold_convert (type, aff_combination_to_tree (&aff));
3881 /* Adjust the cost COST for being in loop setup rather than loop body.
3882 If we're optimizing for space, the loop setup overhead is constant;
3883 if we're optimizing for speed, amortize it over the per-iteration cost.
3884 If ROUND_UP_P is true, the result is round up rather than to zero when
3885 optimizing for speed. */
3886 static unsigned
3887 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
3888 bool round_up_p = false)
3890 if (cost == INFTY)
3891 return cost;
3892 else if (optimize_loop_for_speed_p (data->current_loop))
3894 HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
3895 return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
3897 else
3898 return cost;
3901 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3902 validity for a memory reference accessing memory of mode MODE in
3903 address space AS. */
3906 bool
3907 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3908 addr_space_t as)
3910 #define MAX_RATIO 128
3911 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3912 static vec<sbitmap> valid_mult_list;
3913 sbitmap valid_mult;
3915 if (data_index >= valid_mult_list.length ())
3916 valid_mult_list.safe_grow_cleared (data_index + 1);
3918 valid_mult = valid_mult_list[data_index];
3919 if (!valid_mult)
3921 machine_mode address_mode = targetm.addr_space.address_mode (as);
3922 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3923 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3924 rtx addr, scaled;
3925 HOST_WIDE_INT i;
3927 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3928 bitmap_clear (valid_mult);
3929 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3930 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3931 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3933 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3934 if (memory_address_addr_space_p (mode, addr, as)
3935 || memory_address_addr_space_p (mode, scaled, as))
3936 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3939 if (dump_file && (dump_flags & TDF_DETAILS))
3941 fprintf (dump_file, " allowed multipliers:");
3942 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3943 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3944 fprintf (dump_file, " %d", (int) i);
3945 fprintf (dump_file, "\n");
3946 fprintf (dump_file, "\n");
3949 valid_mult_list[data_index] = valid_mult;
3952 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3953 return false;
3955 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3958 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3959 EXPR operand holding the shift. COST0 and COST1 are the costs for
3960 calculating the operands of EXPR. Returns true if successful, and returns
3961 the cost in COST. */
3963 static bool
3964 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3965 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3967 comp_cost res;
3968 tree op1 = TREE_OPERAND (expr, 1);
3969 tree cst = TREE_OPERAND (mult, 1);
3970 tree multop = TREE_OPERAND (mult, 0);
3971 int m = exact_log2 (int_cst_value (cst));
3972 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3973 int as_cost, sa_cost;
3974 bool mult_in_op1;
3976 if (!(m >= 0 && m < maxm))
3977 return false;
3979 STRIP_NOPS (op1);
3980 mult_in_op1 = operand_equal_p (op1, mult, 0);
3982 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3984 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3985 use that in preference to a shift insn followed by an add insn. */
3986 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3987 ? shiftadd_cost (speed, mode, m)
3988 : (mult_in_op1
3989 ? shiftsub1_cost (speed, mode, m)
3990 : shiftsub0_cost (speed, mode, m)));
3992 res = comp_cost (MIN (as_cost, sa_cost), 0);
3993 res += (mult_in_op1 ? cost0 : cost1);
3995 STRIP_NOPS (multop);
3996 if (!is_gimple_val (multop))
3997 res += force_expr_to_var_cost (multop, speed);
3999 *cost = res;
4000 return true;
4003 /* Estimates cost of forcing expression EXPR into a variable. */
4005 static comp_cost
4006 force_expr_to_var_cost (tree expr, bool speed)
4008 static bool costs_initialized = false;
4009 static unsigned integer_cost [2];
4010 static unsigned symbol_cost [2];
4011 static unsigned address_cost [2];
4012 tree op0, op1;
4013 comp_cost cost0, cost1, cost;
4014 machine_mode mode;
4016 if (!costs_initialized)
4018 tree type = build_pointer_type (integer_type_node);
4019 tree var, addr;
4020 rtx x;
4021 int i;
4023 var = create_tmp_var_raw (integer_type_node, "test_var");
4024 TREE_STATIC (var) = 1;
4025 x = produce_memory_decl_rtl (var, NULL);
4026 SET_DECL_RTL (var, x);
4028 addr = build1 (ADDR_EXPR, type, var);
4031 for (i = 0; i < 2; i++)
4033 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4034 2000), i);
4036 symbol_cost[i] = computation_cost (addr, i) + 1;
4038 address_cost[i]
4039 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4040 if (dump_file && (dump_flags & TDF_DETAILS))
4042 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4043 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4044 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4045 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4046 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4047 fprintf (dump_file, "\n");
4051 costs_initialized = true;
4054 STRIP_NOPS (expr);
4056 if (SSA_VAR_P (expr))
4057 return no_cost;
4059 if (is_gimple_min_invariant (expr))
4061 if (TREE_CODE (expr) == INTEGER_CST)
4062 return comp_cost (integer_cost [speed], 0);
4064 if (TREE_CODE (expr) == ADDR_EXPR)
4066 tree obj = TREE_OPERAND (expr, 0);
4068 if (VAR_P (obj)
4069 || TREE_CODE (obj) == PARM_DECL
4070 || TREE_CODE (obj) == RESULT_DECL)
4071 return comp_cost (symbol_cost [speed], 0);
4074 return comp_cost (address_cost [speed], 0);
4077 switch (TREE_CODE (expr))
4079 case POINTER_PLUS_EXPR:
4080 case PLUS_EXPR:
4081 case MINUS_EXPR:
4082 case MULT_EXPR:
4083 op0 = TREE_OPERAND (expr, 0);
4084 op1 = TREE_OPERAND (expr, 1);
4085 STRIP_NOPS (op0);
4086 STRIP_NOPS (op1);
4087 break;
4089 CASE_CONVERT:
4090 case NEGATE_EXPR:
4091 op0 = TREE_OPERAND (expr, 0);
4092 STRIP_NOPS (op0);
4093 op1 = NULL_TREE;
4094 break;
4096 default:
4097 /* Just an arbitrary value, FIXME. */
4098 return comp_cost (target_spill_cost[speed], 0);
4101 if (op0 == NULL_TREE
4102 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4103 cost0 = no_cost;
4104 else
4105 cost0 = force_expr_to_var_cost (op0, speed);
4107 if (op1 == NULL_TREE
4108 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4109 cost1 = no_cost;
4110 else
4111 cost1 = force_expr_to_var_cost (op1, speed);
4113 mode = TYPE_MODE (TREE_TYPE (expr));
4114 switch (TREE_CODE (expr))
4116 case POINTER_PLUS_EXPR:
4117 case PLUS_EXPR:
4118 case MINUS_EXPR:
4119 case NEGATE_EXPR:
4120 cost = comp_cost (add_cost (speed, mode), 0);
4121 if (TREE_CODE (expr) != NEGATE_EXPR)
4123 tree mult = NULL_TREE;
4124 comp_cost sa_cost;
4125 if (TREE_CODE (op1) == MULT_EXPR)
4126 mult = op1;
4127 else if (TREE_CODE (op0) == MULT_EXPR)
4128 mult = op0;
4130 if (mult != NULL_TREE
4131 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4132 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4133 speed, &sa_cost))
4134 return sa_cost;
4136 break;
4138 CASE_CONVERT:
4140 tree inner_mode, outer_mode;
4141 outer_mode = TREE_TYPE (expr);
4142 inner_mode = TREE_TYPE (op0);
4143 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4144 TYPE_MODE (inner_mode), speed), 0);
4146 break;
4148 case MULT_EXPR:
4149 if (cst_and_fits_in_hwi (op0))
4150 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4151 mode, speed), 0);
4152 else if (cst_and_fits_in_hwi (op1))
4153 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4154 mode, speed), 0);
4155 else
4156 return comp_cost (target_spill_cost [speed], 0);
4157 break;
4159 default:
4160 gcc_unreachable ();
4163 cost += cost0;
4164 cost += cost1;
4165 return cost;
4168 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4169 invariants the computation depends on. */
4171 static comp_cost
4172 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4174 if (!expr)
4175 return no_cost;
4177 find_inv_vars (data, &expr, inv_vars);
4178 return force_expr_to_var_cost (expr, data->speed);
4181 /* Returns cost of auto-modifying address expression in shape base + offset.
4182 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4183 address expression. The address expression has ADDR_MODE in addr space
4184 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4185 speed or size. */
4187 enum ainc_type
4189 AINC_PRE_INC, /* Pre increment. */
4190 AINC_PRE_DEC, /* Pre decrement. */
4191 AINC_POST_INC, /* Post increment. */
4192 AINC_POST_DEC, /* Post decrement. */
4193 AINC_NONE /* Also the number of auto increment types. */
4196 struct ainc_cost_data
4198 unsigned costs[AINC_NONE];
4201 static comp_cost
4202 get_address_cost_ainc (HOST_WIDE_INT ainc_step, HOST_WIDE_INT ainc_offset,
4203 machine_mode addr_mode, machine_mode mem_mode,
4204 addr_space_t as, bool speed)
4206 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4207 && !USE_STORE_PRE_DECREMENT (mem_mode)
4208 && !USE_LOAD_POST_DECREMENT (mem_mode)
4209 && !USE_STORE_POST_DECREMENT (mem_mode)
4210 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4211 && !USE_STORE_PRE_INCREMENT (mem_mode)
4212 && !USE_LOAD_POST_INCREMENT (mem_mode)
4213 && !USE_STORE_POST_INCREMENT (mem_mode))
4214 return infinite_cost;
4216 static vec<ainc_cost_data *> ainc_cost_data_list;
4217 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4218 if (idx >= ainc_cost_data_list.length ())
4220 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4222 gcc_assert (nsize > idx);
4223 ainc_cost_data_list.safe_grow_cleared (nsize);
4226 ainc_cost_data *data = ainc_cost_data_list[idx];
4227 if (data == NULL)
4229 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4231 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4232 data->costs[AINC_PRE_DEC] = INFTY;
4233 data->costs[AINC_POST_DEC] = INFTY;
4234 data->costs[AINC_PRE_INC] = INFTY;
4235 data->costs[AINC_POST_INC] = INFTY;
4236 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4237 || USE_STORE_PRE_DECREMENT (mem_mode))
4239 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4241 if (memory_address_addr_space_p (mem_mode, addr, as))
4242 data->costs[AINC_PRE_DEC]
4243 = address_cost (addr, mem_mode, as, speed);
4245 if (USE_LOAD_POST_DECREMENT (mem_mode)
4246 || USE_STORE_POST_DECREMENT (mem_mode))
4248 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4250 if (memory_address_addr_space_p (mem_mode, addr, as))
4251 data->costs[AINC_POST_DEC]
4252 = address_cost (addr, mem_mode, as, speed);
4254 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4255 || USE_STORE_PRE_INCREMENT (mem_mode))
4257 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4259 if (memory_address_addr_space_p (mem_mode, addr, as))
4260 data->costs[AINC_PRE_INC]
4261 = address_cost (addr, mem_mode, as, speed);
4263 if (USE_LOAD_POST_INCREMENT (mem_mode)
4264 || USE_STORE_POST_INCREMENT (mem_mode))
4266 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4268 if (memory_address_addr_space_p (mem_mode, addr, as))
4269 data->costs[AINC_POST_INC]
4270 = address_cost (addr, mem_mode, as, speed);
4272 ainc_cost_data_list[idx] = data;
4275 HOST_WIDE_INT msize = GET_MODE_SIZE (mem_mode);
4276 if (ainc_offset == 0 && msize == ainc_step)
4277 return comp_cost (data->costs[AINC_POST_INC], 0);
4278 if (ainc_offset == 0 && msize == -ainc_step)
4279 return comp_cost (data->costs[AINC_POST_DEC], 0);
4280 if (ainc_offset == msize && msize == ainc_step)
4281 return comp_cost (data->costs[AINC_PRE_INC], 0);
4282 if (ainc_offset == -msize && msize == -ainc_step)
4283 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4285 return infinite_cost;
4288 /* Return cost of computing USE's address expression by using CAND.
4289 AFF_INV and AFF_VAR represent invariant and variant parts of the
4290 address expression, respectively. If AFF_INV is simple, store
4291 the loop invariant variables which are depended by it in INV_VARS;
4292 if AFF_INV is complicated, handle it as a new invariant expression
4293 and record it in INV_EXPR. RATIO indicates multiple times between
4294 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4295 value to it indicating if this is an auto-increment address. */
4297 static comp_cost
4298 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4299 struct iv_cand *cand, aff_tree *aff_inv,
4300 aff_tree *aff_var, HOST_WIDE_INT ratio,
4301 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4302 bool *can_autoinc, bool speed)
4304 rtx addr;
4305 bool simple_inv = true;
4306 tree comp_inv = NULL_TREE, type = aff_var->type;
4307 comp_cost var_cost = no_cost, cost = no_cost;
4308 struct mem_address parts = {NULL_TREE, integer_one_node,
4309 NULL_TREE, NULL_TREE, NULL_TREE};
4310 machine_mode addr_mode = TYPE_MODE (type);
4311 machine_mode mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
4312 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4314 if (!aff_combination_const_p (aff_inv))
4316 parts.index = integer_one_node;
4317 /* Addressing mode "base + index". */
4318 if (valid_mem_ref_p (mem_mode, as, &parts))
4320 parts.step = wide_int_to_tree (type, ratio);
4321 /* Addressing mode "base + index << scale". */
4322 if (ratio != 1 && !valid_mem_ref_p (mem_mode, as, &parts))
4323 parts.step = NULL_TREE;
4325 if (aff_inv->offset != 0)
4327 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4328 /* Addressing mode "base + index [<< scale] + offset". */
4329 if (!valid_mem_ref_p (mem_mode, as, &parts))
4330 parts.offset = NULL_TREE;
4331 else
4332 aff_inv->offset = 0;
4335 move_fixed_address_to_symbol (&parts, aff_inv);
4336 /* Base is fixed address and is moved to symbol part. */
4337 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4338 parts.base = NULL_TREE;
4340 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4341 if (parts.symbol != NULL_TREE
4342 && !valid_mem_ref_p (mem_mode, as, &parts))
4344 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4345 parts.symbol = NULL_TREE;
4346 /* Reset SIMPLE_INV since symbol address needs to be computed
4347 outside of address expression in this case. */
4348 simple_inv = false;
4349 /* Symbol part is moved back to base part, it can't be NULL. */
4350 parts.base = integer_one_node;
4353 else
4354 parts.index = NULL_TREE;
4356 else
4358 if (can_autoinc && ratio == 1 && cst_and_fits_in_hwi (cand->iv->step))
4360 HOST_WIDE_INT ainc_step = int_cst_value (cand->iv->step);
4361 HOST_WIDE_INT ainc_offset = (aff_inv->offset).to_shwi ();
4363 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4364 ainc_offset += ainc_step;
4365 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4366 addr_mode, mem_mode, as, speed);
4367 if (!cost.infinite_cost_p ())
4369 *can_autoinc = true;
4370 return cost;
4372 cost = no_cost;
4374 if (!aff_combination_zero_p (aff_inv))
4376 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4377 /* Addressing mode "base + offset". */
4378 if (!valid_mem_ref_p (mem_mode, as, &parts))
4379 parts.offset = NULL_TREE;
4380 else
4381 aff_inv->offset = 0;
4385 if (simple_inv)
4386 simple_inv = (aff_inv == NULL
4387 || aff_combination_const_p (aff_inv)
4388 || aff_combination_singleton_var_p (aff_inv));
4389 if (!aff_combination_zero_p (aff_inv))
4390 comp_inv = aff_combination_to_tree (aff_inv);
4391 if (comp_inv != NULL_TREE)
4392 cost = force_var_cost (data, comp_inv, inv_vars);
4393 if (ratio != 1 && parts.step == NULL_TREE)
4394 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4395 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4396 var_cost += add_cost (speed, addr_mode);
4398 if (comp_inv && inv_expr && !simple_inv)
4400 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4401 /* Clear depends on. */
4402 if (*inv_expr != NULL && inv_vars && *inv_vars)
4403 bitmap_clear (*inv_vars);
4405 /* Cost of small invariant expression adjusted against loop niters
4406 is usually zero, which makes it difficult to be differentiated
4407 from candidate based on loop invariant variables. Secondly, the
4408 generated invariant expression may not be hoisted out of loop by
4409 following pass. We penalize the cost by rounding up in order to
4410 neutralize such effects. */
4411 cost.cost = adjust_setup_cost (data, cost.cost, true);
4412 cost.scratch = cost.cost;
4415 cost += var_cost;
4416 addr = addr_for_mem_ref (&parts, as, false);
4417 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4418 cost += address_cost (addr, mem_mode, as, speed);
4420 if (parts.symbol != NULL_TREE)
4421 cost.complexity += 1;
4422 if (parts.step != NULL_TREE && !integer_onep (parts.step))
4423 cost.complexity += 1;
4424 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4425 cost.complexity += 1;
4426 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4427 cost.complexity += 1;
4429 return cost;
4432 /* Scale (multiply) the computed COST (except scratch part that should be
4433 hoisted out a loop) by header->frequency / AT->frequency, which makes
4434 expected cost more accurate. */
4436 static comp_cost
4437 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4439 int loop_freq = data->current_loop->header->frequency;
4440 int bb_freq = gimple_bb (at)->frequency;
4441 if (loop_freq != 0)
4443 gcc_assert (cost.scratch <= cost.cost);
4444 int scaled_cost
4445 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4447 if (dump_file && (dump_flags & TDF_DETAILS))
4448 fprintf (dump_file, "Scaling cost based on bb prob "
4449 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4450 1.0f * bb_freq / loop_freq, cost.cost,
4451 cost.scratch, scaled_cost, bb_freq, loop_freq);
4453 cost.cost = scaled_cost;
4456 return cost;
4459 /* Determines the cost of the computation by that USE is expressed
4460 from induction variable CAND. If ADDRESS_P is true, we just need
4461 to create an address from it, otherwise we want to get it into
4462 register. A set of invariants we depend on is stored in INV_VARS.
4463 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4464 addressing is likely. If INV_EXPR is nonnull, record invariant
4465 expr entry in it. */
4467 static comp_cost
4468 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4469 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4470 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4472 gimple *at = use->stmt;
4473 tree ubase = use->iv->base, cbase = cand->iv->base;
4474 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4475 tree comp_inv = NULL_TREE;
4476 HOST_WIDE_INT ratio, aratio;
4477 comp_cost cost;
4478 widest_int rat;
4479 aff_tree aff_inv, aff_var;
4480 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4482 if (inv_vars)
4483 *inv_vars = NULL;
4484 if (can_autoinc)
4485 *can_autoinc = false;
4486 if (inv_expr)
4487 *inv_expr = NULL;
4489 /* Check if we have enough precision to express the values of use. */
4490 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4491 return infinite_cost;
4493 if (address_p
4494 || (use->iv->base_object
4495 && cand->iv->base_object
4496 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4497 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4499 /* Do not try to express address of an object with computation based
4500 on address of a different object. This may cause problems in rtl
4501 level alias analysis (that does not expect this to be happening,
4502 as this is illegal in C), and would be unlikely to be useful
4503 anyway. */
4504 if (use->iv->base_object
4505 && cand->iv->base_object
4506 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4507 return infinite_cost;
4510 if (!get_computation_aff_1 (data->current_loop, at, use,
4511 cand, &aff_inv, &aff_var, &rat)
4512 || !wi::fits_shwi_p (rat))
4513 return infinite_cost;
4515 ratio = rat.to_shwi ();
4516 if (address_p)
4518 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4519 inv_vars, inv_expr, can_autoinc, speed);
4520 return get_scaled_computation_cost_at (data, at, cost);
4523 bool simple_inv = (aff_combination_const_p (&aff_inv)
4524 || aff_combination_singleton_var_p (&aff_inv));
4525 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4526 aff_combination_convert (&aff_inv, signed_type);
4527 if (!aff_combination_zero_p (&aff_inv))
4528 comp_inv = aff_combination_to_tree (&aff_inv);
4530 cost = force_var_cost (data, comp_inv, inv_vars);
4531 if (comp_inv && inv_expr && !simple_inv)
4533 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4534 /* Clear depends on. */
4535 if (*inv_expr != NULL && inv_vars && *inv_vars)
4536 bitmap_clear (*inv_vars);
4538 cost.cost = adjust_setup_cost (data, cost.cost);
4539 /* Record setup cost in scratch field. */
4540 cost.scratch = cost.cost;
4542 /* Cost of constant integer can be covered when adding invariant part to
4543 variant part. */
4544 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4545 cost = no_cost;
4547 /* Need type narrowing to represent use with cand. */
4548 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4550 machine_mode outer_mode = TYPE_MODE (utype);
4551 machine_mode inner_mode = TYPE_MODE (ctype);
4552 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4555 /* Turn a + i * (-c) into a - i * c. */
4556 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4557 aratio = -ratio;
4558 else
4559 aratio = ratio;
4561 if (ratio != 1)
4562 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4564 /* TODO: We may also need to check if we can compute a + i * 4 in one
4565 instruction. */
4566 /* Need to add up the invariant and variant parts. */
4567 if (comp_inv && !integer_zerop (comp_inv))
4568 cost += add_cost (speed, TYPE_MODE (utype));
4570 return get_scaled_computation_cost_at (data, at, cost);
4573 /* Determines cost of computing the use in GROUP with CAND in a generic
4574 expression. */
4576 static bool
4577 determine_group_iv_cost_generic (struct ivopts_data *data,
4578 struct iv_group *group, struct iv_cand *cand)
4580 comp_cost cost;
4581 iv_inv_expr_ent *inv_expr = NULL;
4582 bitmap inv_vars = NULL, inv_exprs = NULL;
4583 struct iv_use *use = group->vuses[0];
4585 /* The simple case first -- if we need to express value of the preserved
4586 original biv, the cost is 0. This also prevents us from counting the
4587 cost of increment twice -- once at this use and once in the cost of
4588 the candidate. */
4589 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4590 cost = no_cost;
4591 else
4592 cost = get_computation_cost (data, use, cand, false,
4593 &inv_vars, NULL, &inv_expr);
4595 if (inv_expr)
4597 inv_exprs = BITMAP_ALLOC (NULL);
4598 bitmap_set_bit (inv_exprs, inv_expr->id);
4600 set_group_iv_cost (data, group, cand, cost, inv_vars,
4601 NULL_TREE, ERROR_MARK, inv_exprs);
4602 return !cost.infinite_cost_p ();
4605 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4607 static bool
4608 determine_group_iv_cost_address (struct ivopts_data *data,
4609 struct iv_group *group, struct iv_cand *cand)
4611 unsigned i;
4612 bitmap inv_vars = NULL, inv_exprs = NULL;
4613 bool can_autoinc;
4614 iv_inv_expr_ent *inv_expr = NULL;
4615 struct iv_use *use = group->vuses[0];
4616 comp_cost sum_cost = no_cost, cost;
4618 cost = get_computation_cost (data, use, cand, true,
4619 &inv_vars, &can_autoinc, &inv_expr);
4621 if (inv_expr)
4623 inv_exprs = BITMAP_ALLOC (NULL);
4624 bitmap_set_bit (inv_exprs, inv_expr->id);
4626 sum_cost = cost;
4627 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4629 if (can_autoinc)
4630 sum_cost -= cand->cost_step;
4631 /* If we generated the candidate solely for exploiting autoincrement
4632 opportunities, and it turns out it can't be used, set the cost to
4633 infinity to make sure we ignore it. */
4634 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4635 sum_cost = infinite_cost;
4638 /* Uses in a group can share setup code, so only add setup cost once. */
4639 cost -= cost.scratch;
4640 /* Compute and add costs for rest uses of this group. */
4641 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4643 struct iv_use *next = group->vuses[i];
4645 /* TODO: We could skip computing cost for sub iv_use when it has the
4646 same cost as the first iv_use, but the cost really depends on the
4647 offset and where the iv_use is. */
4648 cost = get_computation_cost (data, next, cand, true,
4649 NULL, &can_autoinc, &inv_expr);
4650 if (inv_expr)
4652 if (!inv_exprs)
4653 inv_exprs = BITMAP_ALLOC (NULL);
4655 bitmap_set_bit (inv_exprs, inv_expr->id);
4657 sum_cost += cost;
4659 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4660 NULL_TREE, ERROR_MARK, inv_exprs);
4662 return !sum_cost.infinite_cost_p ();
4665 /* Computes value of candidate CAND at position AT in iteration NITER, and
4666 stores it to VAL. */
4668 static void
4669 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4670 aff_tree *val)
4672 aff_tree step, delta, nit;
4673 struct iv *iv = cand->iv;
4674 tree type = TREE_TYPE (iv->base);
4675 tree steptype;
4676 if (POINTER_TYPE_P (type))
4677 steptype = sizetype;
4678 else
4679 steptype = unsigned_type_for (type);
4681 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4682 aff_combination_convert (&step, steptype);
4683 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4684 aff_combination_convert (&nit, steptype);
4685 aff_combination_mult (&nit, &step, &delta);
4686 if (stmt_after_increment (loop, cand, at))
4687 aff_combination_add (&delta, &step);
4689 tree_to_aff_combination (iv->base, type, val);
4690 if (!POINTER_TYPE_P (type))
4691 aff_combination_convert (val, steptype);
4692 aff_combination_add (val, &delta);
4695 /* Returns period of induction variable iv. */
4697 static tree
4698 iv_period (struct iv *iv)
4700 tree step = iv->step, period, type;
4701 tree pow2div;
4703 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4705 type = unsigned_type_for (TREE_TYPE (step));
4706 /* Period of the iv is lcm (step, type_range)/step -1,
4707 i.e., N*type_range/step - 1. Since type range is power
4708 of two, N == (step >> num_of_ending_zeros_binary (step),
4709 so the final result is
4711 (type_range >> num_of_ending_zeros_binary (step)) - 1
4714 pow2div = num_ending_zeros (step);
4716 period = build_low_bits_mask (type,
4717 (TYPE_PRECISION (type)
4718 - tree_to_uhwi (pow2div)));
4720 return period;
4723 /* Returns the comparison operator used when eliminating the iv USE. */
4725 static enum tree_code
4726 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4728 struct loop *loop = data->current_loop;
4729 basic_block ex_bb;
4730 edge exit;
4732 ex_bb = gimple_bb (use->stmt);
4733 exit = EDGE_SUCC (ex_bb, 0);
4734 if (flow_bb_inside_loop_p (loop, exit->dest))
4735 exit = EDGE_SUCC (ex_bb, 1);
4737 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4740 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4741 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4742 calculation is performed in non-wrapping type.
4744 TODO: More generally, we could test for the situation that
4745 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4746 This would require knowing the sign of OFFSET. */
4748 static bool
4749 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4751 enum tree_code code;
4752 tree e1, e2;
4753 aff_tree aff_e1, aff_e2, aff_offset;
4755 if (!nowrap_type_p (TREE_TYPE (base)))
4756 return false;
4758 base = expand_simple_operations (base);
4760 if (TREE_CODE (base) == SSA_NAME)
4762 gimple *stmt = SSA_NAME_DEF_STMT (base);
4764 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4765 return false;
4767 code = gimple_assign_rhs_code (stmt);
4768 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4769 return false;
4771 e1 = gimple_assign_rhs1 (stmt);
4772 e2 = gimple_assign_rhs2 (stmt);
4774 else
4776 code = TREE_CODE (base);
4777 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4778 return false;
4779 e1 = TREE_OPERAND (base, 0);
4780 e2 = TREE_OPERAND (base, 1);
4783 /* Use affine expansion as deeper inspection to prove the equality. */
4784 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4785 &aff_e2, &data->name_expansion_cache);
4786 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4787 &aff_offset, &data->name_expansion_cache);
4788 aff_combination_scale (&aff_offset, -1);
4789 switch (code)
4791 case PLUS_EXPR:
4792 aff_combination_add (&aff_e2, &aff_offset);
4793 if (aff_combination_zero_p (&aff_e2))
4794 return true;
4796 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4797 &aff_e1, &data->name_expansion_cache);
4798 aff_combination_add (&aff_e1, &aff_offset);
4799 return aff_combination_zero_p (&aff_e1);
4801 case POINTER_PLUS_EXPR:
4802 aff_combination_add (&aff_e2, &aff_offset);
4803 return aff_combination_zero_p (&aff_e2);
4805 default:
4806 return false;
4810 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4811 comparison with CAND. NITER describes the number of iterations of
4812 the loops. If successful, the comparison in COMP_P is altered accordingly.
4814 We aim to handle the following situation:
4816 sometype *base, *p;
4817 int a, b, i;
4819 i = a;
4820 p = p_0 = base + a;
4824 bla (*p);
4825 p++;
4826 i++;
4828 while (i < b);
4830 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4831 We aim to optimize this to
4833 p = p_0 = base + a;
4836 bla (*p);
4837 p++;
4839 while (p < p_0 - a + b);
4841 This preserves the correctness, since the pointer arithmetics does not
4842 overflow. More precisely:
4844 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4845 overflow in computing it or the values of p.
4846 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4847 overflow. To prove this, we use the fact that p_0 = base + a. */
4849 static bool
4850 iv_elimination_compare_lt (struct ivopts_data *data,
4851 struct iv_cand *cand, enum tree_code *comp_p,
4852 struct tree_niter_desc *niter)
4854 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4855 struct aff_tree nit, tmpa, tmpb;
4856 enum tree_code comp;
4857 HOST_WIDE_INT step;
4859 /* We need to know that the candidate induction variable does not overflow.
4860 While more complex analysis may be used to prove this, for now just
4861 check that the variable appears in the original program and that it
4862 is computed in a type that guarantees no overflows. */
4863 cand_type = TREE_TYPE (cand->iv->base);
4864 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4865 return false;
4867 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4868 the calculation of the BOUND could overflow, making the comparison
4869 invalid. */
4870 if (!data->loop_single_exit_p)
4871 return false;
4873 /* We need to be able to decide whether candidate is increasing or decreasing
4874 in order to choose the right comparison operator. */
4875 if (!cst_and_fits_in_hwi (cand->iv->step))
4876 return false;
4877 step = int_cst_value (cand->iv->step);
4879 /* Check that the number of iterations matches the expected pattern:
4880 a + 1 > b ? 0 : b - a - 1. */
4881 mbz = niter->may_be_zero;
4882 if (TREE_CODE (mbz) == GT_EXPR)
4884 /* Handle a + 1 > b. */
4885 tree op0 = TREE_OPERAND (mbz, 0);
4886 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4888 a = TREE_OPERAND (op0, 0);
4889 b = TREE_OPERAND (mbz, 1);
4891 else
4892 return false;
4894 else if (TREE_CODE (mbz) == LT_EXPR)
4896 tree op1 = TREE_OPERAND (mbz, 1);
4898 /* Handle b < a + 1. */
4899 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4901 a = TREE_OPERAND (op1, 0);
4902 b = TREE_OPERAND (mbz, 0);
4904 else
4905 return false;
4907 else
4908 return false;
4910 /* Expected number of iterations is B - A - 1. Check that it matches
4911 the actual number, i.e., that B - A - NITER = 1. */
4912 tree_to_aff_combination (niter->niter, nit_type, &nit);
4913 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4914 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4915 aff_combination_scale (&nit, -1);
4916 aff_combination_scale (&tmpa, -1);
4917 aff_combination_add (&tmpb, &tmpa);
4918 aff_combination_add (&tmpb, &nit);
4919 if (tmpb.n != 0 || tmpb.offset != 1)
4920 return false;
4922 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4923 overflow. */
4924 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4925 cand->iv->step,
4926 fold_convert (TREE_TYPE (cand->iv->step), a));
4927 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4928 return false;
4930 /* Determine the new comparison operator. */
4931 comp = step < 0 ? GT_EXPR : LT_EXPR;
4932 if (*comp_p == NE_EXPR)
4933 *comp_p = comp;
4934 else if (*comp_p == EQ_EXPR)
4935 *comp_p = invert_tree_comparison (comp, false);
4936 else
4937 gcc_unreachable ();
4939 return true;
4942 /* Check whether it is possible to express the condition in USE by comparison
4943 of candidate CAND. If so, store the value compared with to BOUND, and the
4944 comparison operator to COMP. */
4946 static bool
4947 may_eliminate_iv (struct ivopts_data *data,
4948 struct iv_use *use, struct iv_cand *cand, tree *bound,
4949 enum tree_code *comp)
4951 basic_block ex_bb;
4952 edge exit;
4953 tree period;
4954 struct loop *loop = data->current_loop;
4955 aff_tree bnd;
4956 struct tree_niter_desc *desc = NULL;
4958 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4959 return false;
4961 /* For now works only for exits that dominate the loop latch.
4962 TODO: extend to other conditions inside loop body. */
4963 ex_bb = gimple_bb (use->stmt);
4964 if (use->stmt != last_stmt (ex_bb)
4965 || gimple_code (use->stmt) != GIMPLE_COND
4966 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4967 return false;
4969 exit = EDGE_SUCC (ex_bb, 0);
4970 if (flow_bb_inside_loop_p (loop, exit->dest))
4971 exit = EDGE_SUCC (ex_bb, 1);
4972 if (flow_bb_inside_loop_p (loop, exit->dest))
4973 return false;
4975 desc = niter_for_exit (data, exit);
4976 if (!desc)
4977 return false;
4979 /* Determine whether we can use the variable to test the exit condition.
4980 This is the case iff the period of the induction variable is greater
4981 than the number of iterations for which the exit condition is true. */
4982 period = iv_period (cand->iv);
4984 /* If the number of iterations is constant, compare against it directly. */
4985 if (TREE_CODE (desc->niter) == INTEGER_CST)
4987 /* See cand_value_at. */
4988 if (stmt_after_increment (loop, cand, use->stmt))
4990 if (!tree_int_cst_lt (desc->niter, period))
4991 return false;
4993 else
4995 if (tree_int_cst_lt (period, desc->niter))
4996 return false;
5000 /* If not, and if this is the only possible exit of the loop, see whether
5001 we can get a conservative estimate on the number of iterations of the
5002 entire loop and compare against that instead. */
5003 else
5005 widest_int period_value, max_niter;
5007 max_niter = desc->max;
5008 if (stmt_after_increment (loop, cand, use->stmt))
5009 max_niter += 1;
5010 period_value = wi::to_widest (period);
5011 if (wi::gtu_p (max_niter, period_value))
5013 /* See if we can take advantage of inferred loop bound
5014 information. */
5015 if (data->loop_single_exit_p)
5017 if (!max_loop_iterations (loop, &max_niter))
5018 return false;
5019 /* The loop bound is already adjusted by adding 1. */
5020 if (wi::gtu_p (max_niter, period_value))
5021 return false;
5023 else
5024 return false;
5028 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5030 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5031 aff_combination_to_tree (&bnd));
5032 *comp = iv_elimination_compare (data, use);
5034 /* It is unlikely that computing the number of iterations using division
5035 would be more profitable than keeping the original induction variable. */
5036 if (expression_expensive_p (*bound))
5037 return false;
5039 /* Sometimes, it is possible to handle the situation that the number of
5040 iterations may be zero unless additional assumptions by using <
5041 instead of != in the exit condition.
5043 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5044 base the exit condition on it. However, that is often too
5045 expensive. */
5046 if (!integer_zerop (desc->may_be_zero))
5047 return iv_elimination_compare_lt (data, cand, comp, desc);
5049 return true;
5052 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5053 be copied, if it is used in the loop body and DATA->body_includes_call. */
5055 static int
5056 parm_decl_cost (struct ivopts_data *data, tree bound)
5058 tree sbound = bound;
5059 STRIP_NOPS (sbound);
5061 if (TREE_CODE (sbound) == SSA_NAME
5062 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5063 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5064 && data->body_includes_call)
5065 return COSTS_N_INSNS (1);
5067 return 0;
5070 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5072 static bool
5073 determine_group_iv_cost_cond (struct ivopts_data *data,
5074 struct iv_group *group, struct iv_cand *cand)
5076 tree bound = NULL_TREE;
5077 struct iv *cmp_iv;
5078 bitmap inv_exprs = NULL;
5079 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5080 comp_cost elim_cost, express_cost, cost, bound_cost;
5081 bool ok;
5082 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5083 tree *control_var, *bound_cst;
5084 enum tree_code comp = ERROR_MARK;
5085 struct iv_use *use = group->vuses[0];
5087 /* Try iv elimination. */
5088 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5090 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5091 if (elim_cost.cost == 0)
5092 elim_cost.cost = parm_decl_cost (data, bound);
5093 else if (TREE_CODE (bound) == INTEGER_CST)
5094 elim_cost.cost = 0;
5095 /* If we replace a loop condition 'i < n' with 'p < base + n',
5096 inv_vars_elim will have 'base' and 'n' set, which implies that both
5097 'base' and 'n' will be live during the loop. More likely,
5098 'base + n' will be loop invariant, resulting in only one live value
5099 during the loop. So in that case we clear inv_vars_elim and set
5100 inv_expr_elim instead. */
5101 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5103 inv_expr_elim = get_loop_invariant_expr (data, bound);
5104 bitmap_clear (inv_vars_elim);
5106 /* The bound is a loop invariant, so it will be only computed
5107 once. */
5108 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5110 else
5111 elim_cost = infinite_cost;
5113 /* Try expressing the original giv. If it is compared with an invariant,
5114 note that we cannot get rid of it. */
5115 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5116 NULL, &cmp_iv);
5117 gcc_assert (ok);
5119 /* When the condition is a comparison of the candidate IV against
5120 zero, prefer this IV.
5122 TODO: The constant that we're subtracting from the cost should
5123 be target-dependent. This information should be added to the
5124 target costs for each backend. */
5125 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5126 && integer_zerop (*bound_cst)
5127 && (operand_equal_p (*control_var, cand->var_after, 0)
5128 || operand_equal_p (*control_var, cand->var_before, 0)))
5129 elim_cost -= 1;
5131 express_cost = get_computation_cost (data, use, cand, false,
5132 &inv_vars_express, NULL,
5133 &inv_expr_express);
5134 if (cmp_iv != NULL)
5135 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5137 /* Count the cost of the original bound as well. */
5138 bound_cost = force_var_cost (data, *bound_cst, NULL);
5139 if (bound_cost.cost == 0)
5140 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5141 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5142 bound_cost.cost = 0;
5143 express_cost += bound_cost;
5145 /* Choose the better approach, preferring the eliminated IV. */
5146 if (elim_cost <= express_cost)
5148 cost = elim_cost;
5149 inv_vars = inv_vars_elim;
5150 inv_vars_elim = NULL;
5151 inv_expr = inv_expr_elim;
5153 else
5155 cost = express_cost;
5156 inv_vars = inv_vars_express;
5157 inv_vars_express = NULL;
5158 bound = NULL_TREE;
5159 comp = ERROR_MARK;
5160 inv_expr = inv_expr_express;
5163 if (inv_expr)
5165 inv_exprs = BITMAP_ALLOC (NULL);
5166 bitmap_set_bit (inv_exprs, inv_expr->id);
5168 set_group_iv_cost (data, group, cand, cost,
5169 inv_vars, bound, comp, inv_exprs);
5171 if (inv_vars_elim)
5172 BITMAP_FREE (inv_vars_elim);
5173 if (inv_vars_express)
5174 BITMAP_FREE (inv_vars_express);
5176 return !cost.infinite_cost_p ();
5179 /* Determines cost of computing uses in GROUP with CAND. Returns false
5180 if USE cannot be represented with CAND. */
5182 static bool
5183 determine_group_iv_cost (struct ivopts_data *data,
5184 struct iv_group *group, struct iv_cand *cand)
5186 switch (group->type)
5188 case USE_NONLINEAR_EXPR:
5189 return determine_group_iv_cost_generic (data, group, cand);
5191 case USE_ADDRESS:
5192 return determine_group_iv_cost_address (data, group, cand);
5194 case USE_COMPARE:
5195 return determine_group_iv_cost_cond (data, group, cand);
5197 default:
5198 gcc_unreachable ();
5202 /* Return true if get_computation_cost indicates that autoincrement is
5203 a possibility for the pair of USE and CAND, false otherwise. */
5205 static bool
5206 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5207 struct iv_cand *cand)
5209 bitmap inv_vars;
5210 bool can_autoinc;
5211 comp_cost cost;
5213 if (use->type != USE_ADDRESS)
5214 return false;
5216 cost = get_computation_cost (data, use, cand, true, &inv_vars,
5217 &can_autoinc, NULL);
5219 BITMAP_FREE (inv_vars);
5221 return !cost.infinite_cost_p () && can_autoinc;
5224 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5225 use that allows autoincrement, and set their AINC_USE if possible. */
5227 static void
5228 set_autoinc_for_original_candidates (struct ivopts_data *data)
5230 unsigned i, j;
5232 for (i = 0; i < data->vcands.length (); i++)
5234 struct iv_cand *cand = data->vcands[i];
5235 struct iv_use *closest_before = NULL;
5236 struct iv_use *closest_after = NULL;
5237 if (cand->pos != IP_ORIGINAL)
5238 continue;
5240 for (j = 0; j < data->vgroups.length (); j++)
5242 struct iv_group *group = data->vgroups[j];
5243 struct iv_use *use = group->vuses[0];
5244 unsigned uid = gimple_uid (use->stmt);
5246 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5247 continue;
5249 if (uid < gimple_uid (cand->incremented_at)
5250 && (closest_before == NULL
5251 || uid > gimple_uid (closest_before->stmt)))
5252 closest_before = use;
5254 if (uid > gimple_uid (cand->incremented_at)
5255 && (closest_after == NULL
5256 || uid < gimple_uid (closest_after->stmt)))
5257 closest_after = use;
5260 if (closest_before != NULL
5261 && autoinc_possible_for_pair (data, closest_before, cand))
5262 cand->ainc_use = closest_before;
5263 else if (closest_after != NULL
5264 && autoinc_possible_for_pair (data, closest_after, cand))
5265 cand->ainc_use = closest_after;
5269 /* Finds the candidates for the induction variables. */
5271 static void
5272 find_iv_candidates (struct ivopts_data *data)
5274 /* Add commonly used ivs. */
5275 add_standard_iv_candidates (data);
5277 /* Add old induction variables. */
5278 add_iv_candidate_for_bivs (data);
5280 /* Add induction variables derived from uses. */
5281 add_iv_candidate_for_groups (data);
5283 set_autoinc_for_original_candidates (data);
5285 /* Record the important candidates. */
5286 record_important_candidates (data);
5288 if (dump_file && (dump_flags & TDF_DETAILS))
5290 unsigned i;
5292 fprintf (dump_file, "\n<Important Candidates>:\t");
5293 for (i = 0; i < data->vcands.length (); i++)
5294 if (data->vcands[i]->important)
5295 fprintf (dump_file, " %d,", data->vcands[i]->id);
5296 fprintf (dump_file, "\n");
5298 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5299 for (i = 0; i < data->vgroups.length (); i++)
5301 struct iv_group *group = data->vgroups[i];
5303 if (group->related_cands)
5305 fprintf (dump_file, " Group %d:\t", group->id);
5306 dump_bitmap (dump_file, group->related_cands);
5309 fprintf (dump_file, "\n");
5313 /* Determines costs of computing use of iv with an iv candidate. */
5315 static void
5316 determine_group_iv_costs (struct ivopts_data *data)
5318 unsigned i, j;
5319 struct iv_cand *cand;
5320 struct iv_group *group;
5321 bitmap to_clear = BITMAP_ALLOC (NULL);
5323 alloc_use_cost_map (data);
5325 for (i = 0; i < data->vgroups.length (); i++)
5327 group = data->vgroups[i];
5329 if (data->consider_all_candidates)
5331 for (j = 0; j < data->vcands.length (); j++)
5333 cand = data->vcands[j];
5334 determine_group_iv_cost (data, group, cand);
5337 else
5339 bitmap_iterator bi;
5341 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5343 cand = data->vcands[j];
5344 if (!determine_group_iv_cost (data, group, cand))
5345 bitmap_set_bit (to_clear, j);
5348 /* Remove the candidates for that the cost is infinite from
5349 the list of related candidates. */
5350 bitmap_and_compl_into (group->related_cands, to_clear);
5351 bitmap_clear (to_clear);
5355 BITMAP_FREE (to_clear);
5357 if (dump_file && (dump_flags & TDF_DETAILS))
5359 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5360 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5362 for (hash_table<iv_inv_expr_hasher>::iterator it
5363 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5364 ++it)
5365 list.safe_push (*it);
5367 list.qsort (sort_iv_inv_expr_ent);
5369 for (i = 0; i < list.length (); ++i)
5371 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5372 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5373 fprintf (dump_file, "\n");
5376 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5378 for (i = 0; i < data->vgroups.length (); i++)
5380 group = data->vgroups[i];
5382 fprintf (dump_file, "Group %d:\n", i);
5383 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5384 for (j = 0; j < group->n_map_members; j++)
5386 if (!group->cost_map[j].cand
5387 || group->cost_map[j].cost.infinite_cost_p ())
5388 continue;
5390 fprintf (dump_file, " %d\t%d\t%d\t",
5391 group->cost_map[j].cand->id,
5392 group->cost_map[j].cost.cost,
5393 group->cost_map[j].cost.complexity);
5394 if (!group->cost_map[j].inv_exprs
5395 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5396 fprintf (dump_file, "NIL;\t");
5397 else
5398 bitmap_print (dump_file,
5399 group->cost_map[j].inv_exprs, "", ";\t");
5400 if (!group->cost_map[j].inv_vars
5401 || bitmap_empty_p (group->cost_map[j].inv_vars))
5402 fprintf (dump_file, "NIL;\n");
5403 else
5404 bitmap_print (dump_file,
5405 group->cost_map[j].inv_vars, "", "\n");
5408 fprintf (dump_file, "\n");
5410 fprintf (dump_file, "\n");
5414 /* Determines cost of the candidate CAND. */
5416 static void
5417 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5419 comp_cost cost_base;
5420 unsigned cost, cost_step;
5421 tree base;
5423 gcc_assert (cand->iv != NULL);
5425 /* There are two costs associated with the candidate -- its increment
5426 and its initialization. The second is almost negligible for any loop
5427 that rolls enough, so we take it just very little into account. */
5429 base = cand->iv->base;
5430 cost_base = force_var_cost (data, base, NULL);
5431 /* It will be exceptional that the iv register happens to be initialized with
5432 the proper value at no cost. In general, there will at least be a regcopy
5433 or a const set. */
5434 if (cost_base.cost == 0)
5435 cost_base.cost = COSTS_N_INSNS (1);
5436 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5438 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5440 /* Prefer the original ivs unless we may gain something by replacing it.
5441 The reason is to make debugging simpler; so this is not relevant for
5442 artificial ivs created by other optimization passes. */
5443 if (cand->pos != IP_ORIGINAL
5444 || !SSA_NAME_VAR (cand->var_before)
5445 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5446 cost++;
5448 /* Prefer not to insert statements into latch unless there are some
5449 already (so that we do not create unnecessary jumps). */
5450 if (cand->pos == IP_END
5451 && empty_block_p (ip_end_pos (data->current_loop)))
5452 cost++;
5454 cand->cost = cost;
5455 cand->cost_step = cost_step;
5458 /* Determines costs of computation of the candidates. */
5460 static void
5461 determine_iv_costs (struct ivopts_data *data)
5463 unsigned i;
5465 if (dump_file && (dump_flags & TDF_DETAILS))
5467 fprintf (dump_file, "<Candidate Costs>:\n");
5468 fprintf (dump_file, " cand\tcost\n");
5471 for (i = 0; i < data->vcands.length (); i++)
5473 struct iv_cand *cand = data->vcands[i];
5475 determine_iv_cost (data, cand);
5477 if (dump_file && (dump_flags & TDF_DETAILS))
5478 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5481 if (dump_file && (dump_flags & TDF_DETAILS))
5482 fprintf (dump_file, "\n");
5485 /* Calculates cost for having N_REGS registers. This number includes
5486 induction variables, invariant variables and invariant expressions. */
5488 static unsigned
5489 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned n_regs)
5491 unsigned cost = estimate_reg_pressure_cost (n_regs,
5492 data->regs_used, data->speed,
5493 data->body_includes_call);
5494 /* Add n_regs to the cost, so that we prefer eliminating ivs if possible. */
5495 return n_regs + cost;
5498 /* For each size of the induction variable set determine the penalty. */
5500 static void
5501 determine_set_costs (struct ivopts_data *data)
5503 unsigned j, n;
5504 gphi *phi;
5505 gphi_iterator psi;
5506 tree op;
5507 struct loop *loop = data->current_loop;
5508 bitmap_iterator bi;
5510 if (dump_file && (dump_flags & TDF_DETAILS))
5512 fprintf (dump_file, "<Global Costs>:\n");
5513 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5514 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5515 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5516 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5519 n = 0;
5520 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5522 phi = psi.phi ();
5523 op = PHI_RESULT (phi);
5525 if (virtual_operand_p (op))
5526 continue;
5528 if (get_iv (data, op))
5529 continue;
5531 n++;
5534 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5536 struct version_info *info = ver_info (data, j);
5538 if (info->inv_id && info->has_nonlin_use)
5539 n++;
5542 data->regs_used = n;
5543 if (dump_file && (dump_flags & TDF_DETAILS))
5544 fprintf (dump_file, " regs_used %d\n", n);
5546 if (dump_file && (dump_flags & TDF_DETAILS))
5548 fprintf (dump_file, " cost for size:\n");
5549 fprintf (dump_file, " ivs\tcost\n");
5550 for (j = 0; j <= 2 * target_avail_regs; j++)
5551 fprintf (dump_file, " %d\t%d\n", j,
5552 ivopts_global_cost_for_size (data, j));
5553 fprintf (dump_file, "\n");
5557 /* Returns true if A is a cheaper cost pair than B. */
5559 static bool
5560 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5562 if (!a)
5563 return false;
5565 if (!b)
5566 return true;
5568 if (a->cost < b->cost)
5569 return true;
5571 if (b->cost < a->cost)
5572 return false;
5574 /* In case the costs are the same, prefer the cheaper candidate. */
5575 if (a->cand->cost < b->cand->cost)
5576 return true;
5578 return false;
5582 /* Returns candidate by that USE is expressed in IVS. */
5584 static struct cost_pair *
5585 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5587 return ivs->cand_for_group[group->id];
5590 /* Computes the cost field of IVS structure. */
5592 static void
5593 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5595 comp_cost cost = ivs->cand_use_cost;
5597 cost += ivs->cand_cost;
5598 cost += ivopts_global_cost_for_size (data, ivs->n_invs + ivs->n_cands);
5599 ivs->cost = cost;
5602 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5603 and IVS. */
5605 static void
5606 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5608 bitmap_iterator bi;
5609 unsigned iid;
5611 if (!invs)
5612 return;
5614 gcc_assert (n_inv_uses != NULL);
5615 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5617 n_inv_uses[iid]--;
5618 if (n_inv_uses[iid] == 0)
5619 ivs->n_invs--;
5623 /* Set USE not to be expressed by any candidate in IVS. */
5625 static void
5626 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5627 struct iv_group *group)
5629 unsigned gid = group->id, cid;
5630 struct cost_pair *cp;
5632 cp = ivs->cand_for_group[gid];
5633 if (!cp)
5634 return;
5635 cid = cp->cand->id;
5637 ivs->bad_groups++;
5638 ivs->cand_for_group[gid] = NULL;
5639 ivs->n_cand_uses[cid]--;
5641 if (ivs->n_cand_uses[cid] == 0)
5643 bitmap_clear_bit (ivs->cands, cid);
5644 ivs->n_cands--;
5645 ivs->cand_cost -= cp->cand->cost;
5646 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5649 ivs->cand_use_cost -= cp->cost;
5650 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5651 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5652 iv_ca_recount_cost (data, ivs);
5655 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5656 IVS. */
5658 static void
5659 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5661 bitmap_iterator bi;
5662 unsigned iid;
5664 if (!invs)
5665 return;
5667 gcc_assert (n_inv_uses != NULL);
5668 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5670 n_inv_uses[iid]++;
5671 if (n_inv_uses[iid] == 1)
5672 ivs->n_invs++;
5676 /* Set cost pair for GROUP in set IVS to CP. */
5678 static void
5679 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5680 struct iv_group *group, struct cost_pair *cp)
5682 unsigned gid = group->id, cid;
5684 if (ivs->cand_for_group[gid] == cp)
5685 return;
5687 if (ivs->cand_for_group[gid])
5688 iv_ca_set_no_cp (data, ivs, group);
5690 if (cp)
5692 cid = cp->cand->id;
5694 ivs->bad_groups--;
5695 ivs->cand_for_group[gid] = cp;
5696 ivs->n_cand_uses[cid]++;
5697 if (ivs->n_cand_uses[cid] == 1)
5699 bitmap_set_bit (ivs->cands, cid);
5700 ivs->n_cands++;
5701 ivs->cand_cost += cp->cand->cost;
5702 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5705 ivs->cand_use_cost += cp->cost;
5706 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5707 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5708 iv_ca_recount_cost (data, ivs);
5712 /* Extend set IVS by expressing USE by some of the candidates in it
5713 if possible. Consider all important candidates if candidates in
5714 set IVS don't give any result. */
5716 static void
5717 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5718 struct iv_group *group)
5720 struct cost_pair *best_cp = NULL, *cp;
5721 bitmap_iterator bi;
5722 unsigned i;
5723 struct iv_cand *cand;
5725 gcc_assert (ivs->upto >= group->id);
5726 ivs->upto++;
5727 ivs->bad_groups++;
5729 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5731 cand = data->vcands[i];
5732 cp = get_group_iv_cost (data, group, cand);
5733 if (cheaper_cost_pair (cp, best_cp))
5734 best_cp = cp;
5737 if (best_cp == NULL)
5739 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5741 cand = data->vcands[i];
5742 cp = get_group_iv_cost (data, group, cand);
5743 if (cheaper_cost_pair (cp, best_cp))
5744 best_cp = cp;
5748 iv_ca_set_cp (data, ivs, group, best_cp);
5751 /* Get cost for assignment IVS. */
5753 static comp_cost
5754 iv_ca_cost (struct iv_ca *ivs)
5756 /* This was a conditional expression but it triggered a bug in
5757 Sun C 5.5. */
5758 if (ivs->bad_groups)
5759 return infinite_cost;
5760 else
5761 return ivs->cost;
5764 /* Returns true if applying NEW_CP to GROUP for IVS introduces more
5765 invariants than OLD_CP. */
5767 static bool
5768 iv_ca_more_deps (struct ivopts_data *data, struct iv_ca *ivs,
5769 struct iv_group *group, struct cost_pair *old_cp,
5770 struct cost_pair *new_cp)
5772 gcc_assert (old_cp && new_cp && old_cp != new_cp);
5773 unsigned old_n_invs = ivs->n_invs;
5774 iv_ca_set_cp (data, ivs, group, new_cp);
5775 unsigned new_n_invs = ivs->n_invs;
5776 iv_ca_set_cp (data, ivs, group, old_cp);
5778 return (new_n_invs > old_n_invs);
5781 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5782 it before NEXT. */
5784 static struct iv_ca_delta *
5785 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5786 struct cost_pair *new_cp, struct iv_ca_delta *next)
5788 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5790 change->group = group;
5791 change->old_cp = old_cp;
5792 change->new_cp = new_cp;
5793 change->next = next;
5795 return change;
5798 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5799 are rewritten. */
5801 static struct iv_ca_delta *
5802 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5804 struct iv_ca_delta *last;
5806 if (!l2)
5807 return l1;
5809 if (!l1)
5810 return l2;
5812 for (last = l1; last->next; last = last->next)
5813 continue;
5814 last->next = l2;
5816 return l1;
5819 /* Reverse the list of changes DELTA, forming the inverse to it. */
5821 static struct iv_ca_delta *
5822 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5824 struct iv_ca_delta *act, *next, *prev = NULL;
5826 for (act = delta; act; act = next)
5828 next = act->next;
5829 act->next = prev;
5830 prev = act;
5832 std::swap (act->old_cp, act->new_cp);
5835 return prev;
5838 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5839 reverted instead. */
5841 static void
5842 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5843 struct iv_ca_delta *delta, bool forward)
5845 struct cost_pair *from, *to;
5846 struct iv_ca_delta *act;
5848 if (!forward)
5849 delta = iv_ca_delta_reverse (delta);
5851 for (act = delta; act; act = act->next)
5853 from = act->old_cp;
5854 to = act->new_cp;
5855 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
5856 iv_ca_set_cp (data, ivs, act->group, to);
5859 if (!forward)
5860 iv_ca_delta_reverse (delta);
5863 /* Returns true if CAND is used in IVS. */
5865 static bool
5866 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5868 return ivs->n_cand_uses[cand->id] > 0;
5871 /* Returns number of induction variable candidates in the set IVS. */
5873 static unsigned
5874 iv_ca_n_cands (struct iv_ca *ivs)
5876 return ivs->n_cands;
5879 /* Free the list of changes DELTA. */
5881 static void
5882 iv_ca_delta_free (struct iv_ca_delta **delta)
5884 struct iv_ca_delta *act, *next;
5886 for (act = *delta; act; act = next)
5888 next = act->next;
5889 free (act);
5892 *delta = NULL;
5895 /* Allocates new iv candidates assignment. */
5897 static struct iv_ca *
5898 iv_ca_new (struct ivopts_data *data)
5900 struct iv_ca *nw = XNEW (struct iv_ca);
5902 nw->upto = 0;
5903 nw->bad_groups = 0;
5904 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
5905 data->vgroups.length ());
5906 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
5907 nw->cands = BITMAP_ALLOC (NULL);
5908 nw->n_cands = 0;
5909 nw->n_invs = 0;
5910 nw->cand_use_cost = no_cost;
5911 nw->cand_cost = 0;
5912 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
5913 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
5914 nw->cost = no_cost;
5916 return nw;
5919 /* Free memory occupied by the set IVS. */
5921 static void
5922 iv_ca_free (struct iv_ca **ivs)
5924 free ((*ivs)->cand_for_group);
5925 free ((*ivs)->n_cand_uses);
5926 BITMAP_FREE ((*ivs)->cands);
5927 free ((*ivs)->n_inv_var_uses);
5928 free ((*ivs)->n_inv_expr_uses);
5929 free (*ivs);
5930 *ivs = NULL;
5933 /* Dumps IVS to FILE. */
5935 static void
5936 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5938 unsigned i;
5939 comp_cost cost = iv_ca_cost (ivs);
5941 fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
5942 cost.complexity);
5943 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
5944 ivs->cand_cost, ivs->cand_use_cost.cost,
5945 ivs->cand_use_cost.complexity);
5946 bitmap_print (file, ivs->cands, " candidates: ","\n");
5948 for (i = 0; i < ivs->upto; i++)
5950 struct iv_group *group = data->vgroups[i];
5951 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
5952 if (cp)
5953 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
5954 group->id, cp->cand->id, cp->cost.cost,
5955 cp->cost.complexity);
5956 else
5957 fprintf (file, " group:%d --> ??\n", group->id);
5960 const char *pref = "";
5961 fprintf (file, " invariant variables: ");
5962 for (i = 1; i <= data->max_inv_var_id; i++)
5963 if (ivs->n_inv_var_uses[i])
5965 fprintf (file, "%s%d", pref, i);
5966 pref = ", ";
5969 pref = "";
5970 fprintf (file, "\n invariant expressions: ");
5971 for (i = 1; i <= data->max_inv_expr_id; i++)
5972 if (ivs->n_inv_expr_uses[i])
5974 fprintf (file, "%s%d", pref, i);
5975 pref = ", ";
5978 fprintf (file, "\n\n");
5981 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5982 new set, and store differences in DELTA. Number of induction variables
5983 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5984 the function will try to find a solution with mimimal iv candidates. */
5986 static comp_cost
5987 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5988 struct iv_cand *cand, struct iv_ca_delta **delta,
5989 unsigned *n_ivs, bool min_ncand)
5991 unsigned i;
5992 comp_cost cost;
5993 struct iv_group *group;
5994 struct cost_pair *old_cp, *new_cp;
5996 *delta = NULL;
5997 for (i = 0; i < ivs->upto; i++)
5999 group = data->vgroups[i];
6000 old_cp = iv_ca_cand_for_group (ivs, group);
6002 if (old_cp
6003 && old_cp->cand == cand)
6004 continue;
6006 new_cp = get_group_iv_cost (data, group, cand);
6007 if (!new_cp)
6008 continue;
6010 if (!min_ncand && iv_ca_more_deps (data, ivs, group, old_cp, new_cp))
6011 continue;
6013 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6014 continue;
6016 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6019 iv_ca_delta_commit (data, ivs, *delta, true);
6020 cost = iv_ca_cost (ivs);
6021 if (n_ivs)
6022 *n_ivs = iv_ca_n_cands (ivs);
6023 iv_ca_delta_commit (data, ivs, *delta, false);
6025 return cost;
6028 /* Try narrowing set IVS by removing CAND. Return the cost of
6029 the new set and store the differences in DELTA. START is
6030 the candidate with which we start narrowing. */
6032 static comp_cost
6033 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6034 struct iv_cand *cand, struct iv_cand *start,
6035 struct iv_ca_delta **delta)
6037 unsigned i, ci;
6038 struct iv_group *group;
6039 struct cost_pair *old_cp, *new_cp, *cp;
6040 bitmap_iterator bi;
6041 struct iv_cand *cnd;
6042 comp_cost cost, best_cost, acost;
6044 *delta = NULL;
6045 for (i = 0; i < data->vgroups.length (); i++)
6047 group = data->vgroups[i];
6049 old_cp = iv_ca_cand_for_group (ivs, group);
6050 if (old_cp->cand != cand)
6051 continue;
6053 best_cost = iv_ca_cost (ivs);
6054 /* Start narrowing with START. */
6055 new_cp = get_group_iv_cost (data, group, start);
6057 if (data->consider_all_candidates)
6059 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6061 if (ci == cand->id || (start && ci == start->id))
6062 continue;
6064 cnd = data->vcands[ci];
6066 cp = get_group_iv_cost (data, group, cnd);
6067 if (!cp)
6068 continue;
6070 iv_ca_set_cp (data, ivs, group, cp);
6071 acost = iv_ca_cost (ivs);
6073 if (acost < best_cost)
6075 best_cost = acost;
6076 new_cp = cp;
6080 else
6082 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6084 if (ci == cand->id || (start && ci == start->id))
6085 continue;
6087 cnd = data->vcands[ci];
6089 cp = get_group_iv_cost (data, group, cnd);
6090 if (!cp)
6091 continue;
6093 iv_ca_set_cp (data, ivs, group, cp);
6094 acost = iv_ca_cost (ivs);
6096 if (acost < best_cost)
6098 best_cost = acost;
6099 new_cp = cp;
6103 /* Restore to old cp for use. */
6104 iv_ca_set_cp (data, ivs, group, old_cp);
6106 if (!new_cp)
6108 iv_ca_delta_free (delta);
6109 return infinite_cost;
6112 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6115 iv_ca_delta_commit (data, ivs, *delta, true);
6116 cost = iv_ca_cost (ivs);
6117 iv_ca_delta_commit (data, ivs, *delta, false);
6119 return cost;
6122 /* Try optimizing the set of candidates IVS by removing candidates different
6123 from to EXCEPT_CAND from it. Return cost of the new set, and store
6124 differences in DELTA. */
6126 static comp_cost
6127 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6128 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6130 bitmap_iterator bi;
6131 struct iv_ca_delta *act_delta, *best_delta;
6132 unsigned i;
6133 comp_cost best_cost, acost;
6134 struct iv_cand *cand;
6136 best_delta = NULL;
6137 best_cost = iv_ca_cost (ivs);
6139 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6141 cand = data->vcands[i];
6143 if (cand == except_cand)
6144 continue;
6146 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6148 if (acost < best_cost)
6150 best_cost = acost;
6151 iv_ca_delta_free (&best_delta);
6152 best_delta = act_delta;
6154 else
6155 iv_ca_delta_free (&act_delta);
6158 if (!best_delta)
6160 *delta = NULL;
6161 return best_cost;
6164 /* Recurse to possibly remove other unnecessary ivs. */
6165 iv_ca_delta_commit (data, ivs, best_delta, true);
6166 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6167 iv_ca_delta_commit (data, ivs, best_delta, false);
6168 *delta = iv_ca_delta_join (best_delta, *delta);
6169 return best_cost;
6172 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6173 cheaper local cost for GROUP than BEST_CP. Return pointer to
6174 the corresponding cost_pair, otherwise just return BEST_CP. */
6176 static struct cost_pair*
6177 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6178 unsigned int cand_idx, struct iv_cand *old_cand,
6179 struct cost_pair *best_cp)
6181 struct iv_cand *cand;
6182 struct cost_pair *cp;
6184 gcc_assert (old_cand != NULL && best_cp != NULL);
6185 if (cand_idx == old_cand->id)
6186 return best_cp;
6188 cand = data->vcands[cand_idx];
6189 cp = get_group_iv_cost (data, group, cand);
6190 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6191 return cp;
6193 return best_cp;
6196 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6197 which are used by more than one iv uses. For each of those candidates,
6198 this function tries to represent iv uses under that candidate using
6199 other ones with lower local cost, then tries to prune the new set.
6200 If the new set has lower cost, It returns the new cost after recording
6201 candidate replacement in list DELTA. */
6203 static comp_cost
6204 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6205 struct iv_ca_delta **delta)
6207 bitmap_iterator bi, bj;
6208 unsigned int i, j, k;
6209 struct iv_cand *cand;
6210 comp_cost orig_cost, acost;
6211 struct iv_ca_delta *act_delta, *tmp_delta;
6212 struct cost_pair *old_cp, *best_cp = NULL;
6214 *delta = NULL;
6215 orig_cost = iv_ca_cost (ivs);
6217 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6219 if (ivs->n_cand_uses[i] == 1
6220 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6221 continue;
6223 cand = data->vcands[i];
6225 act_delta = NULL;
6226 /* Represent uses under current candidate using other ones with
6227 lower local cost. */
6228 for (j = 0; j < ivs->upto; j++)
6230 struct iv_group *group = data->vgroups[j];
6231 old_cp = iv_ca_cand_for_group (ivs, group);
6233 if (old_cp->cand != cand)
6234 continue;
6236 best_cp = old_cp;
6237 if (data->consider_all_candidates)
6238 for (k = 0; k < data->vcands.length (); k++)
6239 best_cp = cheaper_cost_with_cand (data, group, k,
6240 old_cp->cand, best_cp);
6241 else
6242 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6243 best_cp = cheaper_cost_with_cand (data, group, k,
6244 old_cp->cand, best_cp);
6246 if (best_cp == old_cp)
6247 continue;
6249 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6251 /* No need for further prune. */
6252 if (!act_delta)
6253 continue;
6255 /* Prune the new candidate set. */
6256 iv_ca_delta_commit (data, ivs, act_delta, true);
6257 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6258 iv_ca_delta_commit (data, ivs, act_delta, false);
6259 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6261 if (acost < orig_cost)
6263 *delta = act_delta;
6264 return acost;
6266 else
6267 iv_ca_delta_free (&act_delta);
6270 return orig_cost;
6273 /* Tries to extend the sets IVS in the best possible way in order to
6274 express the GROUP. If ORIGINALP is true, prefer candidates from
6275 the original set of IVs, otherwise favor important candidates not
6276 based on any memory object. */
6278 static bool
6279 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6280 struct iv_group *group, bool originalp)
6282 comp_cost best_cost, act_cost;
6283 unsigned i;
6284 bitmap_iterator bi;
6285 struct iv_cand *cand;
6286 struct iv_ca_delta *best_delta = NULL, *act_delta;
6287 struct cost_pair *cp;
6289 iv_ca_add_group (data, ivs, group);
6290 best_cost = iv_ca_cost (ivs);
6291 cp = iv_ca_cand_for_group (ivs, group);
6292 if (cp)
6294 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6295 iv_ca_set_no_cp (data, ivs, group);
6298 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6299 first try important candidates not based on any memory object. Only if
6300 this fails, try the specific ones. Rationale -- in loops with many
6301 variables the best choice often is to use just one generic biv. If we
6302 added here many ivs specific to the uses, the optimization algorithm later
6303 would be likely to get stuck in a local minimum, thus causing us to create
6304 too many ivs. The approach from few ivs to more seems more likely to be
6305 successful -- starting from few ivs, replacing an expensive use by a
6306 specific iv should always be a win. */
6307 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6309 cand = data->vcands[i];
6311 if (originalp && cand->pos !=IP_ORIGINAL)
6312 continue;
6314 if (!originalp && cand->iv->base_object != NULL_TREE)
6315 continue;
6317 if (iv_ca_cand_used_p (ivs, cand))
6318 continue;
6320 cp = get_group_iv_cost (data, group, cand);
6321 if (!cp)
6322 continue;
6324 iv_ca_set_cp (data, ivs, group, cp);
6325 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6326 true);
6327 iv_ca_set_no_cp (data, ivs, group);
6328 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6330 if (act_cost < best_cost)
6332 best_cost = act_cost;
6334 iv_ca_delta_free (&best_delta);
6335 best_delta = act_delta;
6337 else
6338 iv_ca_delta_free (&act_delta);
6341 if (best_cost.infinite_cost_p ())
6343 for (i = 0; i < group->n_map_members; i++)
6345 cp = group->cost_map + i;
6346 cand = cp->cand;
6347 if (!cand)
6348 continue;
6350 /* Already tried this. */
6351 if (cand->important)
6353 if (originalp && cand->pos == IP_ORIGINAL)
6354 continue;
6355 if (!originalp && cand->iv->base_object == NULL_TREE)
6356 continue;
6359 if (iv_ca_cand_used_p (ivs, cand))
6360 continue;
6362 act_delta = NULL;
6363 iv_ca_set_cp (data, ivs, group, cp);
6364 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6365 iv_ca_set_no_cp (data, ivs, group);
6366 act_delta = iv_ca_delta_add (group,
6367 iv_ca_cand_for_group (ivs, group),
6368 cp, act_delta);
6370 if (act_cost < best_cost)
6372 best_cost = act_cost;
6374 if (best_delta)
6375 iv_ca_delta_free (&best_delta);
6376 best_delta = act_delta;
6378 else
6379 iv_ca_delta_free (&act_delta);
6383 iv_ca_delta_commit (data, ivs, best_delta, true);
6384 iv_ca_delta_free (&best_delta);
6386 return !best_cost.infinite_cost_p ();
6389 /* Finds an initial assignment of candidates to uses. */
6391 static struct iv_ca *
6392 get_initial_solution (struct ivopts_data *data, bool originalp)
6394 unsigned i;
6395 struct iv_ca *ivs = iv_ca_new (data);
6397 for (i = 0; i < data->vgroups.length (); i++)
6398 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6400 iv_ca_free (&ivs);
6401 return NULL;
6404 return ivs;
6407 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6408 points to a bool variable, this function tries to break local
6409 optimal fixed-point by replacing candidates in IVS if it's true. */
6411 static bool
6412 try_improve_iv_set (struct ivopts_data *data,
6413 struct iv_ca *ivs, bool *try_replace_p)
6415 unsigned i, n_ivs;
6416 comp_cost acost, best_cost = iv_ca_cost (ivs);
6417 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6418 struct iv_cand *cand;
6420 /* Try extending the set of induction variables by one. */
6421 for (i = 0; i < data->vcands.length (); i++)
6423 cand = data->vcands[i];
6425 if (iv_ca_cand_used_p (ivs, cand))
6426 continue;
6428 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6429 if (!act_delta)
6430 continue;
6432 /* If we successfully added the candidate and the set is small enough,
6433 try optimizing it by removing other candidates. */
6434 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6436 iv_ca_delta_commit (data, ivs, act_delta, true);
6437 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6438 iv_ca_delta_commit (data, ivs, act_delta, false);
6439 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6442 if (acost < best_cost)
6444 best_cost = acost;
6445 iv_ca_delta_free (&best_delta);
6446 best_delta = act_delta;
6448 else
6449 iv_ca_delta_free (&act_delta);
6452 if (!best_delta)
6454 /* Try removing the candidates from the set instead. */
6455 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6457 if (!best_delta && *try_replace_p)
6459 *try_replace_p = false;
6460 /* So far candidate selecting algorithm tends to choose fewer IVs
6461 so that it can handle cases in which loops have many variables
6462 but the best choice is often to use only one general biv. One
6463 weakness is it can't handle opposite cases, in which different
6464 candidates should be chosen with respect to each use. To solve
6465 the problem, we replace candidates in a manner described by the
6466 comments of iv_ca_replace, thus give general algorithm a chance
6467 to break local optimal fixed-point in these cases. */
6468 best_cost = iv_ca_replace (data, ivs, &best_delta);
6471 if (!best_delta)
6472 return false;
6475 iv_ca_delta_commit (data, ivs, best_delta, true);
6476 gcc_assert (best_cost == iv_ca_cost (ivs));
6477 iv_ca_delta_free (&best_delta);
6478 return true;
6481 /* Attempts to find the optimal set of induction variables. We do simple
6482 greedy heuristic -- we try to replace at most one candidate in the selected
6483 solution and remove the unused ivs while this improves the cost. */
6485 static struct iv_ca *
6486 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6488 struct iv_ca *set;
6489 bool try_replace_p = true;
6491 /* Get the initial solution. */
6492 set = get_initial_solution (data, originalp);
6493 if (!set)
6495 if (dump_file && (dump_flags & TDF_DETAILS))
6496 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6497 return NULL;
6500 if (dump_file && (dump_flags & TDF_DETAILS))
6502 fprintf (dump_file, "Initial set of candidates:\n");
6503 iv_ca_dump (data, dump_file, set);
6506 while (try_improve_iv_set (data, set, &try_replace_p))
6508 if (dump_file && (dump_flags & TDF_DETAILS))
6510 fprintf (dump_file, "Improved to:\n");
6511 iv_ca_dump (data, dump_file, set);
6515 return set;
6518 static struct iv_ca *
6519 find_optimal_iv_set (struct ivopts_data *data)
6521 unsigned i;
6522 comp_cost cost, origcost;
6523 struct iv_ca *set, *origset;
6525 /* Determine the cost based on a strategy that starts with original IVs,
6526 and try again using a strategy that prefers candidates not based
6527 on any IVs. */
6528 origset = find_optimal_iv_set_1 (data, true);
6529 set = find_optimal_iv_set_1 (data, false);
6531 if (!origset && !set)
6532 return NULL;
6534 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6535 cost = set ? iv_ca_cost (set) : infinite_cost;
6537 if (dump_file && (dump_flags & TDF_DETAILS))
6539 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6540 origcost.cost, origcost.complexity);
6541 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6542 cost.cost, cost.complexity);
6545 /* Choose the one with the best cost. */
6546 if (origcost <= cost)
6548 if (set)
6549 iv_ca_free (&set);
6550 set = origset;
6552 else if (origset)
6553 iv_ca_free (&origset);
6555 for (i = 0; i < data->vgroups.length (); i++)
6557 struct iv_group *group = data->vgroups[i];
6558 group->selected = iv_ca_cand_for_group (set, group)->cand;
6561 return set;
6564 /* Creates a new induction variable corresponding to CAND. */
6566 static void
6567 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6569 gimple_stmt_iterator incr_pos;
6570 tree base;
6571 struct iv_use *use;
6572 struct iv_group *group;
6573 bool after = false;
6575 gcc_assert (cand->iv != NULL);
6577 switch (cand->pos)
6579 case IP_NORMAL:
6580 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6581 break;
6583 case IP_END:
6584 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6585 after = true;
6586 break;
6588 case IP_AFTER_USE:
6589 after = true;
6590 /* fall through */
6591 case IP_BEFORE_USE:
6592 incr_pos = gsi_for_stmt (cand->incremented_at);
6593 break;
6595 case IP_ORIGINAL:
6596 /* Mark that the iv is preserved. */
6597 name_info (data, cand->var_before)->preserve_biv = true;
6598 name_info (data, cand->var_after)->preserve_biv = true;
6600 /* Rewrite the increment so that it uses var_before directly. */
6601 use = find_interesting_uses_op (data, cand->var_after);
6602 group = data->vgroups[use->group_id];
6603 group->selected = cand;
6604 return;
6607 gimple_add_tmp_var (cand->var_before);
6609 base = unshare_expr (cand->iv->base);
6611 create_iv (base, unshare_expr (cand->iv->step),
6612 cand->var_before, data->current_loop,
6613 &incr_pos, after, &cand->var_before, &cand->var_after);
6616 /* Creates new induction variables described in SET. */
6618 static void
6619 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6621 unsigned i;
6622 struct iv_cand *cand;
6623 bitmap_iterator bi;
6625 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6627 cand = data->vcands[i];
6628 create_new_iv (data, cand);
6631 if (dump_file && (dump_flags & TDF_DETAILS))
6633 fprintf (dump_file, "Selected IV set for loop %d",
6634 data->current_loop->num);
6635 if (data->loop_loc != UNKNOWN_LOCATION)
6636 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6637 LOCATION_LINE (data->loop_loc));
6638 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6639 avg_loop_niter (data->current_loop));
6640 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6641 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6643 cand = data->vcands[i];
6644 dump_cand (dump_file, cand);
6646 fprintf (dump_file, "\n");
6650 /* Rewrites USE (definition of iv used in a nonlinear expression)
6651 using candidate CAND. */
6653 static void
6654 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6655 struct iv_use *use, struct iv_cand *cand)
6657 tree comp;
6658 tree tgt;
6659 gassign *ass;
6660 gimple_stmt_iterator bsi;
6662 /* An important special case -- if we are asked to express value of
6663 the original iv by itself, just exit; there is no need to
6664 introduce a new computation (that might also need casting the
6665 variable to unsigned and back). */
6666 if (cand->pos == IP_ORIGINAL
6667 && cand->incremented_at == use->stmt)
6669 tree op = NULL_TREE;
6670 enum tree_code stmt_code;
6672 gcc_assert (is_gimple_assign (use->stmt));
6673 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6675 /* Check whether we may leave the computation unchanged.
6676 This is the case only if it does not rely on other
6677 computations in the loop -- otherwise, the computation
6678 we rely upon may be removed in remove_unused_ivs,
6679 thus leading to ICE. */
6680 stmt_code = gimple_assign_rhs_code (use->stmt);
6681 if (stmt_code == PLUS_EXPR
6682 || stmt_code == MINUS_EXPR
6683 || stmt_code == POINTER_PLUS_EXPR)
6685 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6686 op = gimple_assign_rhs2 (use->stmt);
6687 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6688 op = gimple_assign_rhs1 (use->stmt);
6691 if (op != NULL_TREE)
6693 if (expr_invariant_in_loop_p (data->current_loop, op))
6694 return;
6695 if (TREE_CODE (op) == SSA_NAME)
6697 struct iv *iv = get_iv (data, op);
6698 if (iv != NULL && integer_zerop (iv->step))
6699 return;
6704 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
6705 gcc_assert (comp != NULL_TREE);
6707 switch (gimple_code (use->stmt))
6709 case GIMPLE_PHI:
6710 tgt = PHI_RESULT (use->stmt);
6712 /* If we should keep the biv, do not replace it. */
6713 if (name_info (data, tgt)->preserve_biv)
6714 return;
6716 bsi = gsi_after_labels (gimple_bb (use->stmt));
6717 break;
6719 case GIMPLE_ASSIGN:
6720 tgt = gimple_assign_lhs (use->stmt);
6721 bsi = gsi_for_stmt (use->stmt);
6722 break;
6724 default:
6725 gcc_unreachable ();
6728 if (!valid_gimple_rhs_p (comp)
6729 || (gimple_code (use->stmt) != GIMPLE_PHI
6730 /* We can't allow re-allocating the stmt as it might be pointed
6731 to still. */
6732 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6733 >= gimple_num_ops (gsi_stmt (bsi)))))
6735 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6736 true, GSI_SAME_STMT);
6737 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6739 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6740 /* As this isn't a plain copy we have to reset alignment
6741 information. */
6742 if (SSA_NAME_PTR_INFO (comp))
6743 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6747 if (gimple_code (use->stmt) == GIMPLE_PHI)
6749 ass = gimple_build_assign (tgt, comp);
6750 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6752 bsi = gsi_for_stmt (use->stmt);
6753 remove_phi_node (&bsi, false);
6755 else
6757 gimple_assign_set_rhs_from_tree (&bsi, comp);
6758 use->stmt = gsi_stmt (bsi);
6762 /* Performs a peephole optimization to reorder the iv update statement with
6763 a mem ref to enable instruction combining in later phases. The mem ref uses
6764 the iv value before the update, so the reordering transformation requires
6765 adjustment of the offset. CAND is the selected IV_CAND.
6767 Example:
6769 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6770 iv2 = iv1 + 1;
6772 if (t < val) (1)
6773 goto L;
6774 goto Head;
6777 directly propagating t over to (1) will introduce overlapping live range
6778 thus increase register pressure. This peephole transform it into:
6781 iv2 = iv1 + 1;
6782 t = MEM_REF (base, iv2, 8, 8);
6783 if (t < val)
6784 goto L;
6785 goto Head;
6788 static void
6789 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6791 tree var_after;
6792 gimple *iv_update, *stmt;
6793 basic_block bb;
6794 gimple_stmt_iterator gsi, gsi_iv;
6796 if (cand->pos != IP_NORMAL)
6797 return;
6799 var_after = cand->var_after;
6800 iv_update = SSA_NAME_DEF_STMT (var_after);
6802 bb = gimple_bb (iv_update);
6803 gsi = gsi_last_nondebug_bb (bb);
6804 stmt = gsi_stmt (gsi);
6806 /* Only handle conditional statement for now. */
6807 if (gimple_code (stmt) != GIMPLE_COND)
6808 return;
6810 gsi_prev_nondebug (&gsi);
6811 stmt = gsi_stmt (gsi);
6812 if (stmt != iv_update)
6813 return;
6815 gsi_prev_nondebug (&gsi);
6816 if (gsi_end_p (gsi))
6817 return;
6819 stmt = gsi_stmt (gsi);
6820 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6821 return;
6823 if (stmt != use->stmt)
6824 return;
6826 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6827 return;
6829 if (dump_file && (dump_flags & TDF_DETAILS))
6831 fprintf (dump_file, "Reordering \n");
6832 print_gimple_stmt (dump_file, iv_update, 0, 0);
6833 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6834 fprintf (dump_file, "\n");
6837 gsi = gsi_for_stmt (use->stmt);
6838 gsi_iv = gsi_for_stmt (iv_update);
6839 gsi_move_before (&gsi_iv, &gsi);
6841 cand->pos = IP_BEFORE_USE;
6842 cand->incremented_at = use->stmt;
6845 /* Rewrites USE (address that is an iv) using candidate CAND. */
6847 static void
6848 rewrite_use_address (struct ivopts_data *data,
6849 struct iv_use *use, struct iv_cand *cand)
6851 aff_tree aff;
6852 bool ok;
6854 adjust_iv_update_pos (cand, use);
6855 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
6856 gcc_assert (ok);
6857 unshare_aff_combination (&aff);
6859 /* To avoid undefined overflow problems, all IV candidates use unsigned
6860 integer types. The drawback is that this makes it impossible for
6861 create_mem_ref to distinguish an IV that is based on a memory object
6862 from one that represents simply an offset.
6864 To work around this problem, we pass a hint to create_mem_ref that
6865 indicates which variable (if any) in aff is an IV based on a memory
6866 object. Note that we only consider the candidate. If this is not
6867 based on an object, the base of the reference is in some subexpression
6868 of the use -- but these will use pointer types, so they are recognized
6869 by the create_mem_ref heuristics anyway. */
6870 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
6871 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
6872 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6873 tree type = TREE_TYPE (*use->op_p);
6874 unsigned int align = get_object_alignment (*use->op_p);
6875 if (align != TYPE_ALIGN (type))
6876 type = build_aligned_type (type, align);
6878 tree ref = create_mem_ref (&bsi, type, &aff,
6879 reference_alias_ptr_type (*use->op_p),
6880 iv, base_hint, data->speed);
6882 copy_ref_info (ref, *use->op_p);
6883 *use->op_p = ref;
6886 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6887 candidate CAND. */
6889 static void
6890 rewrite_use_compare (struct ivopts_data *data,
6891 struct iv_use *use, struct iv_cand *cand)
6893 tree comp, *var_p, op, bound;
6894 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6895 enum tree_code compare;
6896 struct iv_group *group = data->vgroups[use->group_id];
6897 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
6898 bool ok;
6900 bound = cp->value;
6901 if (bound)
6903 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6904 tree var_type = TREE_TYPE (var);
6905 gimple_seq stmts;
6907 if (dump_file && (dump_flags & TDF_DETAILS))
6909 fprintf (dump_file, "Replacing exit test: ");
6910 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6912 compare = cp->comp;
6913 bound = unshare_expr (fold_convert (var_type, bound));
6914 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6915 if (stmts)
6916 gsi_insert_seq_on_edge_immediate (
6917 loop_preheader_edge (data->current_loop),
6918 stmts);
6920 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6921 gimple_cond_set_lhs (cond_stmt, var);
6922 gimple_cond_set_code (cond_stmt, compare);
6923 gimple_cond_set_rhs (cond_stmt, op);
6924 return;
6927 /* The induction variable elimination failed; just express the original
6928 giv. */
6929 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
6930 gcc_assert (comp != NULL_TREE);
6932 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6933 gcc_assert (ok);
6935 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6936 true, GSI_SAME_STMT);
6939 /* Rewrite the groups using the selected induction variables. */
6941 static void
6942 rewrite_groups (struct ivopts_data *data)
6944 unsigned i, j;
6946 for (i = 0; i < data->vgroups.length (); i++)
6948 struct iv_group *group = data->vgroups[i];
6949 struct iv_cand *cand = group->selected;
6951 gcc_assert (cand);
6953 if (group->type == USE_NONLINEAR_EXPR)
6955 for (j = 0; j < group->vuses.length (); j++)
6957 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
6958 update_stmt (group->vuses[j]->stmt);
6961 else if (group->type == USE_ADDRESS)
6963 for (j = 0; j < group->vuses.length (); j++)
6965 rewrite_use_address (data, group->vuses[j], cand);
6966 update_stmt (group->vuses[j]->stmt);
6969 else
6971 gcc_assert (group->type == USE_COMPARE);
6973 for (j = 0; j < group->vuses.length (); j++)
6975 rewrite_use_compare (data, group->vuses[j], cand);
6976 update_stmt (group->vuses[j]->stmt);
6982 /* Removes the ivs that are not used after rewriting. */
6984 static void
6985 remove_unused_ivs (struct ivopts_data *data)
6987 unsigned j;
6988 bitmap_iterator bi;
6989 bitmap toremove = BITMAP_ALLOC (NULL);
6991 /* Figure out an order in which to release SSA DEFs so that we don't
6992 release something that we'd have to propagate into a debug stmt
6993 afterwards. */
6994 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6996 struct version_info *info;
6998 info = ver_info (data, j);
6999 if (info->iv
7000 && !integer_zerop (info->iv->step)
7001 && !info->inv_id
7002 && !info->iv->nonlin_use
7003 && !info->preserve_biv)
7005 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7007 tree def = info->iv->ssa_name;
7009 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7011 imm_use_iterator imm_iter;
7012 use_operand_p use_p;
7013 gimple *stmt;
7014 int count = 0;
7016 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7018 if (!gimple_debug_bind_p (stmt))
7019 continue;
7021 /* We just want to determine whether to do nothing
7022 (count == 0), to substitute the computed
7023 expression into a single use of the SSA DEF by
7024 itself (count == 1), or to use a debug temp
7025 because the SSA DEF is used multiple times or as
7026 part of a larger expression (count > 1). */
7027 count++;
7028 if (gimple_debug_bind_get_value (stmt) != def)
7029 count++;
7031 if (count > 1)
7032 BREAK_FROM_IMM_USE_STMT (imm_iter);
7035 if (!count)
7036 continue;
7038 struct iv_use dummy_use;
7039 struct iv_cand *best_cand = NULL, *cand;
7040 unsigned i, best_pref = 0, cand_pref;
7042 memset (&dummy_use, 0, sizeof (dummy_use));
7043 dummy_use.iv = info->iv;
7044 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7046 cand = data->vgroups[i]->selected;
7047 if (cand == best_cand)
7048 continue;
7049 cand_pref = operand_equal_p (cand->iv->step,
7050 info->iv->step, 0)
7051 ? 4 : 0;
7052 cand_pref
7053 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7054 == TYPE_MODE (TREE_TYPE (info->iv->base))
7055 ? 2 : 0;
7056 cand_pref
7057 += TREE_CODE (cand->iv->base) == INTEGER_CST
7058 ? 1 : 0;
7059 if (best_cand == NULL || best_pref < cand_pref)
7061 best_cand = cand;
7062 best_pref = cand_pref;
7066 if (!best_cand)
7067 continue;
7069 tree comp = get_computation_at (data->current_loop,
7070 SSA_NAME_DEF_STMT (def),
7071 &dummy_use, best_cand);
7072 if (!comp)
7073 continue;
7075 if (count > 1)
7077 tree vexpr = make_node (DEBUG_EXPR_DECL);
7078 DECL_ARTIFICIAL (vexpr) = 1;
7079 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7080 if (SSA_NAME_VAR (def))
7081 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7082 else
7083 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7084 gdebug *def_temp
7085 = gimple_build_debug_bind (vexpr, comp, NULL);
7086 gimple_stmt_iterator gsi;
7088 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7089 gsi = gsi_after_labels (gimple_bb
7090 (SSA_NAME_DEF_STMT (def)));
7091 else
7092 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7094 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7095 comp = vexpr;
7098 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7100 if (!gimple_debug_bind_p (stmt))
7101 continue;
7103 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7104 SET_USE (use_p, comp);
7106 update_stmt (stmt);
7112 release_defs_bitset (toremove);
7114 BITMAP_FREE (toremove);
7117 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7118 for hash_map::traverse. */
7120 bool
7121 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7123 free (value);
7124 return true;
7127 /* Frees data allocated by the optimization of a single loop. */
7129 static void
7130 free_loop_data (struct ivopts_data *data)
7132 unsigned i, j;
7133 bitmap_iterator bi;
7134 tree obj;
7136 if (data->niters)
7138 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7139 delete data->niters;
7140 data->niters = NULL;
7143 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7145 struct version_info *info;
7147 info = ver_info (data, i);
7148 info->iv = NULL;
7149 info->has_nonlin_use = false;
7150 info->preserve_biv = false;
7151 info->inv_id = 0;
7153 bitmap_clear (data->relevant);
7154 bitmap_clear (data->important_candidates);
7156 for (i = 0; i < data->vgroups.length (); i++)
7158 struct iv_group *group = data->vgroups[i];
7160 for (j = 0; j < group->vuses.length (); j++)
7161 free (group->vuses[j]);
7162 group->vuses.release ();
7164 BITMAP_FREE (group->related_cands);
7165 for (j = 0; j < group->n_map_members; j++)
7167 if (group->cost_map[j].inv_vars)
7168 BITMAP_FREE (group->cost_map[j].inv_vars);
7169 if (group->cost_map[j].inv_exprs)
7170 BITMAP_FREE (group->cost_map[j].inv_exprs);
7173 free (group->cost_map);
7174 free (group);
7176 data->vgroups.truncate (0);
7178 for (i = 0; i < data->vcands.length (); i++)
7180 struct iv_cand *cand = data->vcands[i];
7182 if (cand->inv_vars)
7183 BITMAP_FREE (cand->inv_vars);
7184 free (cand);
7186 data->vcands.truncate (0);
7188 if (data->version_info_size < num_ssa_names)
7190 data->version_info_size = 2 * num_ssa_names;
7191 free (data->version_info);
7192 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7195 data->max_inv_var_id = 0;
7196 data->max_inv_expr_id = 0;
7198 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7199 SET_DECL_RTL (obj, NULL_RTX);
7201 decl_rtl_to_reset.truncate (0);
7203 data->inv_expr_tab->empty ();
7205 data->iv_common_cand_tab->empty ();
7206 data->iv_common_cands.truncate (0);
7209 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7210 loop tree. */
7212 static void
7213 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7215 free_loop_data (data);
7216 free (data->version_info);
7217 BITMAP_FREE (data->relevant);
7218 BITMAP_FREE (data->important_candidates);
7220 decl_rtl_to_reset.release ();
7221 data->vgroups.release ();
7222 data->vcands.release ();
7223 delete data->inv_expr_tab;
7224 data->inv_expr_tab = NULL;
7225 free_affine_expand_cache (&data->name_expansion_cache);
7226 delete data->iv_common_cand_tab;
7227 data->iv_common_cand_tab = NULL;
7228 data->iv_common_cands.release ();
7229 obstack_free (&data->iv_obstack, NULL);
7232 /* Returns true if the loop body BODY includes any function calls. */
7234 static bool
7235 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7237 gimple_stmt_iterator gsi;
7238 unsigned i;
7240 for (i = 0; i < num_nodes; i++)
7241 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7243 gimple *stmt = gsi_stmt (gsi);
7244 if (is_gimple_call (stmt)
7245 && !gimple_call_internal_p (stmt)
7246 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7247 return true;
7249 return false;
7252 /* Optimizes the LOOP. Returns true if anything changed. */
7254 static bool
7255 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7257 bool changed = false;
7258 struct iv_ca *iv_ca;
7259 edge exit = single_dom_exit (loop);
7260 basic_block *body;
7262 gcc_assert (!data->niters);
7263 data->current_loop = loop;
7264 data->loop_loc = find_loop_location (loop);
7265 data->speed = optimize_loop_for_speed_p (loop);
7267 if (dump_file && (dump_flags & TDF_DETAILS))
7269 fprintf (dump_file, "Processing loop %d", loop->num);
7270 if (data->loop_loc != UNKNOWN_LOCATION)
7271 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7272 LOCATION_LINE (data->loop_loc));
7273 fprintf (dump_file, "\n");
7275 if (exit)
7277 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7278 exit->src->index, exit->dest->index);
7279 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7280 fprintf (dump_file, "\n");
7283 fprintf (dump_file, "\n");
7286 body = get_loop_body (loop);
7287 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7288 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7289 free (body);
7291 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7293 /* For each ssa name determines whether it behaves as an induction variable
7294 in some loop. */
7295 if (!find_induction_variables (data))
7296 goto finish;
7298 /* Finds interesting uses (item 1). */
7299 find_interesting_uses (data);
7300 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7301 goto finish;
7303 /* Finds candidates for the induction variables (item 2). */
7304 find_iv_candidates (data);
7306 /* Calculates the costs (item 3, part 1). */
7307 determine_iv_costs (data);
7308 determine_group_iv_costs (data);
7309 determine_set_costs (data);
7311 /* Find the optimal set of induction variables (item 3, part 2). */
7312 iv_ca = find_optimal_iv_set (data);
7313 if (!iv_ca)
7314 goto finish;
7315 changed = true;
7317 /* Create the new induction variables (item 4, part 1). */
7318 create_new_ivs (data, iv_ca);
7319 iv_ca_free (&iv_ca);
7321 /* Rewrite the uses (item 4, part 2). */
7322 rewrite_groups (data);
7324 /* Remove the ivs that are unused after rewriting. */
7325 remove_unused_ivs (data);
7327 /* We have changed the structure of induction variables; it might happen
7328 that definitions in the scev database refer to some of them that were
7329 eliminated. */
7330 scev_reset ();
7332 finish:
7333 free_loop_data (data);
7335 return changed;
7338 /* Main entry point. Optimizes induction variables in loops. */
7340 void
7341 tree_ssa_iv_optimize (void)
7343 struct loop *loop;
7344 struct ivopts_data data;
7346 tree_ssa_iv_optimize_init (&data);
7348 /* Optimize the loops starting with the innermost ones. */
7349 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7351 if (dump_file && (dump_flags & TDF_DETAILS))
7352 flow_loop_dump (loop, dump_file, NULL, 1);
7354 tree_ssa_iv_optimize_loop (&data, loop);
7357 tree_ssa_iv_optimize_finalize (&data);
7360 #include "gt-tree-ssa-loop-ivopts.h"