ada: Fix wrong resolution for hidden discriminant in predicate
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.cc
blob243ce86dfc49d38bce58f9a2ded72116775e03f1
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated.
69 For the targets supporting low-overhead loops, IVOPTs has to take care of
70 the loops which will probably be transformed in RTL doloop optimization,
71 to try to make selected IV candidate set optimal. The process of doloop
72 support includes:
74 1) Analyze the current loop will be transformed to doloop or not, find and
75 mark its compare type IV use as doloop use (iv_group field doloop_p), and
76 set flag doloop_use_p of ivopts_data to notify subsequent processings on
77 doloop. See analyze_and_mark_doloop_use and its callees for the details.
78 The target hook predict_doloop_p can be used for target specific checks.
80 2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81 set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82 like biv. For cost determination between doloop IV cand and IV use, the
83 target hooks doloop_cost_for_generic and doloop_cost_for_address are
84 provided to add on extra costs for generic type and address type IV use.
85 Zero cost is assigned to the pair between doloop IV cand and doloop IV
86 use, and bound zero is set for IV elimination.
88 3) With the cost setting in step 2), the current cost model based IV
89 selection algorithm will process as usual, pick up doloop dedicated IV if
90 profitable. */
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "rtl.h"
97 #include "tree.h"
98 #include "gimple.h"
99 #include "cfghooks.h"
100 #include "tree-pass.h"
101 #include "memmodel.h"
102 #include "tm_p.h"
103 #include "ssa.h"
104 #include "expmed.h"
105 #include "insn-config.h"
106 #include "emit-rtl.h"
107 #include "recog.h"
108 #include "cgraph.h"
109 #include "gimple-pretty-print.h"
110 #include "alias.h"
111 #include "fold-const.h"
112 #include "stor-layout.h"
113 #include "tree-eh.h"
114 #include "gimplify.h"
115 #include "gimple-iterator.h"
116 #include "gimplify-me.h"
117 #include "tree-cfg.h"
118 #include "tree-ssa-loop-ivopts.h"
119 #include "tree-ssa-loop-manip.h"
120 #include "tree-ssa-loop-niter.h"
121 #include "tree-ssa-loop.h"
122 #include "explow.h"
123 #include "expr.h"
124 #include "tree-dfa.h"
125 #include "tree-ssa.h"
126 #include "cfgloop.h"
127 #include "tree-scalar-evolution.h"
128 #include "tree-affine.h"
129 #include "tree-ssa-propagate.h"
130 #include "tree-ssa-address.h"
131 #include "builtins.h"
132 #include "tree-vectorizer.h"
133 #include "dbgcnt.h"
134 #include "cfganal.h"
136 /* For lang_hooks.types.type_for_mode. */
137 #include "langhooks.h"
139 /* FIXME: Expressions are expanded to RTL in this pass to determine the
140 cost of different addressing modes. This should be moved to a TBD
141 interface between the GIMPLE and RTL worlds. */
143 /* The infinite cost. */
144 #define INFTY 1000000000
146 /* Returns the expected number of loop iterations for LOOP.
147 The average trip count is computed from profile data if it
148 exists. */
150 static inline HOST_WIDE_INT
151 avg_loop_niter (class loop *loop)
153 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
154 if (niter == -1)
156 niter = likely_max_stmt_executions_int (loop);
158 if (niter == -1 || niter > param_avg_loop_niter)
159 return param_avg_loop_niter;
162 return niter;
165 struct iv_use;
167 /* Representation of the induction variable. */
168 struct iv
170 tree base; /* Initial value of the iv. */
171 tree base_object; /* A memory object to that the induction variable points. */
172 tree step; /* Step of the iv (constant only). */
173 tree ssa_name; /* The ssa name with the value. */
174 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
175 bool biv_p; /* Is it a biv? */
176 bool no_overflow; /* True if the iv doesn't overflow. */
177 bool have_address_use;/* For biv, indicate if it's used in any address
178 type use. */
181 /* Per-ssa version information (induction variable descriptions, etc.). */
182 struct version_info
184 tree name; /* The ssa name. */
185 struct iv *iv; /* Induction variable description. */
186 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
187 an expression that is not an induction variable. */
188 bool preserve_biv; /* For the original biv, whether to preserve it. */
189 unsigned inv_id; /* Id of an invariant. */
192 /* Types of uses. */
193 enum use_type
195 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
196 USE_REF_ADDRESS, /* Use is an address for an explicit memory
197 reference. */
198 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
199 cases where the expansion of the function
200 will turn the argument into a normal address. */
201 USE_COMPARE /* Use is a compare. */
204 /* Cost of a computation. */
205 class comp_cost
207 public:
208 comp_cost (): cost (0), complexity (0), scratch (0)
211 comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
212 : cost (cost), complexity (complexity), scratch (scratch)
215 /* Returns true if COST is infinite. */
216 bool infinite_cost_p ();
218 /* Adds costs COST1 and COST2. */
219 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
221 /* Adds COST to the comp_cost. */
222 comp_cost operator+= (comp_cost cost);
224 /* Adds constant C to this comp_cost. */
225 comp_cost operator+= (HOST_WIDE_INT c);
227 /* Subtracts constant C to this comp_cost. */
228 comp_cost operator-= (HOST_WIDE_INT c);
230 /* Divide the comp_cost by constant C. */
231 comp_cost operator/= (HOST_WIDE_INT c);
233 /* Multiply the comp_cost by constant C. */
234 comp_cost operator*= (HOST_WIDE_INT c);
236 /* Subtracts costs COST1 and COST2. */
237 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
239 /* Subtracts COST from this comp_cost. */
240 comp_cost operator-= (comp_cost cost);
242 /* Returns true if COST1 is smaller than COST2. */
243 friend bool operator< (comp_cost cost1, comp_cost cost2);
245 /* Returns true if COST1 and COST2 are equal. */
246 friend bool operator== (comp_cost cost1, comp_cost cost2);
248 /* Returns true if COST1 is smaller or equal than COST2. */
249 friend bool operator<= (comp_cost cost1, comp_cost cost2);
251 int64_t cost; /* The runtime cost. */
252 unsigned complexity; /* The estimate of the complexity of the code for
253 the computation (in no concrete units --
254 complexity field should be larger for more
255 complex expressions and addressing modes). */
256 int64_t scratch; /* Scratch used during cost computation. */
259 static const comp_cost no_cost;
260 static const comp_cost infinite_cost (INFTY, 0, INFTY);
262 bool
263 comp_cost::infinite_cost_p ()
265 return cost == INFTY;
268 comp_cost
269 operator+ (comp_cost cost1, comp_cost cost2)
271 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
272 return infinite_cost;
274 gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
275 cost1.cost += cost2.cost;
276 cost1.complexity += cost2.complexity;
278 return cost1;
281 comp_cost
282 operator- (comp_cost cost1, comp_cost cost2)
284 if (cost1.infinite_cost_p ())
285 return infinite_cost;
287 gcc_assert (!cost2.infinite_cost_p ());
288 gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
290 cost1.cost -= cost2.cost;
291 cost1.complexity -= cost2.complexity;
293 return cost1;
296 comp_cost
297 comp_cost::operator+= (comp_cost cost)
299 *this = *this + cost;
300 return *this;
303 comp_cost
304 comp_cost::operator+= (HOST_WIDE_INT c)
306 if (c >= INFTY)
307 this->cost = INFTY;
309 if (infinite_cost_p ())
310 return *this;
312 gcc_assert (this->cost + c < infinite_cost.cost);
313 this->cost += c;
315 return *this;
318 comp_cost
319 comp_cost::operator-= (HOST_WIDE_INT c)
321 if (infinite_cost_p ())
322 return *this;
324 gcc_assert (this->cost - c < infinite_cost.cost);
325 this->cost -= c;
327 return *this;
330 comp_cost
331 comp_cost::operator/= (HOST_WIDE_INT c)
333 gcc_assert (c != 0);
334 if (infinite_cost_p ())
335 return *this;
337 this->cost /= c;
339 return *this;
342 comp_cost
343 comp_cost::operator*= (HOST_WIDE_INT c)
345 if (infinite_cost_p ())
346 return *this;
348 gcc_assert (this->cost * c < infinite_cost.cost);
349 this->cost *= c;
351 return *this;
354 comp_cost
355 comp_cost::operator-= (comp_cost cost)
357 *this = *this - cost;
358 return *this;
361 bool
362 operator< (comp_cost cost1, comp_cost cost2)
364 if (cost1.cost == cost2.cost)
365 return cost1.complexity < cost2.complexity;
367 return cost1.cost < cost2.cost;
370 bool
371 operator== (comp_cost cost1, comp_cost cost2)
373 return cost1.cost == cost2.cost
374 && cost1.complexity == cost2.complexity;
377 bool
378 operator<= (comp_cost cost1, comp_cost cost2)
380 return cost1 < cost2 || cost1 == cost2;
383 struct iv_inv_expr_ent;
385 /* The candidate - cost pair. */
386 class cost_pair
388 public:
389 struct iv_cand *cand; /* The candidate. */
390 comp_cost cost; /* The cost. */
391 enum tree_code comp; /* For iv elimination, the comparison. */
392 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
393 preserved when representing iv_use with iv_cand. */
394 bitmap inv_exprs; /* The list of newly created invariant expressions
395 when representing iv_use with iv_cand. */
396 tree value; /* For final value elimination, the expression for
397 the final value of the iv. For iv elimination,
398 the new bound to compare with. */
401 /* Use. */
402 struct iv_use
404 unsigned id; /* The id of the use. */
405 unsigned group_id; /* The group id the use belongs to. */
406 enum use_type type; /* Type of the use. */
407 tree mem_type; /* The memory type to use when testing whether an
408 address is legitimate, and what the address's
409 cost is. */
410 struct iv *iv; /* The induction variable it is based on. */
411 gimple *stmt; /* Statement in that it occurs. */
412 tree *op_p; /* The place where it occurs. */
414 tree addr_base; /* Base address with const offset stripped. */
415 poly_uint64_pod addr_offset;
416 /* Const offset stripped from base address. */
419 /* Group of uses. */
420 struct iv_group
422 /* The id of the group. */
423 unsigned id;
424 /* Uses of the group are of the same type. */
425 enum use_type type;
426 /* The set of "related" IV candidates, plus the important ones. */
427 bitmap related_cands;
428 /* Number of IV candidates in the cost_map. */
429 unsigned n_map_members;
430 /* The costs wrto the iv candidates. */
431 class cost_pair *cost_map;
432 /* The selected candidate for the group. */
433 struct iv_cand *selected;
434 /* To indicate this is a doloop use group. */
435 bool doloop_p;
436 /* Uses in the group. */
437 vec<struct iv_use *> vuses;
440 /* The position where the iv is computed. */
441 enum iv_position
443 IP_NORMAL, /* At the end, just before the exit condition. */
444 IP_END, /* At the end of the latch block. */
445 IP_BEFORE_USE, /* Immediately before a specific use. */
446 IP_AFTER_USE, /* Immediately after a specific use. */
447 IP_ORIGINAL /* The original biv. */
450 /* The induction variable candidate. */
451 struct iv_cand
453 unsigned id; /* The number of the candidate. */
454 bool important; /* Whether this is an "important" candidate, i.e. such
455 that it should be considered by all uses. */
456 bool involves_undefs; /* Whether the IV involves undefined values. */
457 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
458 gimple *incremented_at;/* For original biv, the statement where it is
459 incremented. */
460 tree var_before; /* The variable used for it before increment. */
461 tree var_after; /* The variable used for it after increment. */
462 struct iv *iv; /* The value of the candidate. NULL for
463 "pseudocandidate" used to indicate the possibility
464 to replace the final value of an iv by direct
465 computation of the value. */
466 unsigned cost; /* Cost of the candidate. */
467 unsigned cost_step; /* Cost of the candidate's increment operation. */
468 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
469 where it is incremented. */
470 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
471 iv_cand. */
472 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
473 handle it as a new invariant expression which will
474 be hoisted out of loop. */
475 struct iv *orig_iv; /* The original iv if this cand is added from biv with
476 smaller type. */
477 bool doloop_p; /* Whether this is a doloop candidate. */
480 /* Hashtable entry for common candidate derived from iv uses. */
481 class iv_common_cand
483 public:
484 tree base;
485 tree step;
486 /* IV uses from which this common candidate is derived. */
487 auto_vec<struct iv_use *> uses;
488 hashval_t hash;
491 /* Hashtable helpers. */
493 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
495 static inline hashval_t hash (const iv_common_cand *);
496 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
499 /* Hash function for possible common candidates. */
501 inline hashval_t
502 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
504 return ccand->hash;
507 /* Hash table equality function for common candidates. */
509 inline bool
510 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
511 const iv_common_cand *ccand2)
513 return (ccand1->hash == ccand2->hash
514 && operand_equal_p (ccand1->base, ccand2->base, 0)
515 && operand_equal_p (ccand1->step, ccand2->step, 0)
516 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
517 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
520 /* Loop invariant expression hashtable entry. */
522 struct iv_inv_expr_ent
524 /* Tree expression of the entry. */
525 tree expr;
526 /* Unique indentifier. */
527 int id;
528 /* Hash value. */
529 hashval_t hash;
532 /* Sort iv_inv_expr_ent pair A and B by id field. */
534 static int
535 sort_iv_inv_expr_ent (const void *a, const void *b)
537 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
538 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
540 unsigned id1 = (*e1)->id;
541 unsigned id2 = (*e2)->id;
543 if (id1 < id2)
544 return -1;
545 else if (id1 > id2)
546 return 1;
547 else
548 return 0;
551 /* Hashtable helpers. */
553 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
555 static inline hashval_t hash (const iv_inv_expr_ent *);
556 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
559 /* Return true if uses of type TYPE represent some form of address. */
561 inline bool
562 address_p (use_type type)
564 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
567 /* Hash function for loop invariant expressions. */
569 inline hashval_t
570 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
572 return expr->hash;
575 /* Hash table equality function for expressions. */
577 inline bool
578 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
579 const iv_inv_expr_ent *expr2)
581 return expr1->hash == expr2->hash
582 && operand_equal_p (expr1->expr, expr2->expr, 0);
585 struct ivopts_data
587 /* The currently optimized loop. */
588 class loop *current_loop;
589 location_t loop_loc;
591 /* Numbers of iterations for all exits of the current loop. */
592 hash_map<edge, tree_niter_desc *> *niters;
594 /* Number of registers used in it. */
595 unsigned regs_used;
597 /* The size of version_info array allocated. */
598 unsigned version_info_size;
600 /* The array of information for the ssa names. */
601 struct version_info *version_info;
603 /* The hashtable of loop invariant expressions created
604 by ivopt. */
605 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
607 /* The bitmap of indices in version_info whose value was changed. */
608 bitmap relevant;
610 /* The uses of induction variables. */
611 vec<iv_group *> vgroups;
613 /* The candidates. */
614 vec<iv_cand *> vcands;
616 /* A bitmap of important candidates. */
617 bitmap important_candidates;
619 /* Cache used by tree_to_aff_combination_expand. */
620 hash_map<tree, name_expansion *> *name_expansion_cache;
622 /* The hashtable of common candidates derived from iv uses. */
623 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
625 /* The common candidates. */
626 vec<iv_common_cand *> iv_common_cands;
628 /* Hash map recording base object information of tree exp. */
629 hash_map<tree, tree> *base_object_map;
631 /* The maximum invariant variable id. */
632 unsigned max_inv_var_id;
634 /* The maximum invariant expression id. */
635 unsigned max_inv_expr_id;
637 /* Number of no_overflow BIVs which are not used in memory address. */
638 unsigned bivs_not_used_in_addr;
640 /* Obstack for iv structure. */
641 struct obstack iv_obstack;
643 /* Whether to consider just related and important candidates when replacing a
644 use. */
645 bool consider_all_candidates;
647 /* Are we optimizing for speed? */
648 bool speed;
650 /* Whether the loop body includes any function calls. */
651 bool body_includes_call;
653 /* Whether the loop body can only be exited via single exit. */
654 bool loop_single_exit_p;
656 /* Whether the loop has doloop comparison use. */
657 bool doloop_use_p;
660 /* An assignment of iv candidates to uses. */
662 class iv_ca
664 public:
665 /* The number of uses covered by the assignment. */
666 unsigned upto;
668 /* Number of uses that cannot be expressed by the candidates in the set. */
669 unsigned bad_groups;
671 /* Candidate assigned to a use, together with the related costs. */
672 class cost_pair **cand_for_group;
674 /* Number of times each candidate is used. */
675 unsigned *n_cand_uses;
677 /* The candidates used. */
678 bitmap cands;
680 /* The number of candidates in the set. */
681 unsigned n_cands;
683 /* The number of invariants needed, including both invariant variants and
684 invariant expressions. */
685 unsigned n_invs;
687 /* Total cost of expressing uses. */
688 comp_cost cand_use_cost;
690 /* Total cost of candidates. */
691 int64_t cand_cost;
693 /* Number of times each invariant variable is used. */
694 unsigned *n_inv_var_uses;
696 /* Number of times each invariant expression is used. */
697 unsigned *n_inv_expr_uses;
699 /* Total cost of the assignment. */
700 comp_cost cost;
703 /* Difference of two iv candidate assignments. */
705 struct iv_ca_delta
707 /* Changed group. */
708 struct iv_group *group;
710 /* An old assignment (for rollback purposes). */
711 class cost_pair *old_cp;
713 /* A new assignment. */
714 class cost_pair *new_cp;
716 /* Next change in the list. */
717 struct iv_ca_delta *next;
720 /* Bound on number of candidates below that all candidates are considered. */
722 #define CONSIDER_ALL_CANDIDATES_BOUND \
723 ((unsigned) param_iv_consider_all_candidates_bound)
725 /* If there are more iv occurrences, we just give up (it is quite unlikely that
726 optimizing such a loop would help, and it would take ages). */
728 #define MAX_CONSIDERED_GROUPS \
729 ((unsigned) param_iv_max_considered_uses)
731 /* If there are at most this number of ivs in the set, try removing unnecessary
732 ivs from the set always. */
734 #define ALWAYS_PRUNE_CAND_SET_BOUND \
735 ((unsigned) param_iv_always_prune_cand_set_bound)
737 /* The list of trees for that the decl_rtl field must be reset is stored
738 here. */
740 static vec<tree> decl_rtl_to_reset;
742 static comp_cost force_expr_to_var_cost (tree, bool);
744 /* The single loop exit if it dominates the latch, NULL otherwise. */
746 edge
747 single_dom_exit (class loop *loop)
749 edge exit = single_exit (loop);
751 if (!exit)
752 return NULL;
754 if (!just_once_each_iteration_p (loop, exit->src))
755 return NULL;
757 return exit;
760 /* Dumps information about the induction variable IV to FILE. Don't dump
761 variable's name if DUMP_NAME is FALSE. The information is dumped with
762 preceding spaces indicated by INDENT_LEVEL. */
764 void
765 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
767 const char *p;
768 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
770 if (indent_level > 4)
771 indent_level = 4;
772 p = spaces + 8 - (indent_level << 1);
774 fprintf (file, "%sIV struct:\n", p);
775 if (iv->ssa_name && dump_name)
777 fprintf (file, "%s SSA_NAME:\t", p);
778 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
779 fprintf (file, "\n");
782 fprintf (file, "%s Type:\t", p);
783 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
784 fprintf (file, "\n");
786 fprintf (file, "%s Base:\t", p);
787 print_generic_expr (file, iv->base, TDF_SLIM);
788 fprintf (file, "\n");
790 fprintf (file, "%s Step:\t", p);
791 print_generic_expr (file, iv->step, TDF_SLIM);
792 fprintf (file, "\n");
794 if (iv->base_object)
796 fprintf (file, "%s Object:\t", p);
797 print_generic_expr (file, iv->base_object, TDF_SLIM);
798 fprintf (file, "\n");
801 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
803 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
804 p, iv->no_overflow ? "No-overflow" : "Overflow");
807 /* Dumps information about the USE to FILE. */
809 void
810 dump_use (FILE *file, struct iv_use *use)
812 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
813 fprintf (file, " At stmt:\t");
814 print_gimple_stmt (file, use->stmt, 0);
815 fprintf (file, " At pos:\t");
816 if (use->op_p)
817 print_generic_expr (file, *use->op_p, TDF_SLIM);
818 fprintf (file, "\n");
819 dump_iv (file, use->iv, false, 2);
822 /* Dumps information about the uses to FILE. */
824 void
825 dump_groups (FILE *file, struct ivopts_data *data)
827 unsigned i, j;
828 struct iv_group *group;
830 for (i = 0; i < data->vgroups.length (); i++)
832 group = data->vgroups[i];
833 fprintf (file, "Group %d:\n", group->id);
834 if (group->type == USE_NONLINEAR_EXPR)
835 fprintf (file, " Type:\tGENERIC\n");
836 else if (group->type == USE_REF_ADDRESS)
837 fprintf (file, " Type:\tREFERENCE ADDRESS\n");
838 else if (group->type == USE_PTR_ADDRESS)
839 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
840 else
842 gcc_assert (group->type == USE_COMPARE);
843 fprintf (file, " Type:\tCOMPARE\n");
845 for (j = 0; j < group->vuses.length (); j++)
846 dump_use (file, group->vuses[j]);
850 /* Dumps information about induction variable candidate CAND to FILE. */
852 void
853 dump_cand (FILE *file, struct iv_cand *cand)
855 struct iv *iv = cand->iv;
857 fprintf (file, "Candidate %d:\n", cand->id);
858 if (cand->inv_vars)
860 fprintf (file, " Depend on inv.vars: ");
861 dump_bitmap (file, cand->inv_vars);
863 if (cand->inv_exprs)
865 fprintf (file, " Depend on inv.exprs: ");
866 dump_bitmap (file, cand->inv_exprs);
869 if (cand->var_before)
871 fprintf (file, " Var befor: ");
872 print_generic_expr (file, cand->var_before, TDF_SLIM);
873 fprintf (file, "\n");
875 if (cand->var_after)
877 fprintf (file, " Var after: ");
878 print_generic_expr (file, cand->var_after, TDF_SLIM);
879 fprintf (file, "\n");
882 switch (cand->pos)
884 case IP_NORMAL:
885 fprintf (file, " Incr POS: before exit test\n");
886 break;
888 case IP_BEFORE_USE:
889 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
890 break;
892 case IP_AFTER_USE:
893 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
894 break;
896 case IP_END:
897 fprintf (file, " Incr POS: at end\n");
898 break;
900 case IP_ORIGINAL:
901 fprintf (file, " Incr POS: orig biv\n");
902 break;
905 dump_iv (file, iv, false, 1);
908 /* Returns the info for ssa version VER. */
910 static inline struct version_info *
911 ver_info (struct ivopts_data *data, unsigned ver)
913 return data->version_info + ver;
916 /* Returns the info for ssa name NAME. */
918 static inline struct version_info *
919 name_info (struct ivopts_data *data, tree name)
921 return ver_info (data, SSA_NAME_VERSION (name));
924 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
925 emitted in LOOP. */
927 static bool
928 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
930 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
932 gcc_assert (bb);
934 if (sbb == loop->latch)
935 return true;
937 if (sbb != bb)
938 return false;
940 return stmt == last_nondebug_stmt (bb);
943 /* Returns true if STMT if after the place where the original induction
944 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
945 if the positions are identical. */
947 static bool
948 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
950 basic_block cand_bb = gimple_bb (cand->incremented_at);
951 basic_block stmt_bb = gimple_bb (stmt);
953 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
954 return false;
956 if (stmt_bb != cand_bb)
957 return true;
959 if (true_if_equal
960 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
961 return true;
962 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
965 /* Returns true if STMT if after the place where the induction variable
966 CAND is incremented in LOOP. */
968 static bool
969 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
971 switch (cand->pos)
973 case IP_END:
974 return false;
976 case IP_NORMAL:
977 return stmt_after_ip_normal_pos (loop, stmt);
979 case IP_ORIGINAL:
980 case IP_AFTER_USE:
981 return stmt_after_inc_pos (cand, stmt, false);
983 case IP_BEFORE_USE:
984 return stmt_after_inc_pos (cand, stmt, true);
986 default:
987 gcc_unreachable ();
991 /* walk_tree callback for contains_abnormal_ssa_name_p. */
993 static tree
994 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
996 if (TREE_CODE (*tp) == SSA_NAME
997 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
998 return *tp;
1000 if (!EXPR_P (*tp))
1001 *walk_subtrees = 0;
1003 return NULL_TREE;
1006 /* Returns true if EXPR contains a ssa name that occurs in an
1007 abnormal phi node. */
1009 bool
1010 contains_abnormal_ssa_name_p (tree expr)
1012 return walk_tree_without_duplicates
1013 (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1016 /* Returns the structure describing number of iterations determined from
1017 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1019 static class tree_niter_desc *
1020 niter_for_exit (struct ivopts_data *data, edge exit)
1022 class tree_niter_desc *desc;
1023 tree_niter_desc **slot;
1025 if (!data->niters)
1027 data->niters = new hash_map<edge, tree_niter_desc *>;
1028 slot = NULL;
1030 else
1031 slot = data->niters->get (exit);
1033 if (!slot)
1035 /* Try to determine number of iterations. We cannot safely work with ssa
1036 names that appear in phi nodes on abnormal edges, so that we do not
1037 create overlapping life ranges for them (PR 27283). */
1038 desc = XNEW (class tree_niter_desc);
1039 if (!number_of_iterations_exit (data->current_loop,
1040 exit, desc, true)
1041 || contains_abnormal_ssa_name_p (desc->niter))
1043 XDELETE (desc);
1044 desc = NULL;
1046 data->niters->put (exit, desc);
1048 else
1049 desc = *slot;
1051 return desc;
1054 /* Returns the structure describing number of iterations determined from
1055 single dominating exit of DATA->current_loop, or NULL if something
1056 goes wrong. */
1058 static class tree_niter_desc *
1059 niter_for_single_dom_exit (struct ivopts_data *data)
1061 edge exit = single_dom_exit (data->current_loop);
1063 if (!exit)
1064 return NULL;
1066 return niter_for_exit (data, exit);
1069 /* Initializes data structures used by the iv optimization pass, stored
1070 in DATA. */
1072 static void
1073 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1075 data->version_info_size = 2 * num_ssa_names;
1076 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1077 data->relevant = BITMAP_ALLOC (NULL);
1078 data->important_candidates = BITMAP_ALLOC (NULL);
1079 data->max_inv_var_id = 0;
1080 data->max_inv_expr_id = 0;
1081 data->niters = NULL;
1082 data->vgroups.create (20);
1083 data->vcands.create (20);
1084 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1085 data->name_expansion_cache = NULL;
1086 data->base_object_map = NULL;
1087 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1088 data->iv_common_cands.create (20);
1089 decl_rtl_to_reset.create (20);
1090 gcc_obstack_init (&data->iv_obstack);
1093 /* walk_tree callback for determine_base_object. */
1095 static tree
1096 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1098 tree_code code = TREE_CODE (*tp);
1099 tree obj = NULL_TREE;
1100 if (code == ADDR_EXPR)
1102 tree base = get_base_address (TREE_OPERAND (*tp, 0));
1103 if (!base)
1104 obj = *tp;
1105 else if (TREE_CODE (base) != MEM_REF)
1106 obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1108 else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1109 obj = fold_convert (ptr_type_node, *tp);
1111 if (!obj)
1113 if (!EXPR_P (*tp))
1114 *walk_subtrees = 0;
1116 return NULL_TREE;
1118 /* Record special node for multiple base objects and stop. */
1119 if (*static_cast<tree *> (wdata))
1121 *static_cast<tree *> (wdata) = integer_zero_node;
1122 return integer_zero_node;
1124 /* Record the base object and continue looking. */
1125 *static_cast<tree *> (wdata) = obj;
1126 return NULL_TREE;
1129 /* Returns a memory object to that EXPR points with caching. Return NULL if we
1130 are able to determine that it does not point to any such object; specially
1131 return integer_zero_node if EXPR contains multiple base objects. */
1133 static tree
1134 determine_base_object (struct ivopts_data *data, tree expr)
1136 tree *slot, obj = NULL_TREE;
1137 if (data->base_object_map)
1139 if ((slot = data->base_object_map->get(expr)) != NULL)
1140 return *slot;
1142 else
1143 data->base_object_map = new hash_map<tree, tree>;
1145 (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1146 data->base_object_map->put (expr, obj);
1147 return obj;
1150 /* Return true if address expression with non-DECL_P operand appears
1151 in EXPR. */
1153 static bool
1154 contain_complex_addr_expr (tree expr)
1156 bool res = false;
1158 STRIP_NOPS (expr);
1159 switch (TREE_CODE (expr))
1161 case POINTER_PLUS_EXPR:
1162 case PLUS_EXPR:
1163 case MINUS_EXPR:
1164 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1165 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1166 break;
1168 case ADDR_EXPR:
1169 return (!DECL_P (TREE_OPERAND (expr, 0)));
1171 default:
1172 return false;
1175 return res;
1178 /* Allocates an induction variable with given initial value BASE and step STEP
1179 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1181 static struct iv *
1182 alloc_iv (struct ivopts_data *data, tree base, tree step,
1183 bool no_overflow = false)
1185 tree expr = base;
1186 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1187 sizeof (struct iv));
1188 gcc_assert (step != NULL_TREE);
1190 /* Lower address expression in base except ones with DECL_P as operand.
1191 By doing this:
1192 1) More accurate cost can be computed for address expressions;
1193 2) Duplicate candidates won't be created for bases in different
1194 forms, like &a[0] and &a. */
1195 STRIP_NOPS (expr);
1196 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1197 || contain_complex_addr_expr (expr))
1199 aff_tree comb;
1200 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1201 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1204 iv->base = base;
1205 iv->base_object = determine_base_object (data, base);
1206 iv->step = step;
1207 iv->biv_p = false;
1208 iv->nonlin_use = NULL;
1209 iv->ssa_name = NULL_TREE;
1210 if (!no_overflow
1211 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1212 base, step))
1213 no_overflow = true;
1214 iv->no_overflow = no_overflow;
1215 iv->have_address_use = false;
1217 return iv;
1220 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1221 doesn't overflow. */
1223 static void
1224 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1225 bool no_overflow)
1227 struct version_info *info = name_info (data, iv);
1229 gcc_assert (!info->iv);
1231 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1232 info->iv = alloc_iv (data, base, step, no_overflow);
1233 info->iv->ssa_name = iv;
1236 /* Finds induction variable declaration for VAR. */
1238 static struct iv *
1239 get_iv (struct ivopts_data *data, tree var)
1241 basic_block bb;
1242 tree type = TREE_TYPE (var);
1244 if (!POINTER_TYPE_P (type)
1245 && !INTEGRAL_TYPE_P (type))
1246 return NULL;
1248 if (!name_info (data, var)->iv)
1250 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1252 if (!bb
1253 || !flow_bb_inside_loop_p (data->current_loop, bb))
1255 if (POINTER_TYPE_P (type))
1256 type = sizetype;
1257 set_iv (data, var, var, build_int_cst (type, 0), true);
1261 return name_info (data, var)->iv;
1264 /* Return the first non-invariant ssa var found in EXPR. */
1266 static tree
1267 extract_single_var_from_expr (tree expr)
1269 int i, n;
1270 tree tmp;
1271 enum tree_code code;
1273 if (!expr || is_gimple_min_invariant (expr))
1274 return NULL;
1276 code = TREE_CODE (expr);
1277 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1279 n = TREE_OPERAND_LENGTH (expr);
1280 for (i = 0; i < n; i++)
1282 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1284 if (tmp)
1285 return tmp;
1288 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1291 /* Finds basic ivs. */
1293 static bool
1294 find_bivs (struct ivopts_data *data)
1296 gphi *phi;
1297 affine_iv iv;
1298 tree step, type, base, stop;
1299 bool found = false;
1300 class loop *loop = data->current_loop;
1301 gphi_iterator psi;
1303 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1305 phi = psi.phi ();
1307 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1308 continue;
1310 if (virtual_operand_p (PHI_RESULT (phi)))
1311 continue;
1313 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1314 continue;
1316 if (integer_zerop (iv.step))
1317 continue;
1319 step = iv.step;
1320 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1321 /* Stop expanding iv base at the first ssa var referred by iv step.
1322 Ideally we should stop at any ssa var, because that's expensive
1323 and unusual to happen, we just do it on the first one.
1325 See PR64705 for the rationale. */
1326 stop = extract_single_var_from_expr (step);
1327 base = expand_simple_operations (base, stop);
1328 if (contains_abnormal_ssa_name_p (base)
1329 || contains_abnormal_ssa_name_p (step))
1330 continue;
1332 type = TREE_TYPE (PHI_RESULT (phi));
1333 base = fold_convert (type, base);
1334 if (step)
1336 if (POINTER_TYPE_P (type))
1337 step = convert_to_ptrofftype (step);
1338 else
1339 step = fold_convert (type, step);
1342 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1343 found = true;
1346 return found;
1349 /* Marks basic ivs. */
1351 static void
1352 mark_bivs (struct ivopts_data *data)
1354 gphi *phi;
1355 gimple *def;
1356 tree var;
1357 struct iv *iv, *incr_iv;
1358 class loop *loop = data->current_loop;
1359 basic_block incr_bb;
1360 gphi_iterator psi;
1362 data->bivs_not_used_in_addr = 0;
1363 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1365 phi = psi.phi ();
1367 iv = get_iv (data, PHI_RESULT (phi));
1368 if (!iv)
1369 continue;
1371 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1372 def = SSA_NAME_DEF_STMT (var);
1373 /* Don't mark iv peeled from other one as biv. */
1374 if (def
1375 && gimple_code (def) == GIMPLE_PHI
1376 && gimple_bb (def) == loop->header)
1377 continue;
1379 incr_iv = get_iv (data, var);
1380 if (!incr_iv)
1381 continue;
1383 /* If the increment is in the subloop, ignore it. */
1384 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1385 if (incr_bb->loop_father != data->current_loop
1386 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1387 continue;
1389 iv->biv_p = true;
1390 incr_iv->biv_p = true;
1391 if (iv->no_overflow)
1392 data->bivs_not_used_in_addr++;
1393 if (incr_iv->no_overflow)
1394 data->bivs_not_used_in_addr++;
1398 /* Checks whether STMT defines a linear induction variable and stores its
1399 parameters to IV. */
1401 static bool
1402 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1404 tree lhs, stop;
1405 class loop *loop = data->current_loop;
1407 iv->base = NULL_TREE;
1408 iv->step = NULL_TREE;
1410 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1411 return false;
1413 lhs = gimple_assign_lhs (stmt);
1414 if (TREE_CODE (lhs) != SSA_NAME)
1415 return false;
1417 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1418 return false;
1420 /* Stop expanding iv base at the first ssa var referred by iv step.
1421 Ideally we should stop at any ssa var, because that's expensive
1422 and unusual to happen, we just do it on the first one.
1424 See PR64705 for the rationale. */
1425 stop = extract_single_var_from_expr (iv->step);
1426 iv->base = expand_simple_operations (iv->base, stop);
1427 if (contains_abnormal_ssa_name_p (iv->base)
1428 || contains_abnormal_ssa_name_p (iv->step))
1429 return false;
1431 /* If STMT could throw, then do not consider STMT as defining a GIV.
1432 While this will suppress optimizations, we cannot safely delete this
1433 GIV and associated statements, even if it appears it is not used. */
1434 if (stmt_could_throw_p (cfun, stmt))
1435 return false;
1437 return true;
1440 /* Finds general ivs in statement STMT. */
1442 static void
1443 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1445 affine_iv iv;
1447 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1448 return;
1450 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1453 /* Finds general ivs in basic block BB. */
1455 static void
1456 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1458 gimple_stmt_iterator bsi;
1460 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1461 find_givs_in_stmt (data, gsi_stmt (bsi));
1464 /* Finds general ivs. */
1466 static void
1467 find_givs (struct ivopts_data *data, basic_block *body)
1469 class loop *loop = data->current_loop;
1470 unsigned i;
1472 for (i = 0; i < loop->num_nodes; i++)
1473 find_givs_in_bb (data, body[i]);
1476 /* For each ssa name defined in LOOP determines whether it is an induction
1477 variable and if so, its initial value and step. */
1479 static bool
1480 find_induction_variables (struct ivopts_data *data, basic_block *body)
1482 unsigned i;
1483 bitmap_iterator bi;
1485 if (!find_bivs (data))
1486 return false;
1488 find_givs (data, body);
1489 mark_bivs (data);
1491 if (dump_file && (dump_flags & TDF_DETAILS))
1493 class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1495 if (niter)
1497 fprintf (dump_file, " number of iterations ");
1498 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1499 if (!integer_zerop (niter->may_be_zero))
1501 fprintf (dump_file, "; zero if ");
1502 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1504 fprintf (dump_file, "\n");
1507 fprintf (dump_file, "\n<Induction Vars>:\n");
1508 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1510 struct version_info *info = ver_info (data, i);
1511 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1512 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1516 return true;
1519 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1520 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1521 is the const offset stripped from IV base and MEM_TYPE is the type
1522 of the memory being addressed. For uses of other types, ADDR_BASE
1523 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1525 static struct iv_use *
1526 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1527 gimple *stmt, enum use_type type, tree mem_type,
1528 tree addr_base, poly_uint64 addr_offset)
1530 struct iv_use *use = XCNEW (struct iv_use);
1532 use->id = group->vuses.length ();
1533 use->group_id = group->id;
1534 use->type = type;
1535 use->mem_type = mem_type;
1536 use->iv = iv;
1537 use->stmt = stmt;
1538 use->op_p = use_p;
1539 use->addr_base = addr_base;
1540 use->addr_offset = addr_offset;
1542 group->vuses.safe_push (use);
1543 return use;
1546 /* Checks whether OP is a loop-level invariant and if so, records it.
1547 NONLINEAR_USE is true if the invariant is used in a way we do not
1548 handle specially. */
1550 static void
1551 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1553 basic_block bb;
1554 struct version_info *info;
1556 if (TREE_CODE (op) != SSA_NAME
1557 || virtual_operand_p (op))
1558 return;
1560 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1561 if (bb
1562 && flow_bb_inside_loop_p (data->current_loop, bb))
1563 return;
1565 info = name_info (data, op);
1566 info->name = op;
1567 info->has_nonlin_use |= nonlinear_use;
1568 if (!info->inv_id)
1569 info->inv_id = ++data->max_inv_var_id;
1570 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1573 /* Record a group of TYPE. */
1575 static struct iv_group *
1576 record_group (struct ivopts_data *data, enum use_type type)
1578 struct iv_group *group = XCNEW (struct iv_group);
1580 group->id = data->vgroups.length ();
1581 group->type = type;
1582 group->related_cands = BITMAP_ALLOC (NULL);
1583 group->vuses.create (1);
1584 group->doloop_p = false;
1586 data->vgroups.safe_push (group);
1587 return group;
1590 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591 New group will be created if there is no existing group for the use.
1592 MEM_TYPE is the type of memory being addressed, or NULL if this
1593 isn't an address reference. */
1595 static struct iv_use *
1596 record_group_use (struct ivopts_data *data, tree *use_p,
1597 struct iv *iv, gimple *stmt, enum use_type type,
1598 tree mem_type)
1600 tree addr_base = NULL;
1601 struct iv_group *group = NULL;
1602 poly_uint64 addr_offset = 0;
1604 /* Record non address type use in a new group. */
1605 if (address_p (type))
1607 unsigned int i;
1609 gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1610 tree addr_toffset;
1611 split_constant_offset (iv->base, &addr_base, &addr_toffset);
1612 addr_offset = int_cst_value (addr_toffset);
1613 for (i = 0; i < data->vgroups.length (); i++)
1615 struct iv_use *use;
1617 group = data->vgroups[i];
1618 use = group->vuses[0];
1619 if (!address_p (use->type))
1620 continue;
1622 /* Check if it has the same stripped base and step. */
1623 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1624 && operand_equal_p (iv->step, use->iv->step, 0)
1625 && operand_equal_p (addr_base, use->addr_base, 0))
1626 break;
1628 if (i == data->vgroups.length ())
1629 group = NULL;
1632 if (!group)
1633 group = record_group (data, type);
1635 return record_use (group, use_p, iv, stmt, type, mem_type,
1636 addr_base, addr_offset);
1639 /* Checks whether the use OP is interesting and if so, records it. */
1641 static struct iv_use *
1642 find_interesting_uses_op (struct ivopts_data *data, tree op)
1644 struct iv *iv;
1645 gimple *stmt;
1646 struct iv_use *use;
1648 if (TREE_CODE (op) != SSA_NAME)
1649 return NULL;
1651 iv = get_iv (data, op);
1652 if (!iv)
1653 return NULL;
1655 if (iv->nonlin_use)
1657 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1658 return iv->nonlin_use;
1661 if (integer_zerop (iv->step))
1663 record_invariant (data, op, true);
1664 return NULL;
1667 stmt = SSA_NAME_DEF_STMT (op);
1668 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1670 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1671 iv->nonlin_use = use;
1672 return use;
1675 /* Indicate how compare type iv_use can be handled. */
1676 enum comp_iv_rewrite
1678 COMP_IV_NA,
1679 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1680 COMP_IV_EXPR,
1681 /* We may rewrite compare type iv_uses on both sides of comparison by
1682 expressing value of each iv_use. */
1683 COMP_IV_EXPR_2,
1684 /* We may rewrite compare type iv_use by expressing value of the iv_use
1685 or by eliminating it with other iv_cand. */
1686 COMP_IV_ELIM
1689 /* Given a condition in statement STMT, checks whether it is a compare
1690 of an induction variable and an invariant. If this is the case,
1691 CONTROL_VAR is set to location of the iv, BOUND to the location of
1692 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1693 induction variable descriptions, and true is returned. If this is not
1694 the case, CONTROL_VAR and BOUND are set to the arguments of the
1695 condition and false is returned. */
1697 static enum comp_iv_rewrite
1698 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1699 tree **control_var, tree **bound,
1700 struct iv **iv_var, struct iv **iv_bound)
1702 /* The objects returned when COND has constant operands. */
1703 static struct iv const_iv;
1704 static tree zero;
1705 tree *op0 = &zero, *op1 = &zero;
1706 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1707 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1709 if (gimple_code (stmt) == GIMPLE_COND)
1711 gcond *cond_stmt = as_a <gcond *> (stmt);
1712 op0 = gimple_cond_lhs_ptr (cond_stmt);
1713 op1 = gimple_cond_rhs_ptr (cond_stmt);
1715 else
1717 op0 = gimple_assign_rhs1_ptr (stmt);
1718 op1 = gimple_assign_rhs2_ptr (stmt);
1721 zero = integer_zero_node;
1722 const_iv.step = integer_zero_node;
1724 if (TREE_CODE (*op0) == SSA_NAME)
1725 iv0 = get_iv (data, *op0);
1726 if (TREE_CODE (*op1) == SSA_NAME)
1727 iv1 = get_iv (data, *op1);
1729 /* If both sides of comparison are IVs. We can express ivs on both end. */
1730 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1732 rewrite_type = COMP_IV_EXPR_2;
1733 goto end;
1736 /* If none side of comparison is IV. */
1737 if ((!iv0 || integer_zerop (iv0->step))
1738 && (!iv1 || integer_zerop (iv1->step)))
1739 goto end;
1741 /* Control variable may be on the other side. */
1742 if (!iv0 || integer_zerop (iv0->step))
1744 std::swap (op0, op1);
1745 std::swap (iv0, iv1);
1747 /* If one side is IV and the other side isn't loop invariant. */
1748 if (!iv1)
1749 rewrite_type = COMP_IV_EXPR;
1750 /* If one side is IV and the other side is loop invariant. */
1751 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1752 rewrite_type = COMP_IV_ELIM;
1754 end:
1755 if (control_var)
1756 *control_var = op0;
1757 if (iv_var)
1758 *iv_var = iv0;
1759 if (bound)
1760 *bound = op1;
1761 if (iv_bound)
1762 *iv_bound = iv1;
1764 return rewrite_type;
1767 /* Checks whether the condition in STMT is interesting and if so,
1768 records it. */
1770 static void
1771 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1773 tree *var_p, *bound_p;
1774 struct iv *var_iv, *bound_iv;
1775 enum comp_iv_rewrite ret;
1777 ret = extract_cond_operands (data, stmt,
1778 &var_p, &bound_p, &var_iv, &bound_iv);
1779 if (ret == COMP_IV_NA)
1781 find_interesting_uses_op (data, *var_p);
1782 find_interesting_uses_op (data, *bound_p);
1783 return;
1786 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1787 /* Record compare type iv_use for iv on the other side of comparison. */
1788 if (ret == COMP_IV_EXPR_2)
1789 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1792 /* Returns the outermost loop EXPR is obviously invariant in
1793 relative to the loop LOOP, i.e. if all its operands are defined
1794 outside of the returned loop. Returns NULL if EXPR is not
1795 even obviously invariant in LOOP. */
1797 class loop *
1798 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1800 basic_block def_bb;
1801 unsigned i, len;
1803 if (is_gimple_min_invariant (expr))
1804 return current_loops->tree_root;
1806 if (TREE_CODE (expr) == SSA_NAME)
1808 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1809 if (def_bb)
1811 if (flow_bb_inside_loop_p (loop, def_bb))
1812 return NULL;
1813 return superloop_at_depth (loop,
1814 loop_depth (def_bb->loop_father) + 1);
1817 return current_loops->tree_root;
1820 if (!EXPR_P (expr))
1821 return NULL;
1823 unsigned maxdepth = 0;
1824 len = TREE_OPERAND_LENGTH (expr);
1825 for (i = 0; i < len; i++)
1827 class loop *ivloop;
1828 if (!TREE_OPERAND (expr, i))
1829 continue;
1831 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1832 if (!ivloop)
1833 return NULL;
1834 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1837 return superloop_at_depth (loop, maxdepth);
1840 /* Returns true if expression EXPR is obviously invariant in LOOP,
1841 i.e. if all its operands are defined outside of the LOOP. LOOP
1842 should not be the function body. */
1844 bool
1845 expr_invariant_in_loop_p (class loop *loop, tree expr)
1847 basic_block def_bb;
1848 unsigned i, len;
1850 gcc_assert (loop_depth (loop) > 0);
1852 if (is_gimple_min_invariant (expr))
1853 return true;
1855 if (TREE_CODE (expr) == SSA_NAME)
1857 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1858 if (def_bb
1859 && flow_bb_inside_loop_p (loop, def_bb))
1860 return false;
1862 return true;
1865 if (!EXPR_P (expr))
1866 return false;
1868 len = TREE_OPERAND_LENGTH (expr);
1869 for (i = 0; i < len; i++)
1870 if (TREE_OPERAND (expr, i)
1871 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1872 return false;
1874 return true;
1877 /* Given expression EXPR which computes inductive values with respect
1878 to loop recorded in DATA, this function returns biv from which EXPR
1879 is derived by tracing definition chains of ssa variables in EXPR. */
1881 static struct iv*
1882 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1884 struct iv *iv;
1885 unsigned i, n;
1886 tree e2, e1;
1887 enum tree_code code;
1888 gimple *stmt;
1890 if (expr == NULL_TREE)
1891 return NULL;
1893 if (is_gimple_min_invariant (expr))
1894 return NULL;
1896 code = TREE_CODE (expr);
1897 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1899 n = TREE_OPERAND_LENGTH (expr);
1900 for (i = 0; i < n; i++)
1902 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1903 if (iv)
1904 return iv;
1908 /* Stop if it's not ssa name. */
1909 if (code != SSA_NAME)
1910 return NULL;
1912 iv = get_iv (data, expr);
1913 if (!iv || integer_zerop (iv->step))
1914 return NULL;
1915 else if (iv->biv_p)
1916 return iv;
1918 stmt = SSA_NAME_DEF_STMT (expr);
1919 if (gphi *phi = dyn_cast <gphi *> (stmt))
1921 ssa_op_iter iter;
1922 use_operand_p use_p;
1923 basic_block phi_bb = gimple_bb (phi);
1925 /* Skip loop header PHI that doesn't define biv. */
1926 if (phi_bb->loop_father == data->current_loop)
1927 return NULL;
1929 if (virtual_operand_p (gimple_phi_result (phi)))
1930 return NULL;
1932 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1934 tree use = USE_FROM_PTR (use_p);
1935 iv = find_deriving_biv_for_expr (data, use);
1936 if (iv)
1937 return iv;
1939 return NULL;
1941 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1942 return NULL;
1944 e1 = gimple_assign_rhs1 (stmt);
1945 code = gimple_assign_rhs_code (stmt);
1946 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1947 return find_deriving_biv_for_expr (data, e1);
1949 switch (code)
1951 case MULT_EXPR:
1952 case PLUS_EXPR:
1953 case MINUS_EXPR:
1954 case POINTER_PLUS_EXPR:
1955 /* Increments, decrements and multiplications by a constant
1956 are simple. */
1957 e2 = gimple_assign_rhs2 (stmt);
1958 iv = find_deriving_biv_for_expr (data, e2);
1959 if (iv)
1960 return iv;
1961 gcc_fallthrough ();
1963 CASE_CONVERT:
1964 /* Casts are simple. */
1965 return find_deriving_biv_for_expr (data, e1);
1967 default:
1968 break;
1971 return NULL;
1974 /* Record BIV, its predecessor and successor that they are used in
1975 address type uses. */
1977 static void
1978 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1980 unsigned i;
1981 tree type, base_1, base_2;
1982 bitmap_iterator bi;
1984 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1985 || biv->have_address_use || !biv->no_overflow)
1986 return;
1988 type = TREE_TYPE (biv->base);
1989 if (!INTEGRAL_TYPE_P (type))
1990 return;
1992 biv->have_address_use = true;
1993 data->bivs_not_used_in_addr--;
1994 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1995 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1997 struct iv *iv = ver_info (data, i)->iv;
1999 if (!iv || !iv->biv_p || integer_zerop (iv->step)
2000 || iv->have_address_use || !iv->no_overflow)
2001 continue;
2003 if (type != TREE_TYPE (iv->base)
2004 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2005 continue;
2007 if (!operand_equal_p (biv->step, iv->step, 0))
2008 continue;
2010 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2011 if (operand_equal_p (base_1, iv->base, 0)
2012 || operand_equal_p (base_2, biv->base, 0))
2014 iv->have_address_use = true;
2015 data->bivs_not_used_in_addr--;
2020 /* Cumulates the steps of indices into DATA and replaces their values with the
2021 initial ones. Returns false when the value of the index cannot be determined.
2022 Callback for for_each_index. */
2024 struct ifs_ivopts_data
2026 struct ivopts_data *ivopts_data;
2027 gimple *stmt;
2028 tree step;
2031 static bool
2032 idx_find_step (tree base, tree *idx, void *data)
2034 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2035 struct iv *iv;
2036 bool use_overflow_semantics = false;
2037 tree step, iv_base, iv_step, lbound, off;
2038 class loop *loop = dta->ivopts_data->current_loop;
2040 /* If base is a component ref, require that the offset of the reference
2041 be invariant. */
2042 if (TREE_CODE (base) == COMPONENT_REF)
2044 off = component_ref_field_offset (base);
2045 return expr_invariant_in_loop_p (loop, off);
2048 /* If base is array, first check whether we will be able to move the
2049 reference out of the loop (in order to take its address in strength
2050 reduction). In order for this to work we need both lower bound
2051 and step to be loop invariants. */
2052 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2054 /* Moreover, for a range, the size needs to be invariant as well. */
2055 if (TREE_CODE (base) == ARRAY_RANGE_REF
2056 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2057 return false;
2059 step = array_ref_element_size (base);
2060 lbound = array_ref_low_bound (base);
2062 if (!expr_invariant_in_loop_p (loop, step)
2063 || !expr_invariant_in_loop_p (loop, lbound))
2064 return false;
2067 if (TREE_CODE (*idx) != SSA_NAME)
2068 return true;
2070 iv = get_iv (dta->ivopts_data, *idx);
2071 if (!iv)
2072 return false;
2074 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2075 *&x[0], which is not folded and does not trigger the
2076 ARRAY_REF path below. */
2077 *idx = iv->base;
2079 if (integer_zerop (iv->step))
2080 return true;
2082 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2084 step = array_ref_element_size (base);
2086 /* We only handle addresses whose step is an integer constant. */
2087 if (TREE_CODE (step) != INTEGER_CST)
2088 return false;
2090 else
2091 /* The step for pointer arithmetics already is 1 byte. */
2092 step = size_one_node;
2094 iv_base = iv->base;
2095 iv_step = iv->step;
2096 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2097 use_overflow_semantics = true;
2099 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2100 sizetype, &iv_base, &iv_step, dta->stmt,
2101 use_overflow_semantics))
2103 /* The index might wrap. */
2104 return false;
2107 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2108 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2110 if (dta->ivopts_data->bivs_not_used_in_addr)
2112 if (!iv->biv_p)
2113 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2115 record_biv_for_address_use (dta->ivopts_data, iv);
2117 return true;
2120 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2121 object is passed to it in DATA. */
2123 static bool
2124 idx_record_use (tree base, tree *idx,
2125 void *vdata)
2127 struct ivopts_data *data = (struct ivopts_data *) vdata;
2128 find_interesting_uses_op (data, *idx);
2129 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2131 if (TREE_OPERAND (base, 2))
2132 find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2133 if (TREE_OPERAND (base, 3))
2134 find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2136 return true;
2139 /* If we can prove that TOP = cst * BOT for some constant cst,
2140 store cst to MUL and return true. Otherwise return false.
2141 The returned value is always sign-extended, regardless of the
2142 signedness of TOP and BOT. */
2144 static bool
2145 constant_multiple_of (tree top, tree bot, widest_int *mul)
2147 tree mby;
2148 enum tree_code code;
2149 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2150 widest_int res, p0, p1;
2152 STRIP_NOPS (top);
2153 STRIP_NOPS (bot);
2155 if (operand_equal_p (top, bot, 0))
2157 *mul = 1;
2158 return true;
2161 code = TREE_CODE (top);
2162 switch (code)
2164 case MULT_EXPR:
2165 mby = TREE_OPERAND (top, 1);
2166 if (TREE_CODE (mby) != INTEGER_CST)
2167 return false;
2169 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2170 return false;
2172 *mul = wi::sext (res * wi::to_widest (mby), precision);
2173 return true;
2175 case PLUS_EXPR:
2176 case MINUS_EXPR:
2177 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2178 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2179 return false;
2181 if (code == MINUS_EXPR)
2182 p1 = -p1;
2183 *mul = wi::sext (p0 + p1, precision);
2184 return true;
2186 case INTEGER_CST:
2187 if (TREE_CODE (bot) != INTEGER_CST)
2188 return false;
2190 p0 = widest_int::from (wi::to_wide (top), SIGNED);
2191 p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2192 if (p1 == 0)
2193 return false;
2194 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2195 return res == 0;
2197 default:
2198 if (POLY_INT_CST_P (top)
2199 && POLY_INT_CST_P (bot)
2200 && constant_multiple_p (wi::to_poly_widest (top),
2201 wi::to_poly_widest (bot), mul))
2202 return true;
2204 return false;
2208 /* Return true if memory reference REF with step STEP may be unaligned. */
2210 static bool
2211 may_be_unaligned_p (tree ref, tree step)
2213 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2214 thus they are not misaligned. */
2215 if (TREE_CODE (ref) == TARGET_MEM_REF)
2216 return false;
2218 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2219 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2220 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2222 unsigned HOST_WIDE_INT bitpos;
2223 unsigned int ref_align;
2224 get_object_alignment_1 (ref, &ref_align, &bitpos);
2225 if (ref_align < align
2226 || (bitpos % align) != 0
2227 || (bitpos % BITS_PER_UNIT) != 0)
2228 return true;
2230 unsigned int trailing_zeros = tree_ctz (step);
2231 if (trailing_zeros < HOST_BITS_PER_INT
2232 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2233 return true;
2235 return false;
2238 /* Return true if EXPR may be non-addressable. */
2240 bool
2241 may_be_nonaddressable_p (tree expr)
2243 switch (TREE_CODE (expr))
2245 case VAR_DECL:
2246 /* Check if it's a register variable. */
2247 return DECL_HARD_REGISTER (expr);
2249 case TARGET_MEM_REF:
2250 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2251 target, thus they are always addressable. */
2252 return false;
2254 case MEM_REF:
2255 /* Likewise for MEM_REFs, modulo the storage order. */
2256 return REF_REVERSE_STORAGE_ORDER (expr);
2258 case BIT_FIELD_REF:
2259 if (REF_REVERSE_STORAGE_ORDER (expr))
2260 return true;
2261 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2263 case COMPONENT_REF:
2264 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2265 return true;
2266 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2267 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2269 case ARRAY_REF:
2270 case ARRAY_RANGE_REF:
2271 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2272 return true;
2273 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2275 case VIEW_CONVERT_EXPR:
2276 /* This kind of view-conversions may wrap non-addressable objects
2277 and make them look addressable. After some processing the
2278 non-addressability may be uncovered again, causing ADDR_EXPRs
2279 of inappropriate objects to be built. */
2280 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2281 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2282 return true;
2283 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2285 CASE_CONVERT:
2286 return true;
2288 default:
2289 break;
2292 return false;
2295 /* Finds addresses in *OP_P inside STMT. */
2297 static void
2298 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2299 tree *op_p)
2301 tree base = *op_p, step = size_zero_node;
2302 struct iv *civ;
2303 struct ifs_ivopts_data ifs_ivopts_data;
2305 /* Do not play with volatile memory references. A bit too conservative,
2306 perhaps, but safe. */
2307 if (gimple_has_volatile_ops (stmt))
2308 goto fail;
2310 /* Ignore bitfields for now. Not really something terribly complicated
2311 to handle. TODO. */
2312 if (TREE_CODE (base) == BIT_FIELD_REF)
2313 goto fail;
2315 base = unshare_expr (base);
2317 if (TREE_CODE (base) == TARGET_MEM_REF)
2319 tree type = build_pointer_type (TREE_TYPE (base));
2320 tree astep;
2322 if (TMR_BASE (base)
2323 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2325 civ = get_iv (data, TMR_BASE (base));
2326 if (!civ)
2327 goto fail;
2329 TMR_BASE (base) = civ->base;
2330 step = civ->step;
2332 if (TMR_INDEX2 (base)
2333 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2335 civ = get_iv (data, TMR_INDEX2 (base));
2336 if (!civ)
2337 goto fail;
2339 TMR_INDEX2 (base) = civ->base;
2340 step = civ->step;
2342 if (TMR_INDEX (base)
2343 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2345 civ = get_iv (data, TMR_INDEX (base));
2346 if (!civ)
2347 goto fail;
2349 TMR_INDEX (base) = civ->base;
2350 astep = civ->step;
2352 if (astep)
2354 if (TMR_STEP (base))
2355 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2357 step = fold_build2 (PLUS_EXPR, type, step, astep);
2361 if (integer_zerop (step))
2362 goto fail;
2363 base = tree_mem_ref_addr (type, base);
2365 else
2367 ifs_ivopts_data.ivopts_data = data;
2368 ifs_ivopts_data.stmt = stmt;
2369 ifs_ivopts_data.step = size_zero_node;
2370 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2371 || integer_zerop (ifs_ivopts_data.step))
2372 goto fail;
2373 step = ifs_ivopts_data.step;
2375 /* Check that the base expression is addressable. This needs
2376 to be done after substituting bases of IVs into it. */
2377 if (may_be_nonaddressable_p (base))
2378 goto fail;
2380 /* Moreover, on strict alignment platforms, check that it is
2381 sufficiently aligned. */
2382 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2383 goto fail;
2385 base = build_fold_addr_expr (base);
2387 /* Substituting bases of IVs into the base expression might
2388 have caused folding opportunities. */
2389 if (TREE_CODE (base) == ADDR_EXPR)
2391 tree *ref = &TREE_OPERAND (base, 0);
2392 while (handled_component_p (*ref))
2393 ref = &TREE_OPERAND (*ref, 0);
2394 if (TREE_CODE (*ref) == MEM_REF)
2396 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2397 TREE_OPERAND (*ref, 0),
2398 TREE_OPERAND (*ref, 1));
2399 if (tem)
2400 *ref = tem;
2405 civ = alloc_iv (data, base, step);
2406 /* Fail if base object of this memory reference is unknown. */
2407 if (civ->base_object == NULL_TREE)
2408 goto fail;
2410 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2411 return;
2413 fail:
2414 for_each_index (op_p, idx_record_use, data);
2417 /* Finds and records invariants used in STMT. */
2419 static void
2420 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2422 ssa_op_iter iter;
2423 use_operand_p use_p;
2424 tree op;
2426 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2428 op = USE_FROM_PTR (use_p);
2429 record_invariant (data, op, false);
2433 /* CALL calls an internal function. If operand *OP_P will become an
2434 address when the call is expanded, return the type of the memory
2435 being addressed, otherwise return null. */
2437 static tree
2438 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2440 switch (gimple_call_internal_fn (call))
2442 case IFN_MASK_LOAD:
2443 case IFN_MASK_LOAD_LANES:
2444 case IFN_LEN_LOAD:
2445 case IFN_LEN_MASK_LOAD:
2446 if (op_p == gimple_call_arg_ptr (call, 0))
2447 return TREE_TYPE (gimple_call_lhs (call));
2448 return NULL_TREE;
2450 case IFN_MASK_STORE:
2451 case IFN_MASK_STORE_LANES:
2452 case IFN_LEN_STORE:
2453 case IFN_LEN_MASK_STORE:
2455 if (op_p == gimple_call_arg_ptr (call, 0))
2457 internal_fn ifn = gimple_call_internal_fn (call);
2458 int index = internal_fn_stored_value_index (ifn);
2459 return TREE_TYPE (gimple_call_arg (call, index));
2461 return NULL_TREE;
2464 default:
2465 return NULL_TREE;
2469 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2470 Return true if the operand will become an address when STMT
2471 is expanded and record the associated address use if so. */
2473 static bool
2474 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2475 struct iv *iv)
2477 /* Fail if base object of this memory reference is unknown. */
2478 if (iv->base_object == NULL_TREE)
2479 return false;
2481 tree mem_type = NULL_TREE;
2482 if (gcall *call = dyn_cast <gcall *> (stmt))
2483 if (gimple_call_internal_p (call))
2484 mem_type = get_mem_type_for_internal_fn (call, op_p);
2485 if (mem_type)
2487 iv = alloc_iv (data, iv->base, iv->step);
2488 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2489 return true;
2491 return false;
2494 /* Finds interesting uses of induction variables in the statement STMT. */
2496 static void
2497 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2499 struct iv *iv;
2500 tree op, *lhs, *rhs;
2501 ssa_op_iter iter;
2502 use_operand_p use_p;
2503 enum tree_code code;
2505 find_invariants_stmt (data, stmt);
2507 if (gimple_code (stmt) == GIMPLE_COND)
2509 find_interesting_uses_cond (data, stmt);
2510 return;
2513 if (is_gimple_assign (stmt))
2515 lhs = gimple_assign_lhs_ptr (stmt);
2516 rhs = gimple_assign_rhs1_ptr (stmt);
2518 if (TREE_CODE (*lhs) == SSA_NAME)
2520 /* If the statement defines an induction variable, the uses are not
2521 interesting by themselves. */
2523 iv = get_iv (data, *lhs);
2525 if (iv && !integer_zerop (iv->step))
2526 return;
2529 code = gimple_assign_rhs_code (stmt);
2530 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2531 && (REFERENCE_CLASS_P (*rhs)
2532 || is_gimple_val (*rhs)))
2534 if (REFERENCE_CLASS_P (*rhs))
2535 find_interesting_uses_address (data, stmt, rhs);
2536 else
2537 find_interesting_uses_op (data, *rhs);
2539 if (REFERENCE_CLASS_P (*lhs))
2540 find_interesting_uses_address (data, stmt, lhs);
2541 return;
2543 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2545 find_interesting_uses_cond (data, stmt);
2546 return;
2549 /* TODO -- we should also handle address uses of type
2551 memory = call (whatever);
2555 call (memory). */
2558 if (gimple_code (stmt) == GIMPLE_PHI
2559 && gimple_bb (stmt) == data->current_loop->header)
2561 iv = get_iv (data, PHI_RESULT (stmt));
2563 if (iv && !integer_zerop (iv->step))
2564 return;
2567 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2569 op = USE_FROM_PTR (use_p);
2571 if (TREE_CODE (op) != SSA_NAME)
2572 continue;
2574 iv = get_iv (data, op);
2575 if (!iv)
2576 continue;
2578 if (!find_address_like_use (data, stmt, use_p->use, iv))
2579 find_interesting_uses_op (data, op);
2583 /* Finds interesting uses of induction variables outside of loops
2584 on loop exit edge EXIT. */
2586 static void
2587 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2589 gphi *phi;
2590 gphi_iterator psi;
2591 tree def;
2593 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2595 phi = psi.phi ();
2596 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2597 if (!virtual_operand_p (def))
2598 find_interesting_uses_op (data, def);
2602 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2603 mode for memory reference represented by USE. */
2605 static GTY (()) vec<rtx, va_gc> *addr_list;
2607 static bool
2608 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2610 rtx reg, addr;
2611 unsigned list_index;
2612 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2613 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2615 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2616 if (list_index >= vec_safe_length (addr_list))
2617 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2619 addr = (*addr_list)[list_index];
2620 if (!addr)
2622 addr_mode = targetm.addr_space.address_mode (as);
2623 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2624 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2625 (*addr_list)[list_index] = addr;
2627 else
2628 addr_mode = GET_MODE (addr);
2630 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2631 return (memory_address_addr_space_p (mem_mode, addr, as));
2634 /* Comparison function to sort group in ascending order of addr_offset. */
2636 static int
2637 group_compare_offset (const void *a, const void *b)
2639 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2640 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2642 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2645 /* Check if small groups should be split. Return true if no group
2646 contains more than two uses with distinct addr_offsets. Return
2647 false otherwise. We want to split such groups because:
2649 1) Small groups don't have much benefit and may interfer with
2650 general candidate selection.
2651 2) Size for problem with only small groups is usually small and
2652 general algorithm can handle it well.
2654 TODO -- Above claim may not hold when we want to merge memory
2655 accesses with conseuctive addresses. */
2657 static bool
2658 split_small_address_groups_p (struct ivopts_data *data)
2660 unsigned int i, j, distinct = 1;
2661 struct iv_use *pre;
2662 struct iv_group *group;
2664 for (i = 0; i < data->vgroups.length (); i++)
2666 group = data->vgroups[i];
2667 if (group->vuses.length () == 1)
2668 continue;
2670 gcc_assert (address_p (group->type));
2671 if (group->vuses.length () == 2)
2673 if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2674 group->vuses[1]->addr_offset) > 0)
2675 std::swap (group->vuses[0], group->vuses[1]);
2677 else
2678 group->vuses.qsort (group_compare_offset);
2680 if (distinct > 2)
2681 continue;
2683 distinct = 1;
2684 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2686 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2688 pre = group->vuses[j];
2689 distinct++;
2692 if (distinct > 2)
2693 break;
2697 return (distinct <= 2);
2700 /* For each group of address type uses, this function further groups
2701 these uses according to the maximum offset supported by target's
2702 [base + offset] addressing mode. */
2704 static void
2705 split_address_groups (struct ivopts_data *data)
2707 unsigned int i, j;
2708 /* Always split group. */
2709 bool split_p = split_small_address_groups_p (data);
2711 for (i = 0; i < data->vgroups.length (); i++)
2713 struct iv_group *new_group = NULL;
2714 struct iv_group *group = data->vgroups[i];
2715 struct iv_use *use = group->vuses[0];
2717 use->id = 0;
2718 use->group_id = group->id;
2719 if (group->vuses.length () == 1)
2720 continue;
2722 gcc_assert (address_p (use->type));
2724 for (j = 1; j < group->vuses.length ();)
2726 struct iv_use *next = group->vuses[j];
2727 poly_int64 offset = next->addr_offset - use->addr_offset;
2729 /* Split group if aksed to, or the offset against the first
2730 use can't fit in offset part of addressing mode. IV uses
2731 having the same offset are still kept in one group. */
2732 if (maybe_ne (offset, 0)
2733 && (split_p || !addr_offset_valid_p (use, offset)))
2735 if (!new_group)
2736 new_group = record_group (data, group->type);
2737 group->vuses.ordered_remove (j);
2738 new_group->vuses.safe_push (next);
2739 continue;
2742 next->id = j;
2743 next->group_id = group->id;
2744 j++;
2749 /* Finds uses of the induction variables that are interesting. */
2751 static void
2752 find_interesting_uses (struct ivopts_data *data, basic_block *body)
2754 basic_block bb;
2755 gimple_stmt_iterator bsi;
2756 unsigned i;
2757 edge e;
2759 for (i = 0; i < data->current_loop->num_nodes; i++)
2761 edge_iterator ei;
2762 bb = body[i];
2764 FOR_EACH_EDGE (e, ei, bb->succs)
2765 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2766 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2767 find_interesting_uses_outside (data, e);
2769 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2770 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2771 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2772 if (!is_gimple_debug (gsi_stmt (bsi)))
2773 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2776 split_address_groups (data);
2778 if (dump_file && (dump_flags & TDF_DETAILS))
2780 fprintf (dump_file, "\n<IV Groups>:\n");
2781 dump_groups (dump_file, data);
2782 fprintf (dump_file, "\n");
2786 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2787 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2788 we are at the top-level of the processed address. */
2790 static tree
2791 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2792 poly_int64 *offset)
2794 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2795 enum tree_code code;
2796 tree type, orig_type = TREE_TYPE (expr);
2797 poly_int64 off0, off1;
2798 HOST_WIDE_INT st;
2799 tree orig_expr = expr;
2801 STRIP_NOPS (expr);
2803 type = TREE_TYPE (expr);
2804 code = TREE_CODE (expr);
2805 *offset = 0;
2807 switch (code)
2809 case POINTER_PLUS_EXPR:
2810 case PLUS_EXPR:
2811 case MINUS_EXPR:
2812 op0 = TREE_OPERAND (expr, 0);
2813 op1 = TREE_OPERAND (expr, 1);
2815 op0 = strip_offset_1 (op0, false, false, &off0);
2816 op1 = strip_offset_1 (op1, false, false, &off1);
2818 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2819 if (op0 == TREE_OPERAND (expr, 0)
2820 && op1 == TREE_OPERAND (expr, 1))
2821 return orig_expr;
2823 if (integer_zerop (op1))
2824 expr = op0;
2825 else if (integer_zerop (op0))
2827 if (code == MINUS_EXPR)
2828 expr = fold_build1 (NEGATE_EXPR, type, op1);
2829 else
2830 expr = op1;
2832 else
2833 expr = fold_build2 (code, type, op0, op1);
2835 return fold_convert (orig_type, expr);
2837 case MULT_EXPR:
2838 op1 = TREE_OPERAND (expr, 1);
2839 if (!cst_and_fits_in_hwi (op1))
2840 return orig_expr;
2842 op0 = TREE_OPERAND (expr, 0);
2843 op0 = strip_offset_1 (op0, false, false, &off0);
2844 if (op0 == TREE_OPERAND (expr, 0))
2845 return orig_expr;
2847 *offset = off0 * int_cst_value (op1);
2848 if (integer_zerop (op0))
2849 expr = op0;
2850 else
2851 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2853 return fold_convert (orig_type, expr);
2855 case ARRAY_REF:
2856 case ARRAY_RANGE_REF:
2857 if (!inside_addr)
2858 return orig_expr;
2860 step = array_ref_element_size (expr);
2861 if (!cst_and_fits_in_hwi (step))
2862 break;
2864 st = int_cst_value (step);
2865 op1 = TREE_OPERAND (expr, 1);
2866 op1 = strip_offset_1 (op1, false, false, &off1);
2867 *offset = off1 * st;
2869 if (top_compref
2870 && integer_zerop (op1))
2872 /* Strip the component reference completely. */
2873 op0 = TREE_OPERAND (expr, 0);
2874 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2875 *offset += off0;
2876 return op0;
2878 break;
2880 case COMPONENT_REF:
2882 tree field;
2884 if (!inside_addr)
2885 return orig_expr;
2887 tmp = component_ref_field_offset (expr);
2888 field = TREE_OPERAND (expr, 1);
2889 if (top_compref
2890 && cst_and_fits_in_hwi (tmp)
2891 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2893 HOST_WIDE_INT boffset, abs_off;
2895 /* Strip the component reference completely. */
2896 op0 = TREE_OPERAND (expr, 0);
2897 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2898 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2899 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2900 if (boffset < 0)
2901 abs_off = -abs_off;
2903 *offset = off0 + int_cst_value (tmp) + abs_off;
2904 return op0;
2907 break;
2909 case ADDR_EXPR:
2910 op0 = TREE_OPERAND (expr, 0);
2911 op0 = strip_offset_1 (op0, true, true, &off0);
2912 *offset += off0;
2914 if (op0 == TREE_OPERAND (expr, 0))
2915 return orig_expr;
2917 expr = build_fold_addr_expr (op0);
2918 return fold_convert (orig_type, expr);
2920 case MEM_REF:
2921 /* ??? Offset operand? */
2922 inside_addr = false;
2923 break;
2925 default:
2926 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2927 return build_int_cst (orig_type, 0);
2928 return orig_expr;
2931 /* Default handling of expressions for that we want to recurse into
2932 the first operand. */
2933 op0 = TREE_OPERAND (expr, 0);
2934 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2935 *offset += off0;
2937 if (op0 == TREE_OPERAND (expr, 0)
2938 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2939 return orig_expr;
2941 expr = copy_node (expr);
2942 TREE_OPERAND (expr, 0) = op0;
2943 if (op1)
2944 TREE_OPERAND (expr, 1) = op1;
2946 /* Inside address, we might strip the top level component references,
2947 thus changing type of the expression. Handling of ADDR_EXPR
2948 will fix that. */
2949 expr = fold_convert (orig_type, expr);
2951 return expr;
2954 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2956 static tree
2957 strip_offset (tree expr, poly_uint64_pod *offset)
2959 poly_int64 off;
2960 tree core = strip_offset_1 (expr, false, false, &off);
2961 *offset = off;
2962 return core;
2965 /* Returns variant of TYPE that can be used as base for different uses.
2966 We return unsigned type with the same precision, which avoids problems
2967 with overflows. */
2969 static tree
2970 generic_type_for (tree type)
2972 if (POINTER_TYPE_P (type))
2973 return unsigned_type_for (type);
2975 if (TYPE_UNSIGNED (type))
2976 return type;
2978 return unsigned_type_for (type);
2981 /* Private data for walk_tree. */
2983 struct walk_tree_data
2985 bitmap *inv_vars;
2986 struct ivopts_data *idata;
2989 /* Callback function for walk_tree, it records invariants and symbol
2990 reference in *EXPR_P. DATA is the structure storing result info. */
2992 static tree
2993 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2995 tree op = *expr_p;
2996 struct version_info *info;
2997 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2999 if (TREE_CODE (op) != SSA_NAME)
3000 return NULL_TREE;
3002 info = name_info (wdata->idata, op);
3003 /* Because we expand simple operations when finding IVs, loop invariant
3004 variable that isn't referred by the original loop could be used now.
3005 Record such invariant variables here. */
3006 if (!info->iv)
3008 struct ivopts_data *idata = wdata->idata;
3009 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3011 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3013 tree steptype = TREE_TYPE (op);
3014 if (POINTER_TYPE_P (steptype))
3015 steptype = sizetype;
3016 set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3017 record_invariant (idata, op, false);
3020 if (!info->inv_id || info->has_nonlin_use)
3021 return NULL_TREE;
3023 if (!*wdata->inv_vars)
3024 *wdata->inv_vars = BITMAP_ALLOC (NULL);
3025 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3027 return NULL_TREE;
3030 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3031 store it. */
3033 static inline void
3034 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3036 struct walk_tree_data wdata;
3038 if (!inv_vars)
3039 return;
3041 wdata.idata = data;
3042 wdata.inv_vars = inv_vars;
3043 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3046 /* Get entry from invariant expr hash table for INV_EXPR. New entry
3047 will be recorded if it doesn't exist yet. Given below two exprs:
3048 inv_expr + cst1, inv_expr + cst2
3049 It's hard to make decision whether constant part should be stripped
3050 or not. We choose to not strip based on below facts:
3051 1) We need to count ADD cost for constant part if it's stripped,
3052 which isn't always trivial where this functions is called.
3053 2) Stripping constant away may be conflict with following loop
3054 invariant hoisting pass.
3055 3) Not stripping constant away results in more invariant exprs,
3056 which usually leads to decision preferring lower reg pressure. */
3058 static iv_inv_expr_ent *
3059 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3061 STRIP_NOPS (inv_expr);
3063 if (poly_int_tree_p (inv_expr)
3064 || TREE_CODE (inv_expr) == SSA_NAME)
3065 return NULL;
3067 /* Don't strip constant part away as we used to. */
3069 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3070 struct iv_inv_expr_ent ent;
3071 ent.expr = inv_expr;
3072 ent.hash = iterative_hash_expr (inv_expr, 0);
3073 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3075 if (!*slot)
3077 *slot = XNEW (struct iv_inv_expr_ent);
3078 (*slot)->expr = inv_expr;
3079 (*slot)->hash = ent.hash;
3080 (*slot)->id = ++data->max_inv_expr_id;
3083 return *slot;
3087 /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3088 unsuitable as ivopts candidates for potentially involving undefined
3089 behavior. */
3091 static tree
3092 find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3094 basic_block bb = (basic_block) bb_;
3095 if (TREE_CODE (*tp) == SSA_NAME
3096 && ssa_name_maybe_undef_p (*tp)
3097 && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3098 return *tp;
3099 if (!EXPR_P (*tp))
3100 *walk_subtrees = 0;
3101 return NULL;
3104 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3105 position to POS. If USE is not NULL, the candidate is set as related to
3106 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3107 replacement of the final value of the iv by a direct computation. */
3109 static struct iv_cand *
3110 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3111 enum iv_position pos, struct iv_use *use,
3112 gimple *incremented_at, struct iv *orig_iv = NULL,
3113 bool doloop = false)
3115 unsigned i;
3116 struct iv_cand *cand = NULL;
3117 tree type, orig_type;
3119 gcc_assert (base && step);
3121 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3122 live, but the ivopts code may replace a real pointer with one
3123 pointing before or after the memory block that is then adjusted
3124 into the memory block during the loop. FIXME: It would likely be
3125 better to actually force the pointer live and still use ivopts;
3126 for example, it would be enough to write the pointer into memory
3127 and keep it there until after the loop. */
3128 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3129 return NULL;
3131 /* If BASE contains undefined SSA names make sure we only record
3132 the original IV. */
3133 bool involves_undefs = false;
3134 if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3136 if (pos != IP_ORIGINAL)
3137 return NULL;
3138 important = false;
3139 involves_undefs = true;
3142 /* For non-original variables, make sure their values are computed in a type
3143 that does not invoke undefined behavior on overflows (since in general,
3144 we cannot prove that these induction variables are non-wrapping). */
3145 if (pos != IP_ORIGINAL)
3147 orig_type = TREE_TYPE (base);
3148 type = generic_type_for (orig_type);
3149 if (type != orig_type)
3151 base = fold_convert (type, base);
3152 step = fold_convert (type, step);
3156 for (i = 0; i < data->vcands.length (); i++)
3158 cand = data->vcands[i];
3160 if (cand->pos != pos)
3161 continue;
3163 if (cand->incremented_at != incremented_at
3164 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3165 && cand->ainc_use != use))
3166 continue;
3168 if (operand_equal_p (base, cand->iv->base, 0)
3169 && operand_equal_p (step, cand->iv->step, 0)
3170 && (TYPE_PRECISION (TREE_TYPE (base))
3171 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3172 break;
3175 if (i == data->vcands.length ())
3177 cand = XCNEW (struct iv_cand);
3178 cand->id = i;
3179 cand->iv = alloc_iv (data, base, step);
3180 cand->pos = pos;
3181 if (pos != IP_ORIGINAL)
3183 if (doloop)
3184 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3185 else
3186 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3187 cand->var_after = cand->var_before;
3189 cand->important = important;
3190 cand->involves_undefs = involves_undefs;
3191 cand->incremented_at = incremented_at;
3192 cand->doloop_p = doloop;
3193 data->vcands.safe_push (cand);
3195 if (!poly_int_tree_p (step))
3197 find_inv_vars (data, &step, &cand->inv_vars);
3199 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3200 /* Share bitmap between inv_vars and inv_exprs for cand. */
3201 if (inv_expr != NULL)
3203 cand->inv_exprs = cand->inv_vars;
3204 cand->inv_vars = NULL;
3205 if (cand->inv_exprs)
3206 bitmap_clear (cand->inv_exprs);
3207 else
3208 cand->inv_exprs = BITMAP_ALLOC (NULL);
3210 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3214 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3215 cand->ainc_use = use;
3216 else
3217 cand->ainc_use = NULL;
3219 cand->orig_iv = orig_iv;
3220 if (dump_file && (dump_flags & TDF_DETAILS))
3221 dump_cand (dump_file, cand);
3224 cand->important |= important;
3225 cand->doloop_p |= doloop;
3227 /* Relate candidate to the group for which it is added. */
3228 if (use)
3229 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3231 return cand;
3234 /* Returns true if incrementing the induction variable at the end of the LOOP
3235 is allowed.
3237 The purpose is to avoid splitting latch edge with a biv increment, thus
3238 creating a jump, possibly confusing other optimization passes and leaving
3239 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3240 available (so we do not have a better alternative), or if the latch edge
3241 is already nonempty. */
3243 static bool
3244 allow_ip_end_pos_p (class loop *loop)
3246 if (!ip_normal_pos (loop))
3247 return true;
3249 if (!empty_block_p (ip_end_pos (loop)))
3250 return true;
3252 return false;
3255 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3256 Important field is set to IMPORTANT. */
3258 static void
3259 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3260 bool important, struct iv_use *use)
3262 basic_block use_bb = gimple_bb (use->stmt);
3263 machine_mode mem_mode;
3264 unsigned HOST_WIDE_INT cstepi;
3266 /* If we insert the increment in any position other than the standard
3267 ones, we must ensure that it is incremented once per iteration.
3268 It must not be in an inner nested loop, or one side of an if
3269 statement. */
3270 if (use_bb->loop_father != data->current_loop
3271 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3272 || stmt_can_throw_internal (cfun, use->stmt)
3273 || !cst_and_fits_in_hwi (step))
3274 return;
3276 cstepi = int_cst_value (step);
3278 mem_mode = TYPE_MODE (use->mem_type);
3279 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3280 || USE_STORE_PRE_INCREMENT (mem_mode))
3281 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3282 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3283 || USE_STORE_PRE_DECREMENT (mem_mode))
3284 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3286 enum tree_code code = MINUS_EXPR;
3287 tree new_base;
3288 tree new_step = step;
3290 if (POINTER_TYPE_P (TREE_TYPE (base)))
3292 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3293 code = POINTER_PLUS_EXPR;
3295 else
3296 new_step = fold_convert (TREE_TYPE (base), new_step);
3297 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3298 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3299 use->stmt);
3301 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3302 || USE_STORE_POST_INCREMENT (mem_mode))
3303 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3304 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3305 || USE_STORE_POST_DECREMENT (mem_mode))
3306 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3308 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3309 use->stmt);
3313 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3314 position to POS. If USE is not NULL, the candidate is set as related to
3315 it. The candidate computation is scheduled before exit condition and at
3316 the end of loop. */
3318 static void
3319 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3320 struct iv_use *use, struct iv *orig_iv = NULL,
3321 bool doloop = false)
3323 if (ip_normal_pos (data->current_loop))
3324 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3325 doloop);
3326 /* Exclude doloop candidate here since it requires decrement then comparison
3327 and jump, the IP_END position doesn't match. */
3328 if (!doloop && ip_end_pos (data->current_loop)
3329 && allow_ip_end_pos_p (data->current_loop))
3330 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3333 /* Adds standard iv candidates. */
3335 static void
3336 add_standard_iv_candidates (struct ivopts_data *data)
3338 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3340 /* The same for a double-integer type if it is still fast enough. */
3341 if (TYPE_PRECISION
3342 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3343 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3344 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3345 build_int_cst (long_integer_type_node, 1), true, NULL);
3347 /* The same for a double-integer type if it is still fast enough. */
3348 if (TYPE_PRECISION
3349 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3350 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3351 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3352 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3356 /* Adds candidates bases on the old induction variable IV. */
3358 static void
3359 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3361 gimple *phi;
3362 tree def;
3363 struct iv_cand *cand;
3365 /* Check if this biv is used in address type use. */
3366 if (iv->no_overflow && iv->have_address_use
3367 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3368 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3370 tree base = fold_convert (sizetype, iv->base);
3371 tree step = fold_convert (sizetype, iv->step);
3373 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3374 add_candidate (data, base, step, true, NULL, iv);
3375 /* Add iv cand of the original type only if it has nonlinear use. */
3376 if (iv->nonlin_use)
3377 add_candidate (data, iv->base, iv->step, true, NULL);
3379 else
3380 add_candidate (data, iv->base, iv->step, true, NULL);
3382 /* The same, but with initial value zero. */
3383 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3384 add_candidate (data, size_int (0), iv->step, true, NULL);
3385 else
3386 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3387 iv->step, true, NULL);
3389 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3390 if (gimple_code (phi) == GIMPLE_PHI)
3392 /* Additionally record the possibility of leaving the original iv
3393 untouched. */
3394 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3395 /* Don't add candidate if it's from another PHI node because
3396 it's an affine iv appearing in the form of PEELED_CHREC. */
3397 phi = SSA_NAME_DEF_STMT (def);
3398 if (gimple_code (phi) != GIMPLE_PHI)
3400 cand = add_candidate_1 (data,
3401 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3402 SSA_NAME_DEF_STMT (def));
3403 if (cand)
3405 cand->var_before = iv->ssa_name;
3406 cand->var_after = def;
3409 else
3410 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3414 /* Adds candidates based on the old induction variables. */
3416 static void
3417 add_iv_candidate_for_bivs (struct ivopts_data *data)
3419 unsigned i;
3420 struct iv *iv;
3421 bitmap_iterator bi;
3423 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3425 iv = ver_info (data, i)->iv;
3426 if (iv && iv->biv_p && !integer_zerop (iv->step))
3427 add_iv_candidate_for_biv (data, iv);
3431 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3433 static void
3434 record_common_cand (struct ivopts_data *data, tree base,
3435 tree step, struct iv_use *use)
3437 class iv_common_cand ent;
3438 class iv_common_cand **slot;
3440 ent.base = base;
3441 ent.step = step;
3442 ent.hash = iterative_hash_expr (base, 0);
3443 ent.hash = iterative_hash_expr (step, ent.hash);
3445 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3446 if (*slot == NULL)
3448 *slot = new iv_common_cand ();
3449 (*slot)->base = base;
3450 (*slot)->step = step;
3451 (*slot)->uses.create (8);
3452 (*slot)->hash = ent.hash;
3453 data->iv_common_cands.safe_push ((*slot));
3456 gcc_assert (use != NULL);
3457 (*slot)->uses.safe_push (use);
3458 return;
3461 /* Comparison function used to sort common candidates. */
3463 static int
3464 common_cand_cmp (const void *p1, const void *p2)
3466 unsigned n1, n2;
3467 const class iv_common_cand *const *const ccand1
3468 = (const class iv_common_cand *const *)p1;
3469 const class iv_common_cand *const *const ccand2
3470 = (const class iv_common_cand *const *)p2;
3472 n1 = (*ccand1)->uses.length ();
3473 n2 = (*ccand2)->uses.length ();
3474 return n2 - n1;
3477 /* Adds IV candidates based on common candidated recorded. */
3479 static void
3480 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3482 unsigned i, j;
3483 struct iv_cand *cand_1, *cand_2;
3485 data->iv_common_cands.qsort (common_cand_cmp);
3486 for (i = 0; i < data->iv_common_cands.length (); i++)
3488 class iv_common_cand *ptr = data->iv_common_cands[i];
3490 /* Only add IV candidate if it's derived from multiple uses. */
3491 if (ptr->uses.length () <= 1)
3492 break;
3494 cand_1 = NULL;
3495 cand_2 = NULL;
3496 if (ip_normal_pos (data->current_loop))
3497 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3498 false, IP_NORMAL, NULL, NULL);
3500 if (ip_end_pos (data->current_loop)
3501 && allow_ip_end_pos_p (data->current_loop))
3502 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3503 false, IP_END, NULL, NULL);
3505 /* Bind deriving uses and the new candidates. */
3506 for (j = 0; j < ptr->uses.length (); j++)
3508 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3509 if (cand_1)
3510 bitmap_set_bit (group->related_cands, cand_1->id);
3511 if (cand_2)
3512 bitmap_set_bit (group->related_cands, cand_2->id);
3516 /* Release data since it is useless from this point. */
3517 data->iv_common_cand_tab->empty ();
3518 data->iv_common_cands.truncate (0);
3521 /* Adds candidates based on the value of USE's iv. */
3523 static void
3524 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3526 poly_uint64 offset;
3527 tree base;
3528 struct iv *iv = use->iv;
3529 tree basetype = TREE_TYPE (iv->base);
3531 /* Don't add candidate for iv_use with non integer, pointer or non-mode
3532 precision types, instead, add candidate for the corresponding scev in
3533 unsigned type with the same precision. See PR93674 for more info. */
3534 if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3535 || !type_has_mode_precision_p (basetype))
3537 basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3538 TYPE_UNSIGNED (basetype));
3539 add_candidate (data, fold_convert (basetype, iv->base),
3540 fold_convert (basetype, iv->step), false, NULL);
3541 return;
3544 add_candidate (data, iv->base, iv->step, false, use);
3546 /* Record common candidate for use in case it can be shared by others. */
3547 record_common_cand (data, iv->base, iv->step, use);
3549 /* Record common candidate with initial value zero. */
3550 basetype = TREE_TYPE (iv->base);
3551 if (POINTER_TYPE_P (basetype))
3552 basetype = sizetype;
3553 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3555 /* Compare the cost of an address with an unscaled index with the cost of
3556 an address with a scaled index and add candidate if useful. */
3557 poly_int64 step;
3558 if (use != NULL
3559 && poly_int_tree_p (iv->step, &step)
3560 && address_p (use->type))
3562 poly_int64 new_step;
3563 unsigned int fact = preferred_mem_scale_factor
3564 (use->iv->base,
3565 TYPE_MODE (use->mem_type),
3566 optimize_loop_for_speed_p (data->current_loop));
3568 if (fact != 1
3569 && multiple_p (step, fact, &new_step))
3570 add_candidate (data, size_int (0),
3571 wide_int_to_tree (sizetype, new_step),
3572 true, NULL);
3575 /* Record common candidate with constant offset stripped in base.
3576 Like the use itself, we also add candidate directly for it. */
3577 base = strip_offset (iv->base, &offset);
3578 if (maybe_ne (offset, 0U) || base != iv->base)
3580 record_common_cand (data, base, iv->step, use);
3581 add_candidate (data, base, iv->step, false, use);
3584 /* Record common candidate with base_object removed in base. */
3585 base = iv->base;
3586 STRIP_NOPS (base);
3587 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3589 tree step = iv->step;
3591 STRIP_NOPS (step);
3592 base = TREE_OPERAND (base, 1);
3593 step = fold_convert (sizetype, step);
3594 record_common_cand (data, base, step, use);
3595 /* Also record common candidate with offset stripped. */
3596 tree alt_base, alt_offset;
3597 split_constant_offset (base, &alt_base, &alt_offset);
3598 if (!integer_zerop (alt_offset))
3599 record_common_cand (data, alt_base, step, use);
3602 /* At last, add auto-incremental candidates. Make such variables
3603 important since other iv uses with same base object may be based
3604 on it. */
3605 if (use != NULL && address_p (use->type))
3606 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3609 /* Adds candidates based on the uses. */
3611 static void
3612 add_iv_candidate_for_groups (struct ivopts_data *data)
3614 unsigned i;
3616 /* Only add candidate for the first use in group. */
3617 for (i = 0; i < data->vgroups.length (); i++)
3619 struct iv_group *group = data->vgroups[i];
3621 gcc_assert (group->vuses[0] != NULL);
3622 add_iv_candidate_for_use (data, group->vuses[0]);
3624 add_iv_candidate_derived_from_uses (data);
3627 /* Record important candidates and add them to related_cands bitmaps. */
3629 static void
3630 record_important_candidates (struct ivopts_data *data)
3632 unsigned i;
3633 struct iv_group *group;
3635 for (i = 0; i < data->vcands.length (); i++)
3637 struct iv_cand *cand = data->vcands[i];
3639 if (cand->important)
3640 bitmap_set_bit (data->important_candidates, i);
3643 data->consider_all_candidates = (data->vcands.length ()
3644 <= CONSIDER_ALL_CANDIDATES_BOUND);
3646 /* Add important candidates to groups' related_cands bitmaps. */
3647 for (i = 0; i < data->vgroups.length (); i++)
3649 group = data->vgroups[i];
3650 bitmap_ior_into (group->related_cands, data->important_candidates);
3654 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3655 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3656 we allocate a simple list to every use. */
3658 static void
3659 alloc_use_cost_map (struct ivopts_data *data)
3661 unsigned i, size, s;
3663 for (i = 0; i < data->vgroups.length (); i++)
3665 struct iv_group *group = data->vgroups[i];
3667 if (data->consider_all_candidates)
3668 size = data->vcands.length ();
3669 else
3671 s = bitmap_count_bits (group->related_cands);
3673 /* Round up to the power of two, so that moduling by it is fast. */
3674 size = s ? (1 << ceil_log2 (s)) : 1;
3677 group->n_map_members = size;
3678 group->cost_map = XCNEWVEC (class cost_pair, size);
3682 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3683 on invariants INV_VARS and that the value used in expressing it is
3684 VALUE, and in case of iv elimination the comparison operator is COMP. */
3686 static void
3687 set_group_iv_cost (struct ivopts_data *data,
3688 struct iv_group *group, struct iv_cand *cand,
3689 comp_cost cost, bitmap inv_vars, tree value,
3690 enum tree_code comp, bitmap inv_exprs)
3692 unsigned i, s;
3694 if (cost.infinite_cost_p ())
3696 BITMAP_FREE (inv_vars);
3697 BITMAP_FREE (inv_exprs);
3698 return;
3701 if (data->consider_all_candidates)
3703 group->cost_map[cand->id].cand = cand;
3704 group->cost_map[cand->id].cost = cost;
3705 group->cost_map[cand->id].inv_vars = inv_vars;
3706 group->cost_map[cand->id].inv_exprs = inv_exprs;
3707 group->cost_map[cand->id].value = value;
3708 group->cost_map[cand->id].comp = comp;
3709 return;
3712 /* n_map_members is a power of two, so this computes modulo. */
3713 s = cand->id & (group->n_map_members - 1);
3714 for (i = s; i < group->n_map_members; i++)
3715 if (!group->cost_map[i].cand)
3716 goto found;
3717 for (i = 0; i < s; i++)
3718 if (!group->cost_map[i].cand)
3719 goto found;
3721 gcc_unreachable ();
3723 found:
3724 group->cost_map[i].cand = cand;
3725 group->cost_map[i].cost = cost;
3726 group->cost_map[i].inv_vars = inv_vars;
3727 group->cost_map[i].inv_exprs = inv_exprs;
3728 group->cost_map[i].value = value;
3729 group->cost_map[i].comp = comp;
3732 /* Gets cost of (GROUP, CAND) pair. */
3734 static class cost_pair *
3735 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3736 struct iv_cand *cand)
3738 unsigned i, s;
3739 class cost_pair *ret;
3741 if (!cand)
3742 return NULL;
3744 if (data->consider_all_candidates)
3746 ret = group->cost_map + cand->id;
3747 if (!ret->cand)
3748 return NULL;
3750 return ret;
3753 /* n_map_members is a power of two, so this computes modulo. */
3754 s = cand->id & (group->n_map_members - 1);
3755 for (i = s; i < group->n_map_members; i++)
3756 if (group->cost_map[i].cand == cand)
3757 return group->cost_map + i;
3758 else if (group->cost_map[i].cand == NULL)
3759 return NULL;
3760 for (i = 0; i < s; i++)
3761 if (group->cost_map[i].cand == cand)
3762 return group->cost_map + i;
3763 else if (group->cost_map[i].cand == NULL)
3764 return NULL;
3766 return NULL;
3769 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3770 static rtx
3771 produce_memory_decl_rtl (tree obj, int *regno)
3773 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3774 machine_mode address_mode = targetm.addr_space.address_mode (as);
3775 rtx x;
3777 gcc_assert (obj);
3778 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3780 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3781 x = gen_rtx_SYMBOL_REF (address_mode, name);
3782 SET_SYMBOL_REF_DECL (x, obj);
3783 x = gen_rtx_MEM (DECL_MODE (obj), x);
3784 set_mem_addr_space (x, as);
3785 targetm.encode_section_info (obj, x, true);
3787 else
3789 x = gen_raw_REG (address_mode, (*regno)++);
3790 x = gen_rtx_MEM (DECL_MODE (obj), x);
3791 set_mem_addr_space (x, as);
3794 return x;
3797 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3798 walk_tree. DATA contains the actual fake register number. */
3800 static tree
3801 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3803 tree obj = NULL_TREE;
3804 rtx x = NULL_RTX;
3805 int *regno = (int *) data;
3807 switch (TREE_CODE (*expr_p))
3809 case ADDR_EXPR:
3810 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3811 handled_component_p (*expr_p);
3812 expr_p = &TREE_OPERAND (*expr_p, 0))
3813 continue;
3814 obj = *expr_p;
3815 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3816 x = produce_memory_decl_rtl (obj, regno);
3817 break;
3819 case SSA_NAME:
3820 *ws = 0;
3821 obj = SSA_NAME_VAR (*expr_p);
3822 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3823 if (!obj)
3824 return NULL_TREE;
3825 if (!DECL_RTL_SET_P (obj))
3826 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3827 break;
3829 case VAR_DECL:
3830 case PARM_DECL:
3831 case RESULT_DECL:
3832 *ws = 0;
3833 obj = *expr_p;
3835 if (DECL_RTL_SET_P (obj))
3836 break;
3838 if (DECL_MODE (obj) == BLKmode)
3839 x = produce_memory_decl_rtl (obj, regno);
3840 else
3841 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3843 break;
3845 default:
3846 break;
3849 if (x)
3851 decl_rtl_to_reset.safe_push (obj);
3852 SET_DECL_RTL (obj, x);
3855 return NULL_TREE;
3858 /* Predict whether the given loop will be transformed in the RTL
3859 doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3860 This is only for target independent checks, see targetm.predict_doloop_p
3861 for the target dependent ones.
3863 Note that according to some initial investigation, some checks like costly
3864 niter check and invalid stmt scanning don't have much gains among general
3865 cases, so keep this as simple as possible first.
3867 Some RTL specific checks seems unable to be checked in gimple, if any new
3868 checks or easy checks _are_ missing here, please add them. */
3870 static bool
3871 generic_predict_doloop_p (struct ivopts_data *data)
3873 class loop *loop = data->current_loop;
3875 /* Call target hook for target dependent checks. */
3876 if (!targetm.predict_doloop_p (loop))
3878 if (dump_file && (dump_flags & TDF_DETAILS))
3879 fprintf (dump_file, "Predict doloop failure due to"
3880 " target specific checks.\n");
3881 return false;
3884 /* Similar to doloop_optimize, check iteration description to know it's
3885 suitable or not. Keep it as simple as possible, feel free to extend it
3886 if you find any multiple exits cases matter. */
3887 edge exit = single_dom_exit (loop);
3888 class tree_niter_desc *niter_desc;
3889 if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3891 if (dump_file && (dump_flags & TDF_DETAILS))
3892 fprintf (dump_file, "Predict doloop failure due to"
3893 " unexpected niters.\n");
3894 return false;
3897 /* Similar to doloop_optimize, check whether iteration count too small
3898 and not profitable. */
3899 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3900 if (est_niter == -1)
3901 est_niter = get_likely_max_loop_iterations_int (loop);
3902 if (est_niter >= 0 && est_niter < 3)
3904 if (dump_file && (dump_flags & TDF_DETAILS))
3905 fprintf (dump_file,
3906 "Predict doloop failure due to"
3907 " too few iterations (%u).\n",
3908 (unsigned int) est_niter);
3909 return false;
3912 return true;
3915 /* Determines cost of the computation of EXPR. */
3917 static unsigned
3918 computation_cost (tree expr, bool speed)
3920 rtx_insn *seq;
3921 rtx rslt;
3922 tree type = TREE_TYPE (expr);
3923 unsigned cost;
3924 /* Avoid using hard regs in ways which may be unsupported. */
3925 int regno = LAST_VIRTUAL_REGISTER + 1;
3926 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3927 enum node_frequency real_frequency = node->frequency;
3929 node->frequency = NODE_FREQUENCY_NORMAL;
3930 crtl->maybe_hot_insn_p = speed;
3931 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3932 start_sequence ();
3933 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3934 seq = get_insns ();
3935 end_sequence ();
3936 default_rtl_profile ();
3937 node->frequency = real_frequency;
3939 cost = seq_cost (seq, speed);
3940 if (MEM_P (rslt))
3941 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3942 TYPE_ADDR_SPACE (type), speed);
3943 else if (!REG_P (rslt))
3944 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3946 return cost;
3949 /* Returns variable containing the value of candidate CAND at statement AT. */
3951 static tree
3952 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3954 if (stmt_after_increment (loop, cand, stmt))
3955 return cand->var_after;
3956 else
3957 return cand->var_before;
3960 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3961 same precision that is at least as wide as the precision of TYPE, stores
3962 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3963 type of A and B. */
3965 static tree
3966 determine_common_wider_type (tree *a, tree *b)
3968 tree wider_type = NULL;
3969 tree suba, subb;
3970 tree atype = TREE_TYPE (*a);
3972 if (CONVERT_EXPR_P (*a))
3974 suba = TREE_OPERAND (*a, 0);
3975 wider_type = TREE_TYPE (suba);
3976 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3977 return atype;
3979 else
3980 return atype;
3982 if (CONVERT_EXPR_P (*b))
3984 subb = TREE_OPERAND (*b, 0);
3985 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3986 return atype;
3988 else
3989 return atype;
3991 *a = suba;
3992 *b = subb;
3993 return wider_type;
3996 /* Determines the expression by that USE is expressed from induction variable
3997 CAND at statement AT in LOOP. The expression is stored in two parts in a
3998 decomposed form. The invariant part is stored in AFF_INV; while variant
3999 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
4000 non-null. Returns false if USE cannot be expressed using CAND. */
4002 static bool
4003 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
4004 struct iv_cand *cand, class aff_tree *aff_inv,
4005 class aff_tree *aff_var, widest_int *prat = NULL)
4007 tree ubase = use->iv->base, ustep = use->iv->step;
4008 tree cbase = cand->iv->base, cstep = cand->iv->step;
4009 tree common_type, uutype, var, cstep_common;
4010 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4011 aff_tree aff_cbase;
4012 widest_int rat;
4014 /* We must have a precision to express the values of use. */
4015 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4016 return false;
4018 var = var_at_stmt (loop, cand, at);
4019 uutype = unsigned_type_for (utype);
4021 /* If the conversion is not noop, perform it. */
4022 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4024 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
4025 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
4027 tree inner_base, inner_step, inner_type;
4028 inner_base = TREE_OPERAND (cbase, 0);
4029 if (CONVERT_EXPR_P (cstep))
4030 inner_step = TREE_OPERAND (cstep, 0);
4031 else
4032 inner_step = cstep;
4034 inner_type = TREE_TYPE (inner_base);
4035 /* If candidate is added from a biv whose type is smaller than
4036 ctype, we know both candidate and the biv won't overflow.
4037 In this case, it's safe to skip the convertion in candidate.
4038 As an example, (unsigned short)((unsigned long)A) equals to
4039 (unsigned short)A, if A has a type no larger than short. */
4040 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4042 cbase = inner_base;
4043 cstep = inner_step;
4046 cbase = fold_convert (uutype, cbase);
4047 cstep = fold_convert (uutype, cstep);
4048 var = fold_convert (uutype, var);
4051 /* Ratio is 1 when computing the value of biv cand by itself.
4052 We can't rely on constant_multiple_of in this case because the
4053 use is created after the original biv is selected. The call
4054 could fail because of inconsistent fold behavior. See PR68021
4055 for more information. */
4056 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4058 gcc_assert (is_gimple_assign (use->stmt));
4059 gcc_assert (use->iv->ssa_name == cand->var_after);
4060 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4061 rat = 1;
4063 else if (!constant_multiple_of (ustep, cstep, &rat))
4064 return false;
4066 if (prat)
4067 *prat = rat;
4069 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4070 type, we achieve better folding by computing their difference in this
4071 wider type, and cast the result to UUTYPE. We do not need to worry about
4072 overflows, as all the arithmetics will in the end be performed in UUTYPE
4073 anyway. */
4074 common_type = determine_common_wider_type (&ubase, &cbase);
4076 /* use = ubase - ratio * cbase + ratio * var. */
4077 tree_to_aff_combination (ubase, common_type, aff_inv);
4078 tree_to_aff_combination (cbase, common_type, &aff_cbase);
4079 tree_to_aff_combination (var, uutype, aff_var);
4081 /* We need to shift the value if we are after the increment. */
4082 if (stmt_after_increment (loop, cand, at))
4084 aff_tree cstep_aff;
4086 if (common_type != uutype)
4087 cstep_common = fold_convert (common_type, cstep);
4088 else
4089 cstep_common = cstep;
4091 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4092 aff_combination_add (&aff_cbase, &cstep_aff);
4095 aff_combination_scale (&aff_cbase, -rat);
4096 aff_combination_add (aff_inv, &aff_cbase);
4097 if (common_type != uutype)
4098 aff_combination_convert (aff_inv, uutype);
4100 aff_combination_scale (aff_var, rat);
4101 return true;
4104 /* Determines the expression by that USE is expressed from induction variable
4105 CAND at statement AT in LOOP. The expression is stored in a decomposed
4106 form into AFF. Returns false if USE cannot be expressed using CAND. */
4108 static bool
4109 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4110 struct iv_cand *cand, class aff_tree *aff)
4112 aff_tree aff_var;
4114 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4115 return false;
4117 aff_combination_add (aff, &aff_var);
4118 return true;
4121 /* Return the type of USE. */
4123 static tree
4124 get_use_type (struct iv_use *use)
4126 tree base_type = TREE_TYPE (use->iv->base);
4127 tree type;
4129 if (use->type == USE_REF_ADDRESS)
4131 /* The base_type may be a void pointer. Create a pointer type based on
4132 the mem_ref instead. */
4133 type = build_pointer_type (TREE_TYPE (*use->op_p));
4134 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4135 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4137 else
4138 type = base_type;
4140 return type;
4143 /* Determines the expression by that USE is expressed from induction variable
4144 CAND at statement AT in LOOP. The computation is unshared. */
4146 static tree
4147 get_computation_at (class loop *loop, gimple *at,
4148 struct iv_use *use, struct iv_cand *cand)
4150 aff_tree aff;
4151 tree type = get_use_type (use);
4153 if (!get_computation_aff (loop, at, use, cand, &aff))
4154 return NULL_TREE;
4155 unshare_aff_combination (&aff);
4156 return fold_convert (type, aff_combination_to_tree (&aff));
4159 /* Like get_computation_at, but try harder, even if the computation
4160 is more expensive. Intended for debug stmts. */
4162 static tree
4163 get_debug_computation_at (class loop *loop, gimple *at,
4164 struct iv_use *use, struct iv_cand *cand)
4166 if (tree ret = get_computation_at (loop, at, use, cand))
4167 return ret;
4169 tree ubase = use->iv->base, ustep = use->iv->step;
4170 tree cbase = cand->iv->base, cstep = cand->iv->step;
4171 tree var;
4172 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4173 widest_int rat;
4175 /* We must have a precision to express the values of use. */
4176 if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4177 return NULL_TREE;
4179 /* Try to handle the case that get_computation_at doesn't,
4180 try to express
4181 use = ubase + (var - cbase) / ratio. */
4182 if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4183 &rat))
4184 return NULL_TREE;
4186 bool neg_p = false;
4187 if (wi::neg_p (rat))
4189 if (TYPE_UNSIGNED (ctype))
4190 return NULL_TREE;
4191 neg_p = true;
4192 rat = wi::neg (rat);
4195 /* If both IVs can wrap around and CAND doesn't have a power of two step,
4196 it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4197 the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4198 uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4199 ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */
4200 if (!use->iv->no_overflow
4201 && !cand->iv->no_overflow
4202 && !integer_pow2p (cstep))
4203 return NULL_TREE;
4205 int bits = wi::exact_log2 (rat);
4206 if (bits == -1)
4207 bits = wi::floor_log2 (rat) + 1;
4208 if (!cand->iv->no_overflow
4209 && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4210 return NULL_TREE;
4212 var = var_at_stmt (loop, cand, at);
4214 if (POINTER_TYPE_P (ctype))
4216 ctype = unsigned_type_for (ctype);
4217 cbase = fold_convert (ctype, cbase);
4218 cstep = fold_convert (ctype, cstep);
4219 var = fold_convert (ctype, var);
4222 if (stmt_after_increment (loop, cand, at))
4223 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4224 unshare_expr (cstep));
4226 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4227 var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4228 wide_int_to_tree (TREE_TYPE (var), rat));
4229 if (POINTER_TYPE_P (utype))
4231 var = fold_convert (sizetype, var);
4232 if (neg_p)
4233 var = fold_build1 (NEGATE_EXPR, sizetype, var);
4234 var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4236 else
4238 var = fold_convert (utype, var);
4239 var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4240 ubase, var);
4242 return var;
4245 /* Adjust the cost COST for being in loop setup rather than loop body.
4246 If we're optimizing for space, the loop setup overhead is constant;
4247 if we're optimizing for speed, amortize it over the per-iteration cost.
4248 If ROUND_UP_P is true, the result is round up rather than to zero when
4249 optimizing for speed. */
4250 static int64_t
4251 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4252 bool round_up_p = false)
4254 if (cost == INFTY)
4255 return cost;
4256 else if (optimize_loop_for_speed_p (data->current_loop))
4258 int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4259 return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4261 else
4262 return cost;
4265 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4266 EXPR operand holding the shift. COST0 and COST1 are the costs for
4267 calculating the operands of EXPR. Returns true if successful, and returns
4268 the cost in COST. */
4270 static bool
4271 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4272 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4274 comp_cost res;
4275 tree op1 = TREE_OPERAND (expr, 1);
4276 tree cst = TREE_OPERAND (mult, 1);
4277 tree multop = TREE_OPERAND (mult, 0);
4278 int m = exact_log2 (int_cst_value (cst));
4279 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4280 int as_cost, sa_cost;
4281 bool mult_in_op1;
4283 if (!(m >= 0 && m < maxm))
4284 return false;
4286 STRIP_NOPS (op1);
4287 mult_in_op1 = operand_equal_p (op1, mult, 0);
4289 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4291 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4292 use that in preference to a shift insn followed by an add insn. */
4293 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4294 ? shiftadd_cost (speed, mode, m)
4295 : (mult_in_op1
4296 ? shiftsub1_cost (speed, mode, m)
4297 : shiftsub0_cost (speed, mode, m)));
4299 res = comp_cost (MIN (as_cost, sa_cost), 0);
4300 res += (mult_in_op1 ? cost0 : cost1);
4302 STRIP_NOPS (multop);
4303 if (!is_gimple_val (multop))
4304 res += force_expr_to_var_cost (multop, speed);
4306 *cost = res;
4307 return true;
4310 /* Estimates cost of forcing expression EXPR into a variable. */
4312 static comp_cost
4313 force_expr_to_var_cost (tree expr, bool speed)
4315 static bool costs_initialized = false;
4316 static unsigned integer_cost [2];
4317 static unsigned symbol_cost [2];
4318 static unsigned address_cost [2];
4319 tree op0, op1;
4320 comp_cost cost0, cost1, cost;
4321 machine_mode mode;
4322 scalar_int_mode int_mode;
4324 if (!costs_initialized)
4326 tree type = build_pointer_type (integer_type_node);
4327 tree var, addr;
4328 rtx x;
4329 int i;
4331 var = create_tmp_var_raw (integer_type_node, "test_var");
4332 TREE_STATIC (var) = 1;
4333 x = produce_memory_decl_rtl (var, NULL);
4334 SET_DECL_RTL (var, x);
4336 addr = build1 (ADDR_EXPR, type, var);
4339 for (i = 0; i < 2; i++)
4341 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4342 2000), i);
4344 symbol_cost[i] = computation_cost (addr, i) + 1;
4346 address_cost[i]
4347 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4348 if (dump_file && (dump_flags & TDF_DETAILS))
4350 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4351 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4352 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4353 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4354 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4355 fprintf (dump_file, "\n");
4359 costs_initialized = true;
4362 STRIP_NOPS (expr);
4364 if (SSA_VAR_P (expr))
4365 return no_cost;
4367 if (is_gimple_min_invariant (expr))
4369 if (poly_int_tree_p (expr))
4370 return comp_cost (integer_cost [speed], 0);
4372 if (TREE_CODE (expr) == ADDR_EXPR)
4374 tree obj = TREE_OPERAND (expr, 0);
4376 if (VAR_P (obj)
4377 || TREE_CODE (obj) == PARM_DECL
4378 || TREE_CODE (obj) == RESULT_DECL)
4379 return comp_cost (symbol_cost [speed], 0);
4382 return comp_cost (address_cost [speed], 0);
4385 switch (TREE_CODE (expr))
4387 case POINTER_PLUS_EXPR:
4388 case PLUS_EXPR:
4389 case MINUS_EXPR:
4390 case MULT_EXPR:
4391 case TRUNC_DIV_EXPR:
4392 case BIT_AND_EXPR:
4393 case BIT_IOR_EXPR:
4394 case LSHIFT_EXPR:
4395 case RSHIFT_EXPR:
4396 op0 = TREE_OPERAND (expr, 0);
4397 op1 = TREE_OPERAND (expr, 1);
4398 STRIP_NOPS (op0);
4399 STRIP_NOPS (op1);
4400 break;
4402 CASE_CONVERT:
4403 case NEGATE_EXPR:
4404 case BIT_NOT_EXPR:
4405 op0 = TREE_OPERAND (expr, 0);
4406 STRIP_NOPS (op0);
4407 op1 = NULL_TREE;
4408 break;
4409 /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4410 introduce COND_EXPR for IV base, need to support better cost estimation
4411 for this COND_EXPR and tcc_comparison. */
4412 case COND_EXPR:
4413 op0 = TREE_OPERAND (expr, 1);
4414 STRIP_NOPS (op0);
4415 op1 = TREE_OPERAND (expr, 2);
4416 STRIP_NOPS (op1);
4417 break;
4418 case LT_EXPR:
4419 case LE_EXPR:
4420 case GT_EXPR:
4421 case GE_EXPR:
4422 case EQ_EXPR:
4423 case NE_EXPR:
4424 case UNORDERED_EXPR:
4425 case ORDERED_EXPR:
4426 case UNLT_EXPR:
4427 case UNLE_EXPR:
4428 case UNGT_EXPR:
4429 case UNGE_EXPR:
4430 case UNEQ_EXPR:
4431 case LTGT_EXPR:
4432 case MAX_EXPR:
4433 case MIN_EXPR:
4434 op0 = TREE_OPERAND (expr, 0);
4435 STRIP_NOPS (op0);
4436 op1 = TREE_OPERAND (expr, 1);
4437 STRIP_NOPS (op1);
4438 break;
4440 default:
4441 /* Just an arbitrary value, FIXME. */
4442 return comp_cost (target_spill_cost[speed], 0);
4445 if (op0 == NULL_TREE
4446 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4447 cost0 = no_cost;
4448 else
4449 cost0 = force_expr_to_var_cost (op0, speed);
4451 if (op1 == NULL_TREE
4452 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4453 cost1 = no_cost;
4454 else
4455 cost1 = force_expr_to_var_cost (op1, speed);
4457 mode = TYPE_MODE (TREE_TYPE (expr));
4458 switch (TREE_CODE (expr))
4460 case POINTER_PLUS_EXPR:
4461 case PLUS_EXPR:
4462 case MINUS_EXPR:
4463 case NEGATE_EXPR:
4464 cost = comp_cost (add_cost (speed, mode), 0);
4465 if (TREE_CODE (expr) != NEGATE_EXPR)
4467 tree mult = NULL_TREE;
4468 comp_cost sa_cost;
4469 if (TREE_CODE (op1) == MULT_EXPR)
4470 mult = op1;
4471 else if (TREE_CODE (op0) == MULT_EXPR)
4472 mult = op0;
4474 if (mult != NULL_TREE
4475 && is_a <scalar_int_mode> (mode, &int_mode)
4476 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4477 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4478 speed, &sa_cost))
4479 return sa_cost;
4481 break;
4483 CASE_CONVERT:
4485 tree inner_mode, outer_mode;
4486 outer_mode = TREE_TYPE (expr);
4487 inner_mode = TREE_TYPE (op0);
4488 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4489 TYPE_MODE (inner_mode), speed), 0);
4491 break;
4493 case MULT_EXPR:
4494 if (cst_and_fits_in_hwi (op0))
4495 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4496 mode, speed), 0);
4497 else if (cst_and_fits_in_hwi (op1))
4498 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4499 mode, speed), 0);
4500 else
4501 return comp_cost (target_spill_cost [speed], 0);
4502 break;
4504 case TRUNC_DIV_EXPR:
4505 /* Division by power of two is usually cheap, so we allow it. Forbid
4506 anything else. */
4507 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4508 cost = comp_cost (add_cost (speed, mode), 0);
4509 else
4510 cost = comp_cost (target_spill_cost[speed], 0);
4511 break;
4513 case BIT_AND_EXPR:
4514 case BIT_IOR_EXPR:
4515 case BIT_NOT_EXPR:
4516 case LSHIFT_EXPR:
4517 case RSHIFT_EXPR:
4518 cost = comp_cost (add_cost (speed, mode), 0);
4519 break;
4520 case COND_EXPR:
4521 op0 = TREE_OPERAND (expr, 0);
4522 STRIP_NOPS (op0);
4523 if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4524 || CONSTANT_CLASS_P (op0))
4525 cost = no_cost;
4526 else
4527 cost = force_expr_to_var_cost (op0, speed);
4528 break;
4529 case LT_EXPR:
4530 case LE_EXPR:
4531 case GT_EXPR:
4532 case GE_EXPR:
4533 case EQ_EXPR:
4534 case NE_EXPR:
4535 case UNORDERED_EXPR:
4536 case ORDERED_EXPR:
4537 case UNLT_EXPR:
4538 case UNLE_EXPR:
4539 case UNGT_EXPR:
4540 case UNGE_EXPR:
4541 case UNEQ_EXPR:
4542 case LTGT_EXPR:
4543 case MAX_EXPR:
4544 case MIN_EXPR:
4545 /* Simply use add cost for now, FIXME if there is some more accurate cost
4546 evaluation way. */
4547 cost = comp_cost (add_cost (speed, mode), 0);
4548 break;
4550 default:
4551 gcc_unreachable ();
4554 cost += cost0;
4555 cost += cost1;
4556 return cost;
4559 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4560 invariants the computation depends on. */
4562 static comp_cost
4563 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4565 if (!expr)
4566 return no_cost;
4568 find_inv_vars (data, &expr, inv_vars);
4569 return force_expr_to_var_cost (expr, data->speed);
4572 /* Returns cost of auto-modifying address expression in shape base + offset.
4573 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4574 address expression. The address expression has ADDR_MODE in addr space
4575 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4576 speed or size. */
4578 enum ainc_type
4580 AINC_PRE_INC, /* Pre increment. */
4581 AINC_PRE_DEC, /* Pre decrement. */
4582 AINC_POST_INC, /* Post increment. */
4583 AINC_POST_DEC, /* Post decrement. */
4584 AINC_NONE /* Also the number of auto increment types. */
4587 struct ainc_cost_data
4589 int64_t costs[AINC_NONE];
4592 static comp_cost
4593 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4594 machine_mode addr_mode, machine_mode mem_mode,
4595 addr_space_t as, bool speed)
4597 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4598 && !USE_STORE_PRE_DECREMENT (mem_mode)
4599 && !USE_LOAD_POST_DECREMENT (mem_mode)
4600 && !USE_STORE_POST_DECREMENT (mem_mode)
4601 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4602 && !USE_STORE_PRE_INCREMENT (mem_mode)
4603 && !USE_LOAD_POST_INCREMENT (mem_mode)
4604 && !USE_STORE_POST_INCREMENT (mem_mode))
4605 return infinite_cost;
4607 static vec<ainc_cost_data *> ainc_cost_data_list;
4608 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4609 if (idx >= ainc_cost_data_list.length ())
4611 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4613 gcc_assert (nsize > idx);
4614 ainc_cost_data_list.safe_grow_cleared (nsize, true);
4617 ainc_cost_data *data = ainc_cost_data_list[idx];
4618 if (data == NULL)
4620 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4622 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4623 data->costs[AINC_PRE_DEC] = INFTY;
4624 data->costs[AINC_POST_DEC] = INFTY;
4625 data->costs[AINC_PRE_INC] = INFTY;
4626 data->costs[AINC_POST_INC] = INFTY;
4627 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4628 || USE_STORE_PRE_DECREMENT (mem_mode))
4630 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4632 if (memory_address_addr_space_p (mem_mode, addr, as))
4633 data->costs[AINC_PRE_DEC]
4634 = address_cost (addr, mem_mode, as, speed);
4636 if (USE_LOAD_POST_DECREMENT (mem_mode)
4637 || USE_STORE_POST_DECREMENT (mem_mode))
4639 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4641 if (memory_address_addr_space_p (mem_mode, addr, as))
4642 data->costs[AINC_POST_DEC]
4643 = address_cost (addr, mem_mode, as, speed);
4645 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4646 || USE_STORE_PRE_INCREMENT (mem_mode))
4648 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4650 if (memory_address_addr_space_p (mem_mode, addr, as))
4651 data->costs[AINC_PRE_INC]
4652 = address_cost (addr, mem_mode, as, speed);
4654 if (USE_LOAD_POST_INCREMENT (mem_mode)
4655 || USE_STORE_POST_INCREMENT (mem_mode))
4657 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4659 if (memory_address_addr_space_p (mem_mode, addr, as))
4660 data->costs[AINC_POST_INC]
4661 = address_cost (addr, mem_mode, as, speed);
4663 ainc_cost_data_list[idx] = data;
4666 poly_int64 msize = GET_MODE_SIZE (mem_mode);
4667 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4668 return comp_cost (data->costs[AINC_POST_INC], 0);
4669 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4670 return comp_cost (data->costs[AINC_POST_DEC], 0);
4671 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4672 return comp_cost (data->costs[AINC_PRE_INC], 0);
4673 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4674 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4676 return infinite_cost;
4679 /* Return cost of computing USE's address expression by using CAND.
4680 AFF_INV and AFF_VAR represent invariant and variant parts of the
4681 address expression, respectively. If AFF_INV is simple, store
4682 the loop invariant variables which are depended by it in INV_VARS;
4683 if AFF_INV is complicated, handle it as a new invariant expression
4684 and record it in INV_EXPR. RATIO indicates multiple times between
4685 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4686 value to it indicating if this is an auto-increment address. */
4688 static comp_cost
4689 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4690 struct iv_cand *cand, aff_tree *aff_inv,
4691 aff_tree *aff_var, HOST_WIDE_INT ratio,
4692 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4693 bool *can_autoinc, bool speed)
4695 rtx addr;
4696 bool simple_inv = true;
4697 tree comp_inv = NULL_TREE, type = aff_var->type;
4698 comp_cost var_cost = no_cost, cost = no_cost;
4699 struct mem_address parts = {NULL_TREE, integer_one_node,
4700 NULL_TREE, NULL_TREE, NULL_TREE};
4701 machine_mode addr_mode = TYPE_MODE (type);
4702 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4703 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4704 /* Only true if ratio != 1. */
4705 bool ok_with_ratio_p = false;
4706 bool ok_without_ratio_p = false;
4708 if (!aff_combination_const_p (aff_inv))
4710 parts.index = integer_one_node;
4711 /* Addressing mode "base + index". */
4712 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4713 if (ratio != 1)
4715 parts.step = wide_int_to_tree (type, ratio);
4716 /* Addressing mode "base + index << scale". */
4717 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4718 if (!ok_with_ratio_p)
4719 parts.step = NULL_TREE;
4721 if (ok_with_ratio_p || ok_without_ratio_p)
4723 if (maybe_ne (aff_inv->offset, 0))
4725 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4726 /* Addressing mode "base + index [<< scale] + offset". */
4727 if (!valid_mem_ref_p (mem_mode, as, &parts))
4728 parts.offset = NULL_TREE;
4729 else
4730 aff_inv->offset = 0;
4733 move_fixed_address_to_symbol (&parts, aff_inv);
4734 /* Base is fixed address and is moved to symbol part. */
4735 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4736 parts.base = NULL_TREE;
4738 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4739 if (parts.symbol != NULL_TREE
4740 && !valid_mem_ref_p (mem_mode, as, &parts))
4742 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4743 parts.symbol = NULL_TREE;
4744 /* Reset SIMPLE_INV since symbol address needs to be computed
4745 outside of address expression in this case. */
4746 simple_inv = false;
4747 /* Symbol part is moved back to base part, it can't be NULL. */
4748 parts.base = integer_one_node;
4751 else
4752 parts.index = NULL_TREE;
4754 else
4756 poly_int64 ainc_step;
4757 if (can_autoinc
4758 && ratio == 1
4759 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4761 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4763 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4764 ainc_offset += ainc_step;
4765 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4766 addr_mode, mem_mode, as, speed);
4767 if (!cost.infinite_cost_p ())
4769 *can_autoinc = true;
4770 return cost;
4772 cost = no_cost;
4774 if (!aff_combination_zero_p (aff_inv))
4776 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4777 /* Addressing mode "base + offset". */
4778 if (!valid_mem_ref_p (mem_mode, as, &parts))
4779 parts.offset = NULL_TREE;
4780 else
4781 aff_inv->offset = 0;
4785 if (simple_inv)
4786 simple_inv = (aff_inv == NULL
4787 || aff_combination_const_p (aff_inv)
4788 || aff_combination_singleton_var_p (aff_inv));
4789 if (!aff_combination_zero_p (aff_inv))
4790 comp_inv = aff_combination_to_tree (aff_inv);
4791 if (comp_inv != NULL_TREE)
4792 cost = force_var_cost (data, comp_inv, inv_vars);
4793 if (ratio != 1 && parts.step == NULL_TREE)
4794 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4795 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4796 var_cost += add_cost (speed, addr_mode);
4798 if (comp_inv && inv_expr && !simple_inv)
4800 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4801 /* Clear depends on. */
4802 if (*inv_expr != NULL && inv_vars && *inv_vars)
4803 bitmap_clear (*inv_vars);
4805 /* Cost of small invariant expression adjusted against loop niters
4806 is usually zero, which makes it difficult to be differentiated
4807 from candidate based on loop invariant variables. Secondly, the
4808 generated invariant expression may not be hoisted out of loop by
4809 following pass. We penalize the cost by rounding up in order to
4810 neutralize such effects. */
4811 cost.cost = adjust_setup_cost (data, cost.cost, true);
4812 cost.scratch = cost.cost;
4815 cost += var_cost;
4816 addr = addr_for_mem_ref (&parts, as, false);
4817 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4818 cost += address_cost (addr, mem_mode, as, speed);
4820 if (parts.symbol != NULL_TREE)
4821 cost.complexity += 1;
4822 /* Don't increase the complexity of adding a scaled index if it's
4823 the only kind of index that the target allows. */
4824 if (parts.step != NULL_TREE && ok_without_ratio_p)
4825 cost.complexity += 1;
4826 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4827 cost.complexity += 1;
4828 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4829 cost.complexity += 1;
4831 return cost;
4834 /* Scale (multiply) the computed COST (except scratch part that should be
4835 hoisted out a loop) by header->frequency / AT->frequency, which makes
4836 expected cost more accurate. */
4838 static comp_cost
4839 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4841 if (data->speed
4842 && data->current_loop->header->count.to_frequency (cfun) > 0)
4844 basic_block bb = gimple_bb (at);
4845 gcc_assert (cost.scratch <= cost.cost);
4846 int scale_factor = (int)(intptr_t) bb->aux;
4847 if (scale_factor == 1)
4848 return cost;
4850 int64_t scaled_cost
4851 = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4853 if (dump_file && (dump_flags & TDF_DETAILS))
4854 fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4855 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4856 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4858 cost.cost = scaled_cost;
4861 return cost;
4864 /* Determines the cost of the computation by that USE is expressed
4865 from induction variable CAND. If ADDRESS_P is true, we just need
4866 to create an address from it, otherwise we want to get it into
4867 register. A set of invariants we depend on is stored in INV_VARS.
4868 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4869 addressing is likely. If INV_EXPR is nonnull, record invariant
4870 expr entry in it. */
4872 static comp_cost
4873 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4874 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4875 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4877 gimple *at = use->stmt;
4878 tree ubase = use->iv->base, cbase = cand->iv->base;
4879 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4880 tree comp_inv = NULL_TREE;
4881 HOST_WIDE_INT ratio, aratio;
4882 comp_cost cost;
4883 widest_int rat;
4884 aff_tree aff_inv, aff_var;
4885 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4887 if (inv_vars)
4888 *inv_vars = NULL;
4889 if (can_autoinc)
4890 *can_autoinc = false;
4891 if (inv_expr)
4892 *inv_expr = NULL;
4894 /* Check if we have enough precision to express the values of use. */
4895 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4896 return infinite_cost;
4898 if (address_p
4899 || (use->iv->base_object
4900 && cand->iv->base_object
4901 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4902 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4904 /* Do not try to express address of an object with computation based
4905 on address of a different object. This may cause problems in rtl
4906 level alias analysis (that does not expect this to be happening,
4907 as this is illegal in C), and would be unlikely to be useful
4908 anyway. */
4909 if (use->iv->base_object
4910 && cand->iv->base_object
4911 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4912 return infinite_cost;
4915 if (!get_computation_aff_1 (data->current_loop, at, use,
4916 cand, &aff_inv, &aff_var, &rat)
4917 || !wi::fits_shwi_p (rat))
4918 return infinite_cost;
4920 ratio = rat.to_shwi ();
4921 if (address_p)
4923 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4924 inv_vars, inv_expr, can_autoinc, speed);
4925 cost = get_scaled_computation_cost_at (data, at, cost);
4926 /* For doloop IV cand, add on the extra cost. */
4927 cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4928 return cost;
4931 bool simple_inv = (aff_combination_const_p (&aff_inv)
4932 || aff_combination_singleton_var_p (&aff_inv));
4933 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4934 aff_combination_convert (&aff_inv, signed_type);
4935 if (!aff_combination_zero_p (&aff_inv))
4936 comp_inv = aff_combination_to_tree (&aff_inv);
4938 cost = force_var_cost (data, comp_inv, inv_vars);
4939 if (comp_inv && inv_expr && !simple_inv)
4941 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4942 /* Clear depends on. */
4943 if (*inv_expr != NULL && inv_vars && *inv_vars)
4944 bitmap_clear (*inv_vars);
4946 cost.cost = adjust_setup_cost (data, cost.cost);
4947 /* Record setup cost in scratch field. */
4948 cost.scratch = cost.cost;
4950 /* Cost of constant integer can be covered when adding invariant part to
4951 variant part. */
4952 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4953 cost = no_cost;
4955 /* Need type narrowing to represent use with cand. */
4956 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4958 machine_mode outer_mode = TYPE_MODE (utype);
4959 machine_mode inner_mode = TYPE_MODE (ctype);
4960 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4963 /* Turn a + i * (-c) into a - i * c. */
4964 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4965 aratio = -ratio;
4966 else
4967 aratio = ratio;
4969 if (ratio != 1)
4970 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4972 /* TODO: We may also need to check if we can compute a + i * 4 in one
4973 instruction. */
4974 /* Need to add up the invariant and variant parts. */
4975 if (comp_inv && !integer_zerop (comp_inv))
4976 cost += add_cost (speed, TYPE_MODE (utype));
4978 cost = get_scaled_computation_cost_at (data, at, cost);
4980 /* For doloop IV cand, add on the extra cost. */
4981 if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4982 cost += targetm.doloop_cost_for_generic;
4984 return cost;
4987 /* Determines cost of computing the use in GROUP with CAND in a generic
4988 expression. */
4990 static bool
4991 determine_group_iv_cost_generic (struct ivopts_data *data,
4992 struct iv_group *group, struct iv_cand *cand)
4994 comp_cost cost;
4995 iv_inv_expr_ent *inv_expr = NULL;
4996 bitmap inv_vars = NULL, inv_exprs = NULL;
4997 struct iv_use *use = group->vuses[0];
4999 /* The simple case first -- if we need to express value of the preserved
5000 original biv, the cost is 0. This also prevents us from counting the
5001 cost of increment twice -- once at this use and once in the cost of
5002 the candidate. */
5003 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5004 cost = no_cost;
5005 /* If the IV candidate involves undefined SSA values and is not the
5006 same IV as on the USE avoid using that candidate here. */
5007 else if (cand->involves_undefs
5008 && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
5009 return false;
5010 else
5011 cost = get_computation_cost (data, use, cand, false,
5012 &inv_vars, NULL, &inv_expr);
5014 if (inv_expr)
5016 inv_exprs = BITMAP_ALLOC (NULL);
5017 bitmap_set_bit (inv_exprs, inv_expr->id);
5019 set_group_iv_cost (data, group, cand, cost, inv_vars,
5020 NULL_TREE, ERROR_MARK, inv_exprs);
5021 return !cost.infinite_cost_p ();
5024 /* Determines cost of computing uses in GROUP with CAND in addresses. */
5026 static bool
5027 determine_group_iv_cost_address (struct ivopts_data *data,
5028 struct iv_group *group, struct iv_cand *cand)
5030 unsigned i;
5031 bitmap inv_vars = NULL, inv_exprs = NULL;
5032 bool can_autoinc;
5033 iv_inv_expr_ent *inv_expr = NULL;
5034 struct iv_use *use = group->vuses[0];
5035 comp_cost sum_cost = no_cost, cost;
5037 cost = get_computation_cost (data, use, cand, true,
5038 &inv_vars, &can_autoinc, &inv_expr);
5040 if (inv_expr)
5042 inv_exprs = BITMAP_ALLOC (NULL);
5043 bitmap_set_bit (inv_exprs, inv_expr->id);
5045 sum_cost = cost;
5046 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5048 if (can_autoinc)
5049 sum_cost -= cand->cost_step;
5050 /* If we generated the candidate solely for exploiting autoincrement
5051 opportunities, and it turns out it can't be used, set the cost to
5052 infinity to make sure we ignore it. */
5053 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5054 sum_cost = infinite_cost;
5057 /* Uses in a group can share setup code, so only add setup cost once. */
5058 cost -= cost.scratch;
5059 /* Compute and add costs for rest uses of this group. */
5060 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5062 struct iv_use *next = group->vuses[i];
5064 /* TODO: We could skip computing cost for sub iv_use when it has the
5065 same cost as the first iv_use, but the cost really depends on the
5066 offset and where the iv_use is. */
5067 cost = get_computation_cost (data, next, cand, true,
5068 NULL, &can_autoinc, &inv_expr);
5069 if (inv_expr)
5071 if (!inv_exprs)
5072 inv_exprs = BITMAP_ALLOC (NULL);
5074 bitmap_set_bit (inv_exprs, inv_expr->id);
5076 sum_cost += cost;
5078 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5079 NULL_TREE, ERROR_MARK, inv_exprs);
5081 return !sum_cost.infinite_cost_p ();
5084 /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5085 and stores it to VAL. */
5087 static void
5088 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5089 class tree_niter_desc *desc, aff_tree *val)
5091 aff_tree step, delta, nit;
5092 struct iv *iv = cand->iv;
5093 tree type = TREE_TYPE (iv->base);
5094 tree niter = desc->niter;
5095 bool after_adjust = stmt_after_increment (loop, cand, at);
5096 tree steptype;
5098 if (POINTER_TYPE_P (type))
5099 steptype = sizetype;
5100 else
5101 steptype = unsigned_type_for (type);
5103 /* If AFTER_ADJUST is required, the code below generates the equivalent
5104 of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5105 BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5106 SSA_NAME - 1. Unfortunately, guaranteeing that adding 1 to NITER
5107 doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5108 class for common idioms that we know are safe. */
5109 if (after_adjust
5110 && desc->control.no_overflow
5111 && integer_onep (desc->control.step)
5112 && (desc->cmp == LT_EXPR
5113 || desc->cmp == NE_EXPR)
5114 && TREE_CODE (desc->bound) == SSA_NAME)
5116 if (integer_onep (desc->control.base))
5118 niter = desc->bound;
5119 after_adjust = false;
5121 else if (TREE_CODE (niter) == MINUS_EXPR
5122 && integer_onep (TREE_OPERAND (niter, 1)))
5124 niter = TREE_OPERAND (niter, 0);
5125 after_adjust = false;
5129 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5130 aff_combination_convert (&step, steptype);
5131 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5132 aff_combination_convert (&nit, steptype);
5133 aff_combination_mult (&nit, &step, &delta);
5134 if (after_adjust)
5135 aff_combination_add (&delta, &step);
5137 tree_to_aff_combination (iv->base, type, val);
5138 if (!POINTER_TYPE_P (type))
5139 aff_combination_convert (val, steptype);
5140 aff_combination_add (val, &delta);
5143 /* Returns period of induction variable iv. */
5145 static tree
5146 iv_period (struct iv *iv)
5148 tree step = iv->step, period, type;
5149 tree pow2div;
5151 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5153 type = unsigned_type_for (TREE_TYPE (step));
5154 /* Period of the iv is lcm (step, type_range)/step -1,
5155 i.e., N*type_range/step - 1. Since type range is power
5156 of two, N == (step >> num_of_ending_zeros_binary (step),
5157 so the final result is
5159 (type_range >> num_of_ending_zeros_binary (step)) - 1
5162 pow2div = num_ending_zeros (step);
5164 period = build_low_bits_mask (type,
5165 (TYPE_PRECISION (type)
5166 - tree_to_uhwi (pow2div)));
5168 return period;
5171 /* Returns the comparison operator used when eliminating the iv USE. */
5173 static enum tree_code
5174 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5176 class loop *loop = data->current_loop;
5177 basic_block ex_bb;
5178 edge exit;
5180 ex_bb = gimple_bb (use->stmt);
5181 exit = EDGE_SUCC (ex_bb, 0);
5182 if (flow_bb_inside_loop_p (loop, exit->dest))
5183 exit = EDGE_SUCC (ex_bb, 1);
5185 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5188 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5189 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5190 calculation is performed in non-wrapping type.
5192 TODO: More generally, we could test for the situation that
5193 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5194 This would require knowing the sign of OFFSET. */
5196 static bool
5197 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5199 enum tree_code code;
5200 tree e1, e2;
5201 aff_tree aff_e1, aff_e2, aff_offset;
5203 if (!nowrap_type_p (TREE_TYPE (base)))
5204 return false;
5206 base = expand_simple_operations (base);
5208 if (TREE_CODE (base) == SSA_NAME)
5210 gimple *stmt = SSA_NAME_DEF_STMT (base);
5212 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5213 return false;
5215 code = gimple_assign_rhs_code (stmt);
5216 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5217 return false;
5219 e1 = gimple_assign_rhs1 (stmt);
5220 e2 = gimple_assign_rhs2 (stmt);
5222 else
5224 code = TREE_CODE (base);
5225 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5226 return false;
5227 e1 = TREE_OPERAND (base, 0);
5228 e2 = TREE_OPERAND (base, 1);
5231 /* Use affine expansion as deeper inspection to prove the equality. */
5232 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5233 &aff_e2, &data->name_expansion_cache);
5234 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5235 &aff_offset, &data->name_expansion_cache);
5236 aff_combination_scale (&aff_offset, -1);
5237 switch (code)
5239 case PLUS_EXPR:
5240 aff_combination_add (&aff_e2, &aff_offset);
5241 if (aff_combination_zero_p (&aff_e2))
5242 return true;
5244 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5245 &aff_e1, &data->name_expansion_cache);
5246 aff_combination_add (&aff_e1, &aff_offset);
5247 return aff_combination_zero_p (&aff_e1);
5249 case POINTER_PLUS_EXPR:
5250 aff_combination_add (&aff_e2, &aff_offset);
5251 return aff_combination_zero_p (&aff_e2);
5253 default:
5254 return false;
5258 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5259 comparison with CAND. NITER describes the number of iterations of
5260 the loops. If successful, the comparison in COMP_P is altered accordingly.
5262 We aim to handle the following situation:
5264 sometype *base, *p;
5265 int a, b, i;
5267 i = a;
5268 p = p_0 = base + a;
5272 bla (*p);
5273 p++;
5274 i++;
5276 while (i < b);
5278 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5279 We aim to optimize this to
5281 p = p_0 = base + a;
5284 bla (*p);
5285 p++;
5287 while (p < p_0 - a + b);
5289 This preserves the correctness, since the pointer arithmetics does not
5290 overflow. More precisely:
5292 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5293 overflow in computing it or the values of p.
5294 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5295 overflow. To prove this, we use the fact that p_0 = base + a. */
5297 static bool
5298 iv_elimination_compare_lt (struct ivopts_data *data,
5299 struct iv_cand *cand, enum tree_code *comp_p,
5300 class tree_niter_desc *niter)
5302 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5303 class aff_tree nit, tmpa, tmpb;
5304 enum tree_code comp;
5305 HOST_WIDE_INT step;
5307 /* We need to know that the candidate induction variable does not overflow.
5308 While more complex analysis may be used to prove this, for now just
5309 check that the variable appears in the original program and that it
5310 is computed in a type that guarantees no overflows. */
5311 cand_type = TREE_TYPE (cand->iv->base);
5312 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5313 return false;
5315 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5316 the calculation of the BOUND could overflow, making the comparison
5317 invalid. */
5318 if (!data->loop_single_exit_p)
5319 return false;
5321 /* We need to be able to decide whether candidate is increasing or decreasing
5322 in order to choose the right comparison operator. */
5323 if (!cst_and_fits_in_hwi (cand->iv->step))
5324 return false;
5325 step = int_cst_value (cand->iv->step);
5327 /* Check that the number of iterations matches the expected pattern:
5328 a + 1 > b ? 0 : b - a - 1. */
5329 mbz = niter->may_be_zero;
5330 if (TREE_CODE (mbz) == GT_EXPR)
5332 /* Handle a + 1 > b. */
5333 tree op0 = TREE_OPERAND (mbz, 0);
5334 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5336 a = TREE_OPERAND (op0, 0);
5337 b = TREE_OPERAND (mbz, 1);
5339 else
5340 return false;
5342 else if (TREE_CODE (mbz) == LT_EXPR)
5344 tree op1 = TREE_OPERAND (mbz, 1);
5346 /* Handle b < a + 1. */
5347 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5349 a = TREE_OPERAND (op1, 0);
5350 b = TREE_OPERAND (mbz, 0);
5352 else
5353 return false;
5355 else
5356 return false;
5358 /* Expected number of iterations is B - A - 1. Check that it matches
5359 the actual number, i.e., that B - A - NITER = 1. */
5360 tree_to_aff_combination (niter->niter, nit_type, &nit);
5361 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5362 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5363 aff_combination_scale (&nit, -1);
5364 aff_combination_scale (&tmpa, -1);
5365 aff_combination_add (&tmpb, &tmpa);
5366 aff_combination_add (&tmpb, &nit);
5367 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5368 return false;
5370 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5371 overflow. */
5372 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5373 cand->iv->step,
5374 fold_convert (TREE_TYPE (cand->iv->step), a));
5375 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5376 return false;
5378 /* Determine the new comparison operator. */
5379 comp = step < 0 ? GT_EXPR : LT_EXPR;
5380 if (*comp_p == NE_EXPR)
5381 *comp_p = comp;
5382 else if (*comp_p == EQ_EXPR)
5383 *comp_p = invert_tree_comparison (comp, false);
5384 else
5385 gcc_unreachable ();
5387 return true;
5390 /* Check whether it is possible to express the condition in USE by comparison
5391 of candidate CAND. If so, store the value compared with to BOUND, and the
5392 comparison operator to COMP. */
5394 static bool
5395 may_eliminate_iv (struct ivopts_data *data,
5396 struct iv_use *use, struct iv_cand *cand, tree *bound,
5397 enum tree_code *comp)
5399 basic_block ex_bb;
5400 edge exit;
5401 tree period;
5402 class loop *loop = data->current_loop;
5403 aff_tree bnd;
5404 class tree_niter_desc *desc = NULL;
5406 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5407 return false;
5409 /* For now works only for exits that dominate the loop latch.
5410 TODO: extend to other conditions inside loop body. */
5411 ex_bb = gimple_bb (use->stmt);
5412 if (use->stmt != last_nondebug_stmt (ex_bb)
5413 || gimple_code (use->stmt) != GIMPLE_COND
5414 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5415 return false;
5417 exit = EDGE_SUCC (ex_bb, 0);
5418 if (flow_bb_inside_loop_p (loop, exit->dest))
5419 exit = EDGE_SUCC (ex_bb, 1);
5420 if (flow_bb_inside_loop_p (loop, exit->dest))
5421 return false;
5423 desc = niter_for_exit (data, exit);
5424 if (!desc)
5425 return false;
5427 /* Determine whether we can use the variable to test the exit condition.
5428 This is the case iff the period of the induction variable is greater
5429 than the number of iterations for which the exit condition is true. */
5430 period = iv_period (cand->iv);
5432 /* If the number of iterations is constant, compare against it directly. */
5433 if (TREE_CODE (desc->niter) == INTEGER_CST)
5435 /* See cand_value_at. */
5436 if (stmt_after_increment (loop, cand, use->stmt))
5438 if (!tree_int_cst_lt (desc->niter, period))
5439 return false;
5441 else
5443 if (tree_int_cst_lt (period, desc->niter))
5444 return false;
5448 /* If not, and if this is the only possible exit of the loop, see whether
5449 we can get a conservative estimate on the number of iterations of the
5450 entire loop and compare against that instead. */
5451 else
5453 widest_int period_value, max_niter;
5455 max_niter = desc->max;
5456 if (stmt_after_increment (loop, cand, use->stmt))
5457 max_niter += 1;
5458 period_value = wi::to_widest (period);
5459 if (wi::gtu_p (max_niter, period_value))
5461 /* See if we can take advantage of inferred loop bound
5462 information. */
5463 if (data->loop_single_exit_p)
5465 if (!max_loop_iterations (loop, &max_niter))
5466 return false;
5467 /* The loop bound is already adjusted by adding 1. */
5468 if (wi::gtu_p (max_niter, period_value))
5469 return false;
5471 else
5472 return false;
5476 /* For doloop IV cand, the bound would be zero. It's safe whether
5477 may_be_zero set or not. */
5478 if (cand->doloop_p)
5480 *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5481 *comp = iv_elimination_compare (data, use);
5482 return true;
5485 cand_value_at (loop, cand, use->stmt, desc, &bnd);
5487 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5488 aff_combination_to_tree (&bnd));
5489 *comp = iv_elimination_compare (data, use);
5491 /* It is unlikely that computing the number of iterations using division
5492 would be more profitable than keeping the original induction variable. */
5493 if (expression_expensive_p (*bound))
5494 return false;
5496 /* Sometimes, it is possible to handle the situation that the number of
5497 iterations may be zero unless additional assumptions by using <
5498 instead of != in the exit condition.
5500 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5501 base the exit condition on it. However, that is often too
5502 expensive. */
5503 if (!integer_zerop (desc->may_be_zero))
5504 return iv_elimination_compare_lt (data, cand, comp, desc);
5506 return true;
5509 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5510 be copied, if it is used in the loop body and DATA->body_includes_call. */
5512 static int
5513 parm_decl_cost (struct ivopts_data *data, tree bound)
5515 tree sbound = bound;
5516 STRIP_NOPS (sbound);
5518 if (TREE_CODE (sbound) == SSA_NAME
5519 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5520 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5521 && data->body_includes_call)
5522 return COSTS_N_INSNS (1);
5524 return 0;
5527 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5529 static bool
5530 determine_group_iv_cost_cond (struct ivopts_data *data,
5531 struct iv_group *group, struct iv_cand *cand)
5533 tree bound = NULL_TREE;
5534 struct iv *cmp_iv;
5535 bitmap inv_exprs = NULL;
5536 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5537 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5538 enum comp_iv_rewrite rewrite_type;
5539 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5540 tree *control_var, *bound_cst;
5541 enum tree_code comp = ERROR_MARK;
5542 struct iv_use *use = group->vuses[0];
5544 /* Extract condition operands. */
5545 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5546 &bound_cst, NULL, &cmp_iv);
5547 gcc_assert (rewrite_type != COMP_IV_NA);
5549 /* Try iv elimination. */
5550 if (rewrite_type == COMP_IV_ELIM
5551 && may_eliminate_iv (data, use, cand, &bound, &comp))
5553 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5554 if (elim_cost.cost == 0)
5555 elim_cost.cost = parm_decl_cost (data, bound);
5556 else if (TREE_CODE (bound) == INTEGER_CST)
5557 elim_cost.cost = 0;
5558 /* If we replace a loop condition 'i < n' with 'p < base + n',
5559 inv_vars_elim will have 'base' and 'n' set, which implies that both
5560 'base' and 'n' will be live during the loop. More likely,
5561 'base + n' will be loop invariant, resulting in only one live value
5562 during the loop. So in that case we clear inv_vars_elim and set
5563 inv_expr_elim instead. */
5564 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5566 inv_expr_elim = get_loop_invariant_expr (data, bound);
5567 bitmap_clear (inv_vars_elim);
5569 /* The bound is a loop invariant, so it will be only computed
5570 once. */
5571 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5574 /* When the condition is a comparison of the candidate IV against
5575 zero, prefer this IV.
5577 TODO: The constant that we're subtracting from the cost should
5578 be target-dependent. This information should be added to the
5579 target costs for each backend. */
5580 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5581 && integer_zerop (*bound_cst)
5582 && (operand_equal_p (*control_var, cand->var_after, 0)
5583 || operand_equal_p (*control_var, cand->var_before, 0)))
5584 elim_cost -= 1;
5586 express_cost = get_computation_cost (data, use, cand, false,
5587 &inv_vars_express, NULL,
5588 &inv_expr_express);
5589 if (cmp_iv != NULL)
5590 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5592 /* Count the cost of the original bound as well. */
5593 bound_cost = force_var_cost (data, *bound_cst, NULL);
5594 if (bound_cost.cost == 0)
5595 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5596 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5597 bound_cost.cost = 0;
5598 express_cost += bound_cost;
5600 /* Choose the better approach, preferring the eliminated IV. */
5601 if (elim_cost <= express_cost)
5603 cost = elim_cost;
5604 inv_vars = inv_vars_elim;
5605 inv_vars_elim = NULL;
5606 inv_expr = inv_expr_elim;
5607 /* For doloop candidate/use pair, adjust to zero cost. */
5608 if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5609 cost = no_cost;
5611 else
5613 cost = express_cost;
5614 inv_vars = inv_vars_express;
5615 inv_vars_express = NULL;
5616 bound = NULL_TREE;
5617 comp = ERROR_MARK;
5618 inv_expr = inv_expr_express;
5621 if (inv_expr)
5623 inv_exprs = BITMAP_ALLOC (NULL);
5624 bitmap_set_bit (inv_exprs, inv_expr->id);
5626 set_group_iv_cost (data, group, cand, cost,
5627 inv_vars, bound, comp, inv_exprs);
5629 if (inv_vars_elim)
5630 BITMAP_FREE (inv_vars_elim);
5631 if (inv_vars_express)
5632 BITMAP_FREE (inv_vars_express);
5634 return !cost.infinite_cost_p ();
5637 /* Determines cost of computing uses in GROUP with CAND. Returns false
5638 if USE cannot be represented with CAND. */
5640 static bool
5641 determine_group_iv_cost (struct ivopts_data *data,
5642 struct iv_group *group, struct iv_cand *cand)
5644 switch (group->type)
5646 case USE_NONLINEAR_EXPR:
5647 return determine_group_iv_cost_generic (data, group, cand);
5649 case USE_REF_ADDRESS:
5650 case USE_PTR_ADDRESS:
5651 return determine_group_iv_cost_address (data, group, cand);
5653 case USE_COMPARE:
5654 return determine_group_iv_cost_cond (data, group, cand);
5656 default:
5657 gcc_unreachable ();
5661 /* Return true if get_computation_cost indicates that autoincrement is
5662 a possibility for the pair of USE and CAND, false otherwise. */
5664 static bool
5665 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5666 struct iv_cand *cand)
5668 if (!address_p (use->type))
5669 return false;
5671 bool can_autoinc = false;
5672 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5673 return can_autoinc;
5676 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5677 use that allows autoincrement, and set their AINC_USE if possible. */
5679 static void
5680 set_autoinc_for_original_candidates (struct ivopts_data *data)
5682 unsigned i, j;
5684 for (i = 0; i < data->vcands.length (); i++)
5686 struct iv_cand *cand = data->vcands[i];
5687 struct iv_use *closest_before = NULL;
5688 struct iv_use *closest_after = NULL;
5689 if (cand->pos != IP_ORIGINAL)
5690 continue;
5692 for (j = 0; j < data->vgroups.length (); j++)
5694 struct iv_group *group = data->vgroups[j];
5695 struct iv_use *use = group->vuses[0];
5696 unsigned uid = gimple_uid (use->stmt);
5698 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5699 continue;
5701 if (uid < gimple_uid (cand->incremented_at)
5702 && (closest_before == NULL
5703 || uid > gimple_uid (closest_before->stmt)))
5704 closest_before = use;
5706 if (uid > gimple_uid (cand->incremented_at)
5707 && (closest_after == NULL
5708 || uid < gimple_uid (closest_after->stmt)))
5709 closest_after = use;
5712 if (closest_before != NULL
5713 && autoinc_possible_for_pair (data, closest_before, cand))
5714 cand->ainc_use = closest_before;
5715 else if (closest_after != NULL
5716 && autoinc_possible_for_pair (data, closest_after, cand))
5717 cand->ainc_use = closest_after;
5721 /* Relate compare use with all candidates. */
5723 static void
5724 relate_compare_use_with_all_cands (struct ivopts_data *data)
5726 unsigned i, count = data->vcands.length ();
5727 for (i = 0; i < data->vgroups.length (); i++)
5729 struct iv_group *group = data->vgroups[i];
5731 if (group->type == USE_COMPARE)
5732 bitmap_set_range (group->related_cands, 0, count);
5736 /* If PREFERRED_MODE is suitable and profitable, use the preferred
5737 PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */
5739 static tree
5740 compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5741 const widest_int &iterations_max)
5743 tree ntype = TREE_TYPE (niter);
5744 tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5745 if (!pref_type)
5746 return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5747 build_int_cst (ntype, 1));
5749 gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5751 int prec = TYPE_PRECISION (ntype);
5752 int pref_prec = TYPE_PRECISION (pref_type);
5754 tree base;
5756 /* Check if the PREFERRED_MODED is able to present niter. */
5757 if (pref_prec > prec
5758 || wi::ltu_p (iterations_max,
5759 widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5760 UNSIGNED)))
5762 /* No wrap, it is safe to use preferred type after niter + 1. */
5763 if (wi::ltu_p (iterations_max,
5764 widest_int::from (wi::max_value (prec, UNSIGNED),
5765 UNSIGNED)))
5767 /* This could help to optimize "-1 +1" pair when niter looks
5768 like "n-1": n is in original mode. "base = (n - 1) + 1"
5769 in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */
5770 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5771 build_int_cst (ntype, 1));
5772 base = fold_convert (pref_type, base);
5775 /* To avoid wrap, convert niter to preferred type before plus 1. */
5776 else
5778 niter = fold_convert (pref_type, niter);
5779 base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5780 build_int_cst (pref_type, 1));
5783 else
5784 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5785 build_int_cst (ntype, 1));
5786 return base;
5789 /* Add one doloop dedicated IV candidate:
5790 - Base is (may_be_zero ? 1 : (niter + 1)).
5791 - Step is -1. */
5793 static void
5794 add_iv_candidate_for_doloop (struct ivopts_data *data)
5796 tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5797 gcc_assert (niter_desc && niter_desc->assumptions);
5799 tree niter = niter_desc->niter;
5800 tree ntype = TREE_TYPE (niter);
5801 gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5803 tree may_be_zero = niter_desc->may_be_zero;
5804 if (may_be_zero && integer_zerop (may_be_zero))
5805 may_be_zero = NULL_TREE;
5806 if (may_be_zero)
5808 if (COMPARISON_CLASS_P (may_be_zero))
5810 niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5811 build_int_cst (ntype, 0),
5812 rewrite_to_non_trapping_overflow (niter));
5814 /* Don't try to obtain the iteration count expression when may_be_zero is
5815 integer_nonzerop (actually iteration count is one) or else. */
5816 else
5817 return;
5820 machine_mode mode = TYPE_MODE (ntype);
5821 machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5823 tree base;
5824 if (mode != pref_mode)
5826 base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5827 ntype = TREE_TYPE (base);
5829 else
5830 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5831 build_int_cst (ntype, 1));
5834 add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5837 /* Finds the candidates for the induction variables. */
5839 static void
5840 find_iv_candidates (struct ivopts_data *data)
5842 /* Add commonly used ivs. */
5843 add_standard_iv_candidates (data);
5845 /* Add doloop dedicated ivs. */
5846 if (data->doloop_use_p)
5847 add_iv_candidate_for_doloop (data);
5849 /* Add old induction variables. */
5850 add_iv_candidate_for_bivs (data);
5852 /* Add induction variables derived from uses. */
5853 add_iv_candidate_for_groups (data);
5855 set_autoinc_for_original_candidates (data);
5857 /* Record the important candidates. */
5858 record_important_candidates (data);
5860 /* Relate compare iv_use with all candidates. */
5861 if (!data->consider_all_candidates)
5862 relate_compare_use_with_all_cands (data);
5864 if (dump_file && (dump_flags & TDF_DETAILS))
5866 unsigned i;
5868 fprintf (dump_file, "\n<Important Candidates>:\t");
5869 for (i = 0; i < data->vcands.length (); i++)
5870 if (data->vcands[i]->important)
5871 fprintf (dump_file, " %d,", data->vcands[i]->id);
5872 fprintf (dump_file, "\n");
5874 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5875 for (i = 0; i < data->vgroups.length (); i++)
5877 struct iv_group *group = data->vgroups[i];
5879 if (group->related_cands)
5881 fprintf (dump_file, " Group %d:\t", group->id);
5882 dump_bitmap (dump_file, group->related_cands);
5885 fprintf (dump_file, "\n");
5889 /* Determines costs of computing use of iv with an iv candidate. */
5891 static void
5892 determine_group_iv_costs (struct ivopts_data *data)
5894 unsigned i, j;
5895 struct iv_cand *cand;
5896 struct iv_group *group;
5897 bitmap to_clear = BITMAP_ALLOC (NULL);
5899 alloc_use_cost_map (data);
5901 for (i = 0; i < data->vgroups.length (); i++)
5903 group = data->vgroups[i];
5905 if (data->consider_all_candidates)
5907 for (j = 0; j < data->vcands.length (); j++)
5909 cand = data->vcands[j];
5910 determine_group_iv_cost (data, group, cand);
5913 else
5915 bitmap_iterator bi;
5917 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5919 cand = data->vcands[j];
5920 if (!determine_group_iv_cost (data, group, cand))
5921 bitmap_set_bit (to_clear, j);
5924 /* Remove the candidates for that the cost is infinite from
5925 the list of related candidates. */
5926 bitmap_and_compl_into (group->related_cands, to_clear);
5927 bitmap_clear (to_clear);
5931 BITMAP_FREE (to_clear);
5933 if (dump_file && (dump_flags & TDF_DETAILS))
5935 bitmap_iterator bi;
5937 /* Dump invariant variables. */
5938 fprintf (dump_file, "\n<Invariant Vars>:\n");
5939 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5941 struct version_info *info = ver_info (data, i);
5942 if (info->inv_id)
5944 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5945 print_generic_expr (dump_file, info->name, TDF_SLIM);
5946 fprintf (dump_file, "%s\n",
5947 info->has_nonlin_use ? "" : "\t(eliminable)");
5951 /* Dump invariant expressions. */
5952 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5953 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5955 for (hash_table<iv_inv_expr_hasher>::iterator it
5956 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5957 ++it)
5958 list.safe_push (*it);
5960 list.qsort (sort_iv_inv_expr_ent);
5962 for (i = 0; i < list.length (); ++i)
5964 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5965 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5966 fprintf (dump_file, "\n");
5969 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5971 for (i = 0; i < data->vgroups.length (); i++)
5973 group = data->vgroups[i];
5975 fprintf (dump_file, "Group %d:\n", i);
5976 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5977 for (j = 0; j < group->n_map_members; j++)
5979 if (!group->cost_map[j].cand
5980 || group->cost_map[j].cost.infinite_cost_p ())
5981 continue;
5983 fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
5984 group->cost_map[j].cand->id,
5985 group->cost_map[j].cost.cost,
5986 group->cost_map[j].cost.complexity);
5987 if (!group->cost_map[j].inv_exprs
5988 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5989 fprintf (dump_file, "NIL;\t");
5990 else
5991 bitmap_print (dump_file,
5992 group->cost_map[j].inv_exprs, "", ";\t");
5993 if (!group->cost_map[j].inv_vars
5994 || bitmap_empty_p (group->cost_map[j].inv_vars))
5995 fprintf (dump_file, "NIL;\n");
5996 else
5997 bitmap_print (dump_file,
5998 group->cost_map[j].inv_vars, "", "\n");
6001 fprintf (dump_file, "\n");
6003 fprintf (dump_file, "\n");
6007 /* Determines cost of the candidate CAND. */
6009 static void
6010 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
6012 comp_cost cost_base;
6013 int64_t cost, cost_step;
6014 tree base;
6016 gcc_assert (cand->iv != NULL);
6018 /* There are two costs associated with the candidate -- its increment
6019 and its initialization. The second is almost negligible for any loop
6020 that rolls enough, so we take it just very little into account. */
6022 base = cand->iv->base;
6023 cost_base = force_var_cost (data, base, NULL);
6024 /* It will be exceptional that the iv register happens to be initialized with
6025 the proper value at no cost. In general, there will at least be a regcopy
6026 or a const set. */
6027 if (cost_base.cost == 0)
6028 cost_base.cost = COSTS_N_INSNS (1);
6029 /* Doloop decrement should be considered as zero cost. */
6030 if (cand->doloop_p)
6031 cost_step = 0;
6032 else
6033 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6034 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6036 /* Prefer the original ivs unless we may gain something by replacing it.
6037 The reason is to make debugging simpler; so this is not relevant for
6038 artificial ivs created by other optimization passes. */
6039 if ((cand->pos != IP_ORIGINAL
6040 || !SSA_NAME_VAR (cand->var_before)
6041 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6042 /* Prefer doloop as well. */
6043 && !cand->doloop_p)
6044 cost++;
6046 /* Prefer not to insert statements into latch unless there are some
6047 already (so that we do not create unnecessary jumps). */
6048 if (cand->pos == IP_END
6049 && empty_block_p (ip_end_pos (data->current_loop)))
6050 cost++;
6052 cand->cost = cost;
6053 cand->cost_step = cost_step;
6056 /* Determines costs of computation of the candidates. */
6058 static void
6059 determine_iv_costs (struct ivopts_data *data)
6061 unsigned i;
6063 if (dump_file && (dump_flags & TDF_DETAILS))
6065 fprintf (dump_file, "<Candidate Costs>:\n");
6066 fprintf (dump_file, " cand\tcost\n");
6069 for (i = 0; i < data->vcands.length (); i++)
6071 struct iv_cand *cand = data->vcands[i];
6073 determine_iv_cost (data, cand);
6075 if (dump_file && (dump_flags & TDF_DETAILS))
6076 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
6079 if (dump_file && (dump_flags & TDF_DETAILS))
6080 fprintf (dump_file, "\n");
6083 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6084 induction variables. Note N_INVS includes both invariant variables and
6085 invariant expressions. */
6087 static unsigned
6088 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6089 unsigned n_cands)
6091 unsigned cost;
6092 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6093 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6094 bool speed = data->speed;
6096 /* If there is a call in the loop body, the call-clobbered registers
6097 are not available for loop invariants. */
6098 if (data->body_includes_call)
6099 available_regs = available_regs - target_clobbered_regs;
6101 /* If we have enough registers. */
6102 if (regs_needed + target_res_regs < available_regs)
6103 cost = n_new;
6104 /* If close to running out of registers, try to preserve them. */
6105 else if (regs_needed <= available_regs)
6106 cost = target_reg_cost [speed] * regs_needed;
6107 /* If we run out of available registers but the number of candidates
6108 does not, we penalize extra registers using target_spill_cost. */
6109 else if (n_cands <= available_regs)
6110 cost = target_reg_cost [speed] * available_regs
6111 + target_spill_cost [speed] * (regs_needed - available_regs);
6112 /* If the number of candidates runs out available registers, we penalize
6113 extra candidate registers using target_spill_cost * 2. Because it is
6114 more expensive to spill induction variable than invariant. */
6115 else
6116 cost = target_reg_cost [speed] * available_regs
6117 + target_spill_cost [speed] * (n_cands - available_regs) * 2
6118 + target_spill_cost [speed] * (regs_needed - n_cands);
6120 /* Finally, add the number of candidates, so that we prefer eliminating
6121 induction variables if possible. */
6122 return cost + n_cands;
6125 /* For each size of the induction variable set determine the penalty. */
6127 static void
6128 determine_set_costs (struct ivopts_data *data)
6130 unsigned j, n;
6131 gphi *phi;
6132 gphi_iterator psi;
6133 tree op;
6134 class loop *loop = data->current_loop;
6135 bitmap_iterator bi;
6137 if (dump_file && (dump_flags & TDF_DETAILS))
6139 fprintf (dump_file, "<Global Costs>:\n");
6140 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
6141 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
6142 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
6143 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
6146 n = 0;
6147 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6149 phi = psi.phi ();
6150 op = PHI_RESULT (phi);
6152 if (virtual_operand_p (op))
6153 continue;
6155 if (get_iv (data, op))
6156 continue;
6158 if (!POINTER_TYPE_P (TREE_TYPE (op))
6159 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6160 continue;
6162 n++;
6165 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6167 struct version_info *info = ver_info (data, j);
6169 if (info->inv_id && info->has_nonlin_use)
6170 n++;
6173 data->regs_used = n;
6174 if (dump_file && (dump_flags & TDF_DETAILS))
6175 fprintf (dump_file, " regs_used %d\n", n);
6177 if (dump_file && (dump_flags & TDF_DETAILS))
6179 fprintf (dump_file, " cost for size:\n");
6180 fprintf (dump_file, " ivs\tcost\n");
6181 for (j = 0; j <= 2 * target_avail_regs; j++)
6182 fprintf (dump_file, " %d\t%d\n", j,
6183 ivopts_estimate_reg_pressure (data, 0, j));
6184 fprintf (dump_file, "\n");
6188 /* Returns true if A is a cheaper cost pair than B. */
6190 static bool
6191 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6193 if (!a)
6194 return false;
6196 if (!b)
6197 return true;
6199 if (a->cost < b->cost)
6200 return true;
6202 if (b->cost < a->cost)
6203 return false;
6205 /* In case the costs are the same, prefer the cheaper candidate. */
6206 if (a->cand->cost < b->cand->cost)
6207 return true;
6209 return false;
6212 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
6213 for more expensive, equal and cheaper respectively. */
6215 static int
6216 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6218 if (cheaper_cost_pair (a, b))
6219 return -1;
6220 if (cheaper_cost_pair (b, a))
6221 return 1;
6223 return 0;
6226 /* Returns candidate by that USE is expressed in IVS. */
6228 static class cost_pair *
6229 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6231 return ivs->cand_for_group[group->id];
6234 /* Computes the cost field of IVS structure. */
6236 static void
6237 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6239 comp_cost cost = ivs->cand_use_cost;
6241 cost += ivs->cand_cost;
6242 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6243 ivs->cost = cost;
6246 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6247 and IVS. */
6249 static void
6250 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6252 bitmap_iterator bi;
6253 unsigned iid;
6255 if (!invs)
6256 return;
6258 gcc_assert (n_inv_uses != NULL);
6259 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6261 n_inv_uses[iid]--;
6262 if (n_inv_uses[iid] == 0)
6263 ivs->n_invs--;
6267 /* Set USE not to be expressed by any candidate in IVS. */
6269 static void
6270 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6271 struct iv_group *group)
6273 unsigned gid = group->id, cid;
6274 class cost_pair *cp;
6276 cp = ivs->cand_for_group[gid];
6277 if (!cp)
6278 return;
6279 cid = cp->cand->id;
6281 ivs->bad_groups++;
6282 ivs->cand_for_group[gid] = NULL;
6283 ivs->n_cand_uses[cid]--;
6285 if (ivs->n_cand_uses[cid] == 0)
6287 bitmap_clear_bit (ivs->cands, cid);
6288 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6289 ivs->n_cands--;
6290 ivs->cand_cost -= cp->cand->cost;
6291 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6292 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6295 ivs->cand_use_cost -= cp->cost;
6296 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6297 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6298 iv_ca_recount_cost (data, ivs);
6301 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6302 IVS. */
6304 static void
6305 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6307 bitmap_iterator bi;
6308 unsigned iid;
6310 if (!invs)
6311 return;
6313 gcc_assert (n_inv_uses != NULL);
6314 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6316 n_inv_uses[iid]++;
6317 if (n_inv_uses[iid] == 1)
6318 ivs->n_invs++;
6322 /* Set cost pair for GROUP in set IVS to CP. */
6324 static void
6325 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6326 struct iv_group *group, class cost_pair *cp)
6328 unsigned gid = group->id, cid;
6330 if (ivs->cand_for_group[gid] == cp)
6331 return;
6333 if (ivs->cand_for_group[gid])
6334 iv_ca_set_no_cp (data, ivs, group);
6336 if (cp)
6338 cid = cp->cand->id;
6340 ivs->bad_groups--;
6341 ivs->cand_for_group[gid] = cp;
6342 ivs->n_cand_uses[cid]++;
6343 if (ivs->n_cand_uses[cid] == 1)
6345 bitmap_set_bit (ivs->cands, cid);
6346 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6347 ivs->n_cands++;
6348 ivs->cand_cost += cp->cand->cost;
6349 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6350 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6353 ivs->cand_use_cost += cp->cost;
6354 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6355 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6356 iv_ca_recount_cost (data, ivs);
6360 /* Extend set IVS by expressing USE by some of the candidates in it
6361 if possible. Consider all important candidates if candidates in
6362 set IVS don't give any result. */
6364 static void
6365 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6366 struct iv_group *group)
6368 class cost_pair *best_cp = NULL, *cp;
6369 bitmap_iterator bi;
6370 unsigned i;
6371 struct iv_cand *cand;
6373 gcc_assert (ivs->upto >= group->id);
6374 ivs->upto++;
6375 ivs->bad_groups++;
6377 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6379 cand = data->vcands[i];
6380 cp = get_group_iv_cost (data, group, cand);
6381 if (cheaper_cost_pair (cp, best_cp))
6382 best_cp = cp;
6385 if (best_cp == NULL)
6387 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6389 cand = data->vcands[i];
6390 cp = get_group_iv_cost (data, group, cand);
6391 if (cheaper_cost_pair (cp, best_cp))
6392 best_cp = cp;
6396 iv_ca_set_cp (data, ivs, group, best_cp);
6399 /* Get cost for assignment IVS. */
6401 static comp_cost
6402 iv_ca_cost (class iv_ca *ivs)
6404 /* This was a conditional expression but it triggered a bug in
6405 Sun C 5.5. */
6406 if (ivs->bad_groups)
6407 return infinite_cost;
6408 else
6409 return ivs->cost;
6412 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6413 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6414 respectively. */
6416 static int
6417 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6418 struct iv_group *group, class cost_pair *old_cp,
6419 class cost_pair *new_cp)
6421 gcc_assert (old_cp && new_cp && old_cp != new_cp);
6422 unsigned old_n_invs = ivs->n_invs;
6423 iv_ca_set_cp (data, ivs, group, new_cp);
6424 unsigned new_n_invs = ivs->n_invs;
6425 iv_ca_set_cp (data, ivs, group, old_cp);
6427 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6430 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6431 it before NEXT. */
6433 static struct iv_ca_delta *
6434 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6435 class cost_pair *new_cp, struct iv_ca_delta *next)
6437 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6439 change->group = group;
6440 change->old_cp = old_cp;
6441 change->new_cp = new_cp;
6442 change->next = next;
6444 return change;
6447 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6448 are rewritten. */
6450 static struct iv_ca_delta *
6451 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6453 struct iv_ca_delta *last;
6455 if (!l2)
6456 return l1;
6458 if (!l1)
6459 return l2;
6461 for (last = l1; last->next; last = last->next)
6462 continue;
6463 last->next = l2;
6465 return l1;
6468 /* Reverse the list of changes DELTA, forming the inverse to it. */
6470 static struct iv_ca_delta *
6471 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6473 struct iv_ca_delta *act, *next, *prev = NULL;
6475 for (act = delta; act; act = next)
6477 next = act->next;
6478 act->next = prev;
6479 prev = act;
6481 std::swap (act->old_cp, act->new_cp);
6484 return prev;
6487 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6488 reverted instead. */
6490 static void
6491 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6492 struct iv_ca_delta *delta, bool forward)
6494 class cost_pair *from, *to;
6495 struct iv_ca_delta *act;
6497 if (!forward)
6498 delta = iv_ca_delta_reverse (delta);
6500 for (act = delta; act; act = act->next)
6502 from = act->old_cp;
6503 to = act->new_cp;
6504 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6505 iv_ca_set_cp (data, ivs, act->group, to);
6508 if (!forward)
6509 iv_ca_delta_reverse (delta);
6512 /* Returns true if CAND is used in IVS. */
6514 static bool
6515 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6517 return ivs->n_cand_uses[cand->id] > 0;
6520 /* Returns number of induction variable candidates in the set IVS. */
6522 static unsigned
6523 iv_ca_n_cands (class iv_ca *ivs)
6525 return ivs->n_cands;
6528 /* Free the list of changes DELTA. */
6530 static void
6531 iv_ca_delta_free (struct iv_ca_delta **delta)
6533 struct iv_ca_delta *act, *next;
6535 for (act = *delta; act; act = next)
6537 next = act->next;
6538 free (act);
6541 *delta = NULL;
6544 /* Allocates new iv candidates assignment. */
6546 static class iv_ca *
6547 iv_ca_new (struct ivopts_data *data)
6549 class iv_ca *nw = XNEW (class iv_ca);
6551 nw->upto = 0;
6552 nw->bad_groups = 0;
6553 nw->cand_for_group = XCNEWVEC (class cost_pair *,
6554 data->vgroups.length ());
6555 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6556 nw->cands = BITMAP_ALLOC (NULL);
6557 nw->n_cands = 0;
6558 nw->n_invs = 0;
6559 nw->cand_use_cost = no_cost;
6560 nw->cand_cost = 0;
6561 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6562 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6563 nw->cost = no_cost;
6565 return nw;
6568 /* Free memory occupied by the set IVS. */
6570 static void
6571 iv_ca_free (class iv_ca **ivs)
6573 free ((*ivs)->cand_for_group);
6574 free ((*ivs)->n_cand_uses);
6575 BITMAP_FREE ((*ivs)->cands);
6576 free ((*ivs)->n_inv_var_uses);
6577 free ((*ivs)->n_inv_expr_uses);
6578 free (*ivs);
6579 *ivs = NULL;
6582 /* Dumps IVS to FILE. */
6584 static void
6585 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6587 unsigned i;
6588 comp_cost cost = iv_ca_cost (ivs);
6590 fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6591 cost.complexity);
6592 fprintf (file, " reg_cost: %d\n",
6593 ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6594 fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
6595 "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6596 ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6597 bitmap_print (file, ivs->cands, " candidates: ","\n");
6599 for (i = 0; i < ivs->upto; i++)
6601 struct iv_group *group = data->vgroups[i];
6602 class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6603 if (cp)
6604 fprintf (file, " group:%d --> iv_cand:%d, cost=("
6605 "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6606 cp->cost.cost, cp->cost.complexity);
6607 else
6608 fprintf (file, " group:%d --> ??\n", group->id);
6611 const char *pref = "";
6612 fprintf (file, " invariant variables: ");
6613 for (i = 1; i <= data->max_inv_var_id; i++)
6614 if (ivs->n_inv_var_uses[i])
6616 fprintf (file, "%s%d", pref, i);
6617 pref = ", ";
6620 pref = "";
6621 fprintf (file, "\n invariant expressions: ");
6622 for (i = 1; i <= data->max_inv_expr_id; i++)
6623 if (ivs->n_inv_expr_uses[i])
6625 fprintf (file, "%s%d", pref, i);
6626 pref = ", ";
6629 fprintf (file, "\n\n");
6632 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6633 new set, and store differences in DELTA. Number of induction variables
6634 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6635 the function will try to find a solution with mimimal iv candidates. */
6637 static comp_cost
6638 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6639 struct iv_cand *cand, struct iv_ca_delta **delta,
6640 unsigned *n_ivs, bool min_ncand)
6642 unsigned i;
6643 comp_cost cost;
6644 struct iv_group *group;
6645 class cost_pair *old_cp, *new_cp;
6647 *delta = NULL;
6648 for (i = 0; i < ivs->upto; i++)
6650 group = data->vgroups[i];
6651 old_cp = iv_ca_cand_for_group (ivs, group);
6653 if (old_cp
6654 && old_cp->cand == cand)
6655 continue;
6657 new_cp = get_group_iv_cost (data, group, cand);
6658 if (!new_cp)
6659 continue;
6661 if (!min_ncand)
6663 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6664 /* Skip if new_cp depends on more invariants. */
6665 if (cmp_invs > 0)
6666 continue;
6668 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6669 /* Skip if new_cp is not cheaper. */
6670 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6671 continue;
6674 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6677 iv_ca_delta_commit (data, ivs, *delta, true);
6678 cost = iv_ca_cost (ivs);
6679 if (n_ivs)
6680 *n_ivs = iv_ca_n_cands (ivs);
6681 iv_ca_delta_commit (data, ivs, *delta, false);
6683 return cost;
6686 /* Try narrowing set IVS by removing CAND. Return the cost of
6687 the new set and store the differences in DELTA. START is
6688 the candidate with which we start narrowing. */
6690 static comp_cost
6691 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6692 struct iv_cand *cand, struct iv_cand *start,
6693 struct iv_ca_delta **delta)
6695 unsigned i, ci;
6696 struct iv_group *group;
6697 class cost_pair *old_cp, *new_cp, *cp;
6698 bitmap_iterator bi;
6699 struct iv_cand *cnd;
6700 comp_cost cost, best_cost, acost;
6702 *delta = NULL;
6703 for (i = 0; i < data->vgroups.length (); i++)
6705 group = data->vgroups[i];
6707 old_cp = iv_ca_cand_for_group (ivs, group);
6708 if (old_cp->cand != cand)
6709 continue;
6711 best_cost = iv_ca_cost (ivs);
6712 /* Start narrowing with START. */
6713 new_cp = get_group_iv_cost (data, group, start);
6715 if (data->consider_all_candidates)
6717 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6719 if (ci == cand->id || (start && ci == start->id))
6720 continue;
6722 cnd = data->vcands[ci];
6724 cp = get_group_iv_cost (data, group, cnd);
6725 if (!cp)
6726 continue;
6728 iv_ca_set_cp (data, ivs, group, cp);
6729 acost = iv_ca_cost (ivs);
6731 if (acost < best_cost)
6733 best_cost = acost;
6734 new_cp = cp;
6738 else
6740 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6742 if (ci == cand->id || (start && ci == start->id))
6743 continue;
6745 cnd = data->vcands[ci];
6747 cp = get_group_iv_cost (data, group, cnd);
6748 if (!cp)
6749 continue;
6751 iv_ca_set_cp (data, ivs, group, cp);
6752 acost = iv_ca_cost (ivs);
6754 if (acost < best_cost)
6756 best_cost = acost;
6757 new_cp = cp;
6761 /* Restore to old cp for use. */
6762 iv_ca_set_cp (data, ivs, group, old_cp);
6764 if (!new_cp)
6766 iv_ca_delta_free (delta);
6767 return infinite_cost;
6770 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6773 iv_ca_delta_commit (data, ivs, *delta, true);
6774 cost = iv_ca_cost (ivs);
6775 iv_ca_delta_commit (data, ivs, *delta, false);
6777 return cost;
6780 /* Try optimizing the set of candidates IVS by removing candidates different
6781 from to EXCEPT_CAND from it. Return cost of the new set, and store
6782 differences in DELTA. */
6784 static comp_cost
6785 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6786 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6788 bitmap_iterator bi;
6789 struct iv_ca_delta *act_delta, *best_delta;
6790 unsigned i;
6791 comp_cost best_cost, acost;
6792 struct iv_cand *cand;
6794 best_delta = NULL;
6795 best_cost = iv_ca_cost (ivs);
6797 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6799 cand = data->vcands[i];
6801 if (cand == except_cand)
6802 continue;
6804 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6806 if (acost < best_cost)
6808 best_cost = acost;
6809 iv_ca_delta_free (&best_delta);
6810 best_delta = act_delta;
6812 else
6813 iv_ca_delta_free (&act_delta);
6816 if (!best_delta)
6818 *delta = NULL;
6819 return best_cost;
6822 /* Recurse to possibly remove other unnecessary ivs. */
6823 iv_ca_delta_commit (data, ivs, best_delta, true);
6824 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6825 iv_ca_delta_commit (data, ivs, best_delta, false);
6826 *delta = iv_ca_delta_join (best_delta, *delta);
6827 return best_cost;
6830 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6831 cheaper local cost for GROUP than BEST_CP. Return pointer to
6832 the corresponding cost_pair, otherwise just return BEST_CP. */
6834 static class cost_pair*
6835 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6836 unsigned int cand_idx, struct iv_cand *old_cand,
6837 class cost_pair *best_cp)
6839 struct iv_cand *cand;
6840 class cost_pair *cp;
6842 gcc_assert (old_cand != NULL && best_cp != NULL);
6843 if (cand_idx == old_cand->id)
6844 return best_cp;
6846 cand = data->vcands[cand_idx];
6847 cp = get_group_iv_cost (data, group, cand);
6848 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6849 return cp;
6851 return best_cp;
6854 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6855 which are used by more than one iv uses. For each of those candidates,
6856 this function tries to represent iv uses under that candidate using
6857 other ones with lower local cost, then tries to prune the new set.
6858 If the new set has lower cost, It returns the new cost after recording
6859 candidate replacement in list DELTA. */
6861 static comp_cost
6862 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6863 struct iv_ca_delta **delta)
6865 bitmap_iterator bi, bj;
6866 unsigned int i, j, k;
6867 struct iv_cand *cand;
6868 comp_cost orig_cost, acost;
6869 struct iv_ca_delta *act_delta, *tmp_delta;
6870 class cost_pair *old_cp, *best_cp = NULL;
6872 *delta = NULL;
6873 orig_cost = iv_ca_cost (ivs);
6875 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6877 if (ivs->n_cand_uses[i] == 1
6878 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6879 continue;
6881 cand = data->vcands[i];
6883 act_delta = NULL;
6884 /* Represent uses under current candidate using other ones with
6885 lower local cost. */
6886 for (j = 0; j < ivs->upto; j++)
6888 struct iv_group *group = data->vgroups[j];
6889 old_cp = iv_ca_cand_for_group (ivs, group);
6891 if (old_cp->cand != cand)
6892 continue;
6894 best_cp = old_cp;
6895 if (data->consider_all_candidates)
6896 for (k = 0; k < data->vcands.length (); k++)
6897 best_cp = cheaper_cost_with_cand (data, group, k,
6898 old_cp->cand, best_cp);
6899 else
6900 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6901 best_cp = cheaper_cost_with_cand (data, group, k,
6902 old_cp->cand, best_cp);
6904 if (best_cp == old_cp)
6905 continue;
6907 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6909 /* No need for further prune. */
6910 if (!act_delta)
6911 continue;
6913 /* Prune the new candidate set. */
6914 iv_ca_delta_commit (data, ivs, act_delta, true);
6915 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6916 iv_ca_delta_commit (data, ivs, act_delta, false);
6917 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6919 if (acost < orig_cost)
6921 *delta = act_delta;
6922 return acost;
6924 else
6925 iv_ca_delta_free (&act_delta);
6928 return orig_cost;
6931 /* Tries to extend the sets IVS in the best possible way in order to
6932 express the GROUP. If ORIGINALP is true, prefer candidates from
6933 the original set of IVs, otherwise favor important candidates not
6934 based on any memory object. */
6936 static bool
6937 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6938 struct iv_group *group, bool originalp)
6940 comp_cost best_cost, act_cost;
6941 unsigned i;
6942 bitmap_iterator bi;
6943 struct iv_cand *cand;
6944 struct iv_ca_delta *best_delta = NULL, *act_delta;
6945 class cost_pair *cp;
6947 iv_ca_add_group (data, ivs, group);
6948 best_cost = iv_ca_cost (ivs);
6949 cp = iv_ca_cand_for_group (ivs, group);
6950 if (cp)
6952 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6953 iv_ca_set_no_cp (data, ivs, group);
6956 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6957 first try important candidates not based on any memory object. Only if
6958 this fails, try the specific ones. Rationale -- in loops with many
6959 variables the best choice often is to use just one generic biv. If we
6960 added here many ivs specific to the uses, the optimization algorithm later
6961 would be likely to get stuck in a local minimum, thus causing us to create
6962 too many ivs. The approach from few ivs to more seems more likely to be
6963 successful -- starting from few ivs, replacing an expensive use by a
6964 specific iv should always be a win. */
6965 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6967 cand = data->vcands[i];
6969 if (originalp && cand->pos !=IP_ORIGINAL)
6970 continue;
6972 if (!originalp && cand->iv->base_object != NULL_TREE)
6973 continue;
6975 if (iv_ca_cand_used_p (ivs, cand))
6976 continue;
6978 cp = get_group_iv_cost (data, group, cand);
6979 if (!cp)
6980 continue;
6982 iv_ca_set_cp (data, ivs, group, cp);
6983 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6984 true);
6985 iv_ca_set_no_cp (data, ivs, group);
6986 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6988 if (act_cost < best_cost)
6990 best_cost = act_cost;
6992 iv_ca_delta_free (&best_delta);
6993 best_delta = act_delta;
6995 else
6996 iv_ca_delta_free (&act_delta);
6999 if (best_cost.infinite_cost_p ())
7001 for (i = 0; i < group->n_map_members; i++)
7003 cp = group->cost_map + i;
7004 cand = cp->cand;
7005 if (!cand)
7006 continue;
7008 /* Already tried this. */
7009 if (cand->important)
7011 if (originalp && cand->pos == IP_ORIGINAL)
7012 continue;
7013 if (!originalp && cand->iv->base_object == NULL_TREE)
7014 continue;
7017 if (iv_ca_cand_used_p (ivs, cand))
7018 continue;
7020 act_delta = NULL;
7021 iv_ca_set_cp (data, ivs, group, cp);
7022 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7023 iv_ca_set_no_cp (data, ivs, group);
7024 act_delta = iv_ca_delta_add (group,
7025 iv_ca_cand_for_group (ivs, group),
7026 cp, act_delta);
7028 if (act_cost < best_cost)
7030 best_cost = act_cost;
7032 if (best_delta)
7033 iv_ca_delta_free (&best_delta);
7034 best_delta = act_delta;
7036 else
7037 iv_ca_delta_free (&act_delta);
7041 iv_ca_delta_commit (data, ivs, best_delta, true);
7042 iv_ca_delta_free (&best_delta);
7044 return !best_cost.infinite_cost_p ();
7047 /* Finds an initial assignment of candidates to uses. */
7049 static class iv_ca *
7050 get_initial_solution (struct ivopts_data *data, bool originalp)
7052 unsigned i;
7053 class iv_ca *ivs = iv_ca_new (data);
7055 for (i = 0; i < data->vgroups.length (); i++)
7056 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7058 iv_ca_free (&ivs);
7059 return NULL;
7062 return ivs;
7065 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
7066 points to a bool variable, this function tries to break local
7067 optimal fixed-point by replacing candidates in IVS if it's true. */
7069 static bool
7070 try_improve_iv_set (struct ivopts_data *data,
7071 class iv_ca *ivs, bool *try_replace_p)
7073 unsigned i, n_ivs;
7074 comp_cost acost, best_cost = iv_ca_cost (ivs);
7075 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7076 struct iv_cand *cand;
7078 /* Try extending the set of induction variables by one. */
7079 for (i = 0; i < data->vcands.length (); i++)
7081 cand = data->vcands[i];
7083 if (iv_ca_cand_used_p (ivs, cand))
7084 continue;
7086 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7087 if (!act_delta)
7088 continue;
7090 /* If we successfully added the candidate and the set is small enough,
7091 try optimizing it by removing other candidates. */
7092 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7094 iv_ca_delta_commit (data, ivs, act_delta, true);
7095 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7096 iv_ca_delta_commit (data, ivs, act_delta, false);
7097 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7100 if (acost < best_cost)
7102 best_cost = acost;
7103 iv_ca_delta_free (&best_delta);
7104 best_delta = act_delta;
7106 else
7107 iv_ca_delta_free (&act_delta);
7110 if (!best_delta)
7112 /* Try removing the candidates from the set instead. */
7113 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7115 if (!best_delta && *try_replace_p)
7117 *try_replace_p = false;
7118 /* So far candidate selecting algorithm tends to choose fewer IVs
7119 so that it can handle cases in which loops have many variables
7120 but the best choice is often to use only one general biv. One
7121 weakness is it can't handle opposite cases, in which different
7122 candidates should be chosen with respect to each use. To solve
7123 the problem, we replace candidates in a manner described by the
7124 comments of iv_ca_replace, thus give general algorithm a chance
7125 to break local optimal fixed-point in these cases. */
7126 best_cost = iv_ca_replace (data, ivs, &best_delta);
7129 if (!best_delta)
7130 return false;
7133 iv_ca_delta_commit (data, ivs, best_delta, true);
7134 iv_ca_delta_free (&best_delta);
7135 return best_cost == iv_ca_cost (ivs);
7138 /* Attempts to find the optimal set of induction variables. We do simple
7139 greedy heuristic -- we try to replace at most one candidate in the selected
7140 solution and remove the unused ivs while this improves the cost. */
7142 static class iv_ca *
7143 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7145 class iv_ca *set;
7146 bool try_replace_p = true;
7148 /* Get the initial solution. */
7149 set = get_initial_solution (data, originalp);
7150 if (!set)
7152 if (dump_file && (dump_flags & TDF_DETAILS))
7153 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7154 return NULL;
7157 if (dump_file && (dump_flags & TDF_DETAILS))
7159 fprintf (dump_file, "Initial set of candidates:\n");
7160 iv_ca_dump (data, dump_file, set);
7163 while (try_improve_iv_set (data, set, &try_replace_p))
7165 if (dump_file && (dump_flags & TDF_DETAILS))
7167 fprintf (dump_file, "Improved to:\n");
7168 iv_ca_dump (data, dump_file, set);
7172 /* If the set has infinite_cost, it can't be optimal. */
7173 if (iv_ca_cost (set).infinite_cost_p ())
7175 if (dump_file && (dump_flags & TDF_DETAILS))
7176 fprintf (dump_file,
7177 "Overflow to infinite cost in try_improve_iv_set.\n");
7178 iv_ca_free (&set);
7180 return set;
7183 static class iv_ca *
7184 find_optimal_iv_set (struct ivopts_data *data)
7186 unsigned i;
7187 comp_cost cost, origcost;
7188 class iv_ca *set, *origset;
7190 /* Determine the cost based on a strategy that starts with original IVs,
7191 and try again using a strategy that prefers candidates not based
7192 on any IVs. */
7193 origset = find_optimal_iv_set_1 (data, true);
7194 set = find_optimal_iv_set_1 (data, false);
7196 if (!origset && !set)
7197 return NULL;
7199 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7200 cost = set ? iv_ca_cost (set) : infinite_cost;
7202 if (dump_file && (dump_flags & TDF_DETAILS))
7204 fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7205 origcost.cost, origcost.complexity);
7206 fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7207 cost.cost, cost.complexity);
7210 /* Choose the one with the best cost. */
7211 if (origcost <= cost)
7213 if (set)
7214 iv_ca_free (&set);
7215 set = origset;
7217 else if (origset)
7218 iv_ca_free (&origset);
7220 for (i = 0; i < data->vgroups.length (); i++)
7222 struct iv_group *group = data->vgroups[i];
7223 group->selected = iv_ca_cand_for_group (set, group)->cand;
7226 return set;
7229 /* Creates a new induction variable corresponding to CAND. */
7231 static void
7232 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7234 gimple_stmt_iterator incr_pos;
7235 tree base;
7236 struct iv_use *use;
7237 struct iv_group *group;
7238 bool after = false;
7240 gcc_assert (cand->iv != NULL);
7242 switch (cand->pos)
7244 case IP_NORMAL:
7245 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7246 break;
7248 case IP_END:
7249 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7250 after = true;
7251 if (!gsi_end_p (incr_pos) && stmt_ends_bb_p (gsi_stmt (incr_pos)))
7253 edge e = find_edge (gsi_bb (incr_pos), data->current_loop->header);
7254 incr_pos = gsi_after_labels (split_edge (e));
7255 after = false;
7257 break;
7259 case IP_AFTER_USE:
7260 after = true;
7261 /* fall through */
7262 case IP_BEFORE_USE:
7263 incr_pos = gsi_for_stmt (cand->incremented_at);
7264 break;
7266 case IP_ORIGINAL:
7267 /* Mark that the iv is preserved. */
7268 name_info (data, cand->var_before)->preserve_biv = true;
7269 name_info (data, cand->var_after)->preserve_biv = true;
7271 /* Rewrite the increment so that it uses var_before directly. */
7272 use = find_interesting_uses_op (data, cand->var_after);
7273 group = data->vgroups[use->group_id];
7274 group->selected = cand;
7275 return;
7278 gimple_add_tmp_var (cand->var_before);
7280 base = unshare_expr (cand->iv->base);
7282 create_iv (base, PLUS_EXPR, unshare_expr (cand->iv->step),
7283 cand->var_before, data->current_loop,
7284 &incr_pos, after, &cand->var_before, &cand->var_after);
7287 /* Creates new induction variables described in SET. */
7289 static void
7290 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7292 unsigned i;
7293 struct iv_cand *cand;
7294 bitmap_iterator bi;
7296 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7298 cand = data->vcands[i];
7299 create_new_iv (data, cand);
7302 if (dump_file && (dump_flags & TDF_DETAILS))
7304 fprintf (dump_file, "Selected IV set for loop %d",
7305 data->current_loop->num);
7306 if (data->loop_loc != UNKNOWN_LOCATION)
7307 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7308 LOCATION_LINE (data->loop_loc));
7309 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7310 avg_loop_niter (data->current_loop));
7311 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7312 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7314 cand = data->vcands[i];
7315 dump_cand (dump_file, cand);
7317 fprintf (dump_file, "\n");
7321 /* Rewrites USE (definition of iv used in a nonlinear expression)
7322 using candidate CAND. */
7324 static void
7325 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7326 struct iv_use *use, struct iv_cand *cand)
7328 gassign *ass;
7329 gimple_stmt_iterator bsi;
7330 tree comp, type = get_use_type (use), tgt;
7332 /* An important special case -- if we are asked to express value of
7333 the original iv by itself, just exit; there is no need to
7334 introduce a new computation (that might also need casting the
7335 variable to unsigned and back). */
7336 if (cand->pos == IP_ORIGINAL
7337 && cand->incremented_at == use->stmt)
7339 tree op = NULL_TREE;
7340 enum tree_code stmt_code;
7342 gcc_assert (is_gimple_assign (use->stmt));
7343 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7345 /* Check whether we may leave the computation unchanged.
7346 This is the case only if it does not rely on other
7347 computations in the loop -- otherwise, the computation
7348 we rely upon may be removed in remove_unused_ivs,
7349 thus leading to ICE. */
7350 stmt_code = gimple_assign_rhs_code (use->stmt);
7351 if (stmt_code == PLUS_EXPR
7352 || stmt_code == MINUS_EXPR
7353 || stmt_code == POINTER_PLUS_EXPR)
7355 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7356 op = gimple_assign_rhs2 (use->stmt);
7357 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7358 op = gimple_assign_rhs1 (use->stmt);
7361 if (op != NULL_TREE)
7363 if (expr_invariant_in_loop_p (data->current_loop, op))
7364 return;
7365 if (TREE_CODE (op) == SSA_NAME)
7367 struct iv *iv = get_iv (data, op);
7368 if (iv != NULL && integer_zerop (iv->step))
7369 return;
7374 switch (gimple_code (use->stmt))
7376 case GIMPLE_PHI:
7377 tgt = PHI_RESULT (use->stmt);
7379 /* If we should keep the biv, do not replace it. */
7380 if (name_info (data, tgt)->preserve_biv)
7381 return;
7383 bsi = gsi_after_labels (gimple_bb (use->stmt));
7384 break;
7386 case GIMPLE_ASSIGN:
7387 tgt = gimple_assign_lhs (use->stmt);
7388 bsi = gsi_for_stmt (use->stmt);
7389 break;
7391 default:
7392 gcc_unreachable ();
7395 aff_tree aff_inv, aff_var;
7396 if (!get_computation_aff_1 (data->current_loop, use->stmt,
7397 use, cand, &aff_inv, &aff_var))
7398 gcc_unreachable ();
7400 unshare_aff_combination (&aff_inv);
7401 unshare_aff_combination (&aff_var);
7402 /* Prefer CSE opportunity than loop invariant by adding offset at last
7403 so that iv_uses have different offsets can be CSEed. */
7404 poly_widest_int offset = aff_inv.offset;
7405 aff_inv.offset = 0;
7407 gimple_seq stmt_list = NULL, seq = NULL;
7408 tree comp_op1 = aff_combination_to_tree (&aff_inv);
7409 tree comp_op2 = aff_combination_to_tree (&aff_var);
7410 gcc_assert (comp_op1 && comp_op2);
7412 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7413 gimple_seq_add_seq (&stmt_list, seq);
7414 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7415 gimple_seq_add_seq (&stmt_list, seq);
7417 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7418 std::swap (comp_op1, comp_op2);
7420 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7422 comp = fold_build_pointer_plus (comp_op1,
7423 fold_convert (sizetype, comp_op2));
7424 comp = fold_build_pointer_plus (comp,
7425 wide_int_to_tree (sizetype, offset));
7427 else
7429 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7430 fold_convert (TREE_TYPE (comp_op1), comp_op2));
7431 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7432 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7435 comp = fold_convert (type, comp);
7436 comp = force_gimple_operand (comp, &seq, false, NULL);
7437 gimple_seq_add_seq (&stmt_list, seq);
7438 if (gimple_code (use->stmt) != GIMPLE_PHI
7439 /* We can't allow re-allocating the stmt as it might be pointed
7440 to still. */
7441 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7442 >= gimple_num_ops (gsi_stmt (bsi))))
7444 comp = force_gimple_operand (comp, &seq, true, NULL);
7445 gimple_seq_add_seq (&stmt_list, seq);
7446 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7448 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7449 /* As this isn't a plain copy we have to reset alignment
7450 information. */
7451 if (SSA_NAME_PTR_INFO (comp))
7452 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7456 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7457 if (gimple_code (use->stmt) == GIMPLE_PHI)
7459 ass = gimple_build_assign (tgt, comp);
7460 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7462 bsi = gsi_for_stmt (use->stmt);
7463 remove_phi_node (&bsi, false);
7465 else
7467 gimple_assign_set_rhs_from_tree (&bsi, comp);
7468 use->stmt = gsi_stmt (bsi);
7472 /* Performs a peephole optimization to reorder the iv update statement with
7473 a mem ref to enable instruction combining in later phases. The mem ref uses
7474 the iv value before the update, so the reordering transformation requires
7475 adjustment of the offset. CAND is the selected IV_CAND.
7477 Example:
7479 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7480 iv2 = iv1 + 1;
7482 if (t < val) (1)
7483 goto L;
7484 goto Head;
7487 directly propagating t over to (1) will introduce overlapping live range
7488 thus increase register pressure. This peephole transform it into:
7491 iv2 = iv1 + 1;
7492 t = MEM_REF (base, iv2, 8, 8);
7493 if (t < val)
7494 goto L;
7495 goto Head;
7498 static void
7499 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7501 tree var_after;
7502 gimple *iv_update, *stmt;
7503 basic_block bb;
7504 gimple_stmt_iterator gsi, gsi_iv;
7506 if (cand->pos != IP_NORMAL)
7507 return;
7509 var_after = cand->var_after;
7510 iv_update = SSA_NAME_DEF_STMT (var_after);
7512 bb = gimple_bb (iv_update);
7513 gsi = gsi_last_nondebug_bb (bb);
7514 stmt = gsi_stmt (gsi);
7516 /* Only handle conditional statement for now. */
7517 if (gimple_code (stmt) != GIMPLE_COND)
7518 return;
7520 gsi_prev_nondebug (&gsi);
7521 stmt = gsi_stmt (gsi);
7522 if (stmt != iv_update)
7523 return;
7525 gsi_prev_nondebug (&gsi);
7526 if (gsi_end_p (gsi))
7527 return;
7529 stmt = gsi_stmt (gsi);
7530 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7531 return;
7533 if (stmt != use->stmt)
7534 return;
7536 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7537 return;
7539 if (dump_file && (dump_flags & TDF_DETAILS))
7541 fprintf (dump_file, "Reordering \n");
7542 print_gimple_stmt (dump_file, iv_update, 0);
7543 print_gimple_stmt (dump_file, use->stmt, 0);
7544 fprintf (dump_file, "\n");
7547 gsi = gsi_for_stmt (use->stmt);
7548 gsi_iv = gsi_for_stmt (iv_update);
7549 gsi_move_before (&gsi_iv, &gsi);
7551 cand->pos = IP_BEFORE_USE;
7552 cand->incremented_at = use->stmt;
7555 /* Return the alias pointer type that should be used for a MEM_REF
7556 associated with USE, which has type USE_PTR_ADDRESS. */
7558 static tree
7559 get_alias_ptr_type_for_ptr_address (iv_use *use)
7561 gcall *call = as_a <gcall *> (use->stmt);
7562 switch (gimple_call_internal_fn (call))
7564 case IFN_MASK_LOAD:
7565 case IFN_MASK_STORE:
7566 case IFN_MASK_LOAD_LANES:
7567 case IFN_MASK_STORE_LANES:
7568 case IFN_LEN_LOAD:
7569 case IFN_LEN_STORE:
7570 case IFN_LEN_MASK_LOAD:
7571 case IFN_LEN_MASK_STORE:
7572 /* The second argument contains the correct alias type. */
7573 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7574 return TREE_TYPE (gimple_call_arg (call, 1));
7576 default:
7577 gcc_unreachable ();
7582 /* Rewrites USE (address that is an iv) using candidate CAND. */
7584 static void
7585 rewrite_use_address (struct ivopts_data *data,
7586 struct iv_use *use, struct iv_cand *cand)
7588 aff_tree aff;
7589 bool ok;
7591 adjust_iv_update_pos (cand, use);
7592 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7593 gcc_assert (ok);
7594 unshare_aff_combination (&aff);
7596 /* To avoid undefined overflow problems, all IV candidates use unsigned
7597 integer types. The drawback is that this makes it impossible for
7598 create_mem_ref to distinguish an IV that is based on a memory object
7599 from one that represents simply an offset.
7601 To work around this problem, we pass a hint to create_mem_ref that
7602 indicates which variable (if any) in aff is an IV based on a memory
7603 object. Note that we only consider the candidate. If this is not
7604 based on an object, the base of the reference is in some subexpression
7605 of the use -- but these will use pointer types, so they are recognized
7606 by the create_mem_ref heuristics anyway. */
7607 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7608 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7609 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7610 tree type = use->mem_type;
7611 tree alias_ptr_type;
7612 if (use->type == USE_PTR_ADDRESS)
7613 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7614 else
7616 gcc_assert (type == TREE_TYPE (*use->op_p));
7617 unsigned int align = get_object_alignment (*use->op_p);
7618 if (align != TYPE_ALIGN (type))
7619 type = build_aligned_type (type, align);
7620 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7622 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7623 iv, base_hint, data->speed);
7625 if (use->type == USE_PTR_ADDRESS)
7627 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7628 ref = fold_convert (get_use_type (use), ref);
7629 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7630 true, GSI_SAME_STMT);
7632 else
7633 copy_ref_info (ref, *use->op_p);
7635 *use->op_p = ref;
7638 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7639 candidate CAND. */
7641 static void
7642 rewrite_use_compare (struct ivopts_data *data,
7643 struct iv_use *use, struct iv_cand *cand)
7645 tree comp, op, bound;
7646 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7647 enum tree_code compare;
7648 struct iv_group *group = data->vgroups[use->group_id];
7649 class cost_pair *cp = get_group_iv_cost (data, group, cand);
7651 bound = cp->value;
7652 if (bound)
7654 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7655 tree var_type = TREE_TYPE (var);
7656 gimple_seq stmts;
7658 if (dump_file && (dump_flags & TDF_DETAILS))
7660 fprintf (dump_file, "Replacing exit test: ");
7661 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7663 compare = cp->comp;
7664 bound = unshare_expr (fold_convert (var_type, bound));
7665 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7666 if (stmts)
7667 gsi_insert_seq_on_edge_immediate (
7668 loop_preheader_edge (data->current_loop),
7669 stmts);
7671 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7672 gimple_cond_set_lhs (cond_stmt, var);
7673 gimple_cond_set_code (cond_stmt, compare);
7674 gimple_cond_set_rhs (cond_stmt, op);
7675 return;
7678 /* The induction variable elimination failed; just express the original
7679 giv. */
7680 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7681 gcc_assert (comp != NULL_TREE);
7682 gcc_assert (use->op_p != NULL);
7683 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7684 SSA_NAME_VAR (*use->op_p),
7685 true, GSI_SAME_STMT);
7688 /* Rewrite the groups using the selected induction variables. */
7690 static void
7691 rewrite_groups (struct ivopts_data *data)
7693 unsigned i, j;
7695 for (i = 0; i < data->vgroups.length (); i++)
7697 struct iv_group *group = data->vgroups[i];
7698 struct iv_cand *cand = group->selected;
7700 gcc_assert (cand);
7702 if (group->type == USE_NONLINEAR_EXPR)
7704 for (j = 0; j < group->vuses.length (); j++)
7706 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7707 update_stmt (group->vuses[j]->stmt);
7710 else if (address_p (group->type))
7712 for (j = 0; j < group->vuses.length (); j++)
7714 rewrite_use_address (data, group->vuses[j], cand);
7715 update_stmt (group->vuses[j]->stmt);
7718 else
7720 gcc_assert (group->type == USE_COMPARE);
7722 for (j = 0; j < group->vuses.length (); j++)
7724 rewrite_use_compare (data, group->vuses[j], cand);
7725 update_stmt (group->vuses[j]->stmt);
7731 /* Removes the ivs that are not used after rewriting. */
7733 static void
7734 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7736 unsigned j;
7737 bitmap_iterator bi;
7739 /* Figure out an order in which to release SSA DEFs so that we don't
7740 release something that we'd have to propagate into a debug stmt
7741 afterwards. */
7742 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7744 struct version_info *info;
7746 info = ver_info (data, j);
7747 if (info->iv
7748 && !integer_zerop (info->iv->step)
7749 && !info->inv_id
7750 && !info->iv->nonlin_use
7751 && !info->preserve_biv)
7753 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7755 tree def = info->iv->ssa_name;
7757 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7759 imm_use_iterator imm_iter;
7760 use_operand_p use_p;
7761 gimple *stmt;
7762 int count = 0;
7764 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7766 if (!gimple_debug_bind_p (stmt))
7767 continue;
7769 /* We just want to determine whether to do nothing
7770 (count == 0), to substitute the computed
7771 expression into a single use of the SSA DEF by
7772 itself (count == 1), or to use a debug temp
7773 because the SSA DEF is used multiple times or as
7774 part of a larger expression (count > 1). */
7775 count++;
7776 if (gimple_debug_bind_get_value (stmt) != def)
7777 count++;
7779 if (count > 1)
7780 break;
7783 if (!count)
7784 continue;
7786 struct iv_use dummy_use;
7787 struct iv_cand *best_cand = NULL, *cand;
7788 unsigned i, best_pref = 0, cand_pref;
7789 tree comp = NULL_TREE;
7791 memset (&dummy_use, 0, sizeof (dummy_use));
7792 dummy_use.iv = info->iv;
7793 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7795 cand = data->vgroups[i]->selected;
7796 if (cand == best_cand)
7797 continue;
7798 cand_pref = operand_equal_p (cand->iv->step,
7799 info->iv->step, 0)
7800 ? 4 : 0;
7801 cand_pref
7802 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7803 == TYPE_MODE (TREE_TYPE (info->iv->base))
7804 ? 2 : 0;
7805 cand_pref
7806 += TREE_CODE (cand->iv->base) == INTEGER_CST
7807 ? 1 : 0;
7808 if (best_cand == NULL || best_pref < cand_pref)
7810 tree this_comp
7811 = get_debug_computation_at (data->current_loop,
7812 SSA_NAME_DEF_STMT (def),
7813 &dummy_use, cand);
7814 if (this_comp)
7816 best_cand = cand;
7817 best_pref = cand_pref;
7818 comp = this_comp;
7823 if (!best_cand)
7824 continue;
7826 comp = unshare_expr (comp);
7827 if (count > 1)
7829 tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7830 /* FIXME: Is setting the mode really necessary? */
7831 if (SSA_NAME_VAR (def))
7832 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7833 else
7834 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7835 gdebug *def_temp
7836 = gimple_build_debug_bind (vexpr, comp, NULL);
7837 gimple_stmt_iterator gsi;
7839 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7840 gsi = gsi_after_labels (gimple_bb
7841 (SSA_NAME_DEF_STMT (def)));
7842 else
7843 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7845 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7846 comp = vexpr;
7849 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7851 if (!gimple_debug_bind_p (stmt))
7852 continue;
7854 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7855 SET_USE (use_p, comp);
7857 update_stmt (stmt);
7864 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7865 for hash_map::traverse. */
7867 bool
7868 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7870 free (value);
7871 return true;
7874 /* Frees data allocated by the optimization of a single loop. */
7876 static void
7877 free_loop_data (struct ivopts_data *data)
7879 unsigned i, j;
7880 bitmap_iterator bi;
7881 tree obj;
7883 if (data->niters)
7885 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7886 delete data->niters;
7887 data->niters = NULL;
7890 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7892 struct version_info *info;
7894 info = ver_info (data, i);
7895 info->iv = NULL;
7896 info->has_nonlin_use = false;
7897 info->preserve_biv = false;
7898 info->inv_id = 0;
7900 bitmap_clear (data->relevant);
7901 bitmap_clear (data->important_candidates);
7903 for (i = 0; i < data->vgroups.length (); i++)
7905 struct iv_group *group = data->vgroups[i];
7907 for (j = 0; j < group->vuses.length (); j++)
7908 free (group->vuses[j]);
7909 group->vuses.release ();
7911 BITMAP_FREE (group->related_cands);
7912 for (j = 0; j < group->n_map_members; j++)
7914 if (group->cost_map[j].inv_vars)
7915 BITMAP_FREE (group->cost_map[j].inv_vars);
7916 if (group->cost_map[j].inv_exprs)
7917 BITMAP_FREE (group->cost_map[j].inv_exprs);
7920 free (group->cost_map);
7921 free (group);
7923 data->vgroups.truncate (0);
7925 for (i = 0; i < data->vcands.length (); i++)
7927 struct iv_cand *cand = data->vcands[i];
7929 if (cand->inv_vars)
7930 BITMAP_FREE (cand->inv_vars);
7931 if (cand->inv_exprs)
7932 BITMAP_FREE (cand->inv_exprs);
7933 free (cand);
7935 data->vcands.truncate (0);
7937 if (data->version_info_size < num_ssa_names)
7939 data->version_info_size = 2 * num_ssa_names;
7940 free (data->version_info);
7941 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7944 data->max_inv_var_id = 0;
7945 data->max_inv_expr_id = 0;
7947 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7948 SET_DECL_RTL (obj, NULL_RTX);
7950 decl_rtl_to_reset.truncate (0);
7952 data->inv_expr_tab->empty ();
7954 data->iv_common_cand_tab->empty ();
7955 data->iv_common_cands.truncate (0);
7958 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7959 loop tree. */
7961 static void
7962 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7964 free_loop_data (data);
7965 free (data->version_info);
7966 BITMAP_FREE (data->relevant);
7967 BITMAP_FREE (data->important_candidates);
7969 decl_rtl_to_reset.release ();
7970 data->vgroups.release ();
7971 data->vcands.release ();
7972 delete data->inv_expr_tab;
7973 data->inv_expr_tab = NULL;
7974 free_affine_expand_cache (&data->name_expansion_cache);
7975 if (data->base_object_map)
7976 delete data->base_object_map;
7977 delete data->iv_common_cand_tab;
7978 data->iv_common_cand_tab = NULL;
7979 data->iv_common_cands.release ();
7980 obstack_free (&data->iv_obstack, NULL);
7983 /* Returns true if the loop body BODY includes any function calls. */
7985 static bool
7986 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7988 gimple_stmt_iterator gsi;
7989 unsigned i;
7991 for (i = 0; i < num_nodes; i++)
7992 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7994 gimple *stmt = gsi_stmt (gsi);
7995 if (is_gimple_call (stmt)
7996 && !gimple_call_internal_p (stmt)
7997 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7998 return true;
8000 return false;
8003 /* Determine cost scaling factor for basic blocks in loop. */
8004 #define COST_SCALING_FACTOR_BOUND (20)
8006 static void
8007 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8009 int lfreq = data->current_loop->header->count.to_frequency (cfun);
8010 if (!data->speed || lfreq <= 0)
8011 return;
8013 int max_freq = lfreq;
8014 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8016 body[i]->aux = (void *)(intptr_t) 1;
8017 if (max_freq < body[i]->count.to_frequency (cfun))
8018 max_freq = body[i]->count.to_frequency (cfun);
8020 if (max_freq > lfreq)
8022 int divisor, factor;
8023 /* Check if scaling factor itself needs to be scaled by the bound. This
8024 is to avoid overflow when scaling cost according to profile info. */
8025 if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8027 divisor = max_freq;
8028 factor = COST_SCALING_FACTOR_BOUND;
8030 else
8032 divisor = lfreq;
8033 factor = 1;
8035 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8037 int bfreq = body[i]->count.to_frequency (cfun);
8038 if (bfreq <= lfreq)
8039 continue;
8041 body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8046 /* Find doloop comparison use and set its doloop_p on if found. */
8048 static bool
8049 find_doloop_use (struct ivopts_data *data)
8051 struct loop *loop = data->current_loop;
8053 for (unsigned i = 0; i < data->vgroups.length (); i++)
8055 struct iv_group *group = data->vgroups[i];
8056 if (group->type == USE_COMPARE)
8058 gcc_assert (group->vuses.length () == 1);
8059 struct iv_use *use = group->vuses[0];
8060 gimple *stmt = use->stmt;
8061 if (gimple_code (stmt) == GIMPLE_COND)
8063 basic_block bb = gimple_bb (stmt);
8064 edge true_edge, false_edge;
8065 extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8066 /* This comparison is used for loop latch. Require latch is empty
8067 for now. */
8068 if ((loop->latch == true_edge->dest
8069 || loop->latch == false_edge->dest)
8070 && empty_block_p (loop->latch))
8072 group->doloop_p = true;
8073 if (dump_file && (dump_flags & TDF_DETAILS))
8075 fprintf (dump_file, "Doloop cmp iv use: ");
8076 print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8078 return true;
8084 return false;
8087 /* For the targets which support doloop, to predict whether later RTL doloop
8088 transformation will perform on this loop, further detect the doloop use and
8089 mark the flag doloop_use_p if predicted. */
8091 void
8092 analyze_and_mark_doloop_use (struct ivopts_data *data)
8094 data->doloop_use_p = false;
8096 if (!flag_branch_on_count_reg)
8097 return;
8099 if (data->current_loop->unroll == USHRT_MAX)
8100 return;
8102 if (!generic_predict_doloop_p (data))
8103 return;
8105 if (find_doloop_use (data))
8107 data->doloop_use_p = true;
8108 if (dump_file && (dump_flags & TDF_DETAILS))
8110 struct loop *loop = data->current_loop;
8111 fprintf (dump_file,
8112 "Predict loop %d can perform"
8113 " doloop optimization later.\n",
8114 loop->num);
8115 flow_loop_dump (loop, dump_file, NULL, 1);
8120 /* Optimizes the LOOP. Returns true if anything changed. */
8122 static bool
8123 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8124 bitmap toremove)
8126 bool changed = false;
8127 class iv_ca *iv_ca;
8128 edge exit = single_dom_exit (loop);
8129 basic_block *body;
8131 gcc_assert (!data->niters);
8132 data->current_loop = loop;
8133 data->loop_loc = find_loop_location (loop).get_location_t ();
8134 data->speed = optimize_loop_for_speed_p (loop);
8136 if (dump_file && (dump_flags & TDF_DETAILS))
8138 fprintf (dump_file, "Processing loop %d", loop->num);
8139 if (data->loop_loc != UNKNOWN_LOCATION)
8140 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8141 LOCATION_LINE (data->loop_loc));
8142 fprintf (dump_file, "\n");
8144 if (exit)
8146 fprintf (dump_file, " single exit %d -> %d, exit condition ",
8147 exit->src->index, exit->dest->index);
8148 print_gimple_stmt (dump_file, *gsi_last_bb (exit->src),
8149 0, TDF_SLIM);
8150 fprintf (dump_file, "\n");
8153 fprintf (dump_file, "\n");
8156 body = get_loop_body (loop);
8157 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8158 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8160 data->loop_single_exit_p
8161 = exit != NULL && loop_only_exit_p (loop, body, exit);
8163 /* For each ssa name determines whether it behaves as an induction variable
8164 in some loop. */
8165 if (!find_induction_variables (data, body))
8166 goto finish;
8168 /* Finds interesting uses (item 1). */
8169 find_interesting_uses (data, body);
8170 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8171 goto finish;
8173 /* Determine cost scaling factor for basic blocks in loop. */
8174 determine_scaling_factor (data, body);
8176 /* Analyze doloop possibility and mark the doloop use if predicted. */
8177 analyze_and_mark_doloop_use (data);
8179 /* Finds candidates for the induction variables (item 2). */
8180 find_iv_candidates (data);
8182 /* Calculates the costs (item 3, part 1). */
8183 determine_iv_costs (data);
8184 determine_group_iv_costs (data);
8185 determine_set_costs (data);
8187 /* Find the optimal set of induction variables (item 3, part 2). */
8188 iv_ca = find_optimal_iv_set (data);
8189 /* Cleanup basic block aux field. */
8190 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8191 body[i]->aux = NULL;
8192 if (!iv_ca)
8193 goto finish;
8194 changed = true;
8196 /* Create the new induction variables (item 4, part 1). */
8197 create_new_ivs (data, iv_ca);
8198 iv_ca_free (&iv_ca);
8200 /* Rewrite the uses (item 4, part 2). */
8201 rewrite_groups (data);
8203 /* Remove the ivs that are unused after rewriting. */
8204 remove_unused_ivs (data, toremove);
8206 finish:
8207 free (body);
8208 free_loop_data (data);
8210 return changed;
8213 /* Main entry point. Optimizes induction variables in loops. */
8215 void
8216 tree_ssa_iv_optimize (void)
8218 struct ivopts_data data;
8219 auto_bitmap toremove;
8221 tree_ssa_iv_optimize_init (&data);
8222 mark_ssa_maybe_undefs ();
8224 /* Optimize the loops starting with the innermost ones. */
8225 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8227 if (!dbg_cnt (ivopts_loop))
8228 continue;
8230 if (dump_file && (dump_flags & TDF_DETAILS))
8231 flow_loop_dump (loop, dump_file, NULL, 1);
8233 tree_ssa_iv_optimize_loop (&data, loop, toremove);
8236 /* Remove eliminated IV defs. */
8237 release_defs_bitset (toremove);
8239 /* We have changed the structure of induction variables; it might happen
8240 that definitions in the scev database refer to some of them that were
8241 eliminated. */
8242 scev_reset_htab ();
8243 /* Likewise niter and control-IV information. */
8244 free_numbers_of_iterations_estimates (cfun);
8246 tree_ssa_iv_optimize_finalize (&data);
8249 #include "gt-tree-ssa-loop-ivopts.h"