Update baseline symbols for hppa-linux.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.cc
blob3d3f28f7f3ba1b6a06eb8342a77d89781a7dff11
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated.
69 For the targets supporting low-overhead loops, IVOPTs has to take care of
70 the loops which will probably be transformed in RTL doloop optimization,
71 to try to make selected IV candidate set optimal. The process of doloop
72 support includes:
74 1) Analyze the current loop will be transformed to doloop or not, find and
75 mark its compare type IV use as doloop use (iv_group field doloop_p), and
76 set flag doloop_use_p of ivopts_data to notify subsequent processings on
77 doloop. See analyze_and_mark_doloop_use and its callees for the details.
78 The target hook predict_doloop_p can be used for target specific checks.
80 2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81 set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82 like biv. For cost determination between doloop IV cand and IV use, the
83 target hooks doloop_cost_for_generic and doloop_cost_for_address are
84 provided to add on extra costs for generic type and address type IV use.
85 Zero cost is assigned to the pair between doloop IV cand and doloop IV
86 use, and bound zero is set for IV elimination.
88 3) With the cost setting in step 2), the current cost model based IV
89 selection algorithm will process as usual, pick up doloop dedicated IV if
90 profitable. */
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "rtl.h"
97 #include "tree.h"
98 #include "gimple.h"
99 #include "cfghooks.h"
100 #include "tree-pass.h"
101 #include "memmodel.h"
102 #include "tm_p.h"
103 #include "ssa.h"
104 #include "expmed.h"
105 #include "insn-config.h"
106 #include "emit-rtl.h"
107 #include "recog.h"
108 #include "cgraph.h"
109 #include "gimple-pretty-print.h"
110 #include "alias.h"
111 #include "fold-const.h"
112 #include "stor-layout.h"
113 #include "tree-eh.h"
114 #include "gimplify.h"
115 #include "gimple-iterator.h"
116 #include "gimplify-me.h"
117 #include "tree-cfg.h"
118 #include "tree-ssa-loop-ivopts.h"
119 #include "tree-ssa-loop-manip.h"
120 #include "tree-ssa-loop-niter.h"
121 #include "tree-ssa-loop.h"
122 #include "explow.h"
123 #include "expr.h"
124 #include "tree-dfa.h"
125 #include "tree-ssa.h"
126 #include "cfgloop.h"
127 #include "tree-scalar-evolution.h"
128 #include "tree-affine.h"
129 #include "tree-ssa-propagate.h"
130 #include "tree-ssa-address.h"
131 #include "builtins.h"
132 #include "tree-vectorizer.h"
133 #include "dbgcnt.h"
134 #include "cfganal.h"
136 /* For lang_hooks.types.type_for_mode. */
137 #include "langhooks.h"
139 /* FIXME: Expressions are expanded to RTL in this pass to determine the
140 cost of different addressing modes. This should be moved to a TBD
141 interface between the GIMPLE and RTL worlds. */
143 /* The infinite cost. */
144 #define INFTY 1000000000
146 /* Returns the expected number of loop iterations for LOOP.
147 The average trip count is computed from profile data if it
148 exists. */
150 static inline HOST_WIDE_INT
151 avg_loop_niter (class loop *loop)
153 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
154 if (niter == -1)
156 niter = likely_max_stmt_executions_int (loop);
158 if (niter == -1 || niter > param_avg_loop_niter)
159 return param_avg_loop_niter;
162 return niter;
165 struct iv_use;
167 /* Representation of the induction variable. */
168 struct iv
170 tree base; /* Initial value of the iv. */
171 tree base_object; /* A memory object to that the induction variable points. */
172 tree step; /* Step of the iv (constant only). */
173 tree ssa_name; /* The ssa name with the value. */
174 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
175 bool biv_p; /* Is it a biv? */
176 bool no_overflow; /* True if the iv doesn't overflow. */
177 bool have_address_use;/* For biv, indicate if it's used in any address
178 type use. */
181 /* Per-ssa version information (induction variable descriptions, etc.). */
182 struct version_info
184 tree name; /* The ssa name. */
185 struct iv *iv; /* Induction variable description. */
186 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
187 an expression that is not an induction variable. */
188 bool preserve_biv; /* For the original biv, whether to preserve it. */
189 unsigned inv_id; /* Id of an invariant. */
192 /* Types of uses. */
193 enum use_type
195 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
196 USE_REF_ADDRESS, /* Use is an address for an explicit memory
197 reference. */
198 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
199 cases where the expansion of the function
200 will turn the argument into a normal address. */
201 USE_COMPARE /* Use is a compare. */
204 /* Cost of a computation. */
205 class comp_cost
207 public:
208 comp_cost (): cost (0), complexity (0), scratch (0)
211 comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
212 : cost (cost), complexity (complexity), scratch (scratch)
215 /* Returns true if COST is infinite. */
216 bool infinite_cost_p ();
218 /* Adds costs COST1 and COST2. */
219 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
221 /* Adds COST to the comp_cost. */
222 comp_cost operator+= (comp_cost cost);
224 /* Adds constant C to this comp_cost. */
225 comp_cost operator+= (HOST_WIDE_INT c);
227 /* Subtracts constant C to this comp_cost. */
228 comp_cost operator-= (HOST_WIDE_INT c);
230 /* Divide the comp_cost by constant C. */
231 comp_cost operator/= (HOST_WIDE_INT c);
233 /* Multiply the comp_cost by constant C. */
234 comp_cost operator*= (HOST_WIDE_INT c);
236 /* Subtracts costs COST1 and COST2. */
237 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
239 /* Subtracts COST from this comp_cost. */
240 comp_cost operator-= (comp_cost cost);
242 /* Returns true if COST1 is smaller than COST2. */
243 friend bool operator< (comp_cost cost1, comp_cost cost2);
245 /* Returns true if COST1 and COST2 are equal. */
246 friend bool operator== (comp_cost cost1, comp_cost cost2);
248 /* Returns true if COST1 is smaller or equal than COST2. */
249 friend bool operator<= (comp_cost cost1, comp_cost cost2);
251 int64_t cost; /* The runtime cost. */
252 unsigned complexity; /* The estimate of the complexity of the code for
253 the computation (in no concrete units --
254 complexity field should be larger for more
255 complex expressions and addressing modes). */
256 int64_t scratch; /* Scratch used during cost computation. */
259 static const comp_cost no_cost;
260 static const comp_cost infinite_cost (INFTY, 0, INFTY);
262 bool
263 comp_cost::infinite_cost_p ()
265 return cost == INFTY;
268 comp_cost
269 operator+ (comp_cost cost1, comp_cost cost2)
271 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
272 return infinite_cost;
274 gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
275 cost1.cost += cost2.cost;
276 cost1.complexity += cost2.complexity;
278 return cost1;
281 comp_cost
282 operator- (comp_cost cost1, comp_cost cost2)
284 if (cost1.infinite_cost_p ())
285 return infinite_cost;
287 gcc_assert (!cost2.infinite_cost_p ());
288 gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
290 cost1.cost -= cost2.cost;
291 cost1.complexity -= cost2.complexity;
293 return cost1;
296 comp_cost
297 comp_cost::operator+= (comp_cost cost)
299 *this = *this + cost;
300 return *this;
303 comp_cost
304 comp_cost::operator+= (HOST_WIDE_INT c)
306 if (c >= INFTY)
307 this->cost = INFTY;
309 if (infinite_cost_p ())
310 return *this;
312 gcc_assert (this->cost + c < infinite_cost.cost);
313 this->cost += c;
315 return *this;
318 comp_cost
319 comp_cost::operator-= (HOST_WIDE_INT c)
321 if (infinite_cost_p ())
322 return *this;
324 gcc_assert (this->cost - c < infinite_cost.cost);
325 this->cost -= c;
327 return *this;
330 comp_cost
331 comp_cost::operator/= (HOST_WIDE_INT c)
333 gcc_assert (c != 0);
334 if (infinite_cost_p ())
335 return *this;
337 this->cost /= c;
339 return *this;
342 comp_cost
343 comp_cost::operator*= (HOST_WIDE_INT c)
345 if (infinite_cost_p ())
346 return *this;
348 gcc_assert (this->cost * c < infinite_cost.cost);
349 this->cost *= c;
351 return *this;
354 comp_cost
355 comp_cost::operator-= (comp_cost cost)
357 *this = *this - cost;
358 return *this;
361 bool
362 operator< (comp_cost cost1, comp_cost cost2)
364 if (cost1.cost == cost2.cost)
365 return cost1.complexity < cost2.complexity;
367 return cost1.cost < cost2.cost;
370 bool
371 operator== (comp_cost cost1, comp_cost cost2)
373 return cost1.cost == cost2.cost
374 && cost1.complexity == cost2.complexity;
377 bool
378 operator<= (comp_cost cost1, comp_cost cost2)
380 return cost1 < cost2 || cost1 == cost2;
383 struct iv_inv_expr_ent;
385 /* The candidate - cost pair. */
386 class cost_pair
388 public:
389 struct iv_cand *cand; /* The candidate. */
390 comp_cost cost; /* The cost. */
391 enum tree_code comp; /* For iv elimination, the comparison. */
392 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
393 preserved when representing iv_use with iv_cand. */
394 bitmap inv_exprs; /* The list of newly created invariant expressions
395 when representing iv_use with iv_cand. */
396 tree value; /* For final value elimination, the expression for
397 the final value of the iv. For iv elimination,
398 the new bound to compare with. */
401 /* Use. */
402 struct iv_use
404 unsigned id; /* The id of the use. */
405 unsigned group_id; /* The group id the use belongs to. */
406 enum use_type type; /* Type of the use. */
407 tree mem_type; /* The memory type to use when testing whether an
408 address is legitimate, and what the address's
409 cost is. */
410 struct iv *iv; /* The induction variable it is based on. */
411 gimple *stmt; /* Statement in that it occurs. */
412 tree *op_p; /* The place where it occurs. */
414 tree addr_base; /* Base address with const offset stripped. */
415 poly_uint64_pod addr_offset;
416 /* Const offset stripped from base address. */
419 /* Group of uses. */
420 struct iv_group
422 /* The id of the group. */
423 unsigned id;
424 /* Uses of the group are of the same type. */
425 enum use_type type;
426 /* The set of "related" IV candidates, plus the important ones. */
427 bitmap related_cands;
428 /* Number of IV candidates in the cost_map. */
429 unsigned n_map_members;
430 /* The costs wrto the iv candidates. */
431 class cost_pair *cost_map;
432 /* The selected candidate for the group. */
433 struct iv_cand *selected;
434 /* To indicate this is a doloop use group. */
435 bool doloop_p;
436 /* Uses in the group. */
437 vec<struct iv_use *> vuses;
440 /* The position where the iv is computed. */
441 enum iv_position
443 IP_NORMAL, /* At the end, just before the exit condition. */
444 IP_END, /* At the end of the latch block. */
445 IP_BEFORE_USE, /* Immediately before a specific use. */
446 IP_AFTER_USE, /* Immediately after a specific use. */
447 IP_ORIGINAL /* The original biv. */
450 /* The induction variable candidate. */
451 struct iv_cand
453 unsigned id; /* The number of the candidate. */
454 bool important; /* Whether this is an "important" candidate, i.e. such
455 that it should be considered by all uses. */
456 bool involves_undefs; /* Whether the IV involves undefined values. */
457 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
458 gimple *incremented_at;/* For original biv, the statement where it is
459 incremented. */
460 tree var_before; /* The variable used for it before increment. */
461 tree var_after; /* The variable used for it after increment. */
462 struct iv *iv; /* The value of the candidate. NULL for
463 "pseudocandidate" used to indicate the possibility
464 to replace the final value of an iv by direct
465 computation of the value. */
466 unsigned cost; /* Cost of the candidate. */
467 unsigned cost_step; /* Cost of the candidate's increment operation. */
468 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
469 where it is incremented. */
470 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
471 iv_cand. */
472 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
473 handle it as a new invariant expression which will
474 be hoisted out of loop. */
475 struct iv *orig_iv; /* The original iv if this cand is added from biv with
476 smaller type. */
477 bool doloop_p; /* Whether this is a doloop candidate. */
480 /* Hashtable entry for common candidate derived from iv uses. */
481 class iv_common_cand
483 public:
484 tree base;
485 tree step;
486 /* IV uses from which this common candidate is derived. */
487 auto_vec<struct iv_use *> uses;
488 hashval_t hash;
491 /* Hashtable helpers. */
493 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
495 static inline hashval_t hash (const iv_common_cand *);
496 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
499 /* Hash function for possible common candidates. */
501 inline hashval_t
502 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
504 return ccand->hash;
507 /* Hash table equality function for common candidates. */
509 inline bool
510 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
511 const iv_common_cand *ccand2)
513 return (ccand1->hash == ccand2->hash
514 && operand_equal_p (ccand1->base, ccand2->base, 0)
515 && operand_equal_p (ccand1->step, ccand2->step, 0)
516 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
517 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
520 /* Loop invariant expression hashtable entry. */
522 struct iv_inv_expr_ent
524 /* Tree expression of the entry. */
525 tree expr;
526 /* Unique indentifier. */
527 int id;
528 /* Hash value. */
529 hashval_t hash;
532 /* Sort iv_inv_expr_ent pair A and B by id field. */
534 static int
535 sort_iv_inv_expr_ent (const void *a, const void *b)
537 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
538 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
540 unsigned id1 = (*e1)->id;
541 unsigned id2 = (*e2)->id;
543 if (id1 < id2)
544 return -1;
545 else if (id1 > id2)
546 return 1;
547 else
548 return 0;
551 /* Hashtable helpers. */
553 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
555 static inline hashval_t hash (const iv_inv_expr_ent *);
556 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
559 /* Return true if uses of type TYPE represent some form of address. */
561 inline bool
562 address_p (use_type type)
564 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
567 /* Hash function for loop invariant expressions. */
569 inline hashval_t
570 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
572 return expr->hash;
575 /* Hash table equality function for expressions. */
577 inline bool
578 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
579 const iv_inv_expr_ent *expr2)
581 return expr1->hash == expr2->hash
582 && operand_equal_p (expr1->expr, expr2->expr, 0);
585 struct ivopts_data
587 /* The currently optimized loop. */
588 class loop *current_loop;
589 location_t loop_loc;
591 /* Numbers of iterations for all exits of the current loop. */
592 hash_map<edge, tree_niter_desc *> *niters;
594 /* Number of registers used in it. */
595 unsigned regs_used;
597 /* The size of version_info array allocated. */
598 unsigned version_info_size;
600 /* The array of information for the ssa names. */
601 struct version_info *version_info;
603 /* The hashtable of loop invariant expressions created
604 by ivopt. */
605 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
607 /* The bitmap of indices in version_info whose value was changed. */
608 bitmap relevant;
610 /* The uses of induction variables. */
611 vec<iv_group *> vgroups;
613 /* The candidates. */
614 vec<iv_cand *> vcands;
616 /* A bitmap of important candidates. */
617 bitmap important_candidates;
619 /* Cache used by tree_to_aff_combination_expand. */
620 hash_map<tree, name_expansion *> *name_expansion_cache;
622 /* The hashtable of common candidates derived from iv uses. */
623 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
625 /* The common candidates. */
626 vec<iv_common_cand *> iv_common_cands;
628 /* Hash map recording base object information of tree exp. */
629 hash_map<tree, tree> *base_object_map;
631 /* The maximum invariant variable id. */
632 unsigned max_inv_var_id;
634 /* The maximum invariant expression id. */
635 unsigned max_inv_expr_id;
637 /* Number of no_overflow BIVs which are not used in memory address. */
638 unsigned bivs_not_used_in_addr;
640 /* Obstack for iv structure. */
641 struct obstack iv_obstack;
643 /* Whether to consider just related and important candidates when replacing a
644 use. */
645 bool consider_all_candidates;
647 /* Are we optimizing for speed? */
648 bool speed;
650 /* Whether the loop body includes any function calls. */
651 bool body_includes_call;
653 /* Whether the loop body can only be exited via single exit. */
654 bool loop_single_exit_p;
656 /* Whether the loop has doloop comparison use. */
657 bool doloop_use_p;
660 /* An assignment of iv candidates to uses. */
662 class iv_ca
664 public:
665 /* The number of uses covered by the assignment. */
666 unsigned upto;
668 /* Number of uses that cannot be expressed by the candidates in the set. */
669 unsigned bad_groups;
671 /* Candidate assigned to a use, together with the related costs. */
672 class cost_pair **cand_for_group;
674 /* Number of times each candidate is used. */
675 unsigned *n_cand_uses;
677 /* The candidates used. */
678 bitmap cands;
680 /* The number of candidates in the set. */
681 unsigned n_cands;
683 /* The number of invariants needed, including both invariant variants and
684 invariant expressions. */
685 unsigned n_invs;
687 /* Total cost of expressing uses. */
688 comp_cost cand_use_cost;
690 /* Total cost of candidates. */
691 int64_t cand_cost;
693 /* Number of times each invariant variable is used. */
694 unsigned *n_inv_var_uses;
696 /* Number of times each invariant expression is used. */
697 unsigned *n_inv_expr_uses;
699 /* Total cost of the assignment. */
700 comp_cost cost;
703 /* Difference of two iv candidate assignments. */
705 struct iv_ca_delta
707 /* Changed group. */
708 struct iv_group *group;
710 /* An old assignment (for rollback purposes). */
711 class cost_pair *old_cp;
713 /* A new assignment. */
714 class cost_pair *new_cp;
716 /* Next change in the list. */
717 struct iv_ca_delta *next;
720 /* Bound on number of candidates below that all candidates are considered. */
722 #define CONSIDER_ALL_CANDIDATES_BOUND \
723 ((unsigned) param_iv_consider_all_candidates_bound)
725 /* If there are more iv occurrences, we just give up (it is quite unlikely that
726 optimizing such a loop would help, and it would take ages). */
728 #define MAX_CONSIDERED_GROUPS \
729 ((unsigned) param_iv_max_considered_uses)
731 /* If there are at most this number of ivs in the set, try removing unnecessary
732 ivs from the set always. */
734 #define ALWAYS_PRUNE_CAND_SET_BOUND \
735 ((unsigned) param_iv_always_prune_cand_set_bound)
737 /* The list of trees for that the decl_rtl field must be reset is stored
738 here. */
740 static vec<tree> decl_rtl_to_reset;
742 static comp_cost force_expr_to_var_cost (tree, bool);
744 /* The single loop exit if it dominates the latch, NULL otherwise. */
746 edge
747 single_dom_exit (class loop *loop)
749 edge exit = single_exit (loop);
751 if (!exit)
752 return NULL;
754 if (!just_once_each_iteration_p (loop, exit->src))
755 return NULL;
757 return exit;
760 /* Dumps information about the induction variable IV to FILE. Don't dump
761 variable's name if DUMP_NAME is FALSE. The information is dumped with
762 preceding spaces indicated by INDENT_LEVEL. */
764 void
765 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
767 const char *p;
768 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
770 if (indent_level > 4)
771 indent_level = 4;
772 p = spaces + 8 - (indent_level << 1);
774 fprintf (file, "%sIV struct:\n", p);
775 if (iv->ssa_name && dump_name)
777 fprintf (file, "%s SSA_NAME:\t", p);
778 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
779 fprintf (file, "\n");
782 fprintf (file, "%s Type:\t", p);
783 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
784 fprintf (file, "\n");
786 fprintf (file, "%s Base:\t", p);
787 print_generic_expr (file, iv->base, TDF_SLIM);
788 fprintf (file, "\n");
790 fprintf (file, "%s Step:\t", p);
791 print_generic_expr (file, iv->step, TDF_SLIM);
792 fprintf (file, "\n");
794 if (iv->base_object)
796 fprintf (file, "%s Object:\t", p);
797 print_generic_expr (file, iv->base_object, TDF_SLIM);
798 fprintf (file, "\n");
801 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
803 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
804 p, iv->no_overflow ? "No-overflow" : "Overflow");
807 /* Dumps information about the USE to FILE. */
809 void
810 dump_use (FILE *file, struct iv_use *use)
812 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
813 fprintf (file, " At stmt:\t");
814 print_gimple_stmt (file, use->stmt, 0);
815 fprintf (file, " At pos:\t");
816 if (use->op_p)
817 print_generic_expr (file, *use->op_p, TDF_SLIM);
818 fprintf (file, "\n");
819 dump_iv (file, use->iv, false, 2);
822 /* Dumps information about the uses to FILE. */
824 void
825 dump_groups (FILE *file, struct ivopts_data *data)
827 unsigned i, j;
828 struct iv_group *group;
830 for (i = 0; i < data->vgroups.length (); i++)
832 group = data->vgroups[i];
833 fprintf (file, "Group %d:\n", group->id);
834 if (group->type == USE_NONLINEAR_EXPR)
835 fprintf (file, " Type:\tGENERIC\n");
836 else if (group->type == USE_REF_ADDRESS)
837 fprintf (file, " Type:\tREFERENCE ADDRESS\n");
838 else if (group->type == USE_PTR_ADDRESS)
839 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
840 else
842 gcc_assert (group->type == USE_COMPARE);
843 fprintf (file, " Type:\tCOMPARE\n");
845 for (j = 0; j < group->vuses.length (); j++)
846 dump_use (file, group->vuses[j]);
850 /* Dumps information about induction variable candidate CAND to FILE. */
852 void
853 dump_cand (FILE *file, struct iv_cand *cand)
855 struct iv *iv = cand->iv;
857 fprintf (file, "Candidate %d:\n", cand->id);
858 if (cand->inv_vars)
860 fprintf (file, " Depend on inv.vars: ");
861 dump_bitmap (file, cand->inv_vars);
863 if (cand->inv_exprs)
865 fprintf (file, " Depend on inv.exprs: ");
866 dump_bitmap (file, cand->inv_exprs);
869 if (cand->var_before)
871 fprintf (file, " Var befor: ");
872 print_generic_expr (file, cand->var_before, TDF_SLIM);
873 fprintf (file, "\n");
875 if (cand->var_after)
877 fprintf (file, " Var after: ");
878 print_generic_expr (file, cand->var_after, TDF_SLIM);
879 fprintf (file, "\n");
882 switch (cand->pos)
884 case IP_NORMAL:
885 fprintf (file, " Incr POS: before exit test\n");
886 break;
888 case IP_BEFORE_USE:
889 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
890 break;
892 case IP_AFTER_USE:
893 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
894 break;
896 case IP_END:
897 fprintf (file, " Incr POS: at end\n");
898 break;
900 case IP_ORIGINAL:
901 fprintf (file, " Incr POS: orig biv\n");
902 break;
905 dump_iv (file, iv, false, 1);
908 /* Returns the info for ssa version VER. */
910 static inline struct version_info *
911 ver_info (struct ivopts_data *data, unsigned ver)
913 return data->version_info + ver;
916 /* Returns the info for ssa name NAME. */
918 static inline struct version_info *
919 name_info (struct ivopts_data *data, tree name)
921 return ver_info (data, SSA_NAME_VERSION (name));
924 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
925 emitted in LOOP. */
927 static bool
928 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
930 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
932 gcc_assert (bb);
934 if (sbb == loop->latch)
935 return true;
937 if (sbb != bb)
938 return false;
940 return stmt == last_nondebug_stmt (bb);
943 /* Returns true if STMT if after the place where the original induction
944 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
945 if the positions are identical. */
947 static bool
948 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
950 basic_block cand_bb = gimple_bb (cand->incremented_at);
951 basic_block stmt_bb = gimple_bb (stmt);
953 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
954 return false;
956 if (stmt_bb != cand_bb)
957 return true;
959 if (true_if_equal
960 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
961 return true;
962 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
965 /* Returns true if STMT if after the place where the induction variable
966 CAND is incremented in LOOP. */
968 static bool
969 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
971 switch (cand->pos)
973 case IP_END:
974 return false;
976 case IP_NORMAL:
977 return stmt_after_ip_normal_pos (loop, stmt);
979 case IP_ORIGINAL:
980 case IP_AFTER_USE:
981 return stmt_after_inc_pos (cand, stmt, false);
983 case IP_BEFORE_USE:
984 return stmt_after_inc_pos (cand, stmt, true);
986 default:
987 gcc_unreachable ();
991 /* walk_tree callback for contains_abnormal_ssa_name_p. */
993 static tree
994 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
996 if (TREE_CODE (*tp) == SSA_NAME
997 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
998 return *tp;
1000 if (!EXPR_P (*tp))
1001 *walk_subtrees = 0;
1003 return NULL_TREE;
1006 /* Returns true if EXPR contains a ssa name that occurs in an
1007 abnormal phi node. */
1009 bool
1010 contains_abnormal_ssa_name_p (tree expr)
1012 return walk_tree_without_duplicates
1013 (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1016 /* Returns the structure describing number of iterations determined from
1017 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1019 static class tree_niter_desc *
1020 niter_for_exit (struct ivopts_data *data, edge exit)
1022 class tree_niter_desc *desc;
1023 tree_niter_desc **slot;
1025 if (!data->niters)
1027 data->niters = new hash_map<edge, tree_niter_desc *>;
1028 slot = NULL;
1030 else
1031 slot = data->niters->get (exit);
1033 if (!slot)
1035 /* Try to determine number of iterations. We cannot safely work with ssa
1036 names that appear in phi nodes on abnormal edges, so that we do not
1037 create overlapping life ranges for them (PR 27283). */
1038 desc = XNEW (class tree_niter_desc);
1039 if (!number_of_iterations_exit (data->current_loop,
1040 exit, desc, true)
1041 || contains_abnormal_ssa_name_p (desc->niter))
1043 XDELETE (desc);
1044 desc = NULL;
1046 data->niters->put (exit, desc);
1048 else
1049 desc = *slot;
1051 return desc;
1054 /* Returns the structure describing number of iterations determined from
1055 single dominating exit of DATA->current_loop, or NULL if something
1056 goes wrong. */
1058 static class tree_niter_desc *
1059 niter_for_single_dom_exit (struct ivopts_data *data)
1061 edge exit = single_dom_exit (data->current_loop);
1063 if (!exit)
1064 return NULL;
1066 return niter_for_exit (data, exit);
1069 /* Initializes data structures used by the iv optimization pass, stored
1070 in DATA. */
1072 static void
1073 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1075 data->version_info_size = 2 * num_ssa_names;
1076 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1077 data->relevant = BITMAP_ALLOC (NULL);
1078 data->important_candidates = BITMAP_ALLOC (NULL);
1079 data->max_inv_var_id = 0;
1080 data->max_inv_expr_id = 0;
1081 data->niters = NULL;
1082 data->vgroups.create (20);
1083 data->vcands.create (20);
1084 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1085 data->name_expansion_cache = NULL;
1086 data->base_object_map = NULL;
1087 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1088 data->iv_common_cands.create (20);
1089 decl_rtl_to_reset.create (20);
1090 gcc_obstack_init (&data->iv_obstack);
1093 /* walk_tree callback for determine_base_object. */
1095 static tree
1096 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1098 tree_code code = TREE_CODE (*tp);
1099 tree obj = NULL_TREE;
1100 if (code == ADDR_EXPR)
1102 tree base = get_base_address (TREE_OPERAND (*tp, 0));
1103 if (!base)
1104 obj = *tp;
1105 else if (TREE_CODE (base) != MEM_REF)
1106 obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1108 else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1109 obj = fold_convert (ptr_type_node, *tp);
1111 if (!obj)
1113 if (!EXPR_P (*tp))
1114 *walk_subtrees = 0;
1116 return NULL_TREE;
1118 /* Record special node for multiple base objects and stop. */
1119 if (*static_cast<tree *> (wdata))
1121 *static_cast<tree *> (wdata) = integer_zero_node;
1122 return integer_zero_node;
1124 /* Record the base object and continue looking. */
1125 *static_cast<tree *> (wdata) = obj;
1126 return NULL_TREE;
1129 /* Returns a memory object to that EXPR points with caching. Return NULL if we
1130 are able to determine that it does not point to any such object; specially
1131 return integer_zero_node if EXPR contains multiple base objects. */
1133 static tree
1134 determine_base_object (struct ivopts_data *data, tree expr)
1136 tree *slot, obj = NULL_TREE;
1137 if (data->base_object_map)
1139 if ((slot = data->base_object_map->get(expr)) != NULL)
1140 return *slot;
1142 else
1143 data->base_object_map = new hash_map<tree, tree>;
1145 (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1146 data->base_object_map->put (expr, obj);
1147 return obj;
1150 /* Return true if address expression with non-DECL_P operand appears
1151 in EXPR. */
1153 static bool
1154 contain_complex_addr_expr (tree expr)
1156 bool res = false;
1158 STRIP_NOPS (expr);
1159 switch (TREE_CODE (expr))
1161 case POINTER_PLUS_EXPR:
1162 case PLUS_EXPR:
1163 case MINUS_EXPR:
1164 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1165 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1166 break;
1168 case ADDR_EXPR:
1169 return (!DECL_P (TREE_OPERAND (expr, 0)));
1171 default:
1172 return false;
1175 return res;
1178 /* Allocates an induction variable with given initial value BASE and step STEP
1179 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1181 static struct iv *
1182 alloc_iv (struct ivopts_data *data, tree base, tree step,
1183 bool no_overflow = false)
1185 tree expr = base;
1186 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1187 sizeof (struct iv));
1188 gcc_assert (step != NULL_TREE);
1190 /* Lower address expression in base except ones with DECL_P as operand.
1191 By doing this:
1192 1) More accurate cost can be computed for address expressions;
1193 2) Duplicate candidates won't be created for bases in different
1194 forms, like &a[0] and &a. */
1195 STRIP_NOPS (expr);
1196 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1197 || contain_complex_addr_expr (expr))
1199 aff_tree comb;
1200 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1201 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1204 iv->base = base;
1205 iv->base_object = determine_base_object (data, base);
1206 iv->step = step;
1207 iv->biv_p = false;
1208 iv->nonlin_use = NULL;
1209 iv->ssa_name = NULL_TREE;
1210 if (!no_overflow
1211 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1212 base, step))
1213 no_overflow = true;
1214 iv->no_overflow = no_overflow;
1215 iv->have_address_use = false;
1217 return iv;
1220 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1221 doesn't overflow. */
1223 static void
1224 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1225 bool no_overflow)
1227 struct version_info *info = name_info (data, iv);
1229 gcc_assert (!info->iv);
1231 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1232 info->iv = alloc_iv (data, base, step, no_overflow);
1233 info->iv->ssa_name = iv;
1236 /* Finds induction variable declaration for VAR. */
1238 static struct iv *
1239 get_iv (struct ivopts_data *data, tree var)
1241 basic_block bb;
1242 tree type = TREE_TYPE (var);
1244 if (!POINTER_TYPE_P (type)
1245 && !INTEGRAL_TYPE_P (type))
1246 return NULL;
1248 if (!name_info (data, var)->iv)
1250 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1252 if (!bb
1253 || !flow_bb_inside_loop_p (data->current_loop, bb))
1255 if (POINTER_TYPE_P (type))
1256 type = sizetype;
1257 set_iv (data, var, var, build_int_cst (type, 0), true);
1261 return name_info (data, var)->iv;
1264 /* Return the first non-invariant ssa var found in EXPR. */
1266 static tree
1267 extract_single_var_from_expr (tree expr)
1269 int i, n;
1270 tree tmp;
1271 enum tree_code code;
1273 if (!expr || is_gimple_min_invariant (expr))
1274 return NULL;
1276 code = TREE_CODE (expr);
1277 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1279 n = TREE_OPERAND_LENGTH (expr);
1280 for (i = 0; i < n; i++)
1282 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1284 if (tmp)
1285 return tmp;
1288 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1291 /* Finds basic ivs. */
1293 static bool
1294 find_bivs (struct ivopts_data *data)
1296 gphi *phi;
1297 affine_iv iv;
1298 tree step, type, base, stop;
1299 bool found = false;
1300 class loop *loop = data->current_loop;
1301 gphi_iterator psi;
1303 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1305 phi = psi.phi ();
1307 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1308 continue;
1310 if (virtual_operand_p (PHI_RESULT (phi)))
1311 continue;
1313 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1314 continue;
1316 if (integer_zerop (iv.step))
1317 continue;
1319 step = iv.step;
1320 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1321 /* Stop expanding iv base at the first ssa var referred by iv step.
1322 Ideally we should stop at any ssa var, because that's expensive
1323 and unusual to happen, we just do it on the first one.
1325 See PR64705 for the rationale. */
1326 stop = extract_single_var_from_expr (step);
1327 base = expand_simple_operations (base, stop);
1328 if (contains_abnormal_ssa_name_p (base)
1329 || contains_abnormal_ssa_name_p (step))
1330 continue;
1332 type = TREE_TYPE (PHI_RESULT (phi));
1333 base = fold_convert (type, base);
1334 if (step)
1336 if (POINTER_TYPE_P (type))
1337 step = convert_to_ptrofftype (step);
1338 else
1339 step = fold_convert (type, step);
1342 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1343 found = true;
1346 return found;
1349 /* Marks basic ivs. */
1351 static void
1352 mark_bivs (struct ivopts_data *data)
1354 gphi *phi;
1355 gimple *def;
1356 tree var;
1357 struct iv *iv, *incr_iv;
1358 class loop *loop = data->current_loop;
1359 basic_block incr_bb;
1360 gphi_iterator psi;
1362 data->bivs_not_used_in_addr = 0;
1363 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1365 phi = psi.phi ();
1367 iv = get_iv (data, PHI_RESULT (phi));
1368 if (!iv)
1369 continue;
1371 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1372 def = SSA_NAME_DEF_STMT (var);
1373 /* Don't mark iv peeled from other one as biv. */
1374 if (def
1375 && gimple_code (def) == GIMPLE_PHI
1376 && gimple_bb (def) == loop->header)
1377 continue;
1379 incr_iv = get_iv (data, var);
1380 if (!incr_iv)
1381 continue;
1383 /* If the increment is in the subloop, ignore it. */
1384 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1385 if (incr_bb->loop_father != data->current_loop
1386 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1387 continue;
1389 iv->biv_p = true;
1390 incr_iv->biv_p = true;
1391 if (iv->no_overflow)
1392 data->bivs_not_used_in_addr++;
1393 if (incr_iv->no_overflow)
1394 data->bivs_not_used_in_addr++;
1398 /* Checks whether STMT defines a linear induction variable and stores its
1399 parameters to IV. */
1401 static bool
1402 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1404 tree lhs, stop;
1405 class loop *loop = data->current_loop;
1407 iv->base = NULL_TREE;
1408 iv->step = NULL_TREE;
1410 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1411 return false;
1413 lhs = gimple_assign_lhs (stmt);
1414 if (TREE_CODE (lhs) != SSA_NAME)
1415 return false;
1417 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1418 return false;
1420 /* Stop expanding iv base at the first ssa var referred by iv step.
1421 Ideally we should stop at any ssa var, because that's expensive
1422 and unusual to happen, we just do it on the first one.
1424 See PR64705 for the rationale. */
1425 stop = extract_single_var_from_expr (iv->step);
1426 iv->base = expand_simple_operations (iv->base, stop);
1427 if (contains_abnormal_ssa_name_p (iv->base)
1428 || contains_abnormal_ssa_name_p (iv->step))
1429 return false;
1431 /* If STMT could throw, then do not consider STMT as defining a GIV.
1432 While this will suppress optimizations, we cannot safely delete this
1433 GIV and associated statements, even if it appears it is not used. */
1434 if (stmt_could_throw_p (cfun, stmt))
1435 return false;
1437 return true;
1440 /* Finds general ivs in statement STMT. */
1442 static void
1443 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1445 affine_iv iv;
1447 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1448 return;
1450 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1453 /* Finds general ivs in basic block BB. */
1455 static void
1456 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1458 gimple_stmt_iterator bsi;
1460 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1461 find_givs_in_stmt (data, gsi_stmt (bsi));
1464 /* Finds general ivs. */
1466 static void
1467 find_givs (struct ivopts_data *data, basic_block *body)
1469 class loop *loop = data->current_loop;
1470 unsigned i;
1472 for (i = 0; i < loop->num_nodes; i++)
1473 find_givs_in_bb (data, body[i]);
1476 /* For each ssa name defined in LOOP determines whether it is an induction
1477 variable and if so, its initial value and step. */
1479 static bool
1480 find_induction_variables (struct ivopts_data *data, basic_block *body)
1482 unsigned i;
1483 bitmap_iterator bi;
1485 if (!find_bivs (data))
1486 return false;
1488 find_givs (data, body);
1489 mark_bivs (data);
1491 if (dump_file && (dump_flags & TDF_DETAILS))
1493 class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1495 if (niter)
1497 fprintf (dump_file, " number of iterations ");
1498 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1499 if (!integer_zerop (niter->may_be_zero))
1501 fprintf (dump_file, "; zero if ");
1502 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1504 fprintf (dump_file, "\n");
1507 fprintf (dump_file, "\n<Induction Vars>:\n");
1508 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1510 struct version_info *info = ver_info (data, i);
1511 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1512 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1516 return true;
1519 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1520 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1521 is the const offset stripped from IV base and MEM_TYPE is the type
1522 of the memory being addressed. For uses of other types, ADDR_BASE
1523 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1525 static struct iv_use *
1526 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1527 gimple *stmt, enum use_type type, tree mem_type,
1528 tree addr_base, poly_uint64 addr_offset)
1530 struct iv_use *use = XCNEW (struct iv_use);
1532 use->id = group->vuses.length ();
1533 use->group_id = group->id;
1534 use->type = type;
1535 use->mem_type = mem_type;
1536 use->iv = iv;
1537 use->stmt = stmt;
1538 use->op_p = use_p;
1539 use->addr_base = addr_base;
1540 use->addr_offset = addr_offset;
1542 group->vuses.safe_push (use);
1543 return use;
1546 /* Checks whether OP is a loop-level invariant and if so, records it.
1547 NONLINEAR_USE is true if the invariant is used in a way we do not
1548 handle specially. */
1550 static void
1551 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1553 basic_block bb;
1554 struct version_info *info;
1556 if (TREE_CODE (op) != SSA_NAME
1557 || virtual_operand_p (op))
1558 return;
1560 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1561 if (bb
1562 && flow_bb_inside_loop_p (data->current_loop, bb))
1563 return;
1565 info = name_info (data, op);
1566 info->name = op;
1567 info->has_nonlin_use |= nonlinear_use;
1568 if (!info->inv_id)
1569 info->inv_id = ++data->max_inv_var_id;
1570 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1573 /* Record a group of TYPE. */
1575 static struct iv_group *
1576 record_group (struct ivopts_data *data, enum use_type type)
1578 struct iv_group *group = XCNEW (struct iv_group);
1580 group->id = data->vgroups.length ();
1581 group->type = type;
1582 group->related_cands = BITMAP_ALLOC (NULL);
1583 group->vuses.create (1);
1584 group->doloop_p = false;
1586 data->vgroups.safe_push (group);
1587 return group;
1590 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591 New group will be created if there is no existing group for the use.
1592 MEM_TYPE is the type of memory being addressed, or NULL if this
1593 isn't an address reference. */
1595 static struct iv_use *
1596 record_group_use (struct ivopts_data *data, tree *use_p,
1597 struct iv *iv, gimple *stmt, enum use_type type,
1598 tree mem_type)
1600 tree addr_base = NULL;
1601 struct iv_group *group = NULL;
1602 poly_uint64 addr_offset = 0;
1604 /* Record non address type use in a new group. */
1605 if (address_p (type))
1607 unsigned int i;
1609 gcc_assert (POINTER_TYPE_P (TREE_TYPE (iv->base)));
1610 tree addr_toffset;
1611 split_constant_offset (iv->base, &addr_base, &addr_toffset);
1612 addr_offset = int_cst_value (addr_toffset);
1613 for (i = 0; i < data->vgroups.length (); i++)
1615 struct iv_use *use;
1617 group = data->vgroups[i];
1618 use = group->vuses[0];
1619 if (!address_p (use->type))
1620 continue;
1622 /* Check if it has the same stripped base and step. */
1623 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1624 && operand_equal_p (iv->step, use->iv->step, 0)
1625 && operand_equal_p (addr_base, use->addr_base, 0))
1626 break;
1628 if (i == data->vgroups.length ())
1629 group = NULL;
1632 if (!group)
1633 group = record_group (data, type);
1635 return record_use (group, use_p, iv, stmt, type, mem_type,
1636 addr_base, addr_offset);
1639 /* Checks whether the use OP is interesting and if so, records it. */
1641 static struct iv_use *
1642 find_interesting_uses_op (struct ivopts_data *data, tree op)
1644 struct iv *iv;
1645 gimple *stmt;
1646 struct iv_use *use;
1648 if (TREE_CODE (op) != SSA_NAME)
1649 return NULL;
1651 iv = get_iv (data, op);
1652 if (!iv)
1653 return NULL;
1655 if (iv->nonlin_use)
1657 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1658 return iv->nonlin_use;
1661 if (integer_zerop (iv->step))
1663 record_invariant (data, op, true);
1664 return NULL;
1667 stmt = SSA_NAME_DEF_STMT (op);
1668 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1670 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1671 iv->nonlin_use = use;
1672 return use;
1675 /* Indicate how compare type iv_use can be handled. */
1676 enum comp_iv_rewrite
1678 COMP_IV_NA,
1679 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1680 COMP_IV_EXPR,
1681 /* We may rewrite compare type iv_uses on both sides of comparison by
1682 expressing value of each iv_use. */
1683 COMP_IV_EXPR_2,
1684 /* We may rewrite compare type iv_use by expressing value of the iv_use
1685 or by eliminating it with other iv_cand. */
1686 COMP_IV_ELIM
1689 /* Given a condition in statement STMT, checks whether it is a compare
1690 of an induction variable and an invariant. If this is the case,
1691 CONTROL_VAR is set to location of the iv, BOUND to the location of
1692 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1693 induction variable descriptions, and true is returned. If this is not
1694 the case, CONTROL_VAR and BOUND are set to the arguments of the
1695 condition and false is returned. */
1697 static enum comp_iv_rewrite
1698 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1699 tree **control_var, tree **bound,
1700 struct iv **iv_var, struct iv **iv_bound)
1702 /* The objects returned when COND has constant operands. */
1703 static struct iv const_iv;
1704 static tree zero;
1705 tree *op0 = &zero, *op1 = &zero;
1706 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1707 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1709 if (gimple_code (stmt) == GIMPLE_COND)
1711 gcond *cond_stmt = as_a <gcond *> (stmt);
1712 op0 = gimple_cond_lhs_ptr (cond_stmt);
1713 op1 = gimple_cond_rhs_ptr (cond_stmt);
1715 else
1717 op0 = gimple_assign_rhs1_ptr (stmt);
1718 op1 = gimple_assign_rhs2_ptr (stmt);
1721 zero = integer_zero_node;
1722 const_iv.step = integer_zero_node;
1724 if (TREE_CODE (*op0) == SSA_NAME)
1725 iv0 = get_iv (data, *op0);
1726 if (TREE_CODE (*op1) == SSA_NAME)
1727 iv1 = get_iv (data, *op1);
1729 /* If both sides of comparison are IVs. We can express ivs on both end. */
1730 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1732 rewrite_type = COMP_IV_EXPR_2;
1733 goto end;
1736 /* If none side of comparison is IV. */
1737 if ((!iv0 || integer_zerop (iv0->step))
1738 && (!iv1 || integer_zerop (iv1->step)))
1739 goto end;
1741 /* Control variable may be on the other side. */
1742 if (!iv0 || integer_zerop (iv0->step))
1744 std::swap (op0, op1);
1745 std::swap (iv0, iv1);
1747 /* If one side is IV and the other side isn't loop invariant. */
1748 if (!iv1)
1749 rewrite_type = COMP_IV_EXPR;
1750 /* If one side is IV and the other side is loop invariant. */
1751 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1752 rewrite_type = COMP_IV_ELIM;
1754 end:
1755 if (control_var)
1756 *control_var = op0;
1757 if (iv_var)
1758 *iv_var = iv0;
1759 if (bound)
1760 *bound = op1;
1761 if (iv_bound)
1762 *iv_bound = iv1;
1764 return rewrite_type;
1767 /* Checks whether the condition in STMT is interesting and if so,
1768 records it. */
1770 static void
1771 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1773 tree *var_p, *bound_p;
1774 struct iv *var_iv, *bound_iv;
1775 enum comp_iv_rewrite ret;
1777 ret = extract_cond_operands (data, stmt,
1778 &var_p, &bound_p, &var_iv, &bound_iv);
1779 if (ret == COMP_IV_NA)
1781 find_interesting_uses_op (data, *var_p);
1782 find_interesting_uses_op (data, *bound_p);
1783 return;
1786 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1787 /* Record compare type iv_use for iv on the other side of comparison. */
1788 if (ret == COMP_IV_EXPR_2)
1789 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1792 /* Returns the outermost loop EXPR is obviously invariant in
1793 relative to the loop LOOP, i.e. if all its operands are defined
1794 outside of the returned loop. Returns NULL if EXPR is not
1795 even obviously invariant in LOOP. */
1797 class loop *
1798 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1800 basic_block def_bb;
1801 unsigned i, len;
1803 if (is_gimple_min_invariant (expr))
1804 return current_loops->tree_root;
1806 if (TREE_CODE (expr) == SSA_NAME)
1808 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1809 if (def_bb)
1811 if (flow_bb_inside_loop_p (loop, def_bb))
1812 return NULL;
1813 return superloop_at_depth (loop,
1814 loop_depth (def_bb->loop_father) + 1);
1817 return current_loops->tree_root;
1820 if (!EXPR_P (expr))
1821 return NULL;
1823 unsigned maxdepth = 0;
1824 len = TREE_OPERAND_LENGTH (expr);
1825 for (i = 0; i < len; i++)
1827 class loop *ivloop;
1828 if (!TREE_OPERAND (expr, i))
1829 continue;
1831 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1832 if (!ivloop)
1833 return NULL;
1834 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1837 return superloop_at_depth (loop, maxdepth);
1840 /* Returns true if expression EXPR is obviously invariant in LOOP,
1841 i.e. if all its operands are defined outside of the LOOP. LOOP
1842 should not be the function body. */
1844 bool
1845 expr_invariant_in_loop_p (class loop *loop, tree expr)
1847 basic_block def_bb;
1848 unsigned i, len;
1850 gcc_assert (loop_depth (loop) > 0);
1852 if (is_gimple_min_invariant (expr))
1853 return true;
1855 if (TREE_CODE (expr) == SSA_NAME)
1857 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1858 if (def_bb
1859 && flow_bb_inside_loop_p (loop, def_bb))
1860 return false;
1862 return true;
1865 if (!EXPR_P (expr))
1866 return false;
1868 len = TREE_OPERAND_LENGTH (expr);
1869 for (i = 0; i < len; i++)
1870 if (TREE_OPERAND (expr, i)
1871 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1872 return false;
1874 return true;
1877 /* Given expression EXPR which computes inductive values with respect
1878 to loop recorded in DATA, this function returns biv from which EXPR
1879 is derived by tracing definition chains of ssa variables in EXPR. */
1881 static struct iv*
1882 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1884 struct iv *iv;
1885 unsigned i, n;
1886 tree e2, e1;
1887 enum tree_code code;
1888 gimple *stmt;
1890 if (expr == NULL_TREE)
1891 return NULL;
1893 if (is_gimple_min_invariant (expr))
1894 return NULL;
1896 code = TREE_CODE (expr);
1897 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1899 n = TREE_OPERAND_LENGTH (expr);
1900 for (i = 0; i < n; i++)
1902 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1903 if (iv)
1904 return iv;
1908 /* Stop if it's not ssa name. */
1909 if (code != SSA_NAME)
1910 return NULL;
1912 iv = get_iv (data, expr);
1913 if (!iv || integer_zerop (iv->step))
1914 return NULL;
1915 else if (iv->biv_p)
1916 return iv;
1918 stmt = SSA_NAME_DEF_STMT (expr);
1919 if (gphi *phi = dyn_cast <gphi *> (stmt))
1921 ssa_op_iter iter;
1922 use_operand_p use_p;
1923 basic_block phi_bb = gimple_bb (phi);
1925 /* Skip loop header PHI that doesn't define biv. */
1926 if (phi_bb->loop_father == data->current_loop)
1927 return NULL;
1929 if (virtual_operand_p (gimple_phi_result (phi)))
1930 return NULL;
1932 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1934 tree use = USE_FROM_PTR (use_p);
1935 iv = find_deriving_biv_for_expr (data, use);
1936 if (iv)
1937 return iv;
1939 return NULL;
1941 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1942 return NULL;
1944 e1 = gimple_assign_rhs1 (stmt);
1945 code = gimple_assign_rhs_code (stmt);
1946 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1947 return find_deriving_biv_for_expr (data, e1);
1949 switch (code)
1951 case MULT_EXPR:
1952 case PLUS_EXPR:
1953 case MINUS_EXPR:
1954 case POINTER_PLUS_EXPR:
1955 /* Increments, decrements and multiplications by a constant
1956 are simple. */
1957 e2 = gimple_assign_rhs2 (stmt);
1958 iv = find_deriving_biv_for_expr (data, e2);
1959 if (iv)
1960 return iv;
1961 gcc_fallthrough ();
1963 CASE_CONVERT:
1964 /* Casts are simple. */
1965 return find_deriving_biv_for_expr (data, e1);
1967 default:
1968 break;
1971 return NULL;
1974 /* Record BIV, its predecessor and successor that they are used in
1975 address type uses. */
1977 static void
1978 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1980 unsigned i;
1981 tree type, base_1, base_2;
1982 bitmap_iterator bi;
1984 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1985 || biv->have_address_use || !biv->no_overflow)
1986 return;
1988 type = TREE_TYPE (biv->base);
1989 if (!INTEGRAL_TYPE_P (type))
1990 return;
1992 biv->have_address_use = true;
1993 data->bivs_not_used_in_addr--;
1994 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1995 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1997 struct iv *iv = ver_info (data, i)->iv;
1999 if (!iv || !iv->biv_p || integer_zerop (iv->step)
2000 || iv->have_address_use || !iv->no_overflow)
2001 continue;
2003 if (type != TREE_TYPE (iv->base)
2004 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2005 continue;
2007 if (!operand_equal_p (biv->step, iv->step, 0))
2008 continue;
2010 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2011 if (operand_equal_p (base_1, iv->base, 0)
2012 || operand_equal_p (base_2, biv->base, 0))
2014 iv->have_address_use = true;
2015 data->bivs_not_used_in_addr--;
2020 /* Cumulates the steps of indices into DATA and replaces their values with the
2021 initial ones. Returns false when the value of the index cannot be determined.
2022 Callback for for_each_index. */
2024 struct ifs_ivopts_data
2026 struct ivopts_data *ivopts_data;
2027 gimple *stmt;
2028 tree step;
2031 static bool
2032 idx_find_step (tree base, tree *idx, void *data)
2034 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2035 struct iv *iv;
2036 bool use_overflow_semantics = false;
2037 tree step, iv_base, iv_step, lbound, off;
2038 class loop *loop = dta->ivopts_data->current_loop;
2040 /* If base is a component ref, require that the offset of the reference
2041 be invariant. */
2042 if (TREE_CODE (base) == COMPONENT_REF)
2044 off = component_ref_field_offset (base);
2045 return expr_invariant_in_loop_p (loop, off);
2048 /* If base is array, first check whether we will be able to move the
2049 reference out of the loop (in order to take its address in strength
2050 reduction). In order for this to work we need both lower bound
2051 and step to be loop invariants. */
2052 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2054 /* Moreover, for a range, the size needs to be invariant as well. */
2055 if (TREE_CODE (base) == ARRAY_RANGE_REF
2056 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2057 return false;
2059 step = array_ref_element_size (base);
2060 lbound = array_ref_low_bound (base);
2062 if (!expr_invariant_in_loop_p (loop, step)
2063 || !expr_invariant_in_loop_p (loop, lbound))
2064 return false;
2067 if (TREE_CODE (*idx) != SSA_NAME)
2068 return true;
2070 iv = get_iv (dta->ivopts_data, *idx);
2071 if (!iv)
2072 return false;
2074 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2075 *&x[0], which is not folded and does not trigger the
2076 ARRAY_REF path below. */
2077 *idx = iv->base;
2079 if (integer_zerop (iv->step))
2080 return true;
2082 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2084 step = array_ref_element_size (base);
2086 /* We only handle addresses whose step is an integer constant. */
2087 if (TREE_CODE (step) != INTEGER_CST)
2088 return false;
2090 else
2091 /* The step for pointer arithmetics already is 1 byte. */
2092 step = size_one_node;
2094 iv_base = iv->base;
2095 iv_step = iv->step;
2096 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2097 use_overflow_semantics = true;
2099 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2100 sizetype, &iv_base, &iv_step, dta->stmt,
2101 use_overflow_semantics))
2103 /* The index might wrap. */
2104 return false;
2107 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2108 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2110 if (dta->ivopts_data->bivs_not_used_in_addr)
2112 if (!iv->biv_p)
2113 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2115 record_biv_for_address_use (dta->ivopts_data, iv);
2117 return true;
2120 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2121 object is passed to it in DATA. */
2123 static bool
2124 idx_record_use (tree base, tree *idx,
2125 void *vdata)
2127 struct ivopts_data *data = (struct ivopts_data *) vdata;
2128 find_interesting_uses_op (data, *idx);
2129 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2131 if (TREE_OPERAND (base, 2))
2132 find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2133 if (TREE_OPERAND (base, 3))
2134 find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2136 return true;
2139 /* If we can prove that TOP = cst * BOT for some constant cst,
2140 store cst to MUL and return true. Otherwise return false.
2141 The returned value is always sign-extended, regardless of the
2142 signedness of TOP and BOT. */
2144 static bool
2145 constant_multiple_of (tree top, tree bot, widest_int *mul)
2147 tree mby;
2148 enum tree_code code;
2149 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2150 widest_int res, p0, p1;
2152 STRIP_NOPS (top);
2153 STRIP_NOPS (bot);
2155 if (operand_equal_p (top, bot, 0))
2157 *mul = 1;
2158 return true;
2161 code = TREE_CODE (top);
2162 switch (code)
2164 case MULT_EXPR:
2165 mby = TREE_OPERAND (top, 1);
2166 if (TREE_CODE (mby) != INTEGER_CST)
2167 return false;
2169 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2170 return false;
2172 *mul = wi::sext (res * wi::to_widest (mby), precision);
2173 return true;
2175 case PLUS_EXPR:
2176 case MINUS_EXPR:
2177 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2178 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2179 return false;
2181 if (code == MINUS_EXPR)
2182 p1 = -p1;
2183 *mul = wi::sext (p0 + p1, precision);
2184 return true;
2186 case INTEGER_CST:
2187 if (TREE_CODE (bot) != INTEGER_CST)
2188 return false;
2190 p0 = widest_int::from (wi::to_wide (top), SIGNED);
2191 p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2192 if (p1 == 0)
2193 return false;
2194 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2195 return res == 0;
2197 default:
2198 if (POLY_INT_CST_P (top)
2199 && POLY_INT_CST_P (bot)
2200 && constant_multiple_p (wi::to_poly_widest (top),
2201 wi::to_poly_widest (bot), mul))
2202 return true;
2204 return false;
2208 /* Return true if memory reference REF with step STEP may be unaligned. */
2210 static bool
2211 may_be_unaligned_p (tree ref, tree step)
2213 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2214 thus they are not misaligned. */
2215 if (TREE_CODE (ref) == TARGET_MEM_REF)
2216 return false;
2218 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2219 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2220 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2222 unsigned HOST_WIDE_INT bitpos;
2223 unsigned int ref_align;
2224 get_object_alignment_1 (ref, &ref_align, &bitpos);
2225 if (ref_align < align
2226 || (bitpos % align) != 0
2227 || (bitpos % BITS_PER_UNIT) != 0)
2228 return true;
2230 unsigned int trailing_zeros = tree_ctz (step);
2231 if (trailing_zeros < HOST_BITS_PER_INT
2232 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2233 return true;
2235 return false;
2238 /* Return true if EXPR may be non-addressable. */
2240 bool
2241 may_be_nonaddressable_p (tree expr)
2243 switch (TREE_CODE (expr))
2245 case VAR_DECL:
2246 /* Check if it's a register variable. */
2247 return DECL_HARD_REGISTER (expr);
2249 case TARGET_MEM_REF:
2250 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2251 target, thus they are always addressable. */
2252 return false;
2254 case MEM_REF:
2255 /* Likewise for MEM_REFs, modulo the storage order. */
2256 return REF_REVERSE_STORAGE_ORDER (expr);
2258 case BIT_FIELD_REF:
2259 if (REF_REVERSE_STORAGE_ORDER (expr))
2260 return true;
2261 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2263 case COMPONENT_REF:
2264 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2265 return true;
2266 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2267 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2269 case ARRAY_REF:
2270 case ARRAY_RANGE_REF:
2271 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2272 return true;
2273 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2275 case VIEW_CONVERT_EXPR:
2276 /* This kind of view-conversions may wrap non-addressable objects
2277 and make them look addressable. After some processing the
2278 non-addressability may be uncovered again, causing ADDR_EXPRs
2279 of inappropriate objects to be built. */
2280 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2281 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2282 return true;
2283 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2285 CASE_CONVERT:
2286 return true;
2288 default:
2289 break;
2292 return false;
2295 /* Finds addresses in *OP_P inside STMT. */
2297 static void
2298 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2299 tree *op_p)
2301 tree base = *op_p, step = size_zero_node;
2302 struct iv *civ;
2303 struct ifs_ivopts_data ifs_ivopts_data;
2305 /* Do not play with volatile memory references. A bit too conservative,
2306 perhaps, but safe. */
2307 if (gimple_has_volatile_ops (stmt))
2308 goto fail;
2310 /* Ignore bitfields for now. Not really something terribly complicated
2311 to handle. TODO. */
2312 if (TREE_CODE (base) == BIT_FIELD_REF)
2313 goto fail;
2315 base = unshare_expr (base);
2317 if (TREE_CODE (base) == TARGET_MEM_REF)
2319 tree type = build_pointer_type (TREE_TYPE (base));
2320 tree astep;
2322 if (TMR_BASE (base)
2323 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2325 civ = get_iv (data, TMR_BASE (base));
2326 if (!civ)
2327 goto fail;
2329 TMR_BASE (base) = civ->base;
2330 step = civ->step;
2332 if (TMR_INDEX2 (base)
2333 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2335 civ = get_iv (data, TMR_INDEX2 (base));
2336 if (!civ)
2337 goto fail;
2339 TMR_INDEX2 (base) = civ->base;
2340 step = civ->step;
2342 if (TMR_INDEX (base)
2343 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2345 civ = get_iv (data, TMR_INDEX (base));
2346 if (!civ)
2347 goto fail;
2349 TMR_INDEX (base) = civ->base;
2350 astep = civ->step;
2352 if (astep)
2354 if (TMR_STEP (base))
2355 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2357 step = fold_build2 (PLUS_EXPR, type, step, astep);
2361 if (integer_zerop (step))
2362 goto fail;
2363 base = tree_mem_ref_addr (type, base);
2365 else
2367 ifs_ivopts_data.ivopts_data = data;
2368 ifs_ivopts_data.stmt = stmt;
2369 ifs_ivopts_data.step = size_zero_node;
2370 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2371 || integer_zerop (ifs_ivopts_data.step))
2372 goto fail;
2373 step = ifs_ivopts_data.step;
2375 /* Check that the base expression is addressable. This needs
2376 to be done after substituting bases of IVs into it. */
2377 if (may_be_nonaddressable_p (base))
2378 goto fail;
2380 /* Moreover, on strict alignment platforms, check that it is
2381 sufficiently aligned. */
2382 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2383 goto fail;
2385 base = build_fold_addr_expr (base);
2387 /* Substituting bases of IVs into the base expression might
2388 have caused folding opportunities. */
2389 if (TREE_CODE (base) == ADDR_EXPR)
2391 tree *ref = &TREE_OPERAND (base, 0);
2392 while (handled_component_p (*ref))
2393 ref = &TREE_OPERAND (*ref, 0);
2394 if (TREE_CODE (*ref) == MEM_REF)
2396 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2397 TREE_OPERAND (*ref, 0),
2398 TREE_OPERAND (*ref, 1));
2399 if (tem)
2400 *ref = tem;
2405 civ = alloc_iv (data, base, step);
2406 /* Fail if base object of this memory reference is unknown. */
2407 if (civ->base_object == NULL_TREE)
2408 goto fail;
2410 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2411 return;
2413 fail:
2414 for_each_index (op_p, idx_record_use, data);
2417 /* Finds and records invariants used in STMT. */
2419 static void
2420 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2422 ssa_op_iter iter;
2423 use_operand_p use_p;
2424 tree op;
2426 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2428 op = USE_FROM_PTR (use_p);
2429 record_invariant (data, op, false);
2433 /* CALL calls an internal function. If operand *OP_P will become an
2434 address when the call is expanded, return the type of the memory
2435 being addressed, otherwise return null. */
2437 static tree
2438 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2440 switch (gimple_call_internal_fn (call))
2442 case IFN_MASK_LOAD:
2443 case IFN_MASK_LOAD_LANES:
2444 case IFN_MASK_LEN_LOAD_LANES:
2445 case IFN_LEN_LOAD:
2446 case IFN_MASK_LEN_LOAD:
2447 if (op_p == gimple_call_arg_ptr (call, 0))
2448 return TREE_TYPE (gimple_call_lhs (call));
2449 return NULL_TREE;
2451 case IFN_MASK_STORE:
2452 case IFN_MASK_STORE_LANES:
2453 case IFN_MASK_LEN_STORE_LANES:
2454 case IFN_LEN_STORE:
2455 case IFN_MASK_LEN_STORE:
2457 if (op_p == gimple_call_arg_ptr (call, 0))
2459 internal_fn ifn = gimple_call_internal_fn (call);
2460 int index = internal_fn_stored_value_index (ifn);
2461 return TREE_TYPE (gimple_call_arg (call, index));
2463 return NULL_TREE;
2466 default:
2467 return NULL_TREE;
2471 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2472 Return true if the operand will become an address when STMT
2473 is expanded and record the associated address use if so. */
2475 static bool
2476 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2477 struct iv *iv)
2479 /* Fail if base object of this memory reference is unknown. */
2480 if (iv->base_object == NULL_TREE)
2481 return false;
2483 tree mem_type = NULL_TREE;
2484 if (gcall *call = dyn_cast <gcall *> (stmt))
2485 if (gimple_call_internal_p (call))
2486 mem_type = get_mem_type_for_internal_fn (call, op_p);
2487 if (mem_type)
2489 iv = alloc_iv (data, iv->base, iv->step);
2490 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2491 return true;
2493 return false;
2496 /* Finds interesting uses of induction variables in the statement STMT. */
2498 static void
2499 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2501 struct iv *iv;
2502 tree op, *lhs, *rhs;
2503 ssa_op_iter iter;
2504 use_operand_p use_p;
2505 enum tree_code code;
2507 find_invariants_stmt (data, stmt);
2509 if (gimple_code (stmt) == GIMPLE_COND)
2511 find_interesting_uses_cond (data, stmt);
2512 return;
2515 if (is_gimple_assign (stmt))
2517 lhs = gimple_assign_lhs_ptr (stmt);
2518 rhs = gimple_assign_rhs1_ptr (stmt);
2520 if (TREE_CODE (*lhs) == SSA_NAME)
2522 /* If the statement defines an induction variable, the uses are not
2523 interesting by themselves. */
2525 iv = get_iv (data, *lhs);
2527 if (iv && !integer_zerop (iv->step))
2528 return;
2531 code = gimple_assign_rhs_code (stmt);
2532 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2533 && (REFERENCE_CLASS_P (*rhs)
2534 || is_gimple_val (*rhs)))
2536 if (REFERENCE_CLASS_P (*rhs))
2537 find_interesting_uses_address (data, stmt, rhs);
2538 else
2539 find_interesting_uses_op (data, *rhs);
2541 if (REFERENCE_CLASS_P (*lhs))
2542 find_interesting_uses_address (data, stmt, lhs);
2543 return;
2545 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2547 find_interesting_uses_cond (data, stmt);
2548 return;
2551 /* TODO -- we should also handle address uses of type
2553 memory = call (whatever);
2557 call (memory). */
2560 if (gimple_code (stmt) == GIMPLE_PHI
2561 && gimple_bb (stmt) == data->current_loop->header)
2563 iv = get_iv (data, PHI_RESULT (stmt));
2565 if (iv && !integer_zerop (iv->step))
2566 return;
2569 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2571 op = USE_FROM_PTR (use_p);
2573 if (TREE_CODE (op) != SSA_NAME)
2574 continue;
2576 iv = get_iv (data, op);
2577 if (!iv)
2578 continue;
2580 if (!find_address_like_use (data, stmt, use_p->use, iv))
2581 find_interesting_uses_op (data, op);
2585 /* Finds interesting uses of induction variables outside of loops
2586 on loop exit edge EXIT. */
2588 static void
2589 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2591 gphi *phi;
2592 gphi_iterator psi;
2593 tree def;
2595 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2597 phi = psi.phi ();
2598 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2599 if (!virtual_operand_p (def))
2600 find_interesting_uses_op (data, def);
2604 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2605 mode for memory reference represented by USE. */
2607 static GTY (()) vec<rtx, va_gc> *addr_list;
2609 static bool
2610 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2612 rtx reg, addr;
2613 unsigned list_index;
2614 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2615 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2617 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2618 if (list_index >= vec_safe_length (addr_list))
2619 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2621 addr = (*addr_list)[list_index];
2622 if (!addr)
2624 addr_mode = targetm.addr_space.address_mode (as);
2625 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2626 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2627 (*addr_list)[list_index] = addr;
2629 else
2630 addr_mode = GET_MODE (addr);
2632 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2633 return (memory_address_addr_space_p (mem_mode, addr, as));
2636 /* Comparison function to sort group in ascending order of addr_offset. */
2638 static int
2639 group_compare_offset (const void *a, const void *b)
2641 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2642 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2644 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2647 /* Check if small groups should be split. Return true if no group
2648 contains more than two uses with distinct addr_offsets. Return
2649 false otherwise. We want to split such groups because:
2651 1) Small groups don't have much benefit and may interfer with
2652 general candidate selection.
2653 2) Size for problem with only small groups is usually small and
2654 general algorithm can handle it well.
2656 TODO -- Above claim may not hold when we want to merge memory
2657 accesses with conseuctive addresses. */
2659 static bool
2660 split_small_address_groups_p (struct ivopts_data *data)
2662 unsigned int i, j, distinct = 1;
2663 struct iv_use *pre;
2664 struct iv_group *group;
2666 for (i = 0; i < data->vgroups.length (); i++)
2668 group = data->vgroups[i];
2669 if (group->vuses.length () == 1)
2670 continue;
2672 gcc_assert (address_p (group->type));
2673 if (group->vuses.length () == 2)
2675 if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2676 group->vuses[1]->addr_offset) > 0)
2677 std::swap (group->vuses[0], group->vuses[1]);
2679 else
2680 group->vuses.qsort (group_compare_offset);
2682 if (distinct > 2)
2683 continue;
2685 distinct = 1;
2686 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2688 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2690 pre = group->vuses[j];
2691 distinct++;
2694 if (distinct > 2)
2695 break;
2699 return (distinct <= 2);
2702 /* For each group of address type uses, this function further groups
2703 these uses according to the maximum offset supported by target's
2704 [base + offset] addressing mode. */
2706 static void
2707 split_address_groups (struct ivopts_data *data)
2709 unsigned int i, j;
2710 /* Always split group. */
2711 bool split_p = split_small_address_groups_p (data);
2713 for (i = 0; i < data->vgroups.length (); i++)
2715 struct iv_group *new_group = NULL;
2716 struct iv_group *group = data->vgroups[i];
2717 struct iv_use *use = group->vuses[0];
2719 use->id = 0;
2720 use->group_id = group->id;
2721 if (group->vuses.length () == 1)
2722 continue;
2724 gcc_assert (address_p (use->type));
2726 for (j = 1; j < group->vuses.length ();)
2728 struct iv_use *next = group->vuses[j];
2729 poly_int64 offset = next->addr_offset - use->addr_offset;
2731 /* Split group if aksed to, or the offset against the first
2732 use can't fit in offset part of addressing mode. IV uses
2733 having the same offset are still kept in one group. */
2734 if (maybe_ne (offset, 0)
2735 && (split_p || !addr_offset_valid_p (use, offset)))
2737 if (!new_group)
2738 new_group = record_group (data, group->type);
2739 group->vuses.ordered_remove (j);
2740 new_group->vuses.safe_push (next);
2741 continue;
2744 next->id = j;
2745 next->group_id = group->id;
2746 j++;
2751 /* Finds uses of the induction variables that are interesting. */
2753 static void
2754 find_interesting_uses (struct ivopts_data *data, basic_block *body)
2756 basic_block bb;
2757 gimple_stmt_iterator bsi;
2758 unsigned i;
2759 edge e;
2761 for (i = 0; i < data->current_loop->num_nodes; i++)
2763 edge_iterator ei;
2764 bb = body[i];
2766 FOR_EACH_EDGE (e, ei, bb->succs)
2767 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2768 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2769 find_interesting_uses_outside (data, e);
2771 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2772 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2773 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2774 if (!is_gimple_debug (gsi_stmt (bsi)))
2775 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2778 split_address_groups (data);
2780 if (dump_file && (dump_flags & TDF_DETAILS))
2782 fprintf (dump_file, "\n<IV Groups>:\n");
2783 dump_groups (dump_file, data);
2784 fprintf (dump_file, "\n");
2788 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2789 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2790 we are at the top-level of the processed address. */
2792 static tree
2793 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2794 poly_int64 *offset)
2796 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2797 enum tree_code code;
2798 tree type, orig_type = TREE_TYPE (expr);
2799 poly_int64 off0, off1;
2800 HOST_WIDE_INT st;
2801 tree orig_expr = expr;
2803 STRIP_NOPS (expr);
2805 type = TREE_TYPE (expr);
2806 code = TREE_CODE (expr);
2807 *offset = 0;
2809 switch (code)
2811 case POINTER_PLUS_EXPR:
2812 case PLUS_EXPR:
2813 case MINUS_EXPR:
2814 op0 = TREE_OPERAND (expr, 0);
2815 op1 = TREE_OPERAND (expr, 1);
2817 op0 = strip_offset_1 (op0, false, false, &off0);
2818 op1 = strip_offset_1 (op1, false, false, &off1);
2820 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2821 if (op0 == TREE_OPERAND (expr, 0)
2822 && op1 == TREE_OPERAND (expr, 1))
2823 return orig_expr;
2825 if (integer_zerop (op1))
2826 expr = op0;
2827 else if (integer_zerop (op0))
2829 if (code == MINUS_EXPR)
2830 expr = fold_build1 (NEGATE_EXPR, type, op1);
2831 else
2832 expr = op1;
2834 else
2835 expr = fold_build2 (code, type, op0, op1);
2837 return fold_convert (orig_type, expr);
2839 case MULT_EXPR:
2840 op1 = TREE_OPERAND (expr, 1);
2841 if (!cst_and_fits_in_hwi (op1))
2842 return orig_expr;
2844 op0 = TREE_OPERAND (expr, 0);
2845 op0 = strip_offset_1 (op0, false, false, &off0);
2846 if (op0 == TREE_OPERAND (expr, 0))
2847 return orig_expr;
2849 *offset = off0 * int_cst_value (op1);
2850 if (integer_zerop (op0))
2851 expr = op0;
2852 else
2853 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2855 return fold_convert (orig_type, expr);
2857 case ARRAY_REF:
2858 case ARRAY_RANGE_REF:
2859 if (!inside_addr)
2860 return orig_expr;
2862 step = array_ref_element_size (expr);
2863 if (!cst_and_fits_in_hwi (step))
2864 break;
2866 st = int_cst_value (step);
2867 op1 = TREE_OPERAND (expr, 1);
2868 op1 = strip_offset_1 (op1, false, false, &off1);
2869 *offset = off1 * st;
2871 if (top_compref
2872 && integer_zerop (op1))
2874 /* Strip the component reference completely. */
2875 op0 = TREE_OPERAND (expr, 0);
2876 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2877 *offset += off0;
2878 return op0;
2880 break;
2882 case COMPONENT_REF:
2884 tree field;
2886 if (!inside_addr)
2887 return orig_expr;
2889 tmp = component_ref_field_offset (expr);
2890 field = TREE_OPERAND (expr, 1);
2891 if (top_compref
2892 && cst_and_fits_in_hwi (tmp)
2893 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2895 HOST_WIDE_INT boffset, abs_off;
2897 /* Strip the component reference completely. */
2898 op0 = TREE_OPERAND (expr, 0);
2899 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2900 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2901 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2902 if (boffset < 0)
2903 abs_off = -abs_off;
2905 *offset = off0 + int_cst_value (tmp) + abs_off;
2906 return op0;
2909 break;
2911 case ADDR_EXPR:
2912 op0 = TREE_OPERAND (expr, 0);
2913 op0 = strip_offset_1 (op0, true, true, &off0);
2914 *offset += off0;
2916 if (op0 == TREE_OPERAND (expr, 0))
2917 return orig_expr;
2919 expr = build_fold_addr_expr (op0);
2920 return fold_convert (orig_type, expr);
2922 case MEM_REF:
2923 /* ??? Offset operand? */
2924 inside_addr = false;
2925 break;
2927 default:
2928 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2929 return build_int_cst (orig_type, 0);
2930 return orig_expr;
2933 /* Default handling of expressions for that we want to recurse into
2934 the first operand. */
2935 op0 = TREE_OPERAND (expr, 0);
2936 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2937 *offset += off0;
2939 if (op0 == TREE_OPERAND (expr, 0)
2940 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2941 return orig_expr;
2943 expr = copy_node (expr);
2944 TREE_OPERAND (expr, 0) = op0;
2945 if (op1)
2946 TREE_OPERAND (expr, 1) = op1;
2948 /* Inside address, we might strip the top level component references,
2949 thus changing type of the expression. Handling of ADDR_EXPR
2950 will fix that. */
2951 expr = fold_convert (orig_type, expr);
2953 return expr;
2956 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2958 static tree
2959 strip_offset (tree expr, poly_uint64_pod *offset)
2961 poly_int64 off;
2962 tree core = strip_offset_1 (expr, false, false, &off);
2963 *offset = off;
2964 return core;
2967 /* Returns variant of TYPE that can be used as base for different uses.
2968 We return unsigned type with the same precision, which avoids problems
2969 with overflows. */
2971 static tree
2972 generic_type_for (tree type)
2974 if (POINTER_TYPE_P (type))
2975 return unsigned_type_for (type);
2977 if (TYPE_UNSIGNED (type))
2978 return type;
2980 return unsigned_type_for (type);
2983 /* Private data for walk_tree. */
2985 struct walk_tree_data
2987 bitmap *inv_vars;
2988 struct ivopts_data *idata;
2991 /* Callback function for walk_tree, it records invariants and symbol
2992 reference in *EXPR_P. DATA is the structure storing result info. */
2994 static tree
2995 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2997 tree op = *expr_p;
2998 struct version_info *info;
2999 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
3001 if (TREE_CODE (op) != SSA_NAME)
3002 return NULL_TREE;
3004 info = name_info (wdata->idata, op);
3005 /* Because we expand simple operations when finding IVs, loop invariant
3006 variable that isn't referred by the original loop could be used now.
3007 Record such invariant variables here. */
3008 if (!info->iv)
3010 struct ivopts_data *idata = wdata->idata;
3011 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3013 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3015 tree steptype = TREE_TYPE (op);
3016 if (POINTER_TYPE_P (steptype))
3017 steptype = sizetype;
3018 set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3019 record_invariant (idata, op, false);
3022 if (!info->inv_id || info->has_nonlin_use)
3023 return NULL_TREE;
3025 if (!*wdata->inv_vars)
3026 *wdata->inv_vars = BITMAP_ALLOC (NULL);
3027 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3029 return NULL_TREE;
3032 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3033 store it. */
3035 static inline void
3036 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3038 struct walk_tree_data wdata;
3040 if (!inv_vars)
3041 return;
3043 wdata.idata = data;
3044 wdata.inv_vars = inv_vars;
3045 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3048 /* Get entry from invariant expr hash table for INV_EXPR. New entry
3049 will be recorded if it doesn't exist yet. Given below two exprs:
3050 inv_expr + cst1, inv_expr + cst2
3051 It's hard to make decision whether constant part should be stripped
3052 or not. We choose to not strip based on below facts:
3053 1) We need to count ADD cost for constant part if it's stripped,
3054 which isn't always trivial where this functions is called.
3055 2) Stripping constant away may be conflict with following loop
3056 invariant hoisting pass.
3057 3) Not stripping constant away results in more invariant exprs,
3058 which usually leads to decision preferring lower reg pressure. */
3060 static iv_inv_expr_ent *
3061 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3063 STRIP_NOPS (inv_expr);
3065 if (poly_int_tree_p (inv_expr)
3066 || TREE_CODE (inv_expr) == SSA_NAME)
3067 return NULL;
3069 /* Don't strip constant part away as we used to. */
3071 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3072 struct iv_inv_expr_ent ent;
3073 ent.expr = inv_expr;
3074 ent.hash = iterative_hash_expr (inv_expr, 0);
3075 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3077 if (!*slot)
3079 *slot = XNEW (struct iv_inv_expr_ent);
3080 (*slot)->expr = inv_expr;
3081 (*slot)->hash = ent.hash;
3082 (*slot)->id = ++data->max_inv_expr_id;
3085 return *slot;
3089 /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3090 unsuitable as ivopts candidates for potentially involving undefined
3091 behavior. */
3093 static tree
3094 find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3096 basic_block bb = (basic_block) bb_;
3097 if (TREE_CODE (*tp) == SSA_NAME
3098 && ssa_name_maybe_undef_p (*tp)
3099 && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3100 return *tp;
3101 if (!EXPR_P (*tp))
3102 *walk_subtrees = 0;
3103 return NULL;
3106 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3107 position to POS. If USE is not NULL, the candidate is set as related to
3108 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3109 replacement of the final value of the iv by a direct computation. */
3111 static struct iv_cand *
3112 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3113 enum iv_position pos, struct iv_use *use,
3114 gimple *incremented_at, struct iv *orig_iv = NULL,
3115 bool doloop = false)
3117 unsigned i;
3118 struct iv_cand *cand = NULL;
3119 tree type, orig_type;
3121 gcc_assert (base && step);
3123 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3124 live, but the ivopts code may replace a real pointer with one
3125 pointing before or after the memory block that is then adjusted
3126 into the memory block during the loop. FIXME: It would likely be
3127 better to actually force the pointer live and still use ivopts;
3128 for example, it would be enough to write the pointer into memory
3129 and keep it there until after the loop. */
3130 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3131 return NULL;
3133 /* If BASE contains undefined SSA names make sure we only record
3134 the original IV. */
3135 bool involves_undefs = false;
3136 if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3138 if (pos != IP_ORIGINAL)
3139 return NULL;
3140 important = false;
3141 involves_undefs = true;
3144 /* For non-original variables, make sure their values are computed in a type
3145 that does not invoke undefined behavior on overflows (since in general,
3146 we cannot prove that these induction variables are non-wrapping). */
3147 if (pos != IP_ORIGINAL)
3149 orig_type = TREE_TYPE (base);
3150 type = generic_type_for (orig_type);
3151 if (type != orig_type)
3153 base = fold_convert (type, base);
3154 step = fold_convert (type, step);
3158 for (i = 0; i < data->vcands.length (); i++)
3160 cand = data->vcands[i];
3162 if (cand->pos != pos)
3163 continue;
3165 if (cand->incremented_at != incremented_at
3166 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3167 && cand->ainc_use != use))
3168 continue;
3170 if (operand_equal_p (base, cand->iv->base, 0)
3171 && operand_equal_p (step, cand->iv->step, 0)
3172 && (TYPE_PRECISION (TREE_TYPE (base))
3173 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3174 break;
3177 if (i == data->vcands.length ())
3179 cand = XCNEW (struct iv_cand);
3180 cand->id = i;
3181 cand->iv = alloc_iv (data, base, step);
3182 cand->pos = pos;
3183 if (pos != IP_ORIGINAL)
3185 if (doloop)
3186 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3187 else
3188 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3189 cand->var_after = cand->var_before;
3191 cand->important = important;
3192 cand->involves_undefs = involves_undefs;
3193 cand->incremented_at = incremented_at;
3194 cand->doloop_p = doloop;
3195 data->vcands.safe_push (cand);
3197 if (!poly_int_tree_p (step))
3199 find_inv_vars (data, &step, &cand->inv_vars);
3201 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3202 /* Share bitmap between inv_vars and inv_exprs for cand. */
3203 if (inv_expr != NULL)
3205 cand->inv_exprs = cand->inv_vars;
3206 cand->inv_vars = NULL;
3207 if (cand->inv_exprs)
3208 bitmap_clear (cand->inv_exprs);
3209 else
3210 cand->inv_exprs = BITMAP_ALLOC (NULL);
3212 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3216 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3217 cand->ainc_use = use;
3218 else
3219 cand->ainc_use = NULL;
3221 cand->orig_iv = orig_iv;
3222 if (dump_file && (dump_flags & TDF_DETAILS))
3223 dump_cand (dump_file, cand);
3226 cand->important |= important;
3227 cand->doloop_p |= doloop;
3229 /* Relate candidate to the group for which it is added. */
3230 if (use)
3231 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3233 return cand;
3236 /* Returns true if incrementing the induction variable at the end of the LOOP
3237 is allowed.
3239 The purpose is to avoid splitting latch edge with a biv increment, thus
3240 creating a jump, possibly confusing other optimization passes and leaving
3241 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3242 available (so we do not have a better alternative), or if the latch edge
3243 is already nonempty. */
3245 static bool
3246 allow_ip_end_pos_p (class loop *loop)
3248 if (!ip_normal_pos (loop))
3249 return true;
3251 if (!empty_block_p (ip_end_pos (loop)))
3252 return true;
3254 return false;
3257 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3258 Important field is set to IMPORTANT. */
3260 static void
3261 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3262 bool important, struct iv_use *use)
3264 basic_block use_bb = gimple_bb (use->stmt);
3265 machine_mode mem_mode;
3266 unsigned HOST_WIDE_INT cstepi;
3268 /* If we insert the increment in any position other than the standard
3269 ones, we must ensure that it is incremented once per iteration.
3270 It must not be in an inner nested loop, or one side of an if
3271 statement. */
3272 if (use_bb->loop_father != data->current_loop
3273 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3274 || stmt_can_throw_internal (cfun, use->stmt)
3275 || !cst_and_fits_in_hwi (step))
3276 return;
3278 cstepi = int_cst_value (step);
3280 mem_mode = TYPE_MODE (use->mem_type);
3281 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3282 || USE_STORE_PRE_INCREMENT (mem_mode))
3283 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3284 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3285 || USE_STORE_PRE_DECREMENT (mem_mode))
3286 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3288 enum tree_code code = MINUS_EXPR;
3289 tree new_base;
3290 tree new_step = step;
3292 if (POINTER_TYPE_P (TREE_TYPE (base)))
3294 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3295 code = POINTER_PLUS_EXPR;
3297 else
3298 new_step = fold_convert (TREE_TYPE (base), new_step);
3299 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3300 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3301 use->stmt);
3303 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3304 || USE_STORE_POST_INCREMENT (mem_mode))
3305 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3306 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3307 || USE_STORE_POST_DECREMENT (mem_mode))
3308 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3310 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3311 use->stmt);
3315 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3316 position to POS. If USE is not NULL, the candidate is set as related to
3317 it. The candidate computation is scheduled before exit condition and at
3318 the end of loop. */
3320 static void
3321 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3322 struct iv_use *use, struct iv *orig_iv = NULL,
3323 bool doloop = false)
3325 if (ip_normal_pos (data->current_loop))
3326 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3327 doloop);
3328 /* Exclude doloop candidate here since it requires decrement then comparison
3329 and jump, the IP_END position doesn't match. */
3330 if (!doloop && ip_end_pos (data->current_loop)
3331 && allow_ip_end_pos_p (data->current_loop))
3332 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3335 /* Adds standard iv candidates. */
3337 static void
3338 add_standard_iv_candidates (struct ivopts_data *data)
3340 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3342 /* The same for a double-integer type if it is still fast enough. */
3343 if (TYPE_PRECISION
3344 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3345 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3346 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3347 build_int_cst (long_integer_type_node, 1), true, NULL);
3349 /* The same for a double-integer type if it is still fast enough. */
3350 if (TYPE_PRECISION
3351 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3352 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3353 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3354 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3358 /* Adds candidates bases on the old induction variable IV. */
3360 static void
3361 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3363 gimple *phi;
3364 tree def;
3365 struct iv_cand *cand;
3367 /* Check if this biv is used in address type use. */
3368 if (iv->no_overflow && iv->have_address_use
3369 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3370 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3372 tree base = fold_convert (sizetype, iv->base);
3373 tree step = fold_convert (sizetype, iv->step);
3375 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3376 add_candidate (data, base, step, true, NULL, iv);
3377 /* Add iv cand of the original type only if it has nonlinear use. */
3378 if (iv->nonlin_use)
3379 add_candidate (data, iv->base, iv->step, true, NULL);
3381 else
3382 add_candidate (data, iv->base, iv->step, true, NULL);
3384 /* The same, but with initial value zero. */
3385 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3386 add_candidate (data, size_int (0), iv->step, true, NULL);
3387 else
3388 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3389 iv->step, true, NULL);
3391 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3392 if (gimple_code (phi) == GIMPLE_PHI)
3394 /* Additionally record the possibility of leaving the original iv
3395 untouched. */
3396 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3397 /* Don't add candidate if it's from another PHI node because
3398 it's an affine iv appearing in the form of PEELED_CHREC. */
3399 phi = SSA_NAME_DEF_STMT (def);
3400 if (gimple_code (phi) != GIMPLE_PHI)
3402 cand = add_candidate_1 (data,
3403 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3404 SSA_NAME_DEF_STMT (def));
3405 if (cand)
3407 cand->var_before = iv->ssa_name;
3408 cand->var_after = def;
3411 else
3412 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3416 /* Adds candidates based on the old induction variables. */
3418 static void
3419 add_iv_candidate_for_bivs (struct ivopts_data *data)
3421 unsigned i;
3422 struct iv *iv;
3423 bitmap_iterator bi;
3425 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3427 iv = ver_info (data, i)->iv;
3428 if (iv && iv->biv_p && !integer_zerop (iv->step))
3429 add_iv_candidate_for_biv (data, iv);
3433 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3435 static void
3436 record_common_cand (struct ivopts_data *data, tree base,
3437 tree step, struct iv_use *use)
3439 class iv_common_cand ent;
3440 class iv_common_cand **slot;
3442 ent.base = base;
3443 ent.step = step;
3444 ent.hash = iterative_hash_expr (base, 0);
3445 ent.hash = iterative_hash_expr (step, ent.hash);
3447 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3448 if (*slot == NULL)
3450 *slot = new iv_common_cand ();
3451 (*slot)->base = base;
3452 (*slot)->step = step;
3453 (*slot)->uses.create (8);
3454 (*slot)->hash = ent.hash;
3455 data->iv_common_cands.safe_push ((*slot));
3458 gcc_assert (use != NULL);
3459 (*slot)->uses.safe_push (use);
3460 return;
3463 /* Comparison function used to sort common candidates. */
3465 static int
3466 common_cand_cmp (const void *p1, const void *p2)
3468 unsigned n1, n2;
3469 const class iv_common_cand *const *const ccand1
3470 = (const class iv_common_cand *const *)p1;
3471 const class iv_common_cand *const *const ccand2
3472 = (const class iv_common_cand *const *)p2;
3474 n1 = (*ccand1)->uses.length ();
3475 n2 = (*ccand2)->uses.length ();
3476 return n2 - n1;
3479 /* Adds IV candidates based on common candidated recorded. */
3481 static void
3482 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3484 unsigned i, j;
3485 struct iv_cand *cand_1, *cand_2;
3487 data->iv_common_cands.qsort (common_cand_cmp);
3488 for (i = 0; i < data->iv_common_cands.length (); i++)
3490 class iv_common_cand *ptr = data->iv_common_cands[i];
3492 /* Only add IV candidate if it's derived from multiple uses. */
3493 if (ptr->uses.length () <= 1)
3494 break;
3496 cand_1 = NULL;
3497 cand_2 = NULL;
3498 if (ip_normal_pos (data->current_loop))
3499 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3500 false, IP_NORMAL, NULL, NULL);
3502 if (ip_end_pos (data->current_loop)
3503 && allow_ip_end_pos_p (data->current_loop))
3504 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3505 false, IP_END, NULL, NULL);
3507 /* Bind deriving uses and the new candidates. */
3508 for (j = 0; j < ptr->uses.length (); j++)
3510 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3511 if (cand_1)
3512 bitmap_set_bit (group->related_cands, cand_1->id);
3513 if (cand_2)
3514 bitmap_set_bit (group->related_cands, cand_2->id);
3518 /* Release data since it is useless from this point. */
3519 data->iv_common_cand_tab->empty ();
3520 data->iv_common_cands.truncate (0);
3523 /* Adds candidates based on the value of USE's iv. */
3525 static void
3526 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3528 poly_uint64 offset;
3529 tree base;
3530 struct iv *iv = use->iv;
3531 tree basetype = TREE_TYPE (iv->base);
3533 /* Don't add candidate for iv_use with non integer, pointer or non-mode
3534 precision types, instead, add candidate for the corresponding scev in
3535 unsigned type with the same precision. See PR93674 for more info. */
3536 if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3537 || !type_has_mode_precision_p (basetype))
3539 basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3540 TYPE_UNSIGNED (basetype));
3541 add_candidate (data, fold_convert (basetype, iv->base),
3542 fold_convert (basetype, iv->step), false, NULL);
3543 return;
3546 add_candidate (data, iv->base, iv->step, false, use);
3548 /* Record common candidate for use in case it can be shared by others. */
3549 record_common_cand (data, iv->base, iv->step, use);
3551 /* Record common candidate with initial value zero. */
3552 basetype = TREE_TYPE (iv->base);
3553 if (POINTER_TYPE_P (basetype))
3554 basetype = sizetype;
3555 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3557 /* Compare the cost of an address with an unscaled index with the cost of
3558 an address with a scaled index and add candidate if useful. */
3559 poly_int64 step;
3560 if (use != NULL
3561 && poly_int_tree_p (iv->step, &step)
3562 && address_p (use->type))
3564 poly_int64 new_step;
3565 unsigned int fact = preferred_mem_scale_factor
3566 (use->iv->base,
3567 TYPE_MODE (use->mem_type),
3568 optimize_loop_for_speed_p (data->current_loop));
3570 if (fact != 1
3571 && multiple_p (step, fact, &new_step))
3572 add_candidate (data, size_int (0),
3573 wide_int_to_tree (sizetype, new_step),
3574 true, NULL);
3577 /* Record common candidate with constant offset stripped in base.
3578 Like the use itself, we also add candidate directly for it. */
3579 base = strip_offset (iv->base, &offset);
3580 if (maybe_ne (offset, 0U) || base != iv->base)
3582 record_common_cand (data, base, iv->step, use);
3583 add_candidate (data, base, iv->step, false, use);
3586 /* Record common candidate with base_object removed in base. */
3587 base = iv->base;
3588 STRIP_NOPS (base);
3589 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3591 tree step = iv->step;
3593 STRIP_NOPS (step);
3594 base = TREE_OPERAND (base, 1);
3595 step = fold_convert (sizetype, step);
3596 record_common_cand (data, base, step, use);
3597 /* Also record common candidate with offset stripped. */
3598 tree alt_base, alt_offset;
3599 split_constant_offset (base, &alt_base, &alt_offset);
3600 if (!integer_zerop (alt_offset))
3601 record_common_cand (data, alt_base, step, use);
3604 /* At last, add auto-incremental candidates. Make such variables
3605 important since other iv uses with same base object may be based
3606 on it. */
3607 if (use != NULL && address_p (use->type))
3608 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3611 /* Adds candidates based on the uses. */
3613 static void
3614 add_iv_candidate_for_groups (struct ivopts_data *data)
3616 unsigned i;
3618 /* Only add candidate for the first use in group. */
3619 for (i = 0; i < data->vgroups.length (); i++)
3621 struct iv_group *group = data->vgroups[i];
3623 gcc_assert (group->vuses[0] != NULL);
3624 add_iv_candidate_for_use (data, group->vuses[0]);
3626 add_iv_candidate_derived_from_uses (data);
3629 /* Record important candidates and add them to related_cands bitmaps. */
3631 static void
3632 record_important_candidates (struct ivopts_data *data)
3634 unsigned i;
3635 struct iv_group *group;
3637 for (i = 0; i < data->vcands.length (); i++)
3639 struct iv_cand *cand = data->vcands[i];
3641 if (cand->important)
3642 bitmap_set_bit (data->important_candidates, i);
3645 data->consider_all_candidates = (data->vcands.length ()
3646 <= CONSIDER_ALL_CANDIDATES_BOUND);
3648 /* Add important candidates to groups' related_cands bitmaps. */
3649 for (i = 0; i < data->vgroups.length (); i++)
3651 group = data->vgroups[i];
3652 bitmap_ior_into (group->related_cands, data->important_candidates);
3656 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3657 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3658 we allocate a simple list to every use. */
3660 static void
3661 alloc_use_cost_map (struct ivopts_data *data)
3663 unsigned i, size, s;
3665 for (i = 0; i < data->vgroups.length (); i++)
3667 struct iv_group *group = data->vgroups[i];
3669 if (data->consider_all_candidates)
3670 size = data->vcands.length ();
3671 else
3673 s = bitmap_count_bits (group->related_cands);
3675 /* Round up to the power of two, so that moduling by it is fast. */
3676 size = s ? (1 << ceil_log2 (s)) : 1;
3679 group->n_map_members = size;
3680 group->cost_map = XCNEWVEC (class cost_pair, size);
3684 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3685 on invariants INV_VARS and that the value used in expressing it is
3686 VALUE, and in case of iv elimination the comparison operator is COMP. */
3688 static void
3689 set_group_iv_cost (struct ivopts_data *data,
3690 struct iv_group *group, struct iv_cand *cand,
3691 comp_cost cost, bitmap inv_vars, tree value,
3692 enum tree_code comp, bitmap inv_exprs)
3694 unsigned i, s;
3696 if (cost.infinite_cost_p ())
3698 BITMAP_FREE (inv_vars);
3699 BITMAP_FREE (inv_exprs);
3700 return;
3703 if (data->consider_all_candidates)
3705 group->cost_map[cand->id].cand = cand;
3706 group->cost_map[cand->id].cost = cost;
3707 group->cost_map[cand->id].inv_vars = inv_vars;
3708 group->cost_map[cand->id].inv_exprs = inv_exprs;
3709 group->cost_map[cand->id].value = value;
3710 group->cost_map[cand->id].comp = comp;
3711 return;
3714 /* n_map_members is a power of two, so this computes modulo. */
3715 s = cand->id & (group->n_map_members - 1);
3716 for (i = s; i < group->n_map_members; i++)
3717 if (!group->cost_map[i].cand)
3718 goto found;
3719 for (i = 0; i < s; i++)
3720 if (!group->cost_map[i].cand)
3721 goto found;
3723 gcc_unreachable ();
3725 found:
3726 group->cost_map[i].cand = cand;
3727 group->cost_map[i].cost = cost;
3728 group->cost_map[i].inv_vars = inv_vars;
3729 group->cost_map[i].inv_exprs = inv_exprs;
3730 group->cost_map[i].value = value;
3731 group->cost_map[i].comp = comp;
3734 /* Gets cost of (GROUP, CAND) pair. */
3736 static class cost_pair *
3737 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3738 struct iv_cand *cand)
3740 unsigned i, s;
3741 class cost_pair *ret;
3743 if (!cand)
3744 return NULL;
3746 if (data->consider_all_candidates)
3748 ret = group->cost_map + cand->id;
3749 if (!ret->cand)
3750 return NULL;
3752 return ret;
3755 /* n_map_members is a power of two, so this computes modulo. */
3756 s = cand->id & (group->n_map_members - 1);
3757 for (i = s; i < group->n_map_members; i++)
3758 if (group->cost_map[i].cand == cand)
3759 return group->cost_map + i;
3760 else if (group->cost_map[i].cand == NULL)
3761 return NULL;
3762 for (i = 0; i < s; i++)
3763 if (group->cost_map[i].cand == cand)
3764 return group->cost_map + i;
3765 else if (group->cost_map[i].cand == NULL)
3766 return NULL;
3768 return NULL;
3771 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3772 static rtx
3773 produce_memory_decl_rtl (tree obj, int *regno)
3775 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3776 machine_mode address_mode = targetm.addr_space.address_mode (as);
3777 rtx x;
3779 gcc_assert (obj);
3780 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3782 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3783 x = gen_rtx_SYMBOL_REF (address_mode, name);
3784 SET_SYMBOL_REF_DECL (x, obj);
3785 x = gen_rtx_MEM (DECL_MODE (obj), x);
3786 set_mem_addr_space (x, as);
3787 targetm.encode_section_info (obj, x, true);
3789 else
3791 x = gen_raw_REG (address_mode, (*regno)++);
3792 x = gen_rtx_MEM (DECL_MODE (obj), x);
3793 set_mem_addr_space (x, as);
3796 return x;
3799 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3800 walk_tree. DATA contains the actual fake register number. */
3802 static tree
3803 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3805 tree obj = NULL_TREE;
3806 rtx x = NULL_RTX;
3807 int *regno = (int *) data;
3809 switch (TREE_CODE (*expr_p))
3811 case ADDR_EXPR:
3812 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3813 handled_component_p (*expr_p);
3814 expr_p = &TREE_OPERAND (*expr_p, 0))
3815 continue;
3816 obj = *expr_p;
3817 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3818 x = produce_memory_decl_rtl (obj, regno);
3819 break;
3821 case SSA_NAME:
3822 *ws = 0;
3823 obj = SSA_NAME_VAR (*expr_p);
3824 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3825 if (!obj)
3826 return NULL_TREE;
3827 if (!DECL_RTL_SET_P (obj))
3828 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3829 break;
3831 case VAR_DECL:
3832 case PARM_DECL:
3833 case RESULT_DECL:
3834 *ws = 0;
3835 obj = *expr_p;
3837 if (DECL_RTL_SET_P (obj))
3838 break;
3840 if (DECL_MODE (obj) == BLKmode)
3841 x = produce_memory_decl_rtl (obj, regno);
3842 else
3843 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3845 break;
3847 default:
3848 break;
3851 if (x)
3853 decl_rtl_to_reset.safe_push (obj);
3854 SET_DECL_RTL (obj, x);
3857 return NULL_TREE;
3860 /* Predict whether the given loop will be transformed in the RTL
3861 doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3862 This is only for target independent checks, see targetm.predict_doloop_p
3863 for the target dependent ones.
3865 Note that according to some initial investigation, some checks like costly
3866 niter check and invalid stmt scanning don't have much gains among general
3867 cases, so keep this as simple as possible first.
3869 Some RTL specific checks seems unable to be checked in gimple, if any new
3870 checks or easy checks _are_ missing here, please add them. */
3872 static bool
3873 generic_predict_doloop_p (struct ivopts_data *data)
3875 class loop *loop = data->current_loop;
3877 /* Call target hook for target dependent checks. */
3878 if (!targetm.predict_doloop_p (loop))
3880 if (dump_file && (dump_flags & TDF_DETAILS))
3881 fprintf (dump_file, "Predict doloop failure due to"
3882 " target specific checks.\n");
3883 return false;
3886 /* Similar to doloop_optimize, check iteration description to know it's
3887 suitable or not. Keep it as simple as possible, feel free to extend it
3888 if you find any multiple exits cases matter. */
3889 edge exit = single_dom_exit (loop);
3890 class tree_niter_desc *niter_desc;
3891 if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3893 if (dump_file && (dump_flags & TDF_DETAILS))
3894 fprintf (dump_file, "Predict doloop failure due to"
3895 " unexpected niters.\n");
3896 return false;
3899 /* Similar to doloop_optimize, check whether iteration count too small
3900 and not profitable. */
3901 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3902 if (est_niter == -1)
3903 est_niter = get_likely_max_loop_iterations_int (loop);
3904 if (est_niter >= 0 && est_niter < 3)
3906 if (dump_file && (dump_flags & TDF_DETAILS))
3907 fprintf (dump_file,
3908 "Predict doloop failure due to"
3909 " too few iterations (%u).\n",
3910 (unsigned int) est_niter);
3911 return false;
3914 return true;
3917 /* Determines cost of the computation of EXPR. */
3919 static unsigned
3920 computation_cost (tree expr, bool speed)
3922 rtx_insn *seq;
3923 rtx rslt;
3924 tree type = TREE_TYPE (expr);
3925 unsigned cost;
3926 /* Avoid using hard regs in ways which may be unsupported. */
3927 int regno = LAST_VIRTUAL_REGISTER + 1;
3928 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3929 enum node_frequency real_frequency = node->frequency;
3931 node->frequency = NODE_FREQUENCY_NORMAL;
3932 crtl->maybe_hot_insn_p = speed;
3933 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3934 start_sequence ();
3935 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3936 seq = get_insns ();
3937 end_sequence ();
3938 default_rtl_profile ();
3939 node->frequency = real_frequency;
3941 cost = seq_cost (seq, speed);
3942 if (MEM_P (rslt))
3943 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3944 TYPE_ADDR_SPACE (type), speed);
3945 else if (!REG_P (rslt))
3946 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3948 return cost;
3951 /* Returns variable containing the value of candidate CAND at statement AT. */
3953 static tree
3954 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3956 if (stmt_after_increment (loop, cand, stmt))
3957 return cand->var_after;
3958 else
3959 return cand->var_before;
3962 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3963 same precision that is at least as wide as the precision of TYPE, stores
3964 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3965 type of A and B. */
3967 static tree
3968 determine_common_wider_type (tree *a, tree *b)
3970 tree wider_type = NULL;
3971 tree suba, subb;
3972 tree atype = TREE_TYPE (*a);
3974 if (CONVERT_EXPR_P (*a))
3976 suba = TREE_OPERAND (*a, 0);
3977 wider_type = TREE_TYPE (suba);
3978 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3979 return atype;
3981 else
3982 return atype;
3984 if (CONVERT_EXPR_P (*b))
3986 subb = TREE_OPERAND (*b, 0);
3987 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3988 return atype;
3990 else
3991 return atype;
3993 *a = suba;
3994 *b = subb;
3995 return wider_type;
3998 /* Determines the expression by that USE is expressed from induction variable
3999 CAND at statement AT in LOOP. The expression is stored in two parts in a
4000 decomposed form. The invariant part is stored in AFF_INV; while variant
4001 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
4002 non-null. Returns false if USE cannot be expressed using CAND. */
4004 static bool
4005 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
4006 struct iv_cand *cand, class aff_tree *aff_inv,
4007 class aff_tree *aff_var, widest_int *prat = NULL)
4009 tree ubase = use->iv->base, ustep = use->iv->step;
4010 tree cbase = cand->iv->base, cstep = cand->iv->step;
4011 tree common_type, uutype, var, cstep_common;
4012 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4013 aff_tree aff_cbase;
4014 widest_int rat;
4016 /* We must have a precision to express the values of use. */
4017 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4018 return false;
4020 var = var_at_stmt (loop, cand, at);
4021 uutype = unsigned_type_for (utype);
4023 /* If the conversion is not noop, perform it. */
4024 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4026 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
4027 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
4029 tree inner_base, inner_step, inner_type;
4030 inner_base = TREE_OPERAND (cbase, 0);
4031 if (CONVERT_EXPR_P (cstep))
4032 inner_step = TREE_OPERAND (cstep, 0);
4033 else
4034 inner_step = cstep;
4036 inner_type = TREE_TYPE (inner_base);
4037 /* If candidate is added from a biv whose type is smaller than
4038 ctype, we know both candidate and the biv won't overflow.
4039 In this case, it's safe to skip the convertion in candidate.
4040 As an example, (unsigned short)((unsigned long)A) equals to
4041 (unsigned short)A, if A has a type no larger than short. */
4042 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4044 cbase = inner_base;
4045 cstep = inner_step;
4048 cbase = fold_convert (uutype, cbase);
4049 cstep = fold_convert (uutype, cstep);
4050 var = fold_convert (uutype, var);
4053 /* Ratio is 1 when computing the value of biv cand by itself.
4054 We can't rely on constant_multiple_of in this case because the
4055 use is created after the original biv is selected. The call
4056 could fail because of inconsistent fold behavior. See PR68021
4057 for more information. */
4058 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4060 gcc_assert (is_gimple_assign (use->stmt));
4061 gcc_assert (use->iv->ssa_name == cand->var_after);
4062 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4063 rat = 1;
4065 else if (!constant_multiple_of (ustep, cstep, &rat))
4066 return false;
4068 if (prat)
4069 *prat = rat;
4071 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4072 type, we achieve better folding by computing their difference in this
4073 wider type, and cast the result to UUTYPE. We do not need to worry about
4074 overflows, as all the arithmetics will in the end be performed in UUTYPE
4075 anyway. */
4076 common_type = determine_common_wider_type (&ubase, &cbase);
4078 /* use = ubase - ratio * cbase + ratio * var. */
4079 tree_to_aff_combination (ubase, common_type, aff_inv);
4080 tree_to_aff_combination (cbase, common_type, &aff_cbase);
4081 tree_to_aff_combination (var, uutype, aff_var);
4083 /* We need to shift the value if we are after the increment. */
4084 if (stmt_after_increment (loop, cand, at))
4086 aff_tree cstep_aff;
4088 if (common_type != uutype)
4089 cstep_common = fold_convert (common_type, cstep);
4090 else
4091 cstep_common = cstep;
4093 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4094 aff_combination_add (&aff_cbase, &cstep_aff);
4097 aff_combination_scale (&aff_cbase, -rat);
4098 aff_combination_add (aff_inv, &aff_cbase);
4099 if (common_type != uutype)
4100 aff_combination_convert (aff_inv, uutype);
4102 aff_combination_scale (aff_var, rat);
4103 return true;
4106 /* Determines the expression by that USE is expressed from induction variable
4107 CAND at statement AT in LOOP. The expression is stored in a decomposed
4108 form into AFF. Returns false if USE cannot be expressed using CAND. */
4110 static bool
4111 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4112 struct iv_cand *cand, class aff_tree *aff)
4114 aff_tree aff_var;
4116 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4117 return false;
4119 aff_combination_add (aff, &aff_var);
4120 return true;
4123 /* Return the type of USE. */
4125 static tree
4126 get_use_type (struct iv_use *use)
4128 tree base_type = TREE_TYPE (use->iv->base);
4129 tree type;
4131 if (use->type == USE_REF_ADDRESS)
4133 /* The base_type may be a void pointer. Create a pointer type based on
4134 the mem_ref instead. */
4135 type = build_pointer_type (TREE_TYPE (*use->op_p));
4136 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4137 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4139 else
4140 type = base_type;
4142 return type;
4145 /* Determines the expression by that USE is expressed from induction variable
4146 CAND at statement AT in LOOP. The computation is unshared. */
4148 static tree
4149 get_computation_at (class loop *loop, gimple *at,
4150 struct iv_use *use, struct iv_cand *cand)
4152 aff_tree aff;
4153 tree type = get_use_type (use);
4155 if (!get_computation_aff (loop, at, use, cand, &aff))
4156 return NULL_TREE;
4157 unshare_aff_combination (&aff);
4158 return fold_convert (type, aff_combination_to_tree (&aff));
4161 /* Like get_computation_at, but try harder, even if the computation
4162 is more expensive. Intended for debug stmts. */
4164 static tree
4165 get_debug_computation_at (class loop *loop, gimple *at,
4166 struct iv_use *use, struct iv_cand *cand)
4168 if (tree ret = get_computation_at (loop, at, use, cand))
4169 return ret;
4171 tree ubase = use->iv->base, ustep = use->iv->step;
4172 tree cbase = cand->iv->base, cstep = cand->iv->step;
4173 tree var;
4174 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4175 widest_int rat;
4177 /* We must have a precision to express the values of use. */
4178 if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4179 return NULL_TREE;
4181 /* Try to handle the case that get_computation_at doesn't,
4182 try to express
4183 use = ubase + (var - cbase) / ratio. */
4184 if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4185 &rat))
4186 return NULL_TREE;
4188 bool neg_p = false;
4189 if (wi::neg_p (rat))
4191 if (TYPE_UNSIGNED (ctype))
4192 return NULL_TREE;
4193 neg_p = true;
4194 rat = wi::neg (rat);
4197 /* If both IVs can wrap around and CAND doesn't have a power of two step,
4198 it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4199 the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4200 uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4201 ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */
4202 if (!use->iv->no_overflow
4203 && !cand->iv->no_overflow
4204 && !integer_pow2p (cstep))
4205 return NULL_TREE;
4207 int bits = wi::exact_log2 (rat);
4208 if (bits == -1)
4209 bits = wi::floor_log2 (rat) + 1;
4210 if (!cand->iv->no_overflow
4211 && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4212 return NULL_TREE;
4214 var = var_at_stmt (loop, cand, at);
4216 if (POINTER_TYPE_P (ctype))
4218 ctype = unsigned_type_for (ctype);
4219 cbase = fold_convert (ctype, cbase);
4220 cstep = fold_convert (ctype, cstep);
4221 var = fold_convert (ctype, var);
4224 if (stmt_after_increment (loop, cand, at))
4225 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4226 unshare_expr (cstep));
4228 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4229 var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4230 wide_int_to_tree (TREE_TYPE (var), rat));
4231 if (POINTER_TYPE_P (utype))
4233 var = fold_convert (sizetype, var);
4234 if (neg_p)
4235 var = fold_build1 (NEGATE_EXPR, sizetype, var);
4236 var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4238 else
4240 var = fold_convert (utype, var);
4241 var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4242 ubase, var);
4244 return var;
4247 /* Adjust the cost COST for being in loop setup rather than loop body.
4248 If we're optimizing for space, the loop setup overhead is constant;
4249 if we're optimizing for speed, amortize it over the per-iteration cost.
4250 If ROUND_UP_P is true, the result is round up rather than to zero when
4251 optimizing for speed. */
4252 static int64_t
4253 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4254 bool round_up_p = false)
4256 if (cost == INFTY)
4257 return cost;
4258 else if (optimize_loop_for_speed_p (data->current_loop))
4260 int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4261 return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4263 else
4264 return cost;
4267 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4268 EXPR operand holding the shift. COST0 and COST1 are the costs for
4269 calculating the operands of EXPR. Returns true if successful, and returns
4270 the cost in COST. */
4272 static bool
4273 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4274 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4276 comp_cost res;
4277 tree op1 = TREE_OPERAND (expr, 1);
4278 tree cst = TREE_OPERAND (mult, 1);
4279 tree multop = TREE_OPERAND (mult, 0);
4280 int m = exact_log2 (int_cst_value (cst));
4281 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4282 int as_cost, sa_cost;
4283 bool mult_in_op1;
4285 if (!(m >= 0 && m < maxm))
4286 return false;
4288 STRIP_NOPS (op1);
4289 mult_in_op1 = operand_equal_p (op1, mult, 0);
4291 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4293 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4294 use that in preference to a shift insn followed by an add insn. */
4295 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4296 ? shiftadd_cost (speed, mode, m)
4297 : (mult_in_op1
4298 ? shiftsub1_cost (speed, mode, m)
4299 : shiftsub0_cost (speed, mode, m)));
4301 res = comp_cost (MIN (as_cost, sa_cost), 0);
4302 res += (mult_in_op1 ? cost0 : cost1);
4304 STRIP_NOPS (multop);
4305 if (!is_gimple_val (multop))
4306 res += force_expr_to_var_cost (multop, speed);
4308 *cost = res;
4309 return true;
4312 /* Estimates cost of forcing expression EXPR into a variable. */
4314 static comp_cost
4315 force_expr_to_var_cost (tree expr, bool speed)
4317 static bool costs_initialized = false;
4318 static unsigned integer_cost [2];
4319 static unsigned symbol_cost [2];
4320 static unsigned address_cost [2];
4321 tree op0, op1;
4322 comp_cost cost0, cost1, cost;
4323 machine_mode mode;
4324 scalar_int_mode int_mode;
4326 if (!costs_initialized)
4328 tree type = build_pointer_type (integer_type_node);
4329 tree var, addr;
4330 rtx x;
4331 int i;
4333 var = create_tmp_var_raw (integer_type_node, "test_var");
4334 TREE_STATIC (var) = 1;
4335 x = produce_memory_decl_rtl (var, NULL);
4336 SET_DECL_RTL (var, x);
4338 addr = build1 (ADDR_EXPR, type, var);
4341 for (i = 0; i < 2; i++)
4343 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4344 2000), i);
4346 symbol_cost[i] = computation_cost (addr, i) + 1;
4348 address_cost[i]
4349 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4350 if (dump_file && (dump_flags & TDF_DETAILS))
4352 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4353 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4354 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4355 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4356 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4357 fprintf (dump_file, "\n");
4361 costs_initialized = true;
4364 STRIP_NOPS (expr);
4366 if (SSA_VAR_P (expr))
4367 return no_cost;
4369 if (is_gimple_min_invariant (expr))
4371 if (poly_int_tree_p (expr))
4372 return comp_cost (integer_cost [speed], 0);
4374 if (TREE_CODE (expr) == ADDR_EXPR)
4376 tree obj = TREE_OPERAND (expr, 0);
4378 if (VAR_P (obj)
4379 || TREE_CODE (obj) == PARM_DECL
4380 || TREE_CODE (obj) == RESULT_DECL)
4381 return comp_cost (symbol_cost [speed], 0);
4384 return comp_cost (address_cost [speed], 0);
4387 switch (TREE_CODE (expr))
4389 case POINTER_PLUS_EXPR:
4390 case PLUS_EXPR:
4391 case MINUS_EXPR:
4392 case MULT_EXPR:
4393 case TRUNC_DIV_EXPR:
4394 case BIT_AND_EXPR:
4395 case BIT_IOR_EXPR:
4396 case LSHIFT_EXPR:
4397 case RSHIFT_EXPR:
4398 op0 = TREE_OPERAND (expr, 0);
4399 op1 = TREE_OPERAND (expr, 1);
4400 STRIP_NOPS (op0);
4401 STRIP_NOPS (op1);
4402 break;
4404 CASE_CONVERT:
4405 case NEGATE_EXPR:
4406 case BIT_NOT_EXPR:
4407 op0 = TREE_OPERAND (expr, 0);
4408 STRIP_NOPS (op0);
4409 op1 = NULL_TREE;
4410 break;
4411 /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4412 introduce COND_EXPR for IV base, need to support better cost estimation
4413 for this COND_EXPR and tcc_comparison. */
4414 case COND_EXPR:
4415 op0 = TREE_OPERAND (expr, 1);
4416 STRIP_NOPS (op0);
4417 op1 = TREE_OPERAND (expr, 2);
4418 STRIP_NOPS (op1);
4419 break;
4420 case LT_EXPR:
4421 case LE_EXPR:
4422 case GT_EXPR:
4423 case GE_EXPR:
4424 case EQ_EXPR:
4425 case NE_EXPR:
4426 case UNORDERED_EXPR:
4427 case ORDERED_EXPR:
4428 case UNLT_EXPR:
4429 case UNLE_EXPR:
4430 case UNGT_EXPR:
4431 case UNGE_EXPR:
4432 case UNEQ_EXPR:
4433 case LTGT_EXPR:
4434 case MAX_EXPR:
4435 case MIN_EXPR:
4436 op0 = TREE_OPERAND (expr, 0);
4437 STRIP_NOPS (op0);
4438 op1 = TREE_OPERAND (expr, 1);
4439 STRIP_NOPS (op1);
4440 break;
4442 default:
4443 /* Just an arbitrary value, FIXME. */
4444 return comp_cost (target_spill_cost[speed], 0);
4447 if (op0 == NULL_TREE
4448 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4449 cost0 = no_cost;
4450 else
4451 cost0 = force_expr_to_var_cost (op0, speed);
4453 if (op1 == NULL_TREE
4454 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4455 cost1 = no_cost;
4456 else
4457 cost1 = force_expr_to_var_cost (op1, speed);
4459 mode = TYPE_MODE (TREE_TYPE (expr));
4460 switch (TREE_CODE (expr))
4462 case POINTER_PLUS_EXPR:
4463 case PLUS_EXPR:
4464 case MINUS_EXPR:
4465 case NEGATE_EXPR:
4466 cost = comp_cost (add_cost (speed, mode), 0);
4467 if (TREE_CODE (expr) != NEGATE_EXPR)
4469 tree mult = NULL_TREE;
4470 comp_cost sa_cost;
4471 if (TREE_CODE (op1) == MULT_EXPR)
4472 mult = op1;
4473 else if (TREE_CODE (op0) == MULT_EXPR)
4474 mult = op0;
4476 if (mult != NULL_TREE
4477 && is_a <scalar_int_mode> (mode, &int_mode)
4478 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4479 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4480 speed, &sa_cost))
4481 return sa_cost;
4483 break;
4485 CASE_CONVERT:
4487 tree inner_mode, outer_mode;
4488 outer_mode = TREE_TYPE (expr);
4489 inner_mode = TREE_TYPE (op0);
4490 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4491 TYPE_MODE (inner_mode), speed), 0);
4493 break;
4495 case MULT_EXPR:
4496 if (cst_and_fits_in_hwi (op0))
4497 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4498 mode, speed), 0);
4499 else if (cst_and_fits_in_hwi (op1))
4500 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4501 mode, speed), 0);
4502 else
4503 return comp_cost (target_spill_cost [speed], 0);
4504 break;
4506 case TRUNC_DIV_EXPR:
4507 /* Division by power of two is usually cheap, so we allow it. Forbid
4508 anything else. */
4509 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4510 cost = comp_cost (add_cost (speed, mode), 0);
4511 else
4512 cost = comp_cost (target_spill_cost[speed], 0);
4513 break;
4515 case BIT_AND_EXPR:
4516 case BIT_IOR_EXPR:
4517 case BIT_NOT_EXPR:
4518 case LSHIFT_EXPR:
4519 case RSHIFT_EXPR:
4520 cost = comp_cost (add_cost (speed, mode), 0);
4521 break;
4522 case COND_EXPR:
4523 op0 = TREE_OPERAND (expr, 0);
4524 STRIP_NOPS (op0);
4525 if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4526 || CONSTANT_CLASS_P (op0))
4527 cost = no_cost;
4528 else
4529 cost = force_expr_to_var_cost (op0, speed);
4530 break;
4531 case LT_EXPR:
4532 case LE_EXPR:
4533 case GT_EXPR:
4534 case GE_EXPR:
4535 case EQ_EXPR:
4536 case NE_EXPR:
4537 case UNORDERED_EXPR:
4538 case ORDERED_EXPR:
4539 case UNLT_EXPR:
4540 case UNLE_EXPR:
4541 case UNGT_EXPR:
4542 case UNGE_EXPR:
4543 case UNEQ_EXPR:
4544 case LTGT_EXPR:
4545 case MAX_EXPR:
4546 case MIN_EXPR:
4547 /* Simply use add cost for now, FIXME if there is some more accurate cost
4548 evaluation way. */
4549 cost = comp_cost (add_cost (speed, mode), 0);
4550 break;
4552 default:
4553 gcc_unreachable ();
4556 cost += cost0;
4557 cost += cost1;
4558 return cost;
4561 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4562 invariants the computation depends on. */
4564 static comp_cost
4565 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4567 if (!expr)
4568 return no_cost;
4570 find_inv_vars (data, &expr, inv_vars);
4571 return force_expr_to_var_cost (expr, data->speed);
4574 /* Returns cost of auto-modifying address expression in shape base + offset.
4575 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4576 address expression. The address expression has ADDR_MODE in addr space
4577 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4578 speed or size. */
4580 enum ainc_type
4582 AINC_PRE_INC, /* Pre increment. */
4583 AINC_PRE_DEC, /* Pre decrement. */
4584 AINC_POST_INC, /* Post increment. */
4585 AINC_POST_DEC, /* Post decrement. */
4586 AINC_NONE /* Also the number of auto increment types. */
4589 struct ainc_cost_data
4591 int64_t costs[AINC_NONE];
4594 static comp_cost
4595 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4596 machine_mode addr_mode, machine_mode mem_mode,
4597 addr_space_t as, bool speed)
4599 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4600 && !USE_STORE_PRE_DECREMENT (mem_mode)
4601 && !USE_LOAD_POST_DECREMENT (mem_mode)
4602 && !USE_STORE_POST_DECREMENT (mem_mode)
4603 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4604 && !USE_STORE_PRE_INCREMENT (mem_mode)
4605 && !USE_LOAD_POST_INCREMENT (mem_mode)
4606 && !USE_STORE_POST_INCREMENT (mem_mode))
4607 return infinite_cost;
4609 static vec<ainc_cost_data *> ainc_cost_data_list;
4610 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4611 if (idx >= ainc_cost_data_list.length ())
4613 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4615 gcc_assert (nsize > idx);
4616 ainc_cost_data_list.safe_grow_cleared (nsize, true);
4619 ainc_cost_data *data = ainc_cost_data_list[idx];
4620 if (data == NULL)
4622 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4624 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4625 data->costs[AINC_PRE_DEC] = INFTY;
4626 data->costs[AINC_POST_DEC] = INFTY;
4627 data->costs[AINC_PRE_INC] = INFTY;
4628 data->costs[AINC_POST_INC] = INFTY;
4629 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4630 || USE_STORE_PRE_DECREMENT (mem_mode))
4632 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4634 if (memory_address_addr_space_p (mem_mode, addr, as))
4635 data->costs[AINC_PRE_DEC]
4636 = address_cost (addr, mem_mode, as, speed);
4638 if (USE_LOAD_POST_DECREMENT (mem_mode)
4639 || USE_STORE_POST_DECREMENT (mem_mode))
4641 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4643 if (memory_address_addr_space_p (mem_mode, addr, as))
4644 data->costs[AINC_POST_DEC]
4645 = address_cost (addr, mem_mode, as, speed);
4647 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4648 || USE_STORE_PRE_INCREMENT (mem_mode))
4650 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4652 if (memory_address_addr_space_p (mem_mode, addr, as))
4653 data->costs[AINC_PRE_INC]
4654 = address_cost (addr, mem_mode, as, speed);
4656 if (USE_LOAD_POST_INCREMENT (mem_mode)
4657 || USE_STORE_POST_INCREMENT (mem_mode))
4659 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4661 if (memory_address_addr_space_p (mem_mode, addr, as))
4662 data->costs[AINC_POST_INC]
4663 = address_cost (addr, mem_mode, as, speed);
4665 ainc_cost_data_list[idx] = data;
4668 poly_int64 msize = GET_MODE_SIZE (mem_mode);
4669 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4670 return comp_cost (data->costs[AINC_POST_INC], 0);
4671 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4672 return comp_cost (data->costs[AINC_POST_DEC], 0);
4673 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4674 return comp_cost (data->costs[AINC_PRE_INC], 0);
4675 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4676 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4678 return infinite_cost;
4681 /* Return cost of computing USE's address expression by using CAND.
4682 AFF_INV and AFF_VAR represent invariant and variant parts of the
4683 address expression, respectively. If AFF_INV is simple, store
4684 the loop invariant variables which are depended by it in INV_VARS;
4685 if AFF_INV is complicated, handle it as a new invariant expression
4686 and record it in INV_EXPR. RATIO indicates multiple times between
4687 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4688 value to it indicating if this is an auto-increment address. */
4690 static comp_cost
4691 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4692 struct iv_cand *cand, aff_tree *aff_inv,
4693 aff_tree *aff_var, HOST_WIDE_INT ratio,
4694 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4695 bool *can_autoinc, bool speed)
4697 rtx addr;
4698 bool simple_inv = true;
4699 tree comp_inv = NULL_TREE, type = aff_var->type;
4700 comp_cost var_cost = no_cost, cost = no_cost;
4701 struct mem_address parts = {NULL_TREE, integer_one_node,
4702 NULL_TREE, NULL_TREE, NULL_TREE};
4703 machine_mode addr_mode = TYPE_MODE (type);
4704 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4705 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4706 /* Only true if ratio != 1. */
4707 bool ok_with_ratio_p = false;
4708 bool ok_without_ratio_p = false;
4709 code_helper code = ERROR_MARK;
4711 if (use->type == USE_PTR_ADDRESS)
4713 gcall *call = as_a<gcall *> (use->stmt);
4714 gcc_assert (gimple_call_internal_p (call));
4715 code = gimple_call_internal_fn (call);
4718 if (!aff_combination_const_p (aff_inv))
4720 parts.index = integer_one_node;
4721 /* Addressing mode "base + index". */
4722 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4723 if (ratio != 1)
4725 parts.step = wide_int_to_tree (type, ratio);
4726 /* Addressing mode "base + index << scale". */
4727 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts, code);
4728 if (!ok_with_ratio_p)
4729 parts.step = NULL_TREE;
4731 if (ok_with_ratio_p || ok_without_ratio_p)
4733 if (maybe_ne (aff_inv->offset, 0))
4735 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4736 /* Addressing mode "base + index [<< scale] + offset". */
4737 if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4738 parts.offset = NULL_TREE;
4739 else
4740 aff_inv->offset = 0;
4743 move_fixed_address_to_symbol (&parts, aff_inv);
4744 /* Base is fixed address and is moved to symbol part. */
4745 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4746 parts.base = NULL_TREE;
4748 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4749 if (parts.symbol != NULL_TREE
4750 && !valid_mem_ref_p (mem_mode, as, &parts, code))
4752 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4753 parts.symbol = NULL_TREE;
4754 /* Reset SIMPLE_INV since symbol address needs to be computed
4755 outside of address expression in this case. */
4756 simple_inv = false;
4757 /* Symbol part is moved back to base part, it can't be NULL. */
4758 parts.base = integer_one_node;
4761 else
4762 parts.index = NULL_TREE;
4764 else
4766 poly_int64 ainc_step;
4767 if (can_autoinc
4768 && ratio == 1
4769 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4771 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4773 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4774 ainc_offset += ainc_step;
4775 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4776 addr_mode, mem_mode, as, speed);
4777 if (!cost.infinite_cost_p ())
4779 *can_autoinc = true;
4780 return cost;
4782 cost = no_cost;
4784 if (!aff_combination_zero_p (aff_inv))
4786 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4787 /* Addressing mode "base + offset". */
4788 if (!valid_mem_ref_p (mem_mode, as, &parts, code))
4789 parts.offset = NULL_TREE;
4790 else
4791 aff_inv->offset = 0;
4795 if (simple_inv)
4796 simple_inv = (aff_inv == NULL
4797 || aff_combination_const_p (aff_inv)
4798 || aff_combination_singleton_var_p (aff_inv));
4799 if (!aff_combination_zero_p (aff_inv))
4800 comp_inv = aff_combination_to_tree (aff_inv);
4801 if (comp_inv != NULL_TREE)
4802 cost = force_var_cost (data, comp_inv, inv_vars);
4803 if (ratio != 1 && parts.step == NULL_TREE)
4804 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4805 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4806 var_cost += add_cost (speed, addr_mode);
4808 if (comp_inv && inv_expr && !simple_inv)
4810 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4811 /* Clear depends on. */
4812 if (*inv_expr != NULL && inv_vars && *inv_vars)
4813 bitmap_clear (*inv_vars);
4815 /* Cost of small invariant expression adjusted against loop niters
4816 is usually zero, which makes it difficult to be differentiated
4817 from candidate based on loop invariant variables. Secondly, the
4818 generated invariant expression may not be hoisted out of loop by
4819 following pass. We penalize the cost by rounding up in order to
4820 neutralize such effects. */
4821 cost.cost = adjust_setup_cost (data, cost.cost, true);
4822 cost.scratch = cost.cost;
4825 cost += var_cost;
4826 addr = addr_for_mem_ref (&parts, as, false);
4827 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4828 cost += address_cost (addr, mem_mode, as, speed);
4830 if (parts.symbol != NULL_TREE)
4831 cost.complexity += 1;
4832 /* Don't increase the complexity of adding a scaled index if it's
4833 the only kind of index that the target allows. */
4834 if (parts.step != NULL_TREE && ok_without_ratio_p)
4835 cost.complexity += 1;
4836 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4837 cost.complexity += 1;
4838 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4839 cost.complexity += 1;
4841 return cost;
4844 /* Scale (multiply) the computed COST (except scratch part that should be
4845 hoisted out a loop) by header->frequency / AT->frequency, which makes
4846 expected cost more accurate. */
4848 static comp_cost
4849 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4851 if (data->speed
4852 && data->current_loop->header->count.to_frequency (cfun) > 0)
4854 basic_block bb = gimple_bb (at);
4855 gcc_assert (cost.scratch <= cost.cost);
4856 int scale_factor = (int)(intptr_t) bb->aux;
4857 if (scale_factor == 1)
4858 return cost;
4860 int64_t scaled_cost
4861 = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4863 if (dump_file && (dump_flags & TDF_DETAILS))
4864 fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4865 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4866 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4868 cost.cost = scaled_cost;
4871 return cost;
4874 /* Determines the cost of the computation by that USE is expressed
4875 from induction variable CAND. If ADDRESS_P is true, we just need
4876 to create an address from it, otherwise we want to get it into
4877 register. A set of invariants we depend on is stored in INV_VARS.
4878 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4879 addressing is likely. If INV_EXPR is nonnull, record invariant
4880 expr entry in it. */
4882 static comp_cost
4883 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4884 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4885 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4887 gimple *at = use->stmt;
4888 tree ubase = use->iv->base, cbase = cand->iv->base;
4889 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4890 tree comp_inv = NULL_TREE;
4891 HOST_WIDE_INT ratio, aratio;
4892 comp_cost cost;
4893 widest_int rat;
4894 aff_tree aff_inv, aff_var;
4895 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4897 if (inv_vars)
4898 *inv_vars = NULL;
4899 if (can_autoinc)
4900 *can_autoinc = false;
4901 if (inv_expr)
4902 *inv_expr = NULL;
4904 /* Check if we have enough precision to express the values of use. */
4905 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4906 return infinite_cost;
4908 if (address_p
4909 || (use->iv->base_object
4910 && cand->iv->base_object
4911 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4912 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4914 /* Do not try to express address of an object with computation based
4915 on address of a different object. This may cause problems in rtl
4916 level alias analysis (that does not expect this to be happening,
4917 as this is illegal in C), and would be unlikely to be useful
4918 anyway. */
4919 if (use->iv->base_object
4920 && cand->iv->base_object
4921 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4922 return infinite_cost;
4925 if (!get_computation_aff_1 (data->current_loop, at, use,
4926 cand, &aff_inv, &aff_var, &rat)
4927 || !wi::fits_shwi_p (rat))
4928 return infinite_cost;
4930 ratio = rat.to_shwi ();
4931 if (address_p)
4933 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4934 inv_vars, inv_expr, can_autoinc, speed);
4935 cost = get_scaled_computation_cost_at (data, at, cost);
4936 /* For doloop IV cand, add on the extra cost. */
4937 cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4938 return cost;
4941 bool simple_inv = (aff_combination_const_p (&aff_inv)
4942 || aff_combination_singleton_var_p (&aff_inv));
4943 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4944 aff_combination_convert (&aff_inv, signed_type);
4945 if (!aff_combination_zero_p (&aff_inv))
4946 comp_inv = aff_combination_to_tree (&aff_inv);
4948 cost = force_var_cost (data, comp_inv, inv_vars);
4949 if (comp_inv && inv_expr && !simple_inv)
4951 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4952 /* Clear depends on. */
4953 if (*inv_expr != NULL && inv_vars && *inv_vars)
4954 bitmap_clear (*inv_vars);
4956 cost.cost = adjust_setup_cost (data, cost.cost);
4957 /* Record setup cost in scratch field. */
4958 cost.scratch = cost.cost;
4960 /* Cost of constant integer can be covered when adding invariant part to
4961 variant part. */
4962 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4963 cost = no_cost;
4965 /* Need type narrowing to represent use with cand. */
4966 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4968 machine_mode outer_mode = TYPE_MODE (utype);
4969 machine_mode inner_mode = TYPE_MODE (ctype);
4970 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4973 /* Turn a + i * (-c) into a - i * c. */
4974 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4975 aratio = -ratio;
4976 else
4977 aratio = ratio;
4979 if (ratio != 1)
4980 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4982 /* TODO: We may also need to check if we can compute a + i * 4 in one
4983 instruction. */
4984 /* Need to add up the invariant and variant parts. */
4985 if (comp_inv && !integer_zerop (comp_inv))
4986 cost += add_cost (speed, TYPE_MODE (utype));
4988 cost = get_scaled_computation_cost_at (data, at, cost);
4990 /* For doloop IV cand, add on the extra cost. */
4991 if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4992 cost += targetm.doloop_cost_for_generic;
4994 return cost;
4997 /* Determines cost of computing the use in GROUP with CAND in a generic
4998 expression. */
5000 static bool
5001 determine_group_iv_cost_generic (struct ivopts_data *data,
5002 struct iv_group *group, struct iv_cand *cand)
5004 comp_cost cost;
5005 iv_inv_expr_ent *inv_expr = NULL;
5006 bitmap inv_vars = NULL, inv_exprs = NULL;
5007 struct iv_use *use = group->vuses[0];
5009 /* The simple case first -- if we need to express value of the preserved
5010 original biv, the cost is 0. This also prevents us from counting the
5011 cost of increment twice -- once at this use and once in the cost of
5012 the candidate. */
5013 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5014 cost = no_cost;
5015 /* If the IV candidate involves undefined SSA values and is not the
5016 same IV as on the USE avoid using that candidate here. */
5017 else if (cand->involves_undefs
5018 && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
5019 return false;
5020 else
5021 cost = get_computation_cost (data, use, cand, false,
5022 &inv_vars, NULL, &inv_expr);
5024 if (inv_expr)
5026 inv_exprs = BITMAP_ALLOC (NULL);
5027 bitmap_set_bit (inv_exprs, inv_expr->id);
5029 set_group_iv_cost (data, group, cand, cost, inv_vars,
5030 NULL_TREE, ERROR_MARK, inv_exprs);
5031 return !cost.infinite_cost_p ();
5034 /* Determines cost of computing uses in GROUP with CAND in addresses. */
5036 static bool
5037 determine_group_iv_cost_address (struct ivopts_data *data,
5038 struct iv_group *group, struct iv_cand *cand)
5040 unsigned i;
5041 bitmap inv_vars = NULL, inv_exprs = NULL;
5042 bool can_autoinc;
5043 iv_inv_expr_ent *inv_expr = NULL;
5044 struct iv_use *use = group->vuses[0];
5045 comp_cost sum_cost = no_cost, cost;
5047 cost = get_computation_cost (data, use, cand, true,
5048 &inv_vars, &can_autoinc, &inv_expr);
5050 if (inv_expr)
5052 inv_exprs = BITMAP_ALLOC (NULL);
5053 bitmap_set_bit (inv_exprs, inv_expr->id);
5055 sum_cost = cost;
5056 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5058 if (can_autoinc)
5059 sum_cost -= cand->cost_step;
5060 /* If we generated the candidate solely for exploiting autoincrement
5061 opportunities, and it turns out it can't be used, set the cost to
5062 infinity to make sure we ignore it. */
5063 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5064 sum_cost = infinite_cost;
5067 /* Uses in a group can share setup code, so only add setup cost once. */
5068 cost -= cost.scratch;
5069 /* Compute and add costs for rest uses of this group. */
5070 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5072 struct iv_use *next = group->vuses[i];
5074 /* TODO: We could skip computing cost for sub iv_use when it has the
5075 same cost as the first iv_use, but the cost really depends on the
5076 offset and where the iv_use is. */
5077 cost = get_computation_cost (data, next, cand, true,
5078 NULL, &can_autoinc, &inv_expr);
5079 if (inv_expr)
5081 if (!inv_exprs)
5082 inv_exprs = BITMAP_ALLOC (NULL);
5084 bitmap_set_bit (inv_exprs, inv_expr->id);
5086 sum_cost += cost;
5088 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5089 NULL_TREE, ERROR_MARK, inv_exprs);
5091 return !sum_cost.infinite_cost_p ();
5094 /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5095 and stores it to VAL. */
5097 static void
5098 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5099 class tree_niter_desc *desc, aff_tree *val)
5101 aff_tree step, delta, nit;
5102 struct iv *iv = cand->iv;
5103 tree type = TREE_TYPE (iv->base);
5104 tree niter = desc->niter;
5105 bool after_adjust = stmt_after_increment (loop, cand, at);
5106 tree steptype;
5108 if (POINTER_TYPE_P (type))
5109 steptype = sizetype;
5110 else
5111 steptype = unsigned_type_for (type);
5113 /* If AFTER_ADJUST is required, the code below generates the equivalent
5114 of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5115 BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5116 SSA_NAME - 1. Unfortunately, guaranteeing that adding 1 to NITER
5117 doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5118 class for common idioms that we know are safe. */
5119 if (after_adjust
5120 && desc->control.no_overflow
5121 && integer_onep (desc->control.step)
5122 && (desc->cmp == LT_EXPR
5123 || desc->cmp == NE_EXPR)
5124 && TREE_CODE (desc->bound) == SSA_NAME)
5126 if (integer_onep (desc->control.base))
5128 niter = desc->bound;
5129 after_adjust = false;
5131 else if (TREE_CODE (niter) == MINUS_EXPR
5132 && integer_onep (TREE_OPERAND (niter, 1)))
5134 niter = TREE_OPERAND (niter, 0);
5135 after_adjust = false;
5139 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5140 aff_combination_convert (&step, steptype);
5141 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5142 aff_combination_convert (&nit, steptype);
5143 aff_combination_mult (&nit, &step, &delta);
5144 if (after_adjust)
5145 aff_combination_add (&delta, &step);
5147 tree_to_aff_combination (iv->base, type, val);
5148 if (!POINTER_TYPE_P (type))
5149 aff_combination_convert (val, steptype);
5150 aff_combination_add (val, &delta);
5153 /* Returns period of induction variable iv. */
5155 static tree
5156 iv_period (struct iv *iv)
5158 tree step = iv->step, period, type;
5159 tree pow2div;
5161 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5163 type = unsigned_type_for (TREE_TYPE (step));
5164 /* Period of the iv is lcm (step, type_range)/step -1,
5165 i.e., N*type_range/step - 1. Since type range is power
5166 of two, N == (step >> num_of_ending_zeros_binary (step),
5167 so the final result is
5169 (type_range >> num_of_ending_zeros_binary (step)) - 1
5172 pow2div = num_ending_zeros (step);
5174 period = build_low_bits_mask (type,
5175 (TYPE_PRECISION (type)
5176 - tree_to_uhwi (pow2div)));
5178 return period;
5181 /* Returns the comparison operator used when eliminating the iv USE. */
5183 static enum tree_code
5184 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5186 class loop *loop = data->current_loop;
5187 basic_block ex_bb;
5188 edge exit;
5190 ex_bb = gimple_bb (use->stmt);
5191 exit = EDGE_SUCC (ex_bb, 0);
5192 if (flow_bb_inside_loop_p (loop, exit->dest))
5193 exit = EDGE_SUCC (ex_bb, 1);
5195 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5198 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5199 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5200 calculation is performed in non-wrapping type.
5202 TODO: More generally, we could test for the situation that
5203 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5204 This would require knowing the sign of OFFSET. */
5206 static bool
5207 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5209 enum tree_code code;
5210 tree e1, e2;
5211 aff_tree aff_e1, aff_e2, aff_offset;
5213 if (!nowrap_type_p (TREE_TYPE (base)))
5214 return false;
5216 base = expand_simple_operations (base);
5218 if (TREE_CODE (base) == SSA_NAME)
5220 gimple *stmt = SSA_NAME_DEF_STMT (base);
5222 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5223 return false;
5225 code = gimple_assign_rhs_code (stmt);
5226 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5227 return false;
5229 e1 = gimple_assign_rhs1 (stmt);
5230 e2 = gimple_assign_rhs2 (stmt);
5232 else
5234 code = TREE_CODE (base);
5235 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5236 return false;
5237 e1 = TREE_OPERAND (base, 0);
5238 e2 = TREE_OPERAND (base, 1);
5241 /* Use affine expansion as deeper inspection to prove the equality. */
5242 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5243 &aff_e2, &data->name_expansion_cache);
5244 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5245 &aff_offset, &data->name_expansion_cache);
5246 aff_combination_scale (&aff_offset, -1);
5247 switch (code)
5249 case PLUS_EXPR:
5250 aff_combination_add (&aff_e2, &aff_offset);
5251 if (aff_combination_zero_p (&aff_e2))
5252 return true;
5254 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5255 &aff_e1, &data->name_expansion_cache);
5256 aff_combination_add (&aff_e1, &aff_offset);
5257 return aff_combination_zero_p (&aff_e1);
5259 case POINTER_PLUS_EXPR:
5260 aff_combination_add (&aff_e2, &aff_offset);
5261 return aff_combination_zero_p (&aff_e2);
5263 default:
5264 return false;
5268 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5269 comparison with CAND. NITER describes the number of iterations of
5270 the loops. If successful, the comparison in COMP_P is altered accordingly.
5272 We aim to handle the following situation:
5274 sometype *base, *p;
5275 int a, b, i;
5277 i = a;
5278 p = p_0 = base + a;
5282 bla (*p);
5283 p++;
5284 i++;
5286 while (i < b);
5288 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5289 We aim to optimize this to
5291 p = p_0 = base + a;
5294 bla (*p);
5295 p++;
5297 while (p < p_0 - a + b);
5299 This preserves the correctness, since the pointer arithmetics does not
5300 overflow. More precisely:
5302 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5303 overflow in computing it or the values of p.
5304 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5305 overflow. To prove this, we use the fact that p_0 = base + a. */
5307 static bool
5308 iv_elimination_compare_lt (struct ivopts_data *data,
5309 struct iv_cand *cand, enum tree_code *comp_p,
5310 class tree_niter_desc *niter)
5312 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5313 class aff_tree nit, tmpa, tmpb;
5314 enum tree_code comp;
5315 HOST_WIDE_INT step;
5317 /* We need to know that the candidate induction variable does not overflow.
5318 While more complex analysis may be used to prove this, for now just
5319 check that the variable appears in the original program and that it
5320 is computed in a type that guarantees no overflows. */
5321 cand_type = TREE_TYPE (cand->iv->base);
5322 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5323 return false;
5325 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5326 the calculation of the BOUND could overflow, making the comparison
5327 invalid. */
5328 if (!data->loop_single_exit_p)
5329 return false;
5331 /* We need to be able to decide whether candidate is increasing or decreasing
5332 in order to choose the right comparison operator. */
5333 if (!cst_and_fits_in_hwi (cand->iv->step))
5334 return false;
5335 step = int_cst_value (cand->iv->step);
5337 /* Check that the number of iterations matches the expected pattern:
5338 a + 1 > b ? 0 : b - a - 1. */
5339 mbz = niter->may_be_zero;
5340 if (TREE_CODE (mbz) == GT_EXPR)
5342 /* Handle a + 1 > b. */
5343 tree op0 = TREE_OPERAND (mbz, 0);
5344 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5346 a = TREE_OPERAND (op0, 0);
5347 b = TREE_OPERAND (mbz, 1);
5349 else
5350 return false;
5352 else if (TREE_CODE (mbz) == LT_EXPR)
5354 tree op1 = TREE_OPERAND (mbz, 1);
5356 /* Handle b < a + 1. */
5357 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5359 a = TREE_OPERAND (op1, 0);
5360 b = TREE_OPERAND (mbz, 0);
5362 else
5363 return false;
5365 else
5366 return false;
5368 /* Expected number of iterations is B - A - 1. Check that it matches
5369 the actual number, i.e., that B - A - NITER = 1. */
5370 tree_to_aff_combination (niter->niter, nit_type, &nit);
5371 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5372 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5373 aff_combination_scale (&nit, -1);
5374 aff_combination_scale (&tmpa, -1);
5375 aff_combination_add (&tmpb, &tmpa);
5376 aff_combination_add (&tmpb, &nit);
5377 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5378 return false;
5380 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5381 overflow. */
5382 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5383 cand->iv->step,
5384 fold_convert (TREE_TYPE (cand->iv->step), a));
5385 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5386 return false;
5388 /* Determine the new comparison operator. */
5389 comp = step < 0 ? GT_EXPR : LT_EXPR;
5390 if (*comp_p == NE_EXPR)
5391 *comp_p = comp;
5392 else if (*comp_p == EQ_EXPR)
5393 *comp_p = invert_tree_comparison (comp, false);
5394 else
5395 gcc_unreachable ();
5397 return true;
5400 /* Check whether it is possible to express the condition in USE by comparison
5401 of candidate CAND. If so, store the value compared with to BOUND, and the
5402 comparison operator to COMP. */
5404 static bool
5405 may_eliminate_iv (struct ivopts_data *data,
5406 struct iv_use *use, struct iv_cand *cand, tree *bound,
5407 enum tree_code *comp)
5409 basic_block ex_bb;
5410 edge exit;
5411 tree period;
5412 class loop *loop = data->current_loop;
5413 aff_tree bnd;
5414 class tree_niter_desc *desc = NULL;
5416 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5417 return false;
5419 /* For now works only for exits that dominate the loop latch.
5420 TODO: extend to other conditions inside loop body. */
5421 ex_bb = gimple_bb (use->stmt);
5422 if (use->stmt != last_nondebug_stmt (ex_bb)
5423 || gimple_code (use->stmt) != GIMPLE_COND
5424 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5425 return false;
5427 exit = EDGE_SUCC (ex_bb, 0);
5428 if (flow_bb_inside_loop_p (loop, exit->dest))
5429 exit = EDGE_SUCC (ex_bb, 1);
5430 if (flow_bb_inside_loop_p (loop, exit->dest))
5431 return false;
5433 desc = niter_for_exit (data, exit);
5434 if (!desc)
5435 return false;
5437 /* Determine whether we can use the variable to test the exit condition.
5438 This is the case iff the period of the induction variable is greater
5439 than the number of iterations for which the exit condition is true. */
5440 period = iv_period (cand->iv);
5442 /* If the number of iterations is constant, compare against it directly. */
5443 if (TREE_CODE (desc->niter) == INTEGER_CST)
5445 /* See cand_value_at. */
5446 if (stmt_after_increment (loop, cand, use->stmt))
5448 if (!tree_int_cst_lt (desc->niter, period))
5449 return false;
5451 else
5453 if (tree_int_cst_lt (period, desc->niter))
5454 return false;
5458 /* If not, and if this is the only possible exit of the loop, see whether
5459 we can get a conservative estimate on the number of iterations of the
5460 entire loop and compare against that instead. */
5461 else
5463 widest_int period_value, max_niter;
5465 max_niter = desc->max;
5466 if (stmt_after_increment (loop, cand, use->stmt))
5467 max_niter += 1;
5468 period_value = wi::to_widest (period);
5469 if (wi::gtu_p (max_niter, period_value))
5471 /* See if we can take advantage of inferred loop bound
5472 information. */
5473 if (data->loop_single_exit_p)
5475 if (!max_loop_iterations (loop, &max_niter))
5476 return false;
5477 /* The loop bound is already adjusted by adding 1. */
5478 if (wi::gtu_p (max_niter, period_value))
5479 return false;
5481 else
5482 return false;
5486 /* For doloop IV cand, the bound would be zero. It's safe whether
5487 may_be_zero set or not. */
5488 if (cand->doloop_p)
5490 *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5491 *comp = iv_elimination_compare (data, use);
5492 return true;
5495 cand_value_at (loop, cand, use->stmt, desc, &bnd);
5497 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5498 aff_combination_to_tree (&bnd));
5499 *comp = iv_elimination_compare (data, use);
5501 /* It is unlikely that computing the number of iterations using division
5502 would be more profitable than keeping the original induction variable. */
5503 if (expression_expensive_p (*bound))
5504 return false;
5506 /* Sometimes, it is possible to handle the situation that the number of
5507 iterations may be zero unless additional assumptions by using <
5508 instead of != in the exit condition.
5510 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5511 base the exit condition on it. However, that is often too
5512 expensive. */
5513 if (!integer_zerop (desc->may_be_zero))
5514 return iv_elimination_compare_lt (data, cand, comp, desc);
5516 return true;
5519 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5520 be copied, if it is used in the loop body and DATA->body_includes_call. */
5522 static int
5523 parm_decl_cost (struct ivopts_data *data, tree bound)
5525 tree sbound = bound;
5526 STRIP_NOPS (sbound);
5528 if (TREE_CODE (sbound) == SSA_NAME
5529 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5530 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5531 && data->body_includes_call)
5532 return COSTS_N_INSNS (1);
5534 return 0;
5537 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5539 static bool
5540 determine_group_iv_cost_cond (struct ivopts_data *data,
5541 struct iv_group *group, struct iv_cand *cand)
5543 tree bound = NULL_TREE;
5544 struct iv *cmp_iv;
5545 bitmap inv_exprs = NULL;
5546 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5547 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5548 enum comp_iv_rewrite rewrite_type;
5549 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5550 tree *control_var, *bound_cst;
5551 enum tree_code comp = ERROR_MARK;
5552 struct iv_use *use = group->vuses[0];
5554 /* Extract condition operands. */
5555 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5556 &bound_cst, NULL, &cmp_iv);
5557 gcc_assert (rewrite_type != COMP_IV_NA);
5559 /* Try iv elimination. */
5560 if (rewrite_type == COMP_IV_ELIM
5561 && may_eliminate_iv (data, use, cand, &bound, &comp))
5563 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5564 if (elim_cost.cost == 0)
5565 elim_cost.cost = parm_decl_cost (data, bound);
5566 else if (TREE_CODE (bound) == INTEGER_CST)
5567 elim_cost.cost = 0;
5568 /* If we replace a loop condition 'i < n' with 'p < base + n',
5569 inv_vars_elim will have 'base' and 'n' set, which implies that both
5570 'base' and 'n' will be live during the loop. More likely,
5571 'base + n' will be loop invariant, resulting in only one live value
5572 during the loop. So in that case we clear inv_vars_elim and set
5573 inv_expr_elim instead. */
5574 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5576 inv_expr_elim = get_loop_invariant_expr (data, bound);
5577 bitmap_clear (inv_vars_elim);
5579 /* The bound is a loop invariant, so it will be only computed
5580 once. */
5581 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5584 /* When the condition is a comparison of the candidate IV against
5585 zero, prefer this IV.
5587 TODO: The constant that we're subtracting from the cost should
5588 be target-dependent. This information should be added to the
5589 target costs for each backend. */
5590 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5591 && integer_zerop (*bound_cst)
5592 && (operand_equal_p (*control_var, cand->var_after, 0)
5593 || operand_equal_p (*control_var, cand->var_before, 0)))
5594 elim_cost -= 1;
5596 express_cost = get_computation_cost (data, use, cand, false,
5597 &inv_vars_express, NULL,
5598 &inv_expr_express);
5599 if (cmp_iv != NULL)
5600 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5602 /* Count the cost of the original bound as well. */
5603 bound_cost = force_var_cost (data, *bound_cst, NULL);
5604 if (bound_cost.cost == 0)
5605 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5606 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5607 bound_cost.cost = 0;
5608 express_cost += bound_cost;
5610 /* Choose the better approach, preferring the eliminated IV. */
5611 if (elim_cost <= express_cost)
5613 cost = elim_cost;
5614 inv_vars = inv_vars_elim;
5615 inv_vars_elim = NULL;
5616 inv_expr = inv_expr_elim;
5617 /* For doloop candidate/use pair, adjust to zero cost. */
5618 if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5619 cost = no_cost;
5621 else
5623 cost = express_cost;
5624 inv_vars = inv_vars_express;
5625 inv_vars_express = NULL;
5626 bound = NULL_TREE;
5627 comp = ERROR_MARK;
5628 inv_expr = inv_expr_express;
5631 if (inv_expr)
5633 inv_exprs = BITMAP_ALLOC (NULL);
5634 bitmap_set_bit (inv_exprs, inv_expr->id);
5636 set_group_iv_cost (data, group, cand, cost,
5637 inv_vars, bound, comp, inv_exprs);
5639 if (inv_vars_elim)
5640 BITMAP_FREE (inv_vars_elim);
5641 if (inv_vars_express)
5642 BITMAP_FREE (inv_vars_express);
5644 return !cost.infinite_cost_p ();
5647 /* Determines cost of computing uses in GROUP with CAND. Returns false
5648 if USE cannot be represented with CAND. */
5650 static bool
5651 determine_group_iv_cost (struct ivopts_data *data,
5652 struct iv_group *group, struct iv_cand *cand)
5654 switch (group->type)
5656 case USE_NONLINEAR_EXPR:
5657 return determine_group_iv_cost_generic (data, group, cand);
5659 case USE_REF_ADDRESS:
5660 case USE_PTR_ADDRESS:
5661 return determine_group_iv_cost_address (data, group, cand);
5663 case USE_COMPARE:
5664 return determine_group_iv_cost_cond (data, group, cand);
5666 default:
5667 gcc_unreachable ();
5671 /* Return true if get_computation_cost indicates that autoincrement is
5672 a possibility for the pair of USE and CAND, false otherwise. */
5674 static bool
5675 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5676 struct iv_cand *cand)
5678 if (!address_p (use->type))
5679 return false;
5681 bool can_autoinc = false;
5682 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5683 return can_autoinc;
5686 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5687 use that allows autoincrement, and set their AINC_USE if possible. */
5689 static void
5690 set_autoinc_for_original_candidates (struct ivopts_data *data)
5692 unsigned i, j;
5694 for (i = 0; i < data->vcands.length (); i++)
5696 struct iv_cand *cand = data->vcands[i];
5697 struct iv_use *closest_before = NULL;
5698 struct iv_use *closest_after = NULL;
5699 if (cand->pos != IP_ORIGINAL)
5700 continue;
5702 for (j = 0; j < data->vgroups.length (); j++)
5704 struct iv_group *group = data->vgroups[j];
5705 struct iv_use *use = group->vuses[0];
5706 unsigned uid = gimple_uid (use->stmt);
5708 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5709 continue;
5711 if (uid < gimple_uid (cand->incremented_at)
5712 && (closest_before == NULL
5713 || uid > gimple_uid (closest_before->stmt)))
5714 closest_before = use;
5716 if (uid > gimple_uid (cand->incremented_at)
5717 && (closest_after == NULL
5718 || uid < gimple_uid (closest_after->stmt)))
5719 closest_after = use;
5722 if (closest_before != NULL
5723 && autoinc_possible_for_pair (data, closest_before, cand))
5724 cand->ainc_use = closest_before;
5725 else if (closest_after != NULL
5726 && autoinc_possible_for_pair (data, closest_after, cand))
5727 cand->ainc_use = closest_after;
5731 /* Relate compare use with all candidates. */
5733 static void
5734 relate_compare_use_with_all_cands (struct ivopts_data *data)
5736 unsigned i, count = data->vcands.length ();
5737 for (i = 0; i < data->vgroups.length (); i++)
5739 struct iv_group *group = data->vgroups[i];
5741 if (group->type == USE_COMPARE)
5742 bitmap_set_range (group->related_cands, 0, count);
5746 /* If PREFERRED_MODE is suitable and profitable, use the preferred
5747 PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */
5749 static tree
5750 compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5751 const widest_int &iterations_max)
5753 tree ntype = TREE_TYPE (niter);
5754 tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5755 if (!pref_type)
5756 return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5757 build_int_cst (ntype, 1));
5759 gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5761 int prec = TYPE_PRECISION (ntype);
5762 int pref_prec = TYPE_PRECISION (pref_type);
5764 tree base;
5766 /* Check if the PREFERRED_MODED is able to present niter. */
5767 if (pref_prec > prec
5768 || wi::ltu_p (iterations_max,
5769 widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5770 UNSIGNED)))
5772 /* No wrap, it is safe to use preferred type after niter + 1. */
5773 if (wi::ltu_p (iterations_max,
5774 widest_int::from (wi::max_value (prec, UNSIGNED),
5775 UNSIGNED)))
5777 /* This could help to optimize "-1 +1" pair when niter looks
5778 like "n-1": n is in original mode. "base = (n - 1) + 1"
5779 in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */
5780 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5781 build_int_cst (ntype, 1));
5782 base = fold_convert (pref_type, base);
5785 /* To avoid wrap, convert niter to preferred type before plus 1. */
5786 else
5788 niter = fold_convert (pref_type, niter);
5789 base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5790 build_int_cst (pref_type, 1));
5793 else
5794 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5795 build_int_cst (ntype, 1));
5796 return base;
5799 /* Add one doloop dedicated IV candidate:
5800 - Base is (may_be_zero ? 1 : (niter + 1)).
5801 - Step is -1. */
5803 static void
5804 add_iv_candidate_for_doloop (struct ivopts_data *data)
5806 tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5807 gcc_assert (niter_desc && niter_desc->assumptions);
5809 tree niter = niter_desc->niter;
5810 tree ntype = TREE_TYPE (niter);
5811 gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5813 tree may_be_zero = niter_desc->may_be_zero;
5814 if (may_be_zero && integer_zerop (may_be_zero))
5815 may_be_zero = NULL_TREE;
5816 if (may_be_zero)
5818 if (COMPARISON_CLASS_P (may_be_zero))
5820 niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5821 build_int_cst (ntype, 0),
5822 rewrite_to_non_trapping_overflow (niter));
5824 /* Don't try to obtain the iteration count expression when may_be_zero is
5825 integer_nonzerop (actually iteration count is one) or else. */
5826 else
5827 return;
5830 machine_mode mode = TYPE_MODE (ntype);
5831 machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5833 tree base;
5834 if (mode != pref_mode)
5836 base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5837 ntype = TREE_TYPE (base);
5839 else
5840 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5841 build_int_cst (ntype, 1));
5844 add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5847 /* Finds the candidates for the induction variables. */
5849 static void
5850 find_iv_candidates (struct ivopts_data *data)
5852 /* Add commonly used ivs. */
5853 add_standard_iv_candidates (data);
5855 /* Add doloop dedicated ivs. */
5856 if (data->doloop_use_p)
5857 add_iv_candidate_for_doloop (data);
5859 /* Add old induction variables. */
5860 add_iv_candidate_for_bivs (data);
5862 /* Add induction variables derived from uses. */
5863 add_iv_candidate_for_groups (data);
5865 set_autoinc_for_original_candidates (data);
5867 /* Record the important candidates. */
5868 record_important_candidates (data);
5870 /* Relate compare iv_use with all candidates. */
5871 if (!data->consider_all_candidates)
5872 relate_compare_use_with_all_cands (data);
5874 if (dump_file && (dump_flags & TDF_DETAILS))
5876 unsigned i;
5878 fprintf (dump_file, "\n<Important Candidates>:\t");
5879 for (i = 0; i < data->vcands.length (); i++)
5880 if (data->vcands[i]->important)
5881 fprintf (dump_file, " %d,", data->vcands[i]->id);
5882 fprintf (dump_file, "\n");
5884 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5885 for (i = 0; i < data->vgroups.length (); i++)
5887 struct iv_group *group = data->vgroups[i];
5889 if (group->related_cands)
5891 fprintf (dump_file, " Group %d:\t", group->id);
5892 dump_bitmap (dump_file, group->related_cands);
5895 fprintf (dump_file, "\n");
5899 /* Determines costs of computing use of iv with an iv candidate. */
5901 static void
5902 determine_group_iv_costs (struct ivopts_data *data)
5904 unsigned i, j;
5905 struct iv_cand *cand;
5906 struct iv_group *group;
5907 bitmap to_clear = BITMAP_ALLOC (NULL);
5909 alloc_use_cost_map (data);
5911 for (i = 0; i < data->vgroups.length (); i++)
5913 group = data->vgroups[i];
5915 if (data->consider_all_candidates)
5917 for (j = 0; j < data->vcands.length (); j++)
5919 cand = data->vcands[j];
5920 determine_group_iv_cost (data, group, cand);
5923 else
5925 bitmap_iterator bi;
5927 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5929 cand = data->vcands[j];
5930 if (!determine_group_iv_cost (data, group, cand))
5931 bitmap_set_bit (to_clear, j);
5934 /* Remove the candidates for that the cost is infinite from
5935 the list of related candidates. */
5936 bitmap_and_compl_into (group->related_cands, to_clear);
5937 bitmap_clear (to_clear);
5941 BITMAP_FREE (to_clear);
5943 if (dump_file && (dump_flags & TDF_DETAILS))
5945 bitmap_iterator bi;
5947 /* Dump invariant variables. */
5948 fprintf (dump_file, "\n<Invariant Vars>:\n");
5949 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5951 struct version_info *info = ver_info (data, i);
5952 if (info->inv_id)
5954 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5955 print_generic_expr (dump_file, info->name, TDF_SLIM);
5956 fprintf (dump_file, "%s\n",
5957 info->has_nonlin_use ? "" : "\t(eliminable)");
5961 /* Dump invariant expressions. */
5962 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5963 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5965 for (hash_table<iv_inv_expr_hasher>::iterator it
5966 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5967 ++it)
5968 list.safe_push (*it);
5970 list.qsort (sort_iv_inv_expr_ent);
5972 for (i = 0; i < list.length (); ++i)
5974 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5975 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5976 fprintf (dump_file, "\n");
5979 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5981 for (i = 0; i < data->vgroups.length (); i++)
5983 group = data->vgroups[i];
5985 fprintf (dump_file, "Group %d:\n", i);
5986 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5987 for (j = 0; j < group->n_map_members; j++)
5989 if (!group->cost_map[j].cand
5990 || group->cost_map[j].cost.infinite_cost_p ())
5991 continue;
5993 fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
5994 group->cost_map[j].cand->id,
5995 group->cost_map[j].cost.cost,
5996 group->cost_map[j].cost.complexity);
5997 if (!group->cost_map[j].inv_exprs
5998 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5999 fprintf (dump_file, "NIL;\t");
6000 else
6001 bitmap_print (dump_file,
6002 group->cost_map[j].inv_exprs, "", ";\t");
6003 if (!group->cost_map[j].inv_vars
6004 || bitmap_empty_p (group->cost_map[j].inv_vars))
6005 fprintf (dump_file, "NIL;\n");
6006 else
6007 bitmap_print (dump_file,
6008 group->cost_map[j].inv_vars, "", "\n");
6011 fprintf (dump_file, "\n");
6013 fprintf (dump_file, "\n");
6017 /* Determines cost of the candidate CAND. */
6019 static void
6020 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
6022 comp_cost cost_base;
6023 int64_t cost, cost_step;
6024 tree base;
6026 gcc_assert (cand->iv != NULL);
6028 /* There are two costs associated with the candidate -- its increment
6029 and its initialization. The second is almost negligible for any loop
6030 that rolls enough, so we take it just very little into account. */
6032 base = cand->iv->base;
6033 cost_base = force_var_cost (data, base, NULL);
6034 /* It will be exceptional that the iv register happens to be initialized with
6035 the proper value at no cost. In general, there will at least be a regcopy
6036 or a const set. */
6037 if (cost_base.cost == 0)
6038 cost_base.cost = COSTS_N_INSNS (1);
6039 /* Doloop decrement should be considered as zero cost. */
6040 if (cand->doloop_p)
6041 cost_step = 0;
6042 else
6043 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6044 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6046 /* Prefer the original ivs unless we may gain something by replacing it.
6047 The reason is to make debugging simpler; so this is not relevant for
6048 artificial ivs created by other optimization passes. */
6049 if ((cand->pos != IP_ORIGINAL
6050 || !SSA_NAME_VAR (cand->var_before)
6051 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6052 /* Prefer doloop as well. */
6053 && !cand->doloop_p)
6054 cost++;
6056 /* Prefer not to insert statements into latch unless there are some
6057 already (so that we do not create unnecessary jumps). */
6058 if (cand->pos == IP_END
6059 && empty_block_p (ip_end_pos (data->current_loop)))
6060 cost++;
6062 cand->cost = cost;
6063 cand->cost_step = cost_step;
6066 /* Determines costs of computation of the candidates. */
6068 static void
6069 determine_iv_costs (struct ivopts_data *data)
6071 unsigned i;
6073 if (dump_file && (dump_flags & TDF_DETAILS))
6075 fprintf (dump_file, "<Candidate Costs>:\n");
6076 fprintf (dump_file, " cand\tcost\n");
6079 for (i = 0; i < data->vcands.length (); i++)
6081 struct iv_cand *cand = data->vcands[i];
6083 determine_iv_cost (data, cand);
6085 if (dump_file && (dump_flags & TDF_DETAILS))
6086 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
6089 if (dump_file && (dump_flags & TDF_DETAILS))
6090 fprintf (dump_file, "\n");
6093 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6094 induction variables. Note N_INVS includes both invariant variables and
6095 invariant expressions. */
6097 static unsigned
6098 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6099 unsigned n_cands)
6101 unsigned cost;
6102 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6103 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6104 bool speed = data->speed;
6106 /* If there is a call in the loop body, the call-clobbered registers
6107 are not available for loop invariants. */
6108 if (data->body_includes_call)
6109 available_regs = available_regs - target_clobbered_regs;
6111 /* If we have enough registers. */
6112 if (regs_needed + target_res_regs < available_regs)
6113 cost = n_new;
6114 /* If close to running out of registers, try to preserve them. */
6115 else if (regs_needed <= available_regs)
6116 cost = target_reg_cost [speed] * regs_needed;
6117 /* If we run out of available registers but the number of candidates
6118 does not, we penalize extra registers using target_spill_cost. */
6119 else if (n_cands <= available_regs)
6120 cost = target_reg_cost [speed] * available_regs
6121 + target_spill_cost [speed] * (regs_needed - available_regs);
6122 /* If the number of candidates runs out available registers, we penalize
6123 extra candidate registers using target_spill_cost * 2. Because it is
6124 more expensive to spill induction variable than invariant. */
6125 else
6126 cost = target_reg_cost [speed] * available_regs
6127 + target_spill_cost [speed] * (n_cands - available_regs) * 2
6128 + target_spill_cost [speed] * (regs_needed - n_cands);
6130 /* Finally, add the number of candidates, so that we prefer eliminating
6131 induction variables if possible. */
6132 return cost + n_cands;
6135 /* For each size of the induction variable set determine the penalty. */
6137 static void
6138 determine_set_costs (struct ivopts_data *data)
6140 unsigned j, n;
6141 gphi *phi;
6142 gphi_iterator psi;
6143 tree op;
6144 class loop *loop = data->current_loop;
6145 bitmap_iterator bi;
6147 if (dump_file && (dump_flags & TDF_DETAILS))
6149 fprintf (dump_file, "<Global Costs>:\n");
6150 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
6151 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
6152 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
6153 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
6156 n = 0;
6157 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6159 phi = psi.phi ();
6160 op = PHI_RESULT (phi);
6162 if (virtual_operand_p (op))
6163 continue;
6165 if (get_iv (data, op))
6166 continue;
6168 if (!POINTER_TYPE_P (TREE_TYPE (op))
6169 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6170 continue;
6172 n++;
6175 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6177 struct version_info *info = ver_info (data, j);
6179 if (info->inv_id && info->has_nonlin_use)
6180 n++;
6183 data->regs_used = n;
6184 if (dump_file && (dump_flags & TDF_DETAILS))
6185 fprintf (dump_file, " regs_used %d\n", n);
6187 if (dump_file && (dump_flags & TDF_DETAILS))
6189 fprintf (dump_file, " cost for size:\n");
6190 fprintf (dump_file, " ivs\tcost\n");
6191 for (j = 0; j <= 2 * target_avail_regs; j++)
6192 fprintf (dump_file, " %d\t%d\n", j,
6193 ivopts_estimate_reg_pressure (data, 0, j));
6194 fprintf (dump_file, "\n");
6198 /* Returns true if A is a cheaper cost pair than B. */
6200 static bool
6201 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6203 if (!a)
6204 return false;
6206 if (!b)
6207 return true;
6209 if (a->cost < b->cost)
6210 return true;
6212 if (b->cost < a->cost)
6213 return false;
6215 /* In case the costs are the same, prefer the cheaper candidate. */
6216 if (a->cand->cost < b->cand->cost)
6217 return true;
6219 return false;
6222 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
6223 for more expensive, equal and cheaper respectively. */
6225 static int
6226 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6228 if (cheaper_cost_pair (a, b))
6229 return -1;
6230 if (cheaper_cost_pair (b, a))
6231 return 1;
6233 return 0;
6236 /* Returns candidate by that USE is expressed in IVS. */
6238 static class cost_pair *
6239 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6241 return ivs->cand_for_group[group->id];
6244 /* Computes the cost field of IVS structure. */
6246 static void
6247 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6249 comp_cost cost = ivs->cand_use_cost;
6251 cost += ivs->cand_cost;
6252 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6253 ivs->cost = cost;
6256 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6257 and IVS. */
6259 static void
6260 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6262 bitmap_iterator bi;
6263 unsigned iid;
6265 if (!invs)
6266 return;
6268 gcc_assert (n_inv_uses != NULL);
6269 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6271 n_inv_uses[iid]--;
6272 if (n_inv_uses[iid] == 0)
6273 ivs->n_invs--;
6277 /* Set USE not to be expressed by any candidate in IVS. */
6279 static void
6280 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6281 struct iv_group *group)
6283 unsigned gid = group->id, cid;
6284 class cost_pair *cp;
6286 cp = ivs->cand_for_group[gid];
6287 if (!cp)
6288 return;
6289 cid = cp->cand->id;
6291 ivs->bad_groups++;
6292 ivs->cand_for_group[gid] = NULL;
6293 ivs->n_cand_uses[cid]--;
6295 if (ivs->n_cand_uses[cid] == 0)
6297 bitmap_clear_bit (ivs->cands, cid);
6298 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6299 ivs->n_cands--;
6300 ivs->cand_cost -= cp->cand->cost;
6301 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6302 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6305 ivs->cand_use_cost -= cp->cost;
6306 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6307 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6308 iv_ca_recount_cost (data, ivs);
6311 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6312 IVS. */
6314 static void
6315 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6317 bitmap_iterator bi;
6318 unsigned iid;
6320 if (!invs)
6321 return;
6323 gcc_assert (n_inv_uses != NULL);
6324 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6326 n_inv_uses[iid]++;
6327 if (n_inv_uses[iid] == 1)
6328 ivs->n_invs++;
6332 /* Set cost pair for GROUP in set IVS to CP. */
6334 static void
6335 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6336 struct iv_group *group, class cost_pair *cp)
6338 unsigned gid = group->id, cid;
6340 if (ivs->cand_for_group[gid] == cp)
6341 return;
6343 if (ivs->cand_for_group[gid])
6344 iv_ca_set_no_cp (data, ivs, group);
6346 if (cp)
6348 cid = cp->cand->id;
6350 ivs->bad_groups--;
6351 ivs->cand_for_group[gid] = cp;
6352 ivs->n_cand_uses[cid]++;
6353 if (ivs->n_cand_uses[cid] == 1)
6355 bitmap_set_bit (ivs->cands, cid);
6356 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6357 ivs->n_cands++;
6358 ivs->cand_cost += cp->cand->cost;
6359 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6360 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6363 ivs->cand_use_cost += cp->cost;
6364 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6365 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6366 iv_ca_recount_cost (data, ivs);
6370 /* Extend set IVS by expressing USE by some of the candidates in it
6371 if possible. Consider all important candidates if candidates in
6372 set IVS don't give any result. */
6374 static void
6375 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6376 struct iv_group *group)
6378 class cost_pair *best_cp = NULL, *cp;
6379 bitmap_iterator bi;
6380 unsigned i;
6381 struct iv_cand *cand;
6383 gcc_assert (ivs->upto >= group->id);
6384 ivs->upto++;
6385 ivs->bad_groups++;
6387 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6389 cand = data->vcands[i];
6390 cp = get_group_iv_cost (data, group, cand);
6391 if (cheaper_cost_pair (cp, best_cp))
6392 best_cp = cp;
6395 if (best_cp == NULL)
6397 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6399 cand = data->vcands[i];
6400 cp = get_group_iv_cost (data, group, cand);
6401 if (cheaper_cost_pair (cp, best_cp))
6402 best_cp = cp;
6406 iv_ca_set_cp (data, ivs, group, best_cp);
6409 /* Get cost for assignment IVS. */
6411 static comp_cost
6412 iv_ca_cost (class iv_ca *ivs)
6414 /* This was a conditional expression but it triggered a bug in
6415 Sun C 5.5. */
6416 if (ivs->bad_groups)
6417 return infinite_cost;
6418 else
6419 return ivs->cost;
6422 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6423 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6424 respectively. */
6426 static int
6427 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6428 struct iv_group *group, class cost_pair *old_cp,
6429 class cost_pair *new_cp)
6431 gcc_assert (old_cp && new_cp && old_cp != new_cp);
6432 unsigned old_n_invs = ivs->n_invs;
6433 iv_ca_set_cp (data, ivs, group, new_cp);
6434 unsigned new_n_invs = ivs->n_invs;
6435 iv_ca_set_cp (data, ivs, group, old_cp);
6437 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6440 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6441 it before NEXT. */
6443 static struct iv_ca_delta *
6444 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6445 class cost_pair *new_cp, struct iv_ca_delta *next)
6447 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6449 change->group = group;
6450 change->old_cp = old_cp;
6451 change->new_cp = new_cp;
6452 change->next = next;
6454 return change;
6457 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6458 are rewritten. */
6460 static struct iv_ca_delta *
6461 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6463 struct iv_ca_delta *last;
6465 if (!l2)
6466 return l1;
6468 if (!l1)
6469 return l2;
6471 for (last = l1; last->next; last = last->next)
6472 continue;
6473 last->next = l2;
6475 return l1;
6478 /* Reverse the list of changes DELTA, forming the inverse to it. */
6480 static struct iv_ca_delta *
6481 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6483 struct iv_ca_delta *act, *next, *prev = NULL;
6485 for (act = delta; act; act = next)
6487 next = act->next;
6488 act->next = prev;
6489 prev = act;
6491 std::swap (act->old_cp, act->new_cp);
6494 return prev;
6497 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6498 reverted instead. */
6500 static void
6501 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6502 struct iv_ca_delta *delta, bool forward)
6504 class cost_pair *from, *to;
6505 struct iv_ca_delta *act;
6507 if (!forward)
6508 delta = iv_ca_delta_reverse (delta);
6510 for (act = delta; act; act = act->next)
6512 from = act->old_cp;
6513 to = act->new_cp;
6514 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6515 iv_ca_set_cp (data, ivs, act->group, to);
6518 if (!forward)
6519 iv_ca_delta_reverse (delta);
6522 /* Returns true if CAND is used in IVS. */
6524 static bool
6525 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6527 return ivs->n_cand_uses[cand->id] > 0;
6530 /* Returns number of induction variable candidates in the set IVS. */
6532 static unsigned
6533 iv_ca_n_cands (class iv_ca *ivs)
6535 return ivs->n_cands;
6538 /* Free the list of changes DELTA. */
6540 static void
6541 iv_ca_delta_free (struct iv_ca_delta **delta)
6543 struct iv_ca_delta *act, *next;
6545 for (act = *delta; act; act = next)
6547 next = act->next;
6548 free (act);
6551 *delta = NULL;
6554 /* Allocates new iv candidates assignment. */
6556 static class iv_ca *
6557 iv_ca_new (struct ivopts_data *data)
6559 class iv_ca *nw = XNEW (class iv_ca);
6561 nw->upto = 0;
6562 nw->bad_groups = 0;
6563 nw->cand_for_group = XCNEWVEC (class cost_pair *,
6564 data->vgroups.length ());
6565 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6566 nw->cands = BITMAP_ALLOC (NULL);
6567 nw->n_cands = 0;
6568 nw->n_invs = 0;
6569 nw->cand_use_cost = no_cost;
6570 nw->cand_cost = 0;
6571 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6572 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6573 nw->cost = no_cost;
6575 return nw;
6578 /* Free memory occupied by the set IVS. */
6580 static void
6581 iv_ca_free (class iv_ca **ivs)
6583 free ((*ivs)->cand_for_group);
6584 free ((*ivs)->n_cand_uses);
6585 BITMAP_FREE ((*ivs)->cands);
6586 free ((*ivs)->n_inv_var_uses);
6587 free ((*ivs)->n_inv_expr_uses);
6588 free (*ivs);
6589 *ivs = NULL;
6592 /* Dumps IVS to FILE. */
6594 static void
6595 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6597 unsigned i;
6598 comp_cost cost = iv_ca_cost (ivs);
6600 fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6601 cost.complexity);
6602 fprintf (file, " reg_cost: %d\n",
6603 ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6604 fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
6605 "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6606 ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6607 bitmap_print (file, ivs->cands, " candidates: ","\n");
6609 for (i = 0; i < ivs->upto; i++)
6611 struct iv_group *group = data->vgroups[i];
6612 class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6613 if (cp)
6614 fprintf (file, " group:%d --> iv_cand:%d, cost=("
6615 "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6616 cp->cost.cost, cp->cost.complexity);
6617 else
6618 fprintf (file, " group:%d --> ??\n", group->id);
6621 const char *pref = "";
6622 fprintf (file, " invariant variables: ");
6623 for (i = 1; i <= data->max_inv_var_id; i++)
6624 if (ivs->n_inv_var_uses[i])
6626 fprintf (file, "%s%d", pref, i);
6627 pref = ", ";
6630 pref = "";
6631 fprintf (file, "\n invariant expressions: ");
6632 for (i = 1; i <= data->max_inv_expr_id; i++)
6633 if (ivs->n_inv_expr_uses[i])
6635 fprintf (file, "%s%d", pref, i);
6636 pref = ", ";
6639 fprintf (file, "\n\n");
6642 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6643 new set, and store differences in DELTA. Number of induction variables
6644 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6645 the function will try to find a solution with mimimal iv candidates. */
6647 static comp_cost
6648 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6649 struct iv_cand *cand, struct iv_ca_delta **delta,
6650 unsigned *n_ivs, bool min_ncand)
6652 unsigned i;
6653 comp_cost cost;
6654 struct iv_group *group;
6655 class cost_pair *old_cp, *new_cp;
6657 *delta = NULL;
6658 for (i = 0; i < ivs->upto; i++)
6660 group = data->vgroups[i];
6661 old_cp = iv_ca_cand_for_group (ivs, group);
6663 if (old_cp
6664 && old_cp->cand == cand)
6665 continue;
6667 new_cp = get_group_iv_cost (data, group, cand);
6668 if (!new_cp)
6669 continue;
6671 if (!min_ncand)
6673 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6674 /* Skip if new_cp depends on more invariants. */
6675 if (cmp_invs > 0)
6676 continue;
6678 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6679 /* Skip if new_cp is not cheaper. */
6680 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6681 continue;
6684 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6687 iv_ca_delta_commit (data, ivs, *delta, true);
6688 cost = iv_ca_cost (ivs);
6689 if (n_ivs)
6690 *n_ivs = iv_ca_n_cands (ivs);
6691 iv_ca_delta_commit (data, ivs, *delta, false);
6693 return cost;
6696 /* Try narrowing set IVS by removing CAND. Return the cost of
6697 the new set and store the differences in DELTA. START is
6698 the candidate with which we start narrowing. */
6700 static comp_cost
6701 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6702 struct iv_cand *cand, struct iv_cand *start,
6703 struct iv_ca_delta **delta)
6705 unsigned i, ci;
6706 struct iv_group *group;
6707 class cost_pair *old_cp, *new_cp, *cp;
6708 bitmap_iterator bi;
6709 struct iv_cand *cnd;
6710 comp_cost cost, best_cost, acost;
6712 *delta = NULL;
6713 for (i = 0; i < data->vgroups.length (); i++)
6715 group = data->vgroups[i];
6717 old_cp = iv_ca_cand_for_group (ivs, group);
6718 if (old_cp->cand != cand)
6719 continue;
6721 best_cost = iv_ca_cost (ivs);
6722 /* Start narrowing with START. */
6723 new_cp = get_group_iv_cost (data, group, start);
6725 if (data->consider_all_candidates)
6727 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6729 if (ci == cand->id || (start && ci == start->id))
6730 continue;
6732 cnd = data->vcands[ci];
6734 cp = get_group_iv_cost (data, group, cnd);
6735 if (!cp)
6736 continue;
6738 iv_ca_set_cp (data, ivs, group, cp);
6739 acost = iv_ca_cost (ivs);
6741 if (acost < best_cost)
6743 best_cost = acost;
6744 new_cp = cp;
6748 else
6750 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6752 if (ci == cand->id || (start && ci == start->id))
6753 continue;
6755 cnd = data->vcands[ci];
6757 cp = get_group_iv_cost (data, group, cnd);
6758 if (!cp)
6759 continue;
6761 iv_ca_set_cp (data, ivs, group, cp);
6762 acost = iv_ca_cost (ivs);
6764 if (acost < best_cost)
6766 best_cost = acost;
6767 new_cp = cp;
6771 /* Restore to old cp for use. */
6772 iv_ca_set_cp (data, ivs, group, old_cp);
6774 if (!new_cp)
6776 iv_ca_delta_free (delta);
6777 return infinite_cost;
6780 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6783 iv_ca_delta_commit (data, ivs, *delta, true);
6784 cost = iv_ca_cost (ivs);
6785 iv_ca_delta_commit (data, ivs, *delta, false);
6787 return cost;
6790 /* Try optimizing the set of candidates IVS by removing candidates different
6791 from to EXCEPT_CAND from it. Return cost of the new set, and store
6792 differences in DELTA. */
6794 static comp_cost
6795 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6796 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6798 bitmap_iterator bi;
6799 struct iv_ca_delta *act_delta, *best_delta;
6800 unsigned i;
6801 comp_cost best_cost, acost;
6802 struct iv_cand *cand;
6804 best_delta = NULL;
6805 best_cost = iv_ca_cost (ivs);
6807 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6809 cand = data->vcands[i];
6811 if (cand == except_cand)
6812 continue;
6814 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6816 if (acost < best_cost)
6818 best_cost = acost;
6819 iv_ca_delta_free (&best_delta);
6820 best_delta = act_delta;
6822 else
6823 iv_ca_delta_free (&act_delta);
6826 if (!best_delta)
6828 *delta = NULL;
6829 return best_cost;
6832 /* Recurse to possibly remove other unnecessary ivs. */
6833 iv_ca_delta_commit (data, ivs, best_delta, true);
6834 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6835 iv_ca_delta_commit (data, ivs, best_delta, false);
6836 *delta = iv_ca_delta_join (best_delta, *delta);
6837 return best_cost;
6840 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6841 cheaper local cost for GROUP than BEST_CP. Return pointer to
6842 the corresponding cost_pair, otherwise just return BEST_CP. */
6844 static class cost_pair*
6845 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6846 unsigned int cand_idx, struct iv_cand *old_cand,
6847 class cost_pair *best_cp)
6849 struct iv_cand *cand;
6850 class cost_pair *cp;
6852 gcc_assert (old_cand != NULL && best_cp != NULL);
6853 if (cand_idx == old_cand->id)
6854 return best_cp;
6856 cand = data->vcands[cand_idx];
6857 cp = get_group_iv_cost (data, group, cand);
6858 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6859 return cp;
6861 return best_cp;
6864 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6865 which are used by more than one iv uses. For each of those candidates,
6866 this function tries to represent iv uses under that candidate using
6867 other ones with lower local cost, then tries to prune the new set.
6868 If the new set has lower cost, It returns the new cost after recording
6869 candidate replacement in list DELTA. */
6871 static comp_cost
6872 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6873 struct iv_ca_delta **delta)
6875 bitmap_iterator bi, bj;
6876 unsigned int i, j, k;
6877 struct iv_cand *cand;
6878 comp_cost orig_cost, acost;
6879 struct iv_ca_delta *act_delta, *tmp_delta;
6880 class cost_pair *old_cp, *best_cp = NULL;
6882 *delta = NULL;
6883 orig_cost = iv_ca_cost (ivs);
6885 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6887 if (ivs->n_cand_uses[i] == 1
6888 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6889 continue;
6891 cand = data->vcands[i];
6893 act_delta = NULL;
6894 /* Represent uses under current candidate using other ones with
6895 lower local cost. */
6896 for (j = 0; j < ivs->upto; j++)
6898 struct iv_group *group = data->vgroups[j];
6899 old_cp = iv_ca_cand_for_group (ivs, group);
6901 if (old_cp->cand != cand)
6902 continue;
6904 best_cp = old_cp;
6905 if (data->consider_all_candidates)
6906 for (k = 0; k < data->vcands.length (); k++)
6907 best_cp = cheaper_cost_with_cand (data, group, k,
6908 old_cp->cand, best_cp);
6909 else
6910 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6911 best_cp = cheaper_cost_with_cand (data, group, k,
6912 old_cp->cand, best_cp);
6914 if (best_cp == old_cp)
6915 continue;
6917 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6919 /* No need for further prune. */
6920 if (!act_delta)
6921 continue;
6923 /* Prune the new candidate set. */
6924 iv_ca_delta_commit (data, ivs, act_delta, true);
6925 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6926 iv_ca_delta_commit (data, ivs, act_delta, false);
6927 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6929 if (acost < orig_cost)
6931 *delta = act_delta;
6932 return acost;
6934 else
6935 iv_ca_delta_free (&act_delta);
6938 return orig_cost;
6941 /* Tries to extend the sets IVS in the best possible way in order to
6942 express the GROUP. If ORIGINALP is true, prefer candidates from
6943 the original set of IVs, otherwise favor important candidates not
6944 based on any memory object. */
6946 static bool
6947 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6948 struct iv_group *group, bool originalp)
6950 comp_cost best_cost, act_cost;
6951 unsigned i;
6952 bitmap_iterator bi;
6953 struct iv_cand *cand;
6954 struct iv_ca_delta *best_delta = NULL, *act_delta;
6955 class cost_pair *cp;
6957 iv_ca_add_group (data, ivs, group);
6958 best_cost = iv_ca_cost (ivs);
6959 cp = iv_ca_cand_for_group (ivs, group);
6960 if (cp)
6962 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6963 iv_ca_set_no_cp (data, ivs, group);
6966 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6967 first try important candidates not based on any memory object. Only if
6968 this fails, try the specific ones. Rationale -- in loops with many
6969 variables the best choice often is to use just one generic biv. If we
6970 added here many ivs specific to the uses, the optimization algorithm later
6971 would be likely to get stuck in a local minimum, thus causing us to create
6972 too many ivs. The approach from few ivs to more seems more likely to be
6973 successful -- starting from few ivs, replacing an expensive use by a
6974 specific iv should always be a win. */
6975 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6977 cand = data->vcands[i];
6979 if (originalp && cand->pos !=IP_ORIGINAL)
6980 continue;
6982 if (!originalp && cand->iv->base_object != NULL_TREE)
6983 continue;
6985 if (iv_ca_cand_used_p (ivs, cand))
6986 continue;
6988 cp = get_group_iv_cost (data, group, cand);
6989 if (!cp)
6990 continue;
6992 iv_ca_set_cp (data, ivs, group, cp);
6993 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6994 true);
6995 iv_ca_set_no_cp (data, ivs, group);
6996 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6998 if (act_cost < best_cost)
7000 best_cost = act_cost;
7002 iv_ca_delta_free (&best_delta);
7003 best_delta = act_delta;
7005 else
7006 iv_ca_delta_free (&act_delta);
7009 if (best_cost.infinite_cost_p ())
7011 for (i = 0; i < group->n_map_members; i++)
7013 cp = group->cost_map + i;
7014 cand = cp->cand;
7015 if (!cand)
7016 continue;
7018 /* Already tried this. */
7019 if (cand->important)
7021 if (originalp && cand->pos == IP_ORIGINAL)
7022 continue;
7023 if (!originalp && cand->iv->base_object == NULL_TREE)
7024 continue;
7027 if (iv_ca_cand_used_p (ivs, cand))
7028 continue;
7030 act_delta = NULL;
7031 iv_ca_set_cp (data, ivs, group, cp);
7032 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7033 iv_ca_set_no_cp (data, ivs, group);
7034 act_delta = iv_ca_delta_add (group,
7035 iv_ca_cand_for_group (ivs, group),
7036 cp, act_delta);
7038 if (act_cost < best_cost)
7040 best_cost = act_cost;
7042 if (best_delta)
7043 iv_ca_delta_free (&best_delta);
7044 best_delta = act_delta;
7046 else
7047 iv_ca_delta_free (&act_delta);
7051 iv_ca_delta_commit (data, ivs, best_delta, true);
7052 iv_ca_delta_free (&best_delta);
7054 return !best_cost.infinite_cost_p ();
7057 /* Finds an initial assignment of candidates to uses. */
7059 static class iv_ca *
7060 get_initial_solution (struct ivopts_data *data, bool originalp)
7062 unsigned i;
7063 class iv_ca *ivs = iv_ca_new (data);
7065 for (i = 0; i < data->vgroups.length (); i++)
7066 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7068 iv_ca_free (&ivs);
7069 return NULL;
7072 return ivs;
7075 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
7076 points to a bool variable, this function tries to break local
7077 optimal fixed-point by replacing candidates in IVS if it's true. */
7079 static bool
7080 try_improve_iv_set (struct ivopts_data *data,
7081 class iv_ca *ivs, bool *try_replace_p)
7083 unsigned i, n_ivs;
7084 comp_cost acost, best_cost = iv_ca_cost (ivs);
7085 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7086 struct iv_cand *cand;
7088 /* Try extending the set of induction variables by one. */
7089 for (i = 0; i < data->vcands.length (); i++)
7091 cand = data->vcands[i];
7093 if (iv_ca_cand_used_p (ivs, cand))
7094 continue;
7096 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7097 if (!act_delta)
7098 continue;
7100 /* If we successfully added the candidate and the set is small enough,
7101 try optimizing it by removing other candidates. */
7102 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7104 iv_ca_delta_commit (data, ivs, act_delta, true);
7105 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7106 iv_ca_delta_commit (data, ivs, act_delta, false);
7107 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7110 if (acost < best_cost)
7112 best_cost = acost;
7113 iv_ca_delta_free (&best_delta);
7114 best_delta = act_delta;
7116 else
7117 iv_ca_delta_free (&act_delta);
7120 if (!best_delta)
7122 /* Try removing the candidates from the set instead. */
7123 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7125 if (!best_delta && *try_replace_p)
7127 *try_replace_p = false;
7128 /* So far candidate selecting algorithm tends to choose fewer IVs
7129 so that it can handle cases in which loops have many variables
7130 but the best choice is often to use only one general biv. One
7131 weakness is it can't handle opposite cases, in which different
7132 candidates should be chosen with respect to each use. To solve
7133 the problem, we replace candidates in a manner described by the
7134 comments of iv_ca_replace, thus give general algorithm a chance
7135 to break local optimal fixed-point in these cases. */
7136 best_cost = iv_ca_replace (data, ivs, &best_delta);
7139 if (!best_delta)
7140 return false;
7143 iv_ca_delta_commit (data, ivs, best_delta, true);
7144 iv_ca_delta_free (&best_delta);
7145 return best_cost == iv_ca_cost (ivs);
7148 /* Attempts to find the optimal set of induction variables. We do simple
7149 greedy heuristic -- we try to replace at most one candidate in the selected
7150 solution and remove the unused ivs while this improves the cost. */
7152 static class iv_ca *
7153 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7155 class iv_ca *set;
7156 bool try_replace_p = true;
7158 /* Get the initial solution. */
7159 set = get_initial_solution (data, originalp);
7160 if (!set)
7162 if (dump_file && (dump_flags & TDF_DETAILS))
7163 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7164 return NULL;
7167 if (dump_file && (dump_flags & TDF_DETAILS))
7169 fprintf (dump_file, "Initial set of candidates:\n");
7170 iv_ca_dump (data, dump_file, set);
7173 while (try_improve_iv_set (data, set, &try_replace_p))
7175 if (dump_file && (dump_flags & TDF_DETAILS))
7177 fprintf (dump_file, "Improved to:\n");
7178 iv_ca_dump (data, dump_file, set);
7182 /* If the set has infinite_cost, it can't be optimal. */
7183 if (iv_ca_cost (set).infinite_cost_p ())
7185 if (dump_file && (dump_flags & TDF_DETAILS))
7186 fprintf (dump_file,
7187 "Overflow to infinite cost in try_improve_iv_set.\n");
7188 iv_ca_free (&set);
7190 return set;
7193 static class iv_ca *
7194 find_optimal_iv_set (struct ivopts_data *data)
7196 unsigned i;
7197 comp_cost cost, origcost;
7198 class iv_ca *set, *origset;
7200 /* Determine the cost based on a strategy that starts with original IVs,
7201 and try again using a strategy that prefers candidates not based
7202 on any IVs. */
7203 origset = find_optimal_iv_set_1 (data, true);
7204 set = find_optimal_iv_set_1 (data, false);
7206 if (!origset && !set)
7207 return NULL;
7209 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7210 cost = set ? iv_ca_cost (set) : infinite_cost;
7212 if (dump_file && (dump_flags & TDF_DETAILS))
7214 fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7215 origcost.cost, origcost.complexity);
7216 fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7217 cost.cost, cost.complexity);
7220 /* Choose the one with the best cost. */
7221 if (origcost <= cost)
7223 if (set)
7224 iv_ca_free (&set);
7225 set = origset;
7227 else if (origset)
7228 iv_ca_free (&origset);
7230 for (i = 0; i < data->vgroups.length (); i++)
7232 struct iv_group *group = data->vgroups[i];
7233 group->selected = iv_ca_cand_for_group (set, group)->cand;
7236 return set;
7239 /* Creates a new induction variable corresponding to CAND. */
7241 static void
7242 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7244 gimple_stmt_iterator incr_pos;
7245 tree base;
7246 struct iv_use *use;
7247 struct iv_group *group;
7248 bool after = false;
7250 gcc_assert (cand->iv != NULL);
7252 switch (cand->pos)
7254 case IP_NORMAL:
7255 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7256 break;
7258 case IP_END:
7259 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7260 after = true;
7261 if (!gsi_end_p (incr_pos) && stmt_ends_bb_p (gsi_stmt (incr_pos)))
7263 edge e = find_edge (gsi_bb (incr_pos), data->current_loop->header);
7264 incr_pos = gsi_after_labels (split_edge (e));
7265 after = false;
7267 break;
7269 case IP_AFTER_USE:
7270 after = true;
7271 /* fall through */
7272 case IP_BEFORE_USE:
7273 incr_pos = gsi_for_stmt (cand->incremented_at);
7274 break;
7276 case IP_ORIGINAL:
7277 /* Mark that the iv is preserved. */
7278 name_info (data, cand->var_before)->preserve_biv = true;
7279 name_info (data, cand->var_after)->preserve_biv = true;
7281 /* Rewrite the increment so that it uses var_before directly. */
7282 use = find_interesting_uses_op (data, cand->var_after);
7283 group = data->vgroups[use->group_id];
7284 group->selected = cand;
7285 return;
7288 gimple_add_tmp_var (cand->var_before);
7290 base = unshare_expr (cand->iv->base);
7292 create_iv (base, PLUS_EXPR, unshare_expr (cand->iv->step),
7293 cand->var_before, data->current_loop,
7294 &incr_pos, after, &cand->var_before, &cand->var_after);
7297 /* Creates new induction variables described in SET. */
7299 static void
7300 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7302 unsigned i;
7303 struct iv_cand *cand;
7304 bitmap_iterator bi;
7306 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7308 cand = data->vcands[i];
7309 create_new_iv (data, cand);
7312 if (dump_file && (dump_flags & TDF_DETAILS))
7314 fprintf (dump_file, "Selected IV set for loop %d",
7315 data->current_loop->num);
7316 if (data->loop_loc != UNKNOWN_LOCATION)
7317 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7318 LOCATION_LINE (data->loop_loc));
7319 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7320 avg_loop_niter (data->current_loop));
7321 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7322 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7324 cand = data->vcands[i];
7325 dump_cand (dump_file, cand);
7327 fprintf (dump_file, "\n");
7331 /* Rewrites USE (definition of iv used in a nonlinear expression)
7332 using candidate CAND. */
7334 static void
7335 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7336 struct iv_use *use, struct iv_cand *cand)
7338 gassign *ass;
7339 gimple_stmt_iterator bsi;
7340 tree comp, type = get_use_type (use), tgt;
7342 /* An important special case -- if we are asked to express value of
7343 the original iv by itself, just exit; there is no need to
7344 introduce a new computation (that might also need casting the
7345 variable to unsigned and back). */
7346 if (cand->pos == IP_ORIGINAL
7347 && cand->incremented_at == use->stmt)
7349 tree op = NULL_TREE;
7350 enum tree_code stmt_code;
7352 gcc_assert (is_gimple_assign (use->stmt));
7353 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7355 /* Check whether we may leave the computation unchanged.
7356 This is the case only if it does not rely on other
7357 computations in the loop -- otherwise, the computation
7358 we rely upon may be removed in remove_unused_ivs,
7359 thus leading to ICE. */
7360 stmt_code = gimple_assign_rhs_code (use->stmt);
7361 if (stmt_code == PLUS_EXPR
7362 || stmt_code == MINUS_EXPR
7363 || stmt_code == POINTER_PLUS_EXPR)
7365 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7366 op = gimple_assign_rhs2 (use->stmt);
7367 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7368 op = gimple_assign_rhs1 (use->stmt);
7371 if (op != NULL_TREE)
7373 if (expr_invariant_in_loop_p (data->current_loop, op))
7374 return;
7375 if (TREE_CODE (op) == SSA_NAME)
7377 struct iv *iv = get_iv (data, op);
7378 if (iv != NULL && integer_zerop (iv->step))
7379 return;
7384 switch (gimple_code (use->stmt))
7386 case GIMPLE_PHI:
7387 tgt = PHI_RESULT (use->stmt);
7389 /* If we should keep the biv, do not replace it. */
7390 if (name_info (data, tgt)->preserve_biv)
7391 return;
7393 bsi = gsi_after_labels (gimple_bb (use->stmt));
7394 break;
7396 case GIMPLE_ASSIGN:
7397 tgt = gimple_assign_lhs (use->stmt);
7398 bsi = gsi_for_stmt (use->stmt);
7399 break;
7401 default:
7402 gcc_unreachable ();
7405 aff_tree aff_inv, aff_var;
7406 if (!get_computation_aff_1 (data->current_loop, use->stmt,
7407 use, cand, &aff_inv, &aff_var))
7408 gcc_unreachable ();
7410 unshare_aff_combination (&aff_inv);
7411 unshare_aff_combination (&aff_var);
7412 /* Prefer CSE opportunity than loop invariant by adding offset at last
7413 so that iv_uses have different offsets can be CSEed. */
7414 poly_widest_int offset = aff_inv.offset;
7415 aff_inv.offset = 0;
7417 gimple_seq stmt_list = NULL, seq = NULL;
7418 tree comp_op1 = aff_combination_to_tree (&aff_inv);
7419 tree comp_op2 = aff_combination_to_tree (&aff_var);
7420 gcc_assert (comp_op1 && comp_op2);
7422 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7423 gimple_seq_add_seq (&stmt_list, seq);
7424 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7425 gimple_seq_add_seq (&stmt_list, seq);
7427 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7428 std::swap (comp_op1, comp_op2);
7430 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7432 comp = fold_build_pointer_plus (comp_op1,
7433 fold_convert (sizetype, comp_op2));
7434 comp = fold_build_pointer_plus (comp,
7435 wide_int_to_tree (sizetype, offset));
7437 else
7439 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7440 fold_convert (TREE_TYPE (comp_op1), comp_op2));
7441 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7442 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7445 comp = fold_convert (type, comp);
7446 comp = force_gimple_operand (comp, &seq, false, NULL);
7447 gimple_seq_add_seq (&stmt_list, seq);
7448 if (gimple_code (use->stmt) != GIMPLE_PHI
7449 /* We can't allow re-allocating the stmt as it might be pointed
7450 to still. */
7451 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7452 >= gimple_num_ops (gsi_stmt (bsi))))
7454 comp = force_gimple_operand (comp, &seq, true, NULL);
7455 gimple_seq_add_seq (&stmt_list, seq);
7456 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7458 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7459 /* As this isn't a plain copy we have to reset alignment
7460 information. */
7461 if (SSA_NAME_PTR_INFO (comp))
7462 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7466 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7467 if (gimple_code (use->stmt) == GIMPLE_PHI)
7469 ass = gimple_build_assign (tgt, comp);
7470 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7472 bsi = gsi_for_stmt (use->stmt);
7473 remove_phi_node (&bsi, false);
7475 else
7477 gimple_assign_set_rhs_from_tree (&bsi, comp);
7478 use->stmt = gsi_stmt (bsi);
7482 /* Performs a peephole optimization to reorder the iv update statement with
7483 a mem ref to enable instruction combining in later phases. The mem ref uses
7484 the iv value before the update, so the reordering transformation requires
7485 adjustment of the offset. CAND is the selected IV_CAND.
7487 Example:
7489 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7490 iv2 = iv1 + 1;
7492 if (t < val) (1)
7493 goto L;
7494 goto Head;
7497 directly propagating t over to (1) will introduce overlapping live range
7498 thus increase register pressure. This peephole transform it into:
7501 iv2 = iv1 + 1;
7502 t = MEM_REF (base, iv2, 8, 8);
7503 if (t < val)
7504 goto L;
7505 goto Head;
7508 static void
7509 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7511 tree var_after;
7512 gimple *iv_update, *stmt;
7513 basic_block bb;
7514 gimple_stmt_iterator gsi, gsi_iv;
7516 if (cand->pos != IP_NORMAL)
7517 return;
7519 var_after = cand->var_after;
7520 iv_update = SSA_NAME_DEF_STMT (var_after);
7522 bb = gimple_bb (iv_update);
7523 gsi = gsi_last_nondebug_bb (bb);
7524 stmt = gsi_stmt (gsi);
7526 /* Only handle conditional statement for now. */
7527 if (gimple_code (stmt) != GIMPLE_COND)
7528 return;
7530 gsi_prev_nondebug (&gsi);
7531 stmt = gsi_stmt (gsi);
7532 if (stmt != iv_update)
7533 return;
7535 gsi_prev_nondebug (&gsi);
7536 if (gsi_end_p (gsi))
7537 return;
7539 stmt = gsi_stmt (gsi);
7540 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7541 return;
7543 if (stmt != use->stmt)
7544 return;
7546 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7547 return;
7549 if (dump_file && (dump_flags & TDF_DETAILS))
7551 fprintf (dump_file, "Reordering \n");
7552 print_gimple_stmt (dump_file, iv_update, 0);
7553 print_gimple_stmt (dump_file, use->stmt, 0);
7554 fprintf (dump_file, "\n");
7557 gsi = gsi_for_stmt (use->stmt);
7558 gsi_iv = gsi_for_stmt (iv_update);
7559 gsi_move_before (&gsi_iv, &gsi);
7561 cand->pos = IP_BEFORE_USE;
7562 cand->incremented_at = use->stmt;
7565 /* Return the alias pointer type that should be used for a MEM_REF
7566 associated with USE, which has type USE_PTR_ADDRESS. */
7568 static tree
7569 get_alias_ptr_type_for_ptr_address (iv_use *use)
7571 gcall *call = as_a <gcall *> (use->stmt);
7572 switch (gimple_call_internal_fn (call))
7574 case IFN_MASK_LOAD:
7575 case IFN_MASK_STORE:
7576 case IFN_MASK_LOAD_LANES:
7577 case IFN_MASK_STORE_LANES:
7578 case IFN_MASK_LEN_LOAD_LANES:
7579 case IFN_MASK_LEN_STORE_LANES:
7580 case IFN_LEN_LOAD:
7581 case IFN_LEN_STORE:
7582 case IFN_MASK_LEN_LOAD:
7583 case IFN_MASK_LEN_STORE:
7584 /* The second argument contains the correct alias type. */
7585 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7586 return TREE_TYPE (gimple_call_arg (call, 1));
7588 default:
7589 gcc_unreachable ();
7594 /* Rewrites USE (address that is an iv) using candidate CAND. */
7596 static void
7597 rewrite_use_address (struct ivopts_data *data,
7598 struct iv_use *use, struct iv_cand *cand)
7600 aff_tree aff;
7601 bool ok;
7603 adjust_iv_update_pos (cand, use);
7604 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7605 gcc_assert (ok);
7606 unshare_aff_combination (&aff);
7608 /* To avoid undefined overflow problems, all IV candidates use unsigned
7609 integer types. The drawback is that this makes it impossible for
7610 create_mem_ref to distinguish an IV that is based on a memory object
7611 from one that represents simply an offset.
7613 To work around this problem, we pass a hint to create_mem_ref that
7614 indicates which variable (if any) in aff is an IV based on a memory
7615 object. Note that we only consider the candidate. If this is not
7616 based on an object, the base of the reference is in some subexpression
7617 of the use -- but these will use pointer types, so they are recognized
7618 by the create_mem_ref heuristics anyway. */
7619 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7620 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7621 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7622 tree type = use->mem_type;
7623 tree alias_ptr_type;
7624 if (use->type == USE_PTR_ADDRESS)
7625 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7626 else
7628 gcc_assert (type == TREE_TYPE (*use->op_p));
7629 unsigned int align = get_object_alignment (*use->op_p);
7630 if (align != TYPE_ALIGN (type))
7631 type = build_aligned_type (type, align);
7632 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7634 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7635 iv, base_hint, data->speed);
7637 if (use->type == USE_PTR_ADDRESS)
7639 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7640 ref = fold_convert (get_use_type (use), ref);
7641 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7642 true, GSI_SAME_STMT);
7644 else
7646 /* When we end up confused enough and have no suitable base but
7647 stuffed everything to index2 use a LEA for the address and
7648 create a plain MEM_REF to avoid basing a memory reference
7649 on address zero which create_mem_ref_raw does as fallback. */
7650 if (TREE_CODE (ref) == TARGET_MEM_REF
7651 && TMR_INDEX2 (ref) != NULL_TREE
7652 && integer_zerop (TREE_OPERAND (ref, 0)))
7654 ref = fold_build1 (ADDR_EXPR, TREE_TYPE (TREE_OPERAND (ref, 0)), ref);
7655 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7656 true, GSI_SAME_STMT);
7657 ref = build2 (MEM_REF, type, ref, build_zero_cst (alias_ptr_type));
7659 copy_ref_info (ref, *use->op_p);
7662 *use->op_p = ref;
7665 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7666 candidate CAND. */
7668 static void
7669 rewrite_use_compare (struct ivopts_data *data,
7670 struct iv_use *use, struct iv_cand *cand)
7672 tree comp, op, bound;
7673 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7674 enum tree_code compare;
7675 struct iv_group *group = data->vgroups[use->group_id];
7676 class cost_pair *cp = get_group_iv_cost (data, group, cand);
7678 bound = cp->value;
7679 if (bound)
7681 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7682 tree var_type = TREE_TYPE (var);
7683 gimple_seq stmts;
7685 if (dump_file && (dump_flags & TDF_DETAILS))
7687 fprintf (dump_file, "Replacing exit test: ");
7688 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7690 compare = cp->comp;
7691 bound = unshare_expr (fold_convert (var_type, bound));
7692 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7693 if (stmts)
7694 gsi_insert_seq_on_edge_immediate (
7695 loop_preheader_edge (data->current_loop),
7696 stmts);
7698 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7699 gimple_cond_set_lhs (cond_stmt, var);
7700 gimple_cond_set_code (cond_stmt, compare);
7701 gimple_cond_set_rhs (cond_stmt, op);
7702 return;
7705 /* The induction variable elimination failed; just express the original
7706 giv. */
7707 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7708 gcc_assert (comp != NULL_TREE);
7709 gcc_assert (use->op_p != NULL);
7710 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7711 SSA_NAME_VAR (*use->op_p),
7712 true, GSI_SAME_STMT);
7715 /* Rewrite the groups using the selected induction variables. */
7717 static void
7718 rewrite_groups (struct ivopts_data *data)
7720 unsigned i, j;
7722 for (i = 0; i < data->vgroups.length (); i++)
7724 struct iv_group *group = data->vgroups[i];
7725 struct iv_cand *cand = group->selected;
7727 gcc_assert (cand);
7729 if (group->type == USE_NONLINEAR_EXPR)
7731 for (j = 0; j < group->vuses.length (); j++)
7733 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7734 update_stmt (group->vuses[j]->stmt);
7737 else if (address_p (group->type))
7739 for (j = 0; j < group->vuses.length (); j++)
7741 rewrite_use_address (data, group->vuses[j], cand);
7742 update_stmt (group->vuses[j]->stmt);
7745 else
7747 gcc_assert (group->type == USE_COMPARE);
7749 for (j = 0; j < group->vuses.length (); j++)
7751 rewrite_use_compare (data, group->vuses[j], cand);
7752 update_stmt (group->vuses[j]->stmt);
7758 /* Removes the ivs that are not used after rewriting. */
7760 static void
7761 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7763 unsigned j;
7764 bitmap_iterator bi;
7766 /* Figure out an order in which to release SSA DEFs so that we don't
7767 release something that we'd have to propagate into a debug stmt
7768 afterwards. */
7769 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7771 struct version_info *info;
7773 info = ver_info (data, j);
7774 if (info->iv
7775 && !integer_zerop (info->iv->step)
7776 && !info->inv_id
7777 && !info->iv->nonlin_use
7778 && !info->preserve_biv)
7780 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7782 tree def = info->iv->ssa_name;
7784 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7786 imm_use_iterator imm_iter;
7787 use_operand_p use_p;
7788 gimple *stmt;
7789 int count = 0;
7791 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7793 if (!gimple_debug_bind_p (stmt))
7794 continue;
7796 /* We just want to determine whether to do nothing
7797 (count == 0), to substitute the computed
7798 expression into a single use of the SSA DEF by
7799 itself (count == 1), or to use a debug temp
7800 because the SSA DEF is used multiple times or as
7801 part of a larger expression (count > 1). */
7802 count++;
7803 if (gimple_debug_bind_get_value (stmt) != def)
7804 count++;
7806 if (count > 1)
7807 break;
7810 if (!count)
7811 continue;
7813 struct iv_use dummy_use;
7814 struct iv_cand *best_cand = NULL, *cand;
7815 unsigned i, best_pref = 0, cand_pref;
7816 tree comp = NULL_TREE;
7818 memset (&dummy_use, 0, sizeof (dummy_use));
7819 dummy_use.iv = info->iv;
7820 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7822 cand = data->vgroups[i]->selected;
7823 if (cand == best_cand)
7824 continue;
7825 cand_pref = operand_equal_p (cand->iv->step,
7826 info->iv->step, 0)
7827 ? 4 : 0;
7828 cand_pref
7829 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7830 == TYPE_MODE (TREE_TYPE (info->iv->base))
7831 ? 2 : 0;
7832 cand_pref
7833 += TREE_CODE (cand->iv->base) == INTEGER_CST
7834 ? 1 : 0;
7835 if (best_cand == NULL || best_pref < cand_pref)
7837 tree this_comp
7838 = get_debug_computation_at (data->current_loop,
7839 SSA_NAME_DEF_STMT (def),
7840 &dummy_use, cand);
7841 if (this_comp)
7843 best_cand = cand;
7844 best_pref = cand_pref;
7845 comp = this_comp;
7850 if (!best_cand)
7851 continue;
7853 comp = unshare_expr (comp);
7854 if (count > 1)
7856 tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7857 /* FIXME: Is setting the mode really necessary? */
7858 if (SSA_NAME_VAR (def))
7859 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7860 else
7861 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7862 gdebug *def_temp
7863 = gimple_build_debug_bind (vexpr, comp, NULL);
7864 gimple_stmt_iterator gsi;
7866 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7867 gsi = gsi_after_labels (gimple_bb
7868 (SSA_NAME_DEF_STMT (def)));
7869 else
7870 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7872 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7873 comp = vexpr;
7876 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7878 if (!gimple_debug_bind_p (stmt))
7879 continue;
7881 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7882 SET_USE (use_p, comp);
7884 update_stmt (stmt);
7891 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7892 for hash_map::traverse. */
7894 bool
7895 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7897 free (value);
7898 return true;
7901 /* Frees data allocated by the optimization of a single loop. */
7903 static void
7904 free_loop_data (struct ivopts_data *data)
7906 unsigned i, j;
7907 bitmap_iterator bi;
7908 tree obj;
7910 if (data->niters)
7912 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7913 delete data->niters;
7914 data->niters = NULL;
7917 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7919 struct version_info *info;
7921 info = ver_info (data, i);
7922 info->iv = NULL;
7923 info->has_nonlin_use = false;
7924 info->preserve_biv = false;
7925 info->inv_id = 0;
7927 bitmap_clear (data->relevant);
7928 bitmap_clear (data->important_candidates);
7930 for (i = 0; i < data->vgroups.length (); i++)
7932 struct iv_group *group = data->vgroups[i];
7934 for (j = 0; j < group->vuses.length (); j++)
7935 free (group->vuses[j]);
7936 group->vuses.release ();
7938 BITMAP_FREE (group->related_cands);
7939 for (j = 0; j < group->n_map_members; j++)
7941 if (group->cost_map[j].inv_vars)
7942 BITMAP_FREE (group->cost_map[j].inv_vars);
7943 if (group->cost_map[j].inv_exprs)
7944 BITMAP_FREE (group->cost_map[j].inv_exprs);
7947 free (group->cost_map);
7948 free (group);
7950 data->vgroups.truncate (0);
7952 for (i = 0; i < data->vcands.length (); i++)
7954 struct iv_cand *cand = data->vcands[i];
7956 if (cand->inv_vars)
7957 BITMAP_FREE (cand->inv_vars);
7958 if (cand->inv_exprs)
7959 BITMAP_FREE (cand->inv_exprs);
7960 free (cand);
7962 data->vcands.truncate (0);
7964 if (data->version_info_size < num_ssa_names)
7966 data->version_info_size = 2 * num_ssa_names;
7967 free (data->version_info);
7968 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7971 data->max_inv_var_id = 0;
7972 data->max_inv_expr_id = 0;
7974 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7975 SET_DECL_RTL (obj, NULL_RTX);
7977 decl_rtl_to_reset.truncate (0);
7979 data->inv_expr_tab->empty ();
7981 data->iv_common_cand_tab->empty ();
7982 data->iv_common_cands.truncate (0);
7985 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7986 loop tree. */
7988 static void
7989 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7991 free_loop_data (data);
7992 free (data->version_info);
7993 BITMAP_FREE (data->relevant);
7994 BITMAP_FREE (data->important_candidates);
7996 decl_rtl_to_reset.release ();
7997 data->vgroups.release ();
7998 data->vcands.release ();
7999 delete data->inv_expr_tab;
8000 data->inv_expr_tab = NULL;
8001 free_affine_expand_cache (&data->name_expansion_cache);
8002 if (data->base_object_map)
8003 delete data->base_object_map;
8004 delete data->iv_common_cand_tab;
8005 data->iv_common_cand_tab = NULL;
8006 data->iv_common_cands.release ();
8007 obstack_free (&data->iv_obstack, NULL);
8010 /* Returns true if the loop body BODY includes any function calls. */
8012 static bool
8013 loop_body_includes_call (basic_block *body, unsigned num_nodes)
8015 gimple_stmt_iterator gsi;
8016 unsigned i;
8018 for (i = 0; i < num_nodes; i++)
8019 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
8021 gimple *stmt = gsi_stmt (gsi);
8022 if (is_gimple_call (stmt)
8023 && !gimple_call_internal_p (stmt)
8024 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
8025 return true;
8027 return false;
8030 /* Determine cost scaling factor for basic blocks in loop. */
8031 #define COST_SCALING_FACTOR_BOUND (20)
8033 static void
8034 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
8036 int lfreq = data->current_loop->header->count.to_frequency (cfun);
8037 if (!data->speed || lfreq <= 0)
8038 return;
8040 int max_freq = lfreq;
8041 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8043 body[i]->aux = (void *)(intptr_t) 1;
8044 if (max_freq < body[i]->count.to_frequency (cfun))
8045 max_freq = body[i]->count.to_frequency (cfun);
8047 if (max_freq > lfreq)
8049 int divisor, factor;
8050 /* Check if scaling factor itself needs to be scaled by the bound. This
8051 is to avoid overflow when scaling cost according to profile info. */
8052 if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8054 divisor = max_freq;
8055 factor = COST_SCALING_FACTOR_BOUND;
8057 else
8059 divisor = lfreq;
8060 factor = 1;
8062 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8064 int bfreq = body[i]->count.to_frequency (cfun);
8065 if (bfreq <= lfreq)
8066 continue;
8068 body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8073 /* Find doloop comparison use and set its doloop_p on if found. */
8075 static bool
8076 find_doloop_use (struct ivopts_data *data)
8078 struct loop *loop = data->current_loop;
8080 for (unsigned i = 0; i < data->vgroups.length (); i++)
8082 struct iv_group *group = data->vgroups[i];
8083 if (group->type == USE_COMPARE)
8085 gcc_assert (group->vuses.length () == 1);
8086 struct iv_use *use = group->vuses[0];
8087 gimple *stmt = use->stmt;
8088 if (gimple_code (stmt) == GIMPLE_COND)
8090 basic_block bb = gimple_bb (stmt);
8091 edge true_edge, false_edge;
8092 extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8093 /* This comparison is used for loop latch. Require latch is empty
8094 for now. */
8095 if ((loop->latch == true_edge->dest
8096 || loop->latch == false_edge->dest)
8097 && empty_block_p (loop->latch))
8099 group->doloop_p = true;
8100 if (dump_file && (dump_flags & TDF_DETAILS))
8102 fprintf (dump_file, "Doloop cmp iv use: ");
8103 print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8105 return true;
8111 return false;
8114 /* For the targets which support doloop, to predict whether later RTL doloop
8115 transformation will perform on this loop, further detect the doloop use and
8116 mark the flag doloop_use_p if predicted. */
8118 void
8119 analyze_and_mark_doloop_use (struct ivopts_data *data)
8121 data->doloop_use_p = false;
8123 if (!flag_branch_on_count_reg)
8124 return;
8126 if (data->current_loop->unroll == USHRT_MAX)
8127 return;
8129 if (!generic_predict_doloop_p (data))
8130 return;
8132 if (find_doloop_use (data))
8134 data->doloop_use_p = true;
8135 if (dump_file && (dump_flags & TDF_DETAILS))
8137 struct loop *loop = data->current_loop;
8138 fprintf (dump_file,
8139 "Predict loop %d can perform"
8140 " doloop optimization later.\n",
8141 loop->num);
8142 flow_loop_dump (loop, dump_file, NULL, 1);
8147 /* Optimizes the LOOP. Returns true if anything changed. */
8149 static bool
8150 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8151 bitmap toremove)
8153 bool changed = false;
8154 class iv_ca *iv_ca;
8155 edge exit = single_dom_exit (loop);
8156 basic_block *body;
8158 gcc_assert (!data->niters);
8159 data->current_loop = loop;
8160 data->loop_loc = find_loop_location (loop).get_location_t ();
8161 data->speed = optimize_loop_for_speed_p (loop);
8163 if (dump_file && (dump_flags & TDF_DETAILS))
8165 fprintf (dump_file, "Processing loop %d", loop->num);
8166 if (data->loop_loc != UNKNOWN_LOCATION)
8167 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8168 LOCATION_LINE (data->loop_loc));
8169 fprintf (dump_file, "\n");
8171 if (exit)
8173 fprintf (dump_file, " single exit %d -> %d, exit condition ",
8174 exit->src->index, exit->dest->index);
8175 print_gimple_stmt (dump_file, *gsi_last_bb (exit->src),
8176 0, TDF_SLIM);
8177 fprintf (dump_file, "\n");
8180 fprintf (dump_file, "\n");
8183 body = get_loop_body (loop);
8184 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8185 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8187 data->loop_single_exit_p
8188 = exit != NULL && loop_only_exit_p (loop, body, exit);
8190 /* For each ssa name determines whether it behaves as an induction variable
8191 in some loop. */
8192 if (!find_induction_variables (data, body))
8193 goto finish;
8195 /* Finds interesting uses (item 1). */
8196 find_interesting_uses (data, body);
8197 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8198 goto finish;
8200 /* Determine cost scaling factor for basic blocks in loop. */
8201 determine_scaling_factor (data, body);
8203 /* Analyze doloop possibility and mark the doloop use if predicted. */
8204 analyze_and_mark_doloop_use (data);
8206 /* Finds candidates for the induction variables (item 2). */
8207 find_iv_candidates (data);
8209 /* Calculates the costs (item 3, part 1). */
8210 determine_iv_costs (data);
8211 determine_group_iv_costs (data);
8212 determine_set_costs (data);
8214 /* Find the optimal set of induction variables (item 3, part 2). */
8215 iv_ca = find_optimal_iv_set (data);
8216 /* Cleanup basic block aux field. */
8217 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8218 body[i]->aux = NULL;
8219 if (!iv_ca)
8220 goto finish;
8221 changed = true;
8223 /* Create the new induction variables (item 4, part 1). */
8224 create_new_ivs (data, iv_ca);
8225 iv_ca_free (&iv_ca);
8227 /* Rewrite the uses (item 4, part 2). */
8228 rewrite_groups (data);
8230 /* Remove the ivs that are unused after rewriting. */
8231 remove_unused_ivs (data, toremove);
8233 finish:
8234 free (body);
8235 free_loop_data (data);
8237 return changed;
8240 /* Main entry point. Optimizes induction variables in loops. */
8242 void
8243 tree_ssa_iv_optimize (void)
8245 struct ivopts_data data;
8246 auto_bitmap toremove;
8248 tree_ssa_iv_optimize_init (&data);
8249 mark_ssa_maybe_undefs ();
8251 /* Optimize the loops starting with the innermost ones. */
8252 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8254 if (!dbg_cnt (ivopts_loop))
8255 continue;
8257 if (dump_file && (dump_flags & TDF_DETAILS))
8258 flow_loop_dump (loop, dump_file, NULL, 1);
8260 tree_ssa_iv_optimize_loop (&data, loop, toremove);
8263 /* Remove eliminated IV defs. */
8264 release_defs_bitset (toremove);
8266 /* We have changed the structure of induction variables; it might happen
8267 that definitions in the scev database refer to some of them that were
8268 eliminated. */
8269 scev_reset_htab ();
8270 /* Likewise niter and control-IV information. */
8271 free_numbers_of_iterations_estimates (cfun);
8273 tree_ssa_iv_optimize_finalize (&data);
8276 #include "gt-tree-ssa-loop-ivopts.h"