Small fix for -fdump-ada-spec
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.cc
blob0dd47910f97a8b7aa1a303f87dbedaa753b861d0
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated.
69 For the targets supporting low-overhead loops, IVOPTs has to take care of
70 the loops which will probably be transformed in RTL doloop optimization,
71 to try to make selected IV candidate set optimal. The process of doloop
72 support includes:
74 1) Analyze the current loop will be transformed to doloop or not, find and
75 mark its compare type IV use as doloop use (iv_group field doloop_p), and
76 set flag doloop_use_p of ivopts_data to notify subsequent processings on
77 doloop. See analyze_and_mark_doloop_use and its callees for the details.
78 The target hook predict_doloop_p can be used for target specific checks.
80 2) Add one doloop dedicated IV cand {(may_be_zero ? 1 : (niter + 1)), +, -1},
81 set flag doloop_p of iv_cand, step cost is set as zero and no extra cost
82 like biv. For cost determination between doloop IV cand and IV use, the
83 target hooks doloop_cost_for_generic and doloop_cost_for_address are
84 provided to add on extra costs for generic type and address type IV use.
85 Zero cost is assigned to the pair between doloop IV cand and doloop IV
86 use, and bound zero is set for IV elimination.
88 3) With the cost setting in step 2), the current cost model based IV
89 selection algorithm will process as usual, pick up doloop dedicated IV if
90 profitable. */
92 #include "config.h"
93 #include "system.h"
94 #include "coretypes.h"
95 #include "backend.h"
96 #include "rtl.h"
97 #include "tree.h"
98 #include "gimple.h"
99 #include "cfghooks.h"
100 #include "tree-pass.h"
101 #include "memmodel.h"
102 #include "tm_p.h"
103 #include "ssa.h"
104 #include "expmed.h"
105 #include "insn-config.h"
106 #include "emit-rtl.h"
107 #include "recog.h"
108 #include "cgraph.h"
109 #include "gimple-pretty-print.h"
110 #include "alias.h"
111 #include "fold-const.h"
112 #include "stor-layout.h"
113 #include "tree-eh.h"
114 #include "gimplify.h"
115 #include "gimple-iterator.h"
116 #include "gimplify-me.h"
117 #include "tree-cfg.h"
118 #include "tree-ssa-loop-ivopts.h"
119 #include "tree-ssa-loop-manip.h"
120 #include "tree-ssa-loop-niter.h"
121 #include "tree-ssa-loop.h"
122 #include "explow.h"
123 #include "expr.h"
124 #include "tree-dfa.h"
125 #include "tree-ssa.h"
126 #include "cfgloop.h"
127 #include "tree-scalar-evolution.h"
128 #include "tree-affine.h"
129 #include "tree-ssa-propagate.h"
130 #include "tree-ssa-address.h"
131 #include "builtins.h"
132 #include "tree-vectorizer.h"
133 #include "dbgcnt.h"
134 #include "cfganal.h"
136 /* For lang_hooks.types.type_for_mode. */
137 #include "langhooks.h"
139 /* FIXME: Expressions are expanded to RTL in this pass to determine the
140 cost of different addressing modes. This should be moved to a TBD
141 interface between the GIMPLE and RTL worlds. */
143 /* The infinite cost. */
144 #define INFTY 1000000000
146 /* Returns the expected number of loop iterations for LOOP.
147 The average trip count is computed from profile data if it
148 exists. */
150 static inline HOST_WIDE_INT
151 avg_loop_niter (class loop *loop)
153 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
154 if (niter == -1)
156 niter = likely_max_stmt_executions_int (loop);
158 if (niter == -1 || niter > param_avg_loop_niter)
159 return param_avg_loop_niter;
162 return niter;
165 struct iv_use;
167 /* Representation of the induction variable. */
168 struct iv
170 tree base; /* Initial value of the iv. */
171 tree base_object; /* A memory object to that the induction variable points. */
172 tree step; /* Step of the iv (constant only). */
173 tree ssa_name; /* The ssa name with the value. */
174 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
175 bool biv_p; /* Is it a biv? */
176 bool no_overflow; /* True if the iv doesn't overflow. */
177 bool have_address_use;/* For biv, indicate if it's used in any address
178 type use. */
181 /* Per-ssa version information (induction variable descriptions, etc.). */
182 struct version_info
184 tree name; /* The ssa name. */
185 struct iv *iv; /* Induction variable description. */
186 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
187 an expression that is not an induction variable. */
188 bool preserve_biv; /* For the original biv, whether to preserve it. */
189 unsigned inv_id; /* Id of an invariant. */
192 /* Types of uses. */
193 enum use_type
195 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
196 USE_REF_ADDRESS, /* Use is an address for an explicit memory
197 reference. */
198 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
199 cases where the expansion of the function
200 will turn the argument into a normal address. */
201 USE_COMPARE /* Use is a compare. */
204 /* Cost of a computation. */
205 class comp_cost
207 public:
208 comp_cost (): cost (0), complexity (0), scratch (0)
211 comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
212 : cost (cost), complexity (complexity), scratch (scratch)
215 /* Returns true if COST is infinite. */
216 bool infinite_cost_p ();
218 /* Adds costs COST1 and COST2. */
219 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
221 /* Adds COST to the comp_cost. */
222 comp_cost operator+= (comp_cost cost);
224 /* Adds constant C to this comp_cost. */
225 comp_cost operator+= (HOST_WIDE_INT c);
227 /* Subtracts constant C to this comp_cost. */
228 comp_cost operator-= (HOST_WIDE_INT c);
230 /* Divide the comp_cost by constant C. */
231 comp_cost operator/= (HOST_WIDE_INT c);
233 /* Multiply the comp_cost by constant C. */
234 comp_cost operator*= (HOST_WIDE_INT c);
236 /* Subtracts costs COST1 and COST2. */
237 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
239 /* Subtracts COST from this comp_cost. */
240 comp_cost operator-= (comp_cost cost);
242 /* Returns true if COST1 is smaller than COST2. */
243 friend bool operator< (comp_cost cost1, comp_cost cost2);
245 /* Returns true if COST1 and COST2 are equal. */
246 friend bool operator== (comp_cost cost1, comp_cost cost2);
248 /* Returns true if COST1 is smaller or equal than COST2. */
249 friend bool operator<= (comp_cost cost1, comp_cost cost2);
251 int64_t cost; /* The runtime cost. */
252 unsigned complexity; /* The estimate of the complexity of the code for
253 the computation (in no concrete units --
254 complexity field should be larger for more
255 complex expressions and addressing modes). */
256 int64_t scratch; /* Scratch used during cost computation. */
259 static const comp_cost no_cost;
260 static const comp_cost infinite_cost (INFTY, 0, INFTY);
262 bool
263 comp_cost::infinite_cost_p ()
265 return cost == INFTY;
268 comp_cost
269 operator+ (comp_cost cost1, comp_cost cost2)
271 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
272 return infinite_cost;
274 gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
275 cost1.cost += cost2.cost;
276 cost1.complexity += cost2.complexity;
278 return cost1;
281 comp_cost
282 operator- (comp_cost cost1, comp_cost cost2)
284 if (cost1.infinite_cost_p ())
285 return infinite_cost;
287 gcc_assert (!cost2.infinite_cost_p ());
288 gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
290 cost1.cost -= cost2.cost;
291 cost1.complexity -= cost2.complexity;
293 return cost1;
296 comp_cost
297 comp_cost::operator+= (comp_cost cost)
299 *this = *this + cost;
300 return *this;
303 comp_cost
304 comp_cost::operator+= (HOST_WIDE_INT c)
306 if (c >= INFTY)
307 this->cost = INFTY;
309 if (infinite_cost_p ())
310 return *this;
312 gcc_assert (this->cost + c < infinite_cost.cost);
313 this->cost += c;
315 return *this;
318 comp_cost
319 comp_cost::operator-= (HOST_WIDE_INT c)
321 if (infinite_cost_p ())
322 return *this;
324 gcc_assert (this->cost - c < infinite_cost.cost);
325 this->cost -= c;
327 return *this;
330 comp_cost
331 comp_cost::operator/= (HOST_WIDE_INT c)
333 gcc_assert (c != 0);
334 if (infinite_cost_p ())
335 return *this;
337 this->cost /= c;
339 return *this;
342 comp_cost
343 comp_cost::operator*= (HOST_WIDE_INT c)
345 if (infinite_cost_p ())
346 return *this;
348 gcc_assert (this->cost * c < infinite_cost.cost);
349 this->cost *= c;
351 return *this;
354 comp_cost
355 comp_cost::operator-= (comp_cost cost)
357 *this = *this - cost;
358 return *this;
361 bool
362 operator< (comp_cost cost1, comp_cost cost2)
364 if (cost1.cost == cost2.cost)
365 return cost1.complexity < cost2.complexity;
367 return cost1.cost < cost2.cost;
370 bool
371 operator== (comp_cost cost1, comp_cost cost2)
373 return cost1.cost == cost2.cost
374 && cost1.complexity == cost2.complexity;
377 bool
378 operator<= (comp_cost cost1, comp_cost cost2)
380 return cost1 < cost2 || cost1 == cost2;
383 struct iv_inv_expr_ent;
385 /* The candidate - cost pair. */
386 class cost_pair
388 public:
389 struct iv_cand *cand; /* The candidate. */
390 comp_cost cost; /* The cost. */
391 enum tree_code comp; /* For iv elimination, the comparison. */
392 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
393 preserved when representing iv_use with iv_cand. */
394 bitmap inv_exprs; /* The list of newly created invariant expressions
395 when representing iv_use with iv_cand. */
396 tree value; /* For final value elimination, the expression for
397 the final value of the iv. For iv elimination,
398 the new bound to compare with. */
401 /* Use. */
402 struct iv_use
404 unsigned id; /* The id of the use. */
405 unsigned group_id; /* The group id the use belongs to. */
406 enum use_type type; /* Type of the use. */
407 tree mem_type; /* The memory type to use when testing whether an
408 address is legitimate, and what the address's
409 cost is. */
410 struct iv *iv; /* The induction variable it is based on. */
411 gimple *stmt; /* Statement in that it occurs. */
412 tree *op_p; /* The place where it occurs. */
414 tree addr_base; /* Base address with const offset stripped. */
415 poly_uint64_pod addr_offset;
416 /* Const offset stripped from base address. */
419 /* Group of uses. */
420 struct iv_group
422 /* The id of the group. */
423 unsigned id;
424 /* Uses of the group are of the same type. */
425 enum use_type type;
426 /* The set of "related" IV candidates, plus the important ones. */
427 bitmap related_cands;
428 /* Number of IV candidates in the cost_map. */
429 unsigned n_map_members;
430 /* The costs wrto the iv candidates. */
431 class cost_pair *cost_map;
432 /* The selected candidate for the group. */
433 struct iv_cand *selected;
434 /* To indicate this is a doloop use group. */
435 bool doloop_p;
436 /* Uses in the group. */
437 vec<struct iv_use *> vuses;
440 /* The position where the iv is computed. */
441 enum iv_position
443 IP_NORMAL, /* At the end, just before the exit condition. */
444 IP_END, /* At the end of the latch block. */
445 IP_BEFORE_USE, /* Immediately before a specific use. */
446 IP_AFTER_USE, /* Immediately after a specific use. */
447 IP_ORIGINAL /* The original biv. */
450 /* The induction variable candidate. */
451 struct iv_cand
453 unsigned id; /* The number of the candidate. */
454 bool important; /* Whether this is an "important" candidate, i.e. such
455 that it should be considered by all uses. */
456 bool involves_undefs; /* Whether the IV involves undefined values. */
457 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
458 gimple *incremented_at;/* For original biv, the statement where it is
459 incremented. */
460 tree var_before; /* The variable used for it before increment. */
461 tree var_after; /* The variable used for it after increment. */
462 struct iv *iv; /* The value of the candidate. NULL for
463 "pseudocandidate" used to indicate the possibility
464 to replace the final value of an iv by direct
465 computation of the value. */
466 unsigned cost; /* Cost of the candidate. */
467 unsigned cost_step; /* Cost of the candidate's increment operation. */
468 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
469 where it is incremented. */
470 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
471 iv_cand. */
472 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
473 handle it as a new invariant expression which will
474 be hoisted out of loop. */
475 struct iv *orig_iv; /* The original iv if this cand is added from biv with
476 smaller type. */
477 bool doloop_p; /* Whether this is a doloop candidate. */
480 /* Hashtable entry for common candidate derived from iv uses. */
481 class iv_common_cand
483 public:
484 tree base;
485 tree step;
486 /* IV uses from which this common candidate is derived. */
487 auto_vec<struct iv_use *> uses;
488 hashval_t hash;
491 /* Hashtable helpers. */
493 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
495 static inline hashval_t hash (const iv_common_cand *);
496 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
499 /* Hash function for possible common candidates. */
501 inline hashval_t
502 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
504 return ccand->hash;
507 /* Hash table equality function for common candidates. */
509 inline bool
510 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
511 const iv_common_cand *ccand2)
513 return (ccand1->hash == ccand2->hash
514 && operand_equal_p (ccand1->base, ccand2->base, 0)
515 && operand_equal_p (ccand1->step, ccand2->step, 0)
516 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
517 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
520 /* Loop invariant expression hashtable entry. */
522 struct iv_inv_expr_ent
524 /* Tree expression of the entry. */
525 tree expr;
526 /* Unique indentifier. */
527 int id;
528 /* Hash value. */
529 hashval_t hash;
532 /* Sort iv_inv_expr_ent pair A and B by id field. */
534 static int
535 sort_iv_inv_expr_ent (const void *a, const void *b)
537 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
538 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
540 unsigned id1 = (*e1)->id;
541 unsigned id2 = (*e2)->id;
543 if (id1 < id2)
544 return -1;
545 else if (id1 > id2)
546 return 1;
547 else
548 return 0;
551 /* Hashtable helpers. */
553 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
555 static inline hashval_t hash (const iv_inv_expr_ent *);
556 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
559 /* Return true if uses of type TYPE represent some form of address. */
561 inline bool
562 address_p (use_type type)
564 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
567 /* Hash function for loop invariant expressions. */
569 inline hashval_t
570 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
572 return expr->hash;
575 /* Hash table equality function for expressions. */
577 inline bool
578 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
579 const iv_inv_expr_ent *expr2)
581 return expr1->hash == expr2->hash
582 && operand_equal_p (expr1->expr, expr2->expr, 0);
585 struct ivopts_data
587 /* The currently optimized loop. */
588 class loop *current_loop;
589 location_t loop_loc;
591 /* Numbers of iterations for all exits of the current loop. */
592 hash_map<edge, tree_niter_desc *> *niters;
594 /* Number of registers used in it. */
595 unsigned regs_used;
597 /* The size of version_info array allocated. */
598 unsigned version_info_size;
600 /* The array of information for the ssa names. */
601 struct version_info *version_info;
603 /* The hashtable of loop invariant expressions created
604 by ivopt. */
605 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
607 /* The bitmap of indices in version_info whose value was changed. */
608 bitmap relevant;
610 /* The uses of induction variables. */
611 vec<iv_group *> vgroups;
613 /* The candidates. */
614 vec<iv_cand *> vcands;
616 /* A bitmap of important candidates. */
617 bitmap important_candidates;
619 /* Cache used by tree_to_aff_combination_expand. */
620 hash_map<tree, name_expansion *> *name_expansion_cache;
622 /* The hashtable of common candidates derived from iv uses. */
623 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
625 /* The common candidates. */
626 vec<iv_common_cand *> iv_common_cands;
628 /* Hash map recording base object information of tree exp. */
629 hash_map<tree, tree> *base_object_map;
631 /* The maximum invariant variable id. */
632 unsigned max_inv_var_id;
634 /* The maximum invariant expression id. */
635 unsigned max_inv_expr_id;
637 /* Number of no_overflow BIVs which are not used in memory address. */
638 unsigned bivs_not_used_in_addr;
640 /* Obstack for iv structure. */
641 struct obstack iv_obstack;
643 /* Whether to consider just related and important candidates when replacing a
644 use. */
645 bool consider_all_candidates;
647 /* Are we optimizing for speed? */
648 bool speed;
650 /* Whether the loop body includes any function calls. */
651 bool body_includes_call;
653 /* Whether the loop body can only be exited via single exit. */
654 bool loop_single_exit_p;
656 /* Whether the loop has doloop comparison use. */
657 bool doloop_use_p;
660 /* An assignment of iv candidates to uses. */
662 class iv_ca
664 public:
665 /* The number of uses covered by the assignment. */
666 unsigned upto;
668 /* Number of uses that cannot be expressed by the candidates in the set. */
669 unsigned bad_groups;
671 /* Candidate assigned to a use, together with the related costs. */
672 class cost_pair **cand_for_group;
674 /* Number of times each candidate is used. */
675 unsigned *n_cand_uses;
677 /* The candidates used. */
678 bitmap cands;
680 /* The number of candidates in the set. */
681 unsigned n_cands;
683 /* The number of invariants needed, including both invariant variants and
684 invariant expressions. */
685 unsigned n_invs;
687 /* Total cost of expressing uses. */
688 comp_cost cand_use_cost;
690 /* Total cost of candidates. */
691 int64_t cand_cost;
693 /* Number of times each invariant variable is used. */
694 unsigned *n_inv_var_uses;
696 /* Number of times each invariant expression is used. */
697 unsigned *n_inv_expr_uses;
699 /* Total cost of the assignment. */
700 comp_cost cost;
703 /* Difference of two iv candidate assignments. */
705 struct iv_ca_delta
707 /* Changed group. */
708 struct iv_group *group;
710 /* An old assignment (for rollback purposes). */
711 class cost_pair *old_cp;
713 /* A new assignment. */
714 class cost_pair *new_cp;
716 /* Next change in the list. */
717 struct iv_ca_delta *next;
720 /* Bound on number of candidates below that all candidates are considered. */
722 #define CONSIDER_ALL_CANDIDATES_BOUND \
723 ((unsigned) param_iv_consider_all_candidates_bound)
725 /* If there are more iv occurrences, we just give up (it is quite unlikely that
726 optimizing such a loop would help, and it would take ages). */
728 #define MAX_CONSIDERED_GROUPS \
729 ((unsigned) param_iv_max_considered_uses)
731 /* If there are at most this number of ivs in the set, try removing unnecessary
732 ivs from the set always. */
734 #define ALWAYS_PRUNE_CAND_SET_BOUND \
735 ((unsigned) param_iv_always_prune_cand_set_bound)
737 /* The list of trees for that the decl_rtl field must be reset is stored
738 here. */
740 static vec<tree> decl_rtl_to_reset;
742 static comp_cost force_expr_to_var_cost (tree, bool);
744 /* The single loop exit if it dominates the latch, NULL otherwise. */
746 edge
747 single_dom_exit (class loop *loop)
749 edge exit = single_exit (loop);
751 if (!exit)
752 return NULL;
754 if (!just_once_each_iteration_p (loop, exit->src))
755 return NULL;
757 return exit;
760 /* Dumps information about the induction variable IV to FILE. Don't dump
761 variable's name if DUMP_NAME is FALSE. The information is dumped with
762 preceding spaces indicated by INDENT_LEVEL. */
764 void
765 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
767 const char *p;
768 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
770 if (indent_level > 4)
771 indent_level = 4;
772 p = spaces + 8 - (indent_level << 1);
774 fprintf (file, "%sIV struct:\n", p);
775 if (iv->ssa_name && dump_name)
777 fprintf (file, "%s SSA_NAME:\t", p);
778 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
779 fprintf (file, "\n");
782 fprintf (file, "%s Type:\t", p);
783 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
784 fprintf (file, "\n");
786 fprintf (file, "%s Base:\t", p);
787 print_generic_expr (file, iv->base, TDF_SLIM);
788 fprintf (file, "\n");
790 fprintf (file, "%s Step:\t", p);
791 print_generic_expr (file, iv->step, TDF_SLIM);
792 fprintf (file, "\n");
794 if (iv->base_object)
796 fprintf (file, "%s Object:\t", p);
797 print_generic_expr (file, iv->base_object, TDF_SLIM);
798 fprintf (file, "\n");
801 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
803 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
804 p, iv->no_overflow ? "No-overflow" : "Overflow");
807 /* Dumps information about the USE to FILE. */
809 void
810 dump_use (FILE *file, struct iv_use *use)
812 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
813 fprintf (file, " At stmt:\t");
814 print_gimple_stmt (file, use->stmt, 0);
815 fprintf (file, " At pos:\t");
816 if (use->op_p)
817 print_generic_expr (file, *use->op_p, TDF_SLIM);
818 fprintf (file, "\n");
819 dump_iv (file, use->iv, false, 2);
822 /* Dumps information about the uses to FILE. */
824 void
825 dump_groups (FILE *file, struct ivopts_data *data)
827 unsigned i, j;
828 struct iv_group *group;
830 for (i = 0; i < data->vgroups.length (); i++)
832 group = data->vgroups[i];
833 fprintf (file, "Group %d:\n", group->id);
834 if (group->type == USE_NONLINEAR_EXPR)
835 fprintf (file, " Type:\tGENERIC\n");
836 else if (group->type == USE_REF_ADDRESS)
837 fprintf (file, " Type:\tREFERENCE ADDRESS\n");
838 else if (group->type == USE_PTR_ADDRESS)
839 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
840 else
842 gcc_assert (group->type == USE_COMPARE);
843 fprintf (file, " Type:\tCOMPARE\n");
845 for (j = 0; j < group->vuses.length (); j++)
846 dump_use (file, group->vuses[j]);
850 /* Dumps information about induction variable candidate CAND to FILE. */
852 void
853 dump_cand (FILE *file, struct iv_cand *cand)
855 struct iv *iv = cand->iv;
857 fprintf (file, "Candidate %d:\n", cand->id);
858 if (cand->inv_vars)
860 fprintf (file, " Depend on inv.vars: ");
861 dump_bitmap (file, cand->inv_vars);
863 if (cand->inv_exprs)
865 fprintf (file, " Depend on inv.exprs: ");
866 dump_bitmap (file, cand->inv_exprs);
869 if (cand->var_before)
871 fprintf (file, " Var befor: ");
872 print_generic_expr (file, cand->var_before, TDF_SLIM);
873 fprintf (file, "\n");
875 if (cand->var_after)
877 fprintf (file, " Var after: ");
878 print_generic_expr (file, cand->var_after, TDF_SLIM);
879 fprintf (file, "\n");
882 switch (cand->pos)
884 case IP_NORMAL:
885 fprintf (file, " Incr POS: before exit test\n");
886 break;
888 case IP_BEFORE_USE:
889 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
890 break;
892 case IP_AFTER_USE:
893 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
894 break;
896 case IP_END:
897 fprintf (file, " Incr POS: at end\n");
898 break;
900 case IP_ORIGINAL:
901 fprintf (file, " Incr POS: orig biv\n");
902 break;
905 dump_iv (file, iv, false, 1);
908 /* Returns the info for ssa version VER. */
910 static inline struct version_info *
911 ver_info (struct ivopts_data *data, unsigned ver)
913 return data->version_info + ver;
916 /* Returns the info for ssa name NAME. */
918 static inline struct version_info *
919 name_info (struct ivopts_data *data, tree name)
921 return ver_info (data, SSA_NAME_VERSION (name));
924 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
925 emitted in LOOP. */
927 static bool
928 stmt_after_ip_normal_pos (class loop *loop, gimple *stmt)
930 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
932 gcc_assert (bb);
934 if (sbb == loop->latch)
935 return true;
937 if (sbb != bb)
938 return false;
940 return stmt == last_stmt (bb);
943 /* Returns true if STMT if after the place where the original induction
944 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
945 if the positions are identical. */
947 static bool
948 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
950 basic_block cand_bb = gimple_bb (cand->incremented_at);
951 basic_block stmt_bb = gimple_bb (stmt);
953 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
954 return false;
956 if (stmt_bb != cand_bb)
957 return true;
959 if (true_if_equal
960 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
961 return true;
962 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
965 /* Returns true if STMT if after the place where the induction variable
966 CAND is incremented in LOOP. */
968 static bool
969 stmt_after_increment (class loop *loop, struct iv_cand *cand, gimple *stmt)
971 switch (cand->pos)
973 case IP_END:
974 return false;
976 case IP_NORMAL:
977 return stmt_after_ip_normal_pos (loop, stmt);
979 case IP_ORIGINAL:
980 case IP_AFTER_USE:
981 return stmt_after_inc_pos (cand, stmt, false);
983 case IP_BEFORE_USE:
984 return stmt_after_inc_pos (cand, stmt, true);
986 default:
987 gcc_unreachable ();
991 /* walk_tree callback for contains_abnormal_ssa_name_p. */
993 static tree
994 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
996 if (TREE_CODE (*tp) == SSA_NAME
997 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
998 return *tp;
1000 if (!EXPR_P (*tp))
1001 *walk_subtrees = 0;
1003 return NULL_TREE;
1006 /* Returns true if EXPR contains a ssa name that occurs in an
1007 abnormal phi node. */
1009 bool
1010 contains_abnormal_ssa_name_p (tree expr)
1012 return walk_tree_without_duplicates
1013 (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
1016 /* Returns the structure describing number of iterations determined from
1017 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1019 static class tree_niter_desc *
1020 niter_for_exit (struct ivopts_data *data, edge exit)
1022 class tree_niter_desc *desc;
1023 tree_niter_desc **slot;
1025 if (!data->niters)
1027 data->niters = new hash_map<edge, tree_niter_desc *>;
1028 slot = NULL;
1030 else
1031 slot = data->niters->get (exit);
1033 if (!slot)
1035 /* Try to determine number of iterations. We cannot safely work with ssa
1036 names that appear in phi nodes on abnormal edges, so that we do not
1037 create overlapping life ranges for them (PR 27283). */
1038 desc = XNEW (class tree_niter_desc);
1039 if (!number_of_iterations_exit (data->current_loop,
1040 exit, desc, true)
1041 || contains_abnormal_ssa_name_p (desc->niter))
1043 XDELETE (desc);
1044 desc = NULL;
1046 data->niters->put (exit, desc);
1048 else
1049 desc = *slot;
1051 return desc;
1054 /* Returns the structure describing number of iterations determined from
1055 single dominating exit of DATA->current_loop, or NULL if something
1056 goes wrong. */
1058 static class tree_niter_desc *
1059 niter_for_single_dom_exit (struct ivopts_data *data)
1061 edge exit = single_dom_exit (data->current_loop);
1063 if (!exit)
1064 return NULL;
1066 return niter_for_exit (data, exit);
1069 /* Initializes data structures used by the iv optimization pass, stored
1070 in DATA. */
1072 static void
1073 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1075 data->version_info_size = 2 * num_ssa_names;
1076 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1077 data->relevant = BITMAP_ALLOC (NULL);
1078 data->important_candidates = BITMAP_ALLOC (NULL);
1079 data->max_inv_var_id = 0;
1080 data->max_inv_expr_id = 0;
1081 data->niters = NULL;
1082 data->vgroups.create (20);
1083 data->vcands.create (20);
1084 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1085 data->name_expansion_cache = NULL;
1086 data->base_object_map = NULL;
1087 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1088 data->iv_common_cands.create (20);
1089 decl_rtl_to_reset.create (20);
1090 gcc_obstack_init (&data->iv_obstack);
1093 /* walk_tree callback for determine_base_object. */
1095 static tree
1096 determine_base_object_1 (tree *tp, int *walk_subtrees, void *wdata)
1098 tree_code code = TREE_CODE (*tp);
1099 tree obj = NULL_TREE;
1100 if (code == ADDR_EXPR)
1102 tree base = get_base_address (TREE_OPERAND (*tp, 0));
1103 if (!base)
1104 obj = *tp;
1105 else if (TREE_CODE (base) != MEM_REF)
1106 obj = fold_convert (ptr_type_node, build_fold_addr_expr (base));
1108 else if (code == SSA_NAME && POINTER_TYPE_P (TREE_TYPE (*tp)))
1109 obj = fold_convert (ptr_type_node, *tp);
1111 if (!obj)
1113 if (!EXPR_P (*tp))
1114 *walk_subtrees = 0;
1116 return NULL_TREE;
1118 /* Record special node for multiple base objects and stop. */
1119 if (*static_cast<tree *> (wdata))
1121 *static_cast<tree *> (wdata) = integer_zero_node;
1122 return integer_zero_node;
1124 /* Record the base object and continue looking. */
1125 *static_cast<tree *> (wdata) = obj;
1126 return NULL_TREE;
1129 /* Returns a memory object to that EXPR points with caching. Return NULL if we
1130 are able to determine that it does not point to any such object; specially
1131 return integer_zero_node if EXPR contains multiple base objects. */
1133 static tree
1134 determine_base_object (struct ivopts_data *data, tree expr)
1136 tree *slot, obj = NULL_TREE;
1137 if (data->base_object_map)
1139 if ((slot = data->base_object_map->get(expr)) != NULL)
1140 return *slot;
1142 else
1143 data->base_object_map = new hash_map<tree, tree>;
1145 (void) walk_tree_without_duplicates (&expr, determine_base_object_1, &obj);
1146 data->base_object_map->put (expr, obj);
1147 return obj;
1150 /* Return true if address expression with non-DECL_P operand appears
1151 in EXPR. */
1153 static bool
1154 contain_complex_addr_expr (tree expr)
1156 bool res = false;
1158 STRIP_NOPS (expr);
1159 switch (TREE_CODE (expr))
1161 case POINTER_PLUS_EXPR:
1162 case PLUS_EXPR:
1163 case MINUS_EXPR:
1164 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1165 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1166 break;
1168 case ADDR_EXPR:
1169 return (!DECL_P (TREE_OPERAND (expr, 0)));
1171 default:
1172 return false;
1175 return res;
1178 /* Allocates an induction variable with given initial value BASE and step STEP
1179 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1181 static struct iv *
1182 alloc_iv (struct ivopts_data *data, tree base, tree step,
1183 bool no_overflow = false)
1185 tree expr = base;
1186 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1187 sizeof (struct iv));
1188 gcc_assert (step != NULL_TREE);
1190 /* Lower address expression in base except ones with DECL_P as operand.
1191 By doing this:
1192 1) More accurate cost can be computed for address expressions;
1193 2) Duplicate candidates won't be created for bases in different
1194 forms, like &a[0] and &a. */
1195 STRIP_NOPS (expr);
1196 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1197 || contain_complex_addr_expr (expr))
1199 aff_tree comb;
1200 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1201 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1204 iv->base = base;
1205 iv->base_object = determine_base_object (data, base);
1206 iv->step = step;
1207 iv->biv_p = false;
1208 iv->nonlin_use = NULL;
1209 iv->ssa_name = NULL_TREE;
1210 if (!no_overflow
1211 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1212 base, step))
1213 no_overflow = true;
1214 iv->no_overflow = no_overflow;
1215 iv->have_address_use = false;
1217 return iv;
1220 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1221 doesn't overflow. */
1223 static void
1224 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1225 bool no_overflow)
1227 struct version_info *info = name_info (data, iv);
1229 gcc_assert (!info->iv);
1231 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1232 info->iv = alloc_iv (data, base, step, no_overflow);
1233 info->iv->ssa_name = iv;
1236 /* Finds induction variable declaration for VAR. */
1238 static struct iv *
1239 get_iv (struct ivopts_data *data, tree var)
1241 basic_block bb;
1242 tree type = TREE_TYPE (var);
1244 if (!POINTER_TYPE_P (type)
1245 && !INTEGRAL_TYPE_P (type))
1246 return NULL;
1248 if (!name_info (data, var)->iv)
1250 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1252 if (!bb
1253 || !flow_bb_inside_loop_p (data->current_loop, bb))
1255 if (POINTER_TYPE_P (type))
1256 type = sizetype;
1257 set_iv (data, var, var, build_int_cst (type, 0), true);
1261 return name_info (data, var)->iv;
1264 /* Return the first non-invariant ssa var found in EXPR. */
1266 static tree
1267 extract_single_var_from_expr (tree expr)
1269 int i, n;
1270 tree tmp;
1271 enum tree_code code;
1273 if (!expr || is_gimple_min_invariant (expr))
1274 return NULL;
1276 code = TREE_CODE (expr);
1277 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1279 n = TREE_OPERAND_LENGTH (expr);
1280 for (i = 0; i < n; i++)
1282 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1284 if (tmp)
1285 return tmp;
1288 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1291 /* Finds basic ivs. */
1293 static bool
1294 find_bivs (struct ivopts_data *data)
1296 gphi *phi;
1297 affine_iv iv;
1298 tree step, type, base, stop;
1299 bool found = false;
1300 class loop *loop = data->current_loop;
1301 gphi_iterator psi;
1303 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1305 phi = psi.phi ();
1307 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1308 continue;
1310 if (virtual_operand_p (PHI_RESULT (phi)))
1311 continue;
1313 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1314 continue;
1316 if (integer_zerop (iv.step))
1317 continue;
1319 step = iv.step;
1320 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1321 /* Stop expanding iv base at the first ssa var referred by iv step.
1322 Ideally we should stop at any ssa var, because that's expensive
1323 and unusual to happen, we just do it on the first one.
1325 See PR64705 for the rationale. */
1326 stop = extract_single_var_from_expr (step);
1327 base = expand_simple_operations (base, stop);
1328 if (contains_abnormal_ssa_name_p (base)
1329 || contains_abnormal_ssa_name_p (step))
1330 continue;
1332 type = TREE_TYPE (PHI_RESULT (phi));
1333 base = fold_convert (type, base);
1334 if (step)
1336 if (POINTER_TYPE_P (type))
1337 step = convert_to_ptrofftype (step);
1338 else
1339 step = fold_convert (type, step);
1342 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1343 found = true;
1346 return found;
1349 /* Marks basic ivs. */
1351 static void
1352 mark_bivs (struct ivopts_data *data)
1354 gphi *phi;
1355 gimple *def;
1356 tree var;
1357 struct iv *iv, *incr_iv;
1358 class loop *loop = data->current_loop;
1359 basic_block incr_bb;
1360 gphi_iterator psi;
1362 data->bivs_not_used_in_addr = 0;
1363 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1365 phi = psi.phi ();
1367 iv = get_iv (data, PHI_RESULT (phi));
1368 if (!iv)
1369 continue;
1371 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1372 def = SSA_NAME_DEF_STMT (var);
1373 /* Don't mark iv peeled from other one as biv. */
1374 if (def
1375 && gimple_code (def) == GIMPLE_PHI
1376 && gimple_bb (def) == loop->header)
1377 continue;
1379 incr_iv = get_iv (data, var);
1380 if (!incr_iv)
1381 continue;
1383 /* If the increment is in the subloop, ignore it. */
1384 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1385 if (incr_bb->loop_father != data->current_loop
1386 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1387 continue;
1389 iv->biv_p = true;
1390 incr_iv->biv_p = true;
1391 if (iv->no_overflow)
1392 data->bivs_not_used_in_addr++;
1393 if (incr_iv->no_overflow)
1394 data->bivs_not_used_in_addr++;
1398 /* Checks whether STMT defines a linear induction variable and stores its
1399 parameters to IV. */
1401 static bool
1402 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1404 tree lhs, stop;
1405 class loop *loop = data->current_loop;
1407 iv->base = NULL_TREE;
1408 iv->step = NULL_TREE;
1410 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1411 return false;
1413 lhs = gimple_assign_lhs (stmt);
1414 if (TREE_CODE (lhs) != SSA_NAME)
1415 return false;
1417 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1418 return false;
1420 /* Stop expanding iv base at the first ssa var referred by iv step.
1421 Ideally we should stop at any ssa var, because that's expensive
1422 and unusual to happen, we just do it on the first one.
1424 See PR64705 for the rationale. */
1425 stop = extract_single_var_from_expr (iv->step);
1426 iv->base = expand_simple_operations (iv->base, stop);
1427 if (contains_abnormal_ssa_name_p (iv->base)
1428 || contains_abnormal_ssa_name_p (iv->step))
1429 return false;
1431 /* If STMT could throw, then do not consider STMT as defining a GIV.
1432 While this will suppress optimizations, we cannot safely delete this
1433 GIV and associated statements, even if it appears it is not used. */
1434 if (stmt_could_throw_p (cfun, stmt))
1435 return false;
1437 return true;
1440 /* Finds general ivs in statement STMT. */
1442 static void
1443 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1445 affine_iv iv;
1447 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1448 return;
1450 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1453 /* Finds general ivs in basic block BB. */
1455 static void
1456 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1458 gimple_stmt_iterator bsi;
1460 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1461 find_givs_in_stmt (data, gsi_stmt (bsi));
1464 /* Finds general ivs. */
1466 static void
1467 find_givs (struct ivopts_data *data, basic_block *body)
1469 class loop *loop = data->current_loop;
1470 unsigned i;
1472 for (i = 0; i < loop->num_nodes; i++)
1473 find_givs_in_bb (data, body[i]);
1476 /* For each ssa name defined in LOOP determines whether it is an induction
1477 variable and if so, its initial value and step. */
1479 static bool
1480 find_induction_variables (struct ivopts_data *data, basic_block *body)
1482 unsigned i;
1483 bitmap_iterator bi;
1485 if (!find_bivs (data))
1486 return false;
1488 find_givs (data, body);
1489 mark_bivs (data);
1491 if (dump_file && (dump_flags & TDF_DETAILS))
1493 class tree_niter_desc *niter = niter_for_single_dom_exit (data);
1495 if (niter)
1497 fprintf (dump_file, " number of iterations ");
1498 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1499 if (!integer_zerop (niter->may_be_zero))
1501 fprintf (dump_file, "; zero if ");
1502 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1504 fprintf (dump_file, "\n");
1507 fprintf (dump_file, "\n<Induction Vars>:\n");
1508 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1510 struct version_info *info = ver_info (data, i);
1511 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1512 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1516 return true;
1519 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1520 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1521 is the const offset stripped from IV base and MEM_TYPE is the type
1522 of the memory being addressed. For uses of other types, ADDR_BASE
1523 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1525 static struct iv_use *
1526 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1527 gimple *stmt, enum use_type type, tree mem_type,
1528 tree addr_base, poly_uint64 addr_offset)
1530 struct iv_use *use = XCNEW (struct iv_use);
1532 use->id = group->vuses.length ();
1533 use->group_id = group->id;
1534 use->type = type;
1535 use->mem_type = mem_type;
1536 use->iv = iv;
1537 use->stmt = stmt;
1538 use->op_p = use_p;
1539 use->addr_base = addr_base;
1540 use->addr_offset = addr_offset;
1542 group->vuses.safe_push (use);
1543 return use;
1546 /* Checks whether OP is a loop-level invariant and if so, records it.
1547 NONLINEAR_USE is true if the invariant is used in a way we do not
1548 handle specially. */
1550 static void
1551 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1553 basic_block bb;
1554 struct version_info *info;
1556 if (TREE_CODE (op) != SSA_NAME
1557 || virtual_operand_p (op))
1558 return;
1560 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1561 if (bb
1562 && flow_bb_inside_loop_p (data->current_loop, bb))
1563 return;
1565 info = name_info (data, op);
1566 info->name = op;
1567 info->has_nonlin_use |= nonlinear_use;
1568 if (!info->inv_id)
1569 info->inv_id = ++data->max_inv_var_id;
1570 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1573 /* Record a group of TYPE. */
1575 static struct iv_group *
1576 record_group (struct ivopts_data *data, enum use_type type)
1578 struct iv_group *group = XCNEW (struct iv_group);
1580 group->id = data->vgroups.length ();
1581 group->type = type;
1582 group->related_cands = BITMAP_ALLOC (NULL);
1583 group->vuses.create (1);
1584 group->doloop_p = false;
1586 data->vgroups.safe_push (group);
1587 return group;
1590 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1591 New group will be created if there is no existing group for the use.
1592 MEM_TYPE is the type of memory being addressed, or NULL if this
1593 isn't an address reference. */
1595 static struct iv_use *
1596 record_group_use (struct ivopts_data *data, tree *use_p,
1597 struct iv *iv, gimple *stmt, enum use_type type,
1598 tree mem_type)
1600 tree addr_base = NULL;
1601 struct iv_group *group = NULL;
1602 poly_uint64 addr_offset = 0;
1604 /* Record non address type use in a new group. */
1605 if (address_p (type))
1607 unsigned int i;
1609 addr_base = strip_offset (iv->base, &addr_offset);
1610 for (i = 0; i < data->vgroups.length (); i++)
1612 struct iv_use *use;
1614 group = data->vgroups[i];
1615 use = group->vuses[0];
1616 if (!address_p (use->type))
1617 continue;
1619 /* Check if it has the same stripped base and step. */
1620 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1621 && operand_equal_p (iv->step, use->iv->step, 0)
1622 && operand_equal_p (addr_base, use->addr_base, 0))
1623 break;
1625 if (i == data->vgroups.length ())
1626 group = NULL;
1629 if (!group)
1630 group = record_group (data, type);
1632 return record_use (group, use_p, iv, stmt, type, mem_type,
1633 addr_base, addr_offset);
1636 /* Checks whether the use OP is interesting and if so, records it. */
1638 static struct iv_use *
1639 find_interesting_uses_op (struct ivopts_data *data, tree op)
1641 struct iv *iv;
1642 gimple *stmt;
1643 struct iv_use *use;
1645 if (TREE_CODE (op) != SSA_NAME)
1646 return NULL;
1648 iv = get_iv (data, op);
1649 if (!iv)
1650 return NULL;
1652 if (iv->nonlin_use)
1654 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1655 return iv->nonlin_use;
1658 if (integer_zerop (iv->step))
1660 record_invariant (data, op, true);
1661 return NULL;
1664 stmt = SSA_NAME_DEF_STMT (op);
1665 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1667 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1668 iv->nonlin_use = use;
1669 return use;
1672 /* Indicate how compare type iv_use can be handled. */
1673 enum comp_iv_rewrite
1675 COMP_IV_NA,
1676 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1677 COMP_IV_EXPR,
1678 /* We may rewrite compare type iv_uses on both sides of comparison by
1679 expressing value of each iv_use. */
1680 COMP_IV_EXPR_2,
1681 /* We may rewrite compare type iv_use by expressing value of the iv_use
1682 or by eliminating it with other iv_cand. */
1683 COMP_IV_ELIM
1686 /* Given a condition in statement STMT, checks whether it is a compare
1687 of an induction variable and an invariant. If this is the case,
1688 CONTROL_VAR is set to location of the iv, BOUND to the location of
1689 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1690 induction variable descriptions, and true is returned. If this is not
1691 the case, CONTROL_VAR and BOUND are set to the arguments of the
1692 condition and false is returned. */
1694 static enum comp_iv_rewrite
1695 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1696 tree **control_var, tree **bound,
1697 struct iv **iv_var, struct iv **iv_bound)
1699 /* The objects returned when COND has constant operands. */
1700 static struct iv const_iv;
1701 static tree zero;
1702 tree *op0 = &zero, *op1 = &zero;
1703 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1704 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1706 if (gimple_code (stmt) == GIMPLE_COND)
1708 gcond *cond_stmt = as_a <gcond *> (stmt);
1709 op0 = gimple_cond_lhs_ptr (cond_stmt);
1710 op1 = gimple_cond_rhs_ptr (cond_stmt);
1712 else
1714 op0 = gimple_assign_rhs1_ptr (stmt);
1715 op1 = gimple_assign_rhs2_ptr (stmt);
1718 zero = integer_zero_node;
1719 const_iv.step = integer_zero_node;
1721 if (TREE_CODE (*op0) == SSA_NAME)
1722 iv0 = get_iv (data, *op0);
1723 if (TREE_CODE (*op1) == SSA_NAME)
1724 iv1 = get_iv (data, *op1);
1726 /* If both sides of comparison are IVs. We can express ivs on both end. */
1727 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1729 rewrite_type = COMP_IV_EXPR_2;
1730 goto end;
1733 /* If none side of comparison is IV. */
1734 if ((!iv0 || integer_zerop (iv0->step))
1735 && (!iv1 || integer_zerop (iv1->step)))
1736 goto end;
1738 /* Control variable may be on the other side. */
1739 if (!iv0 || integer_zerop (iv0->step))
1741 std::swap (op0, op1);
1742 std::swap (iv0, iv1);
1744 /* If one side is IV and the other side isn't loop invariant. */
1745 if (!iv1)
1746 rewrite_type = COMP_IV_EXPR;
1747 /* If one side is IV and the other side is loop invariant. */
1748 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1749 rewrite_type = COMP_IV_ELIM;
1751 end:
1752 if (control_var)
1753 *control_var = op0;
1754 if (iv_var)
1755 *iv_var = iv0;
1756 if (bound)
1757 *bound = op1;
1758 if (iv_bound)
1759 *iv_bound = iv1;
1761 return rewrite_type;
1764 /* Checks whether the condition in STMT is interesting and if so,
1765 records it. */
1767 static void
1768 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1770 tree *var_p, *bound_p;
1771 struct iv *var_iv, *bound_iv;
1772 enum comp_iv_rewrite ret;
1774 ret = extract_cond_operands (data, stmt,
1775 &var_p, &bound_p, &var_iv, &bound_iv);
1776 if (ret == COMP_IV_NA)
1778 find_interesting_uses_op (data, *var_p);
1779 find_interesting_uses_op (data, *bound_p);
1780 return;
1783 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1784 /* Record compare type iv_use for iv on the other side of comparison. */
1785 if (ret == COMP_IV_EXPR_2)
1786 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1789 /* Returns the outermost loop EXPR is obviously invariant in
1790 relative to the loop LOOP, i.e. if all its operands are defined
1791 outside of the returned loop. Returns NULL if EXPR is not
1792 even obviously invariant in LOOP. */
1794 class loop *
1795 outermost_invariant_loop_for_expr (class loop *loop, tree expr)
1797 basic_block def_bb;
1798 unsigned i, len;
1800 if (is_gimple_min_invariant (expr))
1801 return current_loops->tree_root;
1803 if (TREE_CODE (expr) == SSA_NAME)
1805 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1806 if (def_bb)
1808 if (flow_bb_inside_loop_p (loop, def_bb))
1809 return NULL;
1810 return superloop_at_depth (loop,
1811 loop_depth (def_bb->loop_father) + 1);
1814 return current_loops->tree_root;
1817 if (!EXPR_P (expr))
1818 return NULL;
1820 unsigned maxdepth = 0;
1821 len = TREE_OPERAND_LENGTH (expr);
1822 for (i = 0; i < len; i++)
1824 class loop *ivloop;
1825 if (!TREE_OPERAND (expr, i))
1826 continue;
1828 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1829 if (!ivloop)
1830 return NULL;
1831 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1834 return superloop_at_depth (loop, maxdepth);
1837 /* Returns true if expression EXPR is obviously invariant in LOOP,
1838 i.e. if all its operands are defined outside of the LOOP. LOOP
1839 should not be the function body. */
1841 bool
1842 expr_invariant_in_loop_p (class loop *loop, tree expr)
1844 basic_block def_bb;
1845 unsigned i, len;
1847 gcc_assert (loop_depth (loop) > 0);
1849 if (is_gimple_min_invariant (expr))
1850 return true;
1852 if (TREE_CODE (expr) == SSA_NAME)
1854 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1855 if (def_bb
1856 && flow_bb_inside_loop_p (loop, def_bb))
1857 return false;
1859 return true;
1862 if (!EXPR_P (expr))
1863 return false;
1865 len = TREE_OPERAND_LENGTH (expr);
1866 for (i = 0; i < len; i++)
1867 if (TREE_OPERAND (expr, i)
1868 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1869 return false;
1871 return true;
1874 /* Given expression EXPR which computes inductive values with respect
1875 to loop recorded in DATA, this function returns biv from which EXPR
1876 is derived by tracing definition chains of ssa variables in EXPR. */
1878 static struct iv*
1879 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1881 struct iv *iv;
1882 unsigned i, n;
1883 tree e2, e1;
1884 enum tree_code code;
1885 gimple *stmt;
1887 if (expr == NULL_TREE)
1888 return NULL;
1890 if (is_gimple_min_invariant (expr))
1891 return NULL;
1893 code = TREE_CODE (expr);
1894 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1896 n = TREE_OPERAND_LENGTH (expr);
1897 for (i = 0; i < n; i++)
1899 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1900 if (iv)
1901 return iv;
1905 /* Stop if it's not ssa name. */
1906 if (code != SSA_NAME)
1907 return NULL;
1909 iv = get_iv (data, expr);
1910 if (!iv || integer_zerop (iv->step))
1911 return NULL;
1912 else if (iv->biv_p)
1913 return iv;
1915 stmt = SSA_NAME_DEF_STMT (expr);
1916 if (gphi *phi = dyn_cast <gphi *> (stmt))
1918 ssa_op_iter iter;
1919 use_operand_p use_p;
1920 basic_block phi_bb = gimple_bb (phi);
1922 /* Skip loop header PHI that doesn't define biv. */
1923 if (phi_bb->loop_father == data->current_loop)
1924 return NULL;
1926 if (virtual_operand_p (gimple_phi_result (phi)))
1927 return NULL;
1929 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1931 tree use = USE_FROM_PTR (use_p);
1932 iv = find_deriving_biv_for_expr (data, use);
1933 if (iv)
1934 return iv;
1936 return NULL;
1938 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1939 return NULL;
1941 e1 = gimple_assign_rhs1 (stmt);
1942 code = gimple_assign_rhs_code (stmt);
1943 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1944 return find_deriving_biv_for_expr (data, e1);
1946 switch (code)
1948 case MULT_EXPR:
1949 case PLUS_EXPR:
1950 case MINUS_EXPR:
1951 case POINTER_PLUS_EXPR:
1952 /* Increments, decrements and multiplications by a constant
1953 are simple. */
1954 e2 = gimple_assign_rhs2 (stmt);
1955 iv = find_deriving_biv_for_expr (data, e2);
1956 if (iv)
1957 return iv;
1958 gcc_fallthrough ();
1960 CASE_CONVERT:
1961 /* Casts are simple. */
1962 return find_deriving_biv_for_expr (data, e1);
1964 default:
1965 break;
1968 return NULL;
1971 /* Record BIV, its predecessor and successor that they are used in
1972 address type uses. */
1974 static void
1975 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1977 unsigned i;
1978 tree type, base_1, base_2;
1979 bitmap_iterator bi;
1981 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1982 || biv->have_address_use || !biv->no_overflow)
1983 return;
1985 type = TREE_TYPE (biv->base);
1986 if (!INTEGRAL_TYPE_P (type))
1987 return;
1989 biv->have_address_use = true;
1990 data->bivs_not_used_in_addr--;
1991 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1992 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1994 struct iv *iv = ver_info (data, i)->iv;
1996 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1997 || iv->have_address_use || !iv->no_overflow)
1998 continue;
2000 if (type != TREE_TYPE (iv->base)
2001 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2002 continue;
2004 if (!operand_equal_p (biv->step, iv->step, 0))
2005 continue;
2007 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2008 if (operand_equal_p (base_1, iv->base, 0)
2009 || operand_equal_p (base_2, biv->base, 0))
2011 iv->have_address_use = true;
2012 data->bivs_not_used_in_addr--;
2017 /* Cumulates the steps of indices into DATA and replaces their values with the
2018 initial ones. Returns false when the value of the index cannot be determined.
2019 Callback for for_each_index. */
2021 struct ifs_ivopts_data
2023 struct ivopts_data *ivopts_data;
2024 gimple *stmt;
2025 tree step;
2028 static bool
2029 idx_find_step (tree base, tree *idx, void *data)
2031 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2032 struct iv *iv;
2033 bool use_overflow_semantics = false;
2034 tree step, iv_base, iv_step, lbound, off;
2035 class loop *loop = dta->ivopts_data->current_loop;
2037 /* If base is a component ref, require that the offset of the reference
2038 be invariant. */
2039 if (TREE_CODE (base) == COMPONENT_REF)
2041 off = component_ref_field_offset (base);
2042 return expr_invariant_in_loop_p (loop, off);
2045 /* If base is array, first check whether we will be able to move the
2046 reference out of the loop (in order to take its address in strength
2047 reduction). In order for this to work we need both lower bound
2048 and step to be loop invariants. */
2049 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2051 /* Moreover, for a range, the size needs to be invariant as well. */
2052 if (TREE_CODE (base) == ARRAY_RANGE_REF
2053 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2054 return false;
2056 step = array_ref_element_size (base);
2057 lbound = array_ref_low_bound (base);
2059 if (!expr_invariant_in_loop_p (loop, step)
2060 || !expr_invariant_in_loop_p (loop, lbound))
2061 return false;
2064 if (TREE_CODE (*idx) != SSA_NAME)
2065 return true;
2067 iv = get_iv (dta->ivopts_data, *idx);
2068 if (!iv)
2069 return false;
2071 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2072 *&x[0], which is not folded and does not trigger the
2073 ARRAY_REF path below. */
2074 *idx = iv->base;
2076 if (integer_zerop (iv->step))
2077 return true;
2079 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2081 step = array_ref_element_size (base);
2083 /* We only handle addresses whose step is an integer constant. */
2084 if (TREE_CODE (step) != INTEGER_CST)
2085 return false;
2087 else
2088 /* The step for pointer arithmetics already is 1 byte. */
2089 step = size_one_node;
2091 iv_base = iv->base;
2092 iv_step = iv->step;
2093 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2094 use_overflow_semantics = true;
2096 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2097 sizetype, &iv_base, &iv_step, dta->stmt,
2098 use_overflow_semantics))
2100 /* The index might wrap. */
2101 return false;
2104 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2105 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2107 if (dta->ivopts_data->bivs_not_used_in_addr)
2109 if (!iv->biv_p)
2110 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2112 record_biv_for_address_use (dta->ivopts_data, iv);
2114 return true;
2117 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2118 object is passed to it in DATA. */
2120 static bool
2121 idx_record_use (tree base, tree *idx,
2122 void *vdata)
2124 struct ivopts_data *data = (struct ivopts_data *) vdata;
2125 find_interesting_uses_op (data, *idx);
2126 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2128 if (TREE_OPERAND (base, 2))
2129 find_interesting_uses_op (data, TREE_OPERAND (base, 2));
2130 if (TREE_OPERAND (base, 3))
2131 find_interesting_uses_op (data, TREE_OPERAND (base, 3));
2133 return true;
2136 /* If we can prove that TOP = cst * BOT for some constant cst,
2137 store cst to MUL and return true. Otherwise return false.
2138 The returned value is always sign-extended, regardless of the
2139 signedness of TOP and BOT. */
2141 static bool
2142 constant_multiple_of (tree top, tree bot, widest_int *mul)
2144 tree mby;
2145 enum tree_code code;
2146 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2147 widest_int res, p0, p1;
2149 STRIP_NOPS (top);
2150 STRIP_NOPS (bot);
2152 if (operand_equal_p (top, bot, 0))
2154 *mul = 1;
2155 return true;
2158 code = TREE_CODE (top);
2159 switch (code)
2161 case MULT_EXPR:
2162 mby = TREE_OPERAND (top, 1);
2163 if (TREE_CODE (mby) != INTEGER_CST)
2164 return false;
2166 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2167 return false;
2169 *mul = wi::sext (res * wi::to_widest (mby), precision);
2170 return true;
2172 case PLUS_EXPR:
2173 case MINUS_EXPR:
2174 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2175 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2176 return false;
2178 if (code == MINUS_EXPR)
2179 p1 = -p1;
2180 *mul = wi::sext (p0 + p1, precision);
2181 return true;
2183 case INTEGER_CST:
2184 if (TREE_CODE (bot) != INTEGER_CST)
2185 return false;
2187 p0 = widest_int::from (wi::to_wide (top), SIGNED);
2188 p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2189 if (p1 == 0)
2190 return false;
2191 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2192 return res == 0;
2194 default:
2195 if (POLY_INT_CST_P (top)
2196 && POLY_INT_CST_P (bot)
2197 && constant_multiple_p (wi::to_poly_widest (top),
2198 wi::to_poly_widest (bot), mul))
2199 return true;
2201 return false;
2205 /* Return true if memory reference REF with step STEP may be unaligned. */
2207 static bool
2208 may_be_unaligned_p (tree ref, tree step)
2210 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2211 thus they are not misaligned. */
2212 if (TREE_CODE (ref) == TARGET_MEM_REF)
2213 return false;
2215 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2216 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2217 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2219 unsigned HOST_WIDE_INT bitpos;
2220 unsigned int ref_align;
2221 get_object_alignment_1 (ref, &ref_align, &bitpos);
2222 if (ref_align < align
2223 || (bitpos % align) != 0
2224 || (bitpos % BITS_PER_UNIT) != 0)
2225 return true;
2227 unsigned int trailing_zeros = tree_ctz (step);
2228 if (trailing_zeros < HOST_BITS_PER_INT
2229 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2230 return true;
2232 return false;
2235 /* Return true if EXPR may be non-addressable. */
2237 bool
2238 may_be_nonaddressable_p (tree expr)
2240 switch (TREE_CODE (expr))
2242 case VAR_DECL:
2243 /* Check if it's a register variable. */
2244 return DECL_HARD_REGISTER (expr);
2246 case TARGET_MEM_REF:
2247 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2248 target, thus they are always addressable. */
2249 return false;
2251 case MEM_REF:
2252 /* Likewise for MEM_REFs, modulo the storage order. */
2253 return REF_REVERSE_STORAGE_ORDER (expr);
2255 case BIT_FIELD_REF:
2256 if (REF_REVERSE_STORAGE_ORDER (expr))
2257 return true;
2258 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2260 case COMPONENT_REF:
2261 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2262 return true;
2263 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2264 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2266 case ARRAY_REF:
2267 case ARRAY_RANGE_REF:
2268 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2269 return true;
2270 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2272 case VIEW_CONVERT_EXPR:
2273 /* This kind of view-conversions may wrap non-addressable objects
2274 and make them look addressable. After some processing the
2275 non-addressability may be uncovered again, causing ADDR_EXPRs
2276 of inappropriate objects to be built. */
2277 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2278 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2279 return true;
2280 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2282 CASE_CONVERT:
2283 return true;
2285 default:
2286 break;
2289 return false;
2292 /* Finds addresses in *OP_P inside STMT. */
2294 static void
2295 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2296 tree *op_p)
2298 tree base = *op_p, step = size_zero_node;
2299 struct iv *civ;
2300 struct ifs_ivopts_data ifs_ivopts_data;
2302 /* Do not play with volatile memory references. A bit too conservative,
2303 perhaps, but safe. */
2304 if (gimple_has_volatile_ops (stmt))
2305 goto fail;
2307 /* Ignore bitfields for now. Not really something terribly complicated
2308 to handle. TODO. */
2309 if (TREE_CODE (base) == BIT_FIELD_REF)
2310 goto fail;
2312 base = unshare_expr (base);
2314 if (TREE_CODE (base) == TARGET_MEM_REF)
2316 tree type = build_pointer_type (TREE_TYPE (base));
2317 tree astep;
2319 if (TMR_BASE (base)
2320 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2322 civ = get_iv (data, TMR_BASE (base));
2323 if (!civ)
2324 goto fail;
2326 TMR_BASE (base) = civ->base;
2327 step = civ->step;
2329 if (TMR_INDEX2 (base)
2330 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2332 civ = get_iv (data, TMR_INDEX2 (base));
2333 if (!civ)
2334 goto fail;
2336 TMR_INDEX2 (base) = civ->base;
2337 step = civ->step;
2339 if (TMR_INDEX (base)
2340 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2342 civ = get_iv (data, TMR_INDEX (base));
2343 if (!civ)
2344 goto fail;
2346 TMR_INDEX (base) = civ->base;
2347 astep = civ->step;
2349 if (astep)
2351 if (TMR_STEP (base))
2352 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2354 step = fold_build2 (PLUS_EXPR, type, step, astep);
2358 if (integer_zerop (step))
2359 goto fail;
2360 base = tree_mem_ref_addr (type, base);
2362 else
2364 ifs_ivopts_data.ivopts_data = data;
2365 ifs_ivopts_data.stmt = stmt;
2366 ifs_ivopts_data.step = size_zero_node;
2367 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2368 || integer_zerop (ifs_ivopts_data.step))
2369 goto fail;
2370 step = ifs_ivopts_data.step;
2372 /* Check that the base expression is addressable. This needs
2373 to be done after substituting bases of IVs into it. */
2374 if (may_be_nonaddressable_p (base))
2375 goto fail;
2377 /* Moreover, on strict alignment platforms, check that it is
2378 sufficiently aligned. */
2379 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2380 goto fail;
2382 base = build_fold_addr_expr (base);
2384 /* Substituting bases of IVs into the base expression might
2385 have caused folding opportunities. */
2386 if (TREE_CODE (base) == ADDR_EXPR)
2388 tree *ref = &TREE_OPERAND (base, 0);
2389 while (handled_component_p (*ref))
2390 ref = &TREE_OPERAND (*ref, 0);
2391 if (TREE_CODE (*ref) == MEM_REF)
2393 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2394 TREE_OPERAND (*ref, 0),
2395 TREE_OPERAND (*ref, 1));
2396 if (tem)
2397 *ref = tem;
2402 civ = alloc_iv (data, base, step);
2403 /* Fail if base object of this memory reference is unknown. */
2404 if (civ->base_object == NULL_TREE)
2405 goto fail;
2407 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2408 return;
2410 fail:
2411 for_each_index (op_p, idx_record_use, data);
2414 /* Finds and records invariants used in STMT. */
2416 static void
2417 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2419 ssa_op_iter iter;
2420 use_operand_p use_p;
2421 tree op;
2423 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2425 op = USE_FROM_PTR (use_p);
2426 record_invariant (data, op, false);
2430 /* CALL calls an internal function. If operand *OP_P will become an
2431 address when the call is expanded, return the type of the memory
2432 being addressed, otherwise return null. */
2434 static tree
2435 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2437 switch (gimple_call_internal_fn (call))
2439 case IFN_MASK_LOAD:
2440 case IFN_MASK_LOAD_LANES:
2441 case IFN_LEN_LOAD:
2442 if (op_p == gimple_call_arg_ptr (call, 0))
2443 return TREE_TYPE (gimple_call_lhs (call));
2444 return NULL_TREE;
2446 case IFN_MASK_STORE:
2447 case IFN_MASK_STORE_LANES:
2448 case IFN_LEN_STORE:
2449 if (op_p == gimple_call_arg_ptr (call, 0))
2450 return TREE_TYPE (gimple_call_arg (call, 3));
2451 return NULL_TREE;
2453 default:
2454 return NULL_TREE;
2458 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2459 Return true if the operand will become an address when STMT
2460 is expanded and record the associated address use if so. */
2462 static bool
2463 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2464 struct iv *iv)
2466 /* Fail if base object of this memory reference is unknown. */
2467 if (iv->base_object == NULL_TREE)
2468 return false;
2470 tree mem_type = NULL_TREE;
2471 if (gcall *call = dyn_cast <gcall *> (stmt))
2472 if (gimple_call_internal_p (call))
2473 mem_type = get_mem_type_for_internal_fn (call, op_p);
2474 if (mem_type)
2476 iv = alloc_iv (data, iv->base, iv->step);
2477 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2478 return true;
2480 return false;
2483 /* Finds interesting uses of induction variables in the statement STMT. */
2485 static void
2486 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2488 struct iv *iv;
2489 tree op, *lhs, *rhs;
2490 ssa_op_iter iter;
2491 use_operand_p use_p;
2492 enum tree_code code;
2494 find_invariants_stmt (data, stmt);
2496 if (gimple_code (stmt) == GIMPLE_COND)
2498 find_interesting_uses_cond (data, stmt);
2499 return;
2502 if (is_gimple_assign (stmt))
2504 lhs = gimple_assign_lhs_ptr (stmt);
2505 rhs = gimple_assign_rhs1_ptr (stmt);
2507 if (TREE_CODE (*lhs) == SSA_NAME)
2509 /* If the statement defines an induction variable, the uses are not
2510 interesting by themselves. */
2512 iv = get_iv (data, *lhs);
2514 if (iv && !integer_zerop (iv->step))
2515 return;
2518 code = gimple_assign_rhs_code (stmt);
2519 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2520 && (REFERENCE_CLASS_P (*rhs)
2521 || is_gimple_val (*rhs)))
2523 if (REFERENCE_CLASS_P (*rhs))
2524 find_interesting_uses_address (data, stmt, rhs);
2525 else
2526 find_interesting_uses_op (data, *rhs);
2528 if (REFERENCE_CLASS_P (*lhs))
2529 find_interesting_uses_address (data, stmt, lhs);
2530 return;
2532 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2534 find_interesting_uses_cond (data, stmt);
2535 return;
2538 /* TODO -- we should also handle address uses of type
2540 memory = call (whatever);
2544 call (memory). */
2547 if (gimple_code (stmt) == GIMPLE_PHI
2548 && gimple_bb (stmt) == data->current_loop->header)
2550 iv = get_iv (data, PHI_RESULT (stmt));
2552 if (iv && !integer_zerop (iv->step))
2553 return;
2556 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2558 op = USE_FROM_PTR (use_p);
2560 if (TREE_CODE (op) != SSA_NAME)
2561 continue;
2563 iv = get_iv (data, op);
2564 if (!iv)
2565 continue;
2567 if (!find_address_like_use (data, stmt, use_p->use, iv))
2568 find_interesting_uses_op (data, op);
2572 /* Finds interesting uses of induction variables outside of loops
2573 on loop exit edge EXIT. */
2575 static void
2576 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2578 gphi *phi;
2579 gphi_iterator psi;
2580 tree def;
2582 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2584 phi = psi.phi ();
2585 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2586 if (!virtual_operand_p (def))
2587 find_interesting_uses_op (data, def);
2591 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2592 mode for memory reference represented by USE. */
2594 static GTY (()) vec<rtx, va_gc> *addr_list;
2596 static bool
2597 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2599 rtx reg, addr;
2600 unsigned list_index;
2601 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2602 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2604 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2605 if (list_index >= vec_safe_length (addr_list))
2606 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE, true);
2608 addr = (*addr_list)[list_index];
2609 if (!addr)
2611 addr_mode = targetm.addr_space.address_mode (as);
2612 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2613 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2614 (*addr_list)[list_index] = addr;
2616 else
2617 addr_mode = GET_MODE (addr);
2619 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2620 return (memory_address_addr_space_p (mem_mode, addr, as));
2623 /* Comparison function to sort group in ascending order of addr_offset. */
2625 static int
2626 group_compare_offset (const void *a, const void *b)
2628 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2629 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2631 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2634 /* Check if small groups should be split. Return true if no group
2635 contains more than two uses with distinct addr_offsets. Return
2636 false otherwise. We want to split such groups because:
2638 1) Small groups don't have much benefit and may interfer with
2639 general candidate selection.
2640 2) Size for problem with only small groups is usually small and
2641 general algorithm can handle it well.
2643 TODO -- Above claim may not hold when we want to merge memory
2644 accesses with conseuctive addresses. */
2646 static bool
2647 split_small_address_groups_p (struct ivopts_data *data)
2649 unsigned int i, j, distinct = 1;
2650 struct iv_use *pre;
2651 struct iv_group *group;
2653 for (i = 0; i < data->vgroups.length (); i++)
2655 group = data->vgroups[i];
2656 if (group->vuses.length () == 1)
2657 continue;
2659 gcc_assert (address_p (group->type));
2660 if (group->vuses.length () == 2)
2662 if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2663 group->vuses[1]->addr_offset) > 0)
2664 std::swap (group->vuses[0], group->vuses[1]);
2666 else
2667 group->vuses.qsort (group_compare_offset);
2669 if (distinct > 2)
2670 continue;
2672 distinct = 1;
2673 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2675 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2677 pre = group->vuses[j];
2678 distinct++;
2681 if (distinct > 2)
2682 break;
2686 return (distinct <= 2);
2689 /* For each group of address type uses, this function further groups
2690 these uses according to the maximum offset supported by target's
2691 [base + offset] addressing mode. */
2693 static void
2694 split_address_groups (struct ivopts_data *data)
2696 unsigned int i, j;
2697 /* Always split group. */
2698 bool split_p = split_small_address_groups_p (data);
2700 for (i = 0; i < data->vgroups.length (); i++)
2702 struct iv_group *new_group = NULL;
2703 struct iv_group *group = data->vgroups[i];
2704 struct iv_use *use = group->vuses[0];
2706 use->id = 0;
2707 use->group_id = group->id;
2708 if (group->vuses.length () == 1)
2709 continue;
2711 gcc_assert (address_p (use->type));
2713 for (j = 1; j < group->vuses.length ();)
2715 struct iv_use *next = group->vuses[j];
2716 poly_int64 offset = next->addr_offset - use->addr_offset;
2718 /* Split group if aksed to, or the offset against the first
2719 use can't fit in offset part of addressing mode. IV uses
2720 having the same offset are still kept in one group. */
2721 if (maybe_ne (offset, 0)
2722 && (split_p || !addr_offset_valid_p (use, offset)))
2724 if (!new_group)
2725 new_group = record_group (data, group->type);
2726 group->vuses.ordered_remove (j);
2727 new_group->vuses.safe_push (next);
2728 continue;
2731 next->id = j;
2732 next->group_id = group->id;
2733 j++;
2738 /* Finds uses of the induction variables that are interesting. */
2740 static void
2741 find_interesting_uses (struct ivopts_data *data, basic_block *body)
2743 basic_block bb;
2744 gimple_stmt_iterator bsi;
2745 unsigned i;
2746 edge e;
2748 for (i = 0; i < data->current_loop->num_nodes; i++)
2750 edge_iterator ei;
2751 bb = body[i];
2753 FOR_EACH_EDGE (e, ei, bb->succs)
2754 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2755 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2756 find_interesting_uses_outside (data, e);
2758 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2759 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2760 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2761 if (!is_gimple_debug (gsi_stmt (bsi)))
2762 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2765 split_address_groups (data);
2767 if (dump_file && (dump_flags & TDF_DETAILS))
2769 fprintf (dump_file, "\n<IV Groups>:\n");
2770 dump_groups (dump_file, data);
2771 fprintf (dump_file, "\n");
2775 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2776 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2777 we are at the top-level of the processed address. */
2779 static tree
2780 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2781 poly_int64 *offset)
2783 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2784 enum tree_code code;
2785 tree type, orig_type = TREE_TYPE (expr);
2786 poly_int64 off0, off1;
2787 HOST_WIDE_INT st;
2788 tree orig_expr = expr;
2790 STRIP_NOPS (expr);
2792 type = TREE_TYPE (expr);
2793 code = TREE_CODE (expr);
2794 *offset = 0;
2796 switch (code)
2798 case POINTER_PLUS_EXPR:
2799 case PLUS_EXPR:
2800 case MINUS_EXPR:
2801 op0 = TREE_OPERAND (expr, 0);
2802 op1 = TREE_OPERAND (expr, 1);
2804 op0 = strip_offset_1 (op0, false, false, &off0);
2805 op1 = strip_offset_1 (op1, false, false, &off1);
2807 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2808 if (op0 == TREE_OPERAND (expr, 0)
2809 && op1 == TREE_OPERAND (expr, 1))
2810 return orig_expr;
2812 if (integer_zerop (op1))
2813 expr = op0;
2814 else if (integer_zerop (op0))
2816 if (code == MINUS_EXPR)
2817 expr = fold_build1 (NEGATE_EXPR, type, op1);
2818 else
2819 expr = op1;
2821 else
2822 expr = fold_build2 (code, type, op0, op1);
2824 return fold_convert (orig_type, expr);
2826 case MULT_EXPR:
2827 op1 = TREE_OPERAND (expr, 1);
2828 if (!cst_and_fits_in_hwi (op1))
2829 return orig_expr;
2831 op0 = TREE_OPERAND (expr, 0);
2832 op0 = strip_offset_1 (op0, false, false, &off0);
2833 if (op0 == TREE_OPERAND (expr, 0))
2834 return orig_expr;
2836 *offset = off0 * int_cst_value (op1);
2837 if (integer_zerop (op0))
2838 expr = op0;
2839 else
2840 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2842 return fold_convert (orig_type, expr);
2844 case ARRAY_REF:
2845 case ARRAY_RANGE_REF:
2846 if (!inside_addr)
2847 return orig_expr;
2849 step = array_ref_element_size (expr);
2850 if (!cst_and_fits_in_hwi (step))
2851 break;
2853 st = int_cst_value (step);
2854 op1 = TREE_OPERAND (expr, 1);
2855 op1 = strip_offset_1 (op1, false, false, &off1);
2856 *offset = off1 * st;
2858 if (top_compref
2859 && integer_zerop (op1))
2861 /* Strip the component reference completely. */
2862 op0 = TREE_OPERAND (expr, 0);
2863 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2864 *offset += off0;
2865 return op0;
2867 break;
2869 case COMPONENT_REF:
2871 tree field;
2873 if (!inside_addr)
2874 return orig_expr;
2876 tmp = component_ref_field_offset (expr);
2877 field = TREE_OPERAND (expr, 1);
2878 if (top_compref
2879 && cst_and_fits_in_hwi (tmp)
2880 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2882 HOST_WIDE_INT boffset, abs_off;
2884 /* Strip the component reference completely. */
2885 op0 = TREE_OPERAND (expr, 0);
2886 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2887 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2888 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2889 if (boffset < 0)
2890 abs_off = -abs_off;
2892 *offset = off0 + int_cst_value (tmp) + abs_off;
2893 return op0;
2896 break;
2898 case ADDR_EXPR:
2899 op0 = TREE_OPERAND (expr, 0);
2900 op0 = strip_offset_1 (op0, true, true, &off0);
2901 *offset += off0;
2903 if (op0 == TREE_OPERAND (expr, 0))
2904 return orig_expr;
2906 expr = build_fold_addr_expr (op0);
2907 return fold_convert (orig_type, expr);
2909 case MEM_REF:
2910 /* ??? Offset operand? */
2911 inside_addr = false;
2912 break;
2914 default:
2915 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2916 return build_int_cst (orig_type, 0);
2917 return orig_expr;
2920 /* Default handling of expressions for that we want to recurse into
2921 the first operand. */
2922 op0 = TREE_OPERAND (expr, 0);
2923 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2924 *offset += off0;
2926 if (op0 == TREE_OPERAND (expr, 0)
2927 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2928 return orig_expr;
2930 expr = copy_node (expr);
2931 TREE_OPERAND (expr, 0) = op0;
2932 if (op1)
2933 TREE_OPERAND (expr, 1) = op1;
2935 /* Inside address, we might strip the top level component references,
2936 thus changing type of the expression. Handling of ADDR_EXPR
2937 will fix that. */
2938 expr = fold_convert (orig_type, expr);
2940 return expr;
2943 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2945 tree
2946 strip_offset (tree expr, poly_uint64_pod *offset)
2948 poly_int64 off;
2949 tree core = strip_offset_1 (expr, false, false, &off);
2950 *offset = off;
2951 return core;
2954 /* Returns variant of TYPE that can be used as base for different uses.
2955 We return unsigned type with the same precision, which avoids problems
2956 with overflows. */
2958 static tree
2959 generic_type_for (tree type)
2961 if (POINTER_TYPE_P (type))
2962 return unsigned_type_for (type);
2964 if (TYPE_UNSIGNED (type))
2965 return type;
2967 return unsigned_type_for (type);
2970 /* Private data for walk_tree. */
2972 struct walk_tree_data
2974 bitmap *inv_vars;
2975 struct ivopts_data *idata;
2978 /* Callback function for walk_tree, it records invariants and symbol
2979 reference in *EXPR_P. DATA is the structure storing result info. */
2981 static tree
2982 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2984 tree op = *expr_p;
2985 struct version_info *info;
2986 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2988 if (TREE_CODE (op) != SSA_NAME)
2989 return NULL_TREE;
2991 info = name_info (wdata->idata, op);
2992 /* Because we expand simple operations when finding IVs, loop invariant
2993 variable that isn't referred by the original loop could be used now.
2994 Record such invariant variables here. */
2995 if (!info->iv)
2997 struct ivopts_data *idata = wdata->idata;
2998 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3000 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3002 tree steptype = TREE_TYPE (op);
3003 if (POINTER_TYPE_P (steptype))
3004 steptype = sizetype;
3005 set_iv (idata, op, op, build_int_cst (steptype, 0), true);
3006 record_invariant (idata, op, false);
3009 if (!info->inv_id || info->has_nonlin_use)
3010 return NULL_TREE;
3012 if (!*wdata->inv_vars)
3013 *wdata->inv_vars = BITMAP_ALLOC (NULL);
3014 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3016 return NULL_TREE;
3019 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3020 store it. */
3022 static inline void
3023 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3025 struct walk_tree_data wdata;
3027 if (!inv_vars)
3028 return;
3030 wdata.idata = data;
3031 wdata.inv_vars = inv_vars;
3032 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3035 /* Get entry from invariant expr hash table for INV_EXPR. New entry
3036 will be recorded if it doesn't exist yet. Given below two exprs:
3037 inv_expr + cst1, inv_expr + cst2
3038 It's hard to make decision whether constant part should be stripped
3039 or not. We choose to not strip based on below facts:
3040 1) We need to count ADD cost for constant part if it's stripped,
3041 which isn't always trivial where this functions is called.
3042 2) Stripping constant away may be conflict with following loop
3043 invariant hoisting pass.
3044 3) Not stripping constant away results in more invariant exprs,
3045 which usually leads to decision preferring lower reg pressure. */
3047 static iv_inv_expr_ent *
3048 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3050 STRIP_NOPS (inv_expr);
3052 if (poly_int_tree_p (inv_expr)
3053 || TREE_CODE (inv_expr) == SSA_NAME)
3054 return NULL;
3056 /* Don't strip constant part away as we used to. */
3058 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3059 struct iv_inv_expr_ent ent;
3060 ent.expr = inv_expr;
3061 ent.hash = iterative_hash_expr (inv_expr, 0);
3062 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3064 if (!*slot)
3066 *slot = XNEW (struct iv_inv_expr_ent);
3067 (*slot)->expr = inv_expr;
3068 (*slot)->hash = ent.hash;
3069 (*slot)->id = ++data->max_inv_expr_id;
3072 return *slot;
3076 /* Return *TP if it is an SSA_NAME marked with TREE_VISITED, i.e., as
3077 unsuitable as ivopts candidates for potentially involving undefined
3078 behavior. */
3080 static tree
3081 find_ssa_undef (tree *tp, int *walk_subtrees, void *bb_)
3083 basic_block bb = (basic_block) bb_;
3084 if (TREE_CODE (*tp) == SSA_NAME
3085 && ssa_name_maybe_undef_p (*tp)
3086 && !ssa_name_any_use_dominates_bb_p (*tp, bb))
3087 return *tp;
3088 if (!EXPR_P (*tp))
3089 *walk_subtrees = 0;
3090 return NULL;
3093 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3094 position to POS. If USE is not NULL, the candidate is set as related to
3095 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3096 replacement of the final value of the iv by a direct computation. */
3098 static struct iv_cand *
3099 add_candidate_1 (struct ivopts_data *data, tree base, tree step, bool important,
3100 enum iv_position pos, struct iv_use *use,
3101 gimple *incremented_at, struct iv *orig_iv = NULL,
3102 bool doloop = false)
3104 unsigned i;
3105 struct iv_cand *cand = NULL;
3106 tree type, orig_type;
3108 gcc_assert (base && step);
3110 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3111 live, but the ivopts code may replace a real pointer with one
3112 pointing before or after the memory block that is then adjusted
3113 into the memory block during the loop. FIXME: It would likely be
3114 better to actually force the pointer live and still use ivopts;
3115 for example, it would be enough to write the pointer into memory
3116 and keep it there until after the loop. */
3117 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3118 return NULL;
3120 /* If BASE contains undefined SSA names make sure we only record
3121 the original IV. */
3122 bool involves_undefs = false;
3123 if (walk_tree (&base, find_ssa_undef, data->current_loop->header, NULL))
3125 if (pos != IP_ORIGINAL)
3126 return NULL;
3127 important = false;
3128 involves_undefs = true;
3131 /* For non-original variables, make sure their values are computed in a type
3132 that does not invoke undefined behavior on overflows (since in general,
3133 we cannot prove that these induction variables are non-wrapping). */
3134 if (pos != IP_ORIGINAL)
3136 orig_type = TREE_TYPE (base);
3137 type = generic_type_for (orig_type);
3138 if (type != orig_type)
3140 base = fold_convert (type, base);
3141 step = fold_convert (type, step);
3145 for (i = 0; i < data->vcands.length (); i++)
3147 cand = data->vcands[i];
3149 if (cand->pos != pos)
3150 continue;
3152 if (cand->incremented_at != incremented_at
3153 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3154 && cand->ainc_use != use))
3155 continue;
3157 if (operand_equal_p (base, cand->iv->base, 0)
3158 && operand_equal_p (step, cand->iv->step, 0)
3159 && (TYPE_PRECISION (TREE_TYPE (base))
3160 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3161 break;
3164 if (i == data->vcands.length ())
3166 cand = XCNEW (struct iv_cand);
3167 cand->id = i;
3168 cand->iv = alloc_iv (data, base, step);
3169 cand->pos = pos;
3170 if (pos != IP_ORIGINAL)
3172 if (doloop)
3173 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "doloop");
3174 else
3175 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3176 cand->var_after = cand->var_before;
3178 cand->important = important;
3179 cand->involves_undefs = involves_undefs;
3180 cand->incremented_at = incremented_at;
3181 cand->doloop_p = doloop;
3182 data->vcands.safe_push (cand);
3184 if (!poly_int_tree_p (step))
3186 find_inv_vars (data, &step, &cand->inv_vars);
3188 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3189 /* Share bitmap between inv_vars and inv_exprs for cand. */
3190 if (inv_expr != NULL)
3192 cand->inv_exprs = cand->inv_vars;
3193 cand->inv_vars = NULL;
3194 if (cand->inv_exprs)
3195 bitmap_clear (cand->inv_exprs);
3196 else
3197 cand->inv_exprs = BITMAP_ALLOC (NULL);
3199 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3203 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3204 cand->ainc_use = use;
3205 else
3206 cand->ainc_use = NULL;
3208 cand->orig_iv = orig_iv;
3209 if (dump_file && (dump_flags & TDF_DETAILS))
3210 dump_cand (dump_file, cand);
3213 cand->important |= important;
3214 cand->doloop_p |= doloop;
3216 /* Relate candidate to the group for which it is added. */
3217 if (use)
3218 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3220 return cand;
3223 /* Returns true if incrementing the induction variable at the end of the LOOP
3224 is allowed.
3226 The purpose is to avoid splitting latch edge with a biv increment, thus
3227 creating a jump, possibly confusing other optimization passes and leaving
3228 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3229 available (so we do not have a better alternative), or if the latch edge
3230 is already nonempty. */
3232 static bool
3233 allow_ip_end_pos_p (class loop *loop)
3235 if (!ip_normal_pos (loop))
3236 return true;
3238 if (!empty_block_p (ip_end_pos (loop)))
3239 return true;
3241 return false;
3244 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3245 Important field is set to IMPORTANT. */
3247 static void
3248 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3249 bool important, struct iv_use *use)
3251 basic_block use_bb = gimple_bb (use->stmt);
3252 machine_mode mem_mode;
3253 unsigned HOST_WIDE_INT cstepi;
3255 /* If we insert the increment in any position other than the standard
3256 ones, we must ensure that it is incremented once per iteration.
3257 It must not be in an inner nested loop, or one side of an if
3258 statement. */
3259 if (use_bb->loop_father != data->current_loop
3260 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3261 || stmt_can_throw_internal (cfun, use->stmt)
3262 || !cst_and_fits_in_hwi (step))
3263 return;
3265 cstepi = int_cst_value (step);
3267 mem_mode = TYPE_MODE (use->mem_type);
3268 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3269 || USE_STORE_PRE_INCREMENT (mem_mode))
3270 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3271 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3272 || USE_STORE_PRE_DECREMENT (mem_mode))
3273 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3275 enum tree_code code = MINUS_EXPR;
3276 tree new_base;
3277 tree new_step = step;
3279 if (POINTER_TYPE_P (TREE_TYPE (base)))
3281 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3282 code = POINTER_PLUS_EXPR;
3284 else
3285 new_step = fold_convert (TREE_TYPE (base), new_step);
3286 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3287 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3288 use->stmt);
3290 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3291 || USE_STORE_POST_INCREMENT (mem_mode))
3292 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3293 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3294 || USE_STORE_POST_DECREMENT (mem_mode))
3295 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3297 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3298 use->stmt);
3302 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3303 position to POS. If USE is not NULL, the candidate is set as related to
3304 it. The candidate computation is scheduled before exit condition and at
3305 the end of loop. */
3307 static void
3308 add_candidate (struct ivopts_data *data, tree base, tree step, bool important,
3309 struct iv_use *use, struct iv *orig_iv = NULL,
3310 bool doloop = false)
3312 if (ip_normal_pos (data->current_loop))
3313 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL, orig_iv,
3314 doloop);
3315 /* Exclude doloop candidate here since it requires decrement then comparison
3316 and jump, the IP_END position doesn't match. */
3317 if (!doloop && ip_end_pos (data->current_loop)
3318 && allow_ip_end_pos_p (data->current_loop))
3319 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3322 /* Adds standard iv candidates. */
3324 static void
3325 add_standard_iv_candidates (struct ivopts_data *data)
3327 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3329 /* The same for a double-integer type if it is still fast enough. */
3330 if (TYPE_PRECISION
3331 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3332 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3333 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3334 build_int_cst (long_integer_type_node, 1), true, NULL);
3336 /* The same for a double-integer type if it is still fast enough. */
3337 if (TYPE_PRECISION
3338 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3339 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3340 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3341 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3345 /* Adds candidates bases on the old induction variable IV. */
3347 static void
3348 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3350 gimple *phi;
3351 tree def;
3352 struct iv_cand *cand;
3354 /* Check if this biv is used in address type use. */
3355 if (iv->no_overflow && iv->have_address_use
3356 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3357 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3359 tree base = fold_convert (sizetype, iv->base);
3360 tree step = fold_convert (sizetype, iv->step);
3362 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3363 add_candidate (data, base, step, true, NULL, iv);
3364 /* Add iv cand of the original type only if it has nonlinear use. */
3365 if (iv->nonlin_use)
3366 add_candidate (data, iv->base, iv->step, true, NULL);
3368 else
3369 add_candidate (data, iv->base, iv->step, true, NULL);
3371 /* The same, but with initial value zero. */
3372 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3373 add_candidate (data, size_int (0), iv->step, true, NULL);
3374 else
3375 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3376 iv->step, true, NULL);
3378 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3379 if (gimple_code (phi) == GIMPLE_PHI)
3381 /* Additionally record the possibility of leaving the original iv
3382 untouched. */
3383 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3384 /* Don't add candidate if it's from another PHI node because
3385 it's an affine iv appearing in the form of PEELED_CHREC. */
3386 phi = SSA_NAME_DEF_STMT (def);
3387 if (gimple_code (phi) != GIMPLE_PHI)
3389 cand = add_candidate_1 (data,
3390 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3391 SSA_NAME_DEF_STMT (def));
3392 if (cand)
3394 cand->var_before = iv->ssa_name;
3395 cand->var_after = def;
3398 else
3399 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3403 /* Adds candidates based on the old induction variables. */
3405 static void
3406 add_iv_candidate_for_bivs (struct ivopts_data *data)
3408 unsigned i;
3409 struct iv *iv;
3410 bitmap_iterator bi;
3412 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3414 iv = ver_info (data, i)->iv;
3415 if (iv && iv->biv_p && !integer_zerop (iv->step))
3416 add_iv_candidate_for_biv (data, iv);
3420 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3422 static void
3423 record_common_cand (struct ivopts_data *data, tree base,
3424 tree step, struct iv_use *use)
3426 class iv_common_cand ent;
3427 class iv_common_cand **slot;
3429 ent.base = base;
3430 ent.step = step;
3431 ent.hash = iterative_hash_expr (base, 0);
3432 ent.hash = iterative_hash_expr (step, ent.hash);
3434 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3435 if (*slot == NULL)
3437 *slot = new iv_common_cand ();
3438 (*slot)->base = base;
3439 (*slot)->step = step;
3440 (*slot)->uses.create (8);
3441 (*slot)->hash = ent.hash;
3442 data->iv_common_cands.safe_push ((*slot));
3445 gcc_assert (use != NULL);
3446 (*slot)->uses.safe_push (use);
3447 return;
3450 /* Comparison function used to sort common candidates. */
3452 static int
3453 common_cand_cmp (const void *p1, const void *p2)
3455 unsigned n1, n2;
3456 const class iv_common_cand *const *const ccand1
3457 = (const class iv_common_cand *const *)p1;
3458 const class iv_common_cand *const *const ccand2
3459 = (const class iv_common_cand *const *)p2;
3461 n1 = (*ccand1)->uses.length ();
3462 n2 = (*ccand2)->uses.length ();
3463 return n2 - n1;
3466 /* Adds IV candidates based on common candidated recorded. */
3468 static void
3469 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3471 unsigned i, j;
3472 struct iv_cand *cand_1, *cand_2;
3474 data->iv_common_cands.qsort (common_cand_cmp);
3475 for (i = 0; i < data->iv_common_cands.length (); i++)
3477 class iv_common_cand *ptr = data->iv_common_cands[i];
3479 /* Only add IV candidate if it's derived from multiple uses. */
3480 if (ptr->uses.length () <= 1)
3481 break;
3483 cand_1 = NULL;
3484 cand_2 = NULL;
3485 if (ip_normal_pos (data->current_loop))
3486 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3487 false, IP_NORMAL, NULL, NULL);
3489 if (ip_end_pos (data->current_loop)
3490 && allow_ip_end_pos_p (data->current_loop))
3491 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3492 false, IP_END, NULL, NULL);
3494 /* Bind deriving uses and the new candidates. */
3495 for (j = 0; j < ptr->uses.length (); j++)
3497 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3498 if (cand_1)
3499 bitmap_set_bit (group->related_cands, cand_1->id);
3500 if (cand_2)
3501 bitmap_set_bit (group->related_cands, cand_2->id);
3505 /* Release data since it is useless from this point. */
3506 data->iv_common_cand_tab->empty ();
3507 data->iv_common_cands.truncate (0);
3510 /* Adds candidates based on the value of USE's iv. */
3512 static void
3513 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3515 poly_uint64 offset;
3516 tree base;
3517 struct iv *iv = use->iv;
3518 tree basetype = TREE_TYPE (iv->base);
3520 /* Don't add candidate for iv_use with non integer, pointer or non-mode
3521 precision types, instead, add candidate for the corresponding scev in
3522 unsigned type with the same precision. See PR93674 for more info. */
3523 if ((TREE_CODE (basetype) != INTEGER_TYPE && !POINTER_TYPE_P (basetype))
3524 || !type_has_mode_precision_p (basetype))
3526 basetype = lang_hooks.types.type_for_mode (TYPE_MODE (basetype),
3527 TYPE_UNSIGNED (basetype));
3528 add_candidate (data, fold_convert (basetype, iv->base),
3529 fold_convert (basetype, iv->step), false, NULL);
3530 return;
3533 add_candidate (data, iv->base, iv->step, false, use);
3535 /* Record common candidate for use in case it can be shared by others. */
3536 record_common_cand (data, iv->base, iv->step, use);
3538 /* Record common candidate with initial value zero. */
3539 basetype = TREE_TYPE (iv->base);
3540 if (POINTER_TYPE_P (basetype))
3541 basetype = sizetype;
3542 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3544 /* Compare the cost of an address with an unscaled index with the cost of
3545 an address with a scaled index and add candidate if useful. */
3546 poly_int64 step;
3547 if (use != NULL
3548 && poly_int_tree_p (iv->step, &step)
3549 && address_p (use->type))
3551 poly_int64 new_step;
3552 unsigned int fact = preferred_mem_scale_factor
3553 (use->iv->base,
3554 TYPE_MODE (use->mem_type),
3555 optimize_loop_for_speed_p (data->current_loop));
3557 if (fact != 1
3558 && multiple_p (step, fact, &new_step))
3559 add_candidate (data, size_int (0),
3560 wide_int_to_tree (sizetype, new_step),
3561 true, NULL);
3564 /* Record common candidate with constant offset stripped in base.
3565 Like the use itself, we also add candidate directly for it. */
3566 base = strip_offset (iv->base, &offset);
3567 if (maybe_ne (offset, 0U) || base != iv->base)
3569 record_common_cand (data, base, iv->step, use);
3570 add_candidate (data, base, iv->step, false, use);
3573 /* Record common candidate with base_object removed in base. */
3574 base = iv->base;
3575 STRIP_NOPS (base);
3576 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3578 tree step = iv->step;
3580 STRIP_NOPS (step);
3581 base = TREE_OPERAND (base, 1);
3582 step = fold_convert (sizetype, step);
3583 record_common_cand (data, base, step, use);
3584 /* Also record common candidate with offset stripped. */
3585 base = strip_offset (base, &offset);
3586 if (maybe_ne (offset, 0U))
3587 record_common_cand (data, base, step, use);
3590 /* At last, add auto-incremental candidates. Make such variables
3591 important since other iv uses with same base object may be based
3592 on it. */
3593 if (use != NULL && address_p (use->type))
3594 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3597 /* Adds candidates based on the uses. */
3599 static void
3600 add_iv_candidate_for_groups (struct ivopts_data *data)
3602 unsigned i;
3604 /* Only add candidate for the first use in group. */
3605 for (i = 0; i < data->vgroups.length (); i++)
3607 struct iv_group *group = data->vgroups[i];
3609 gcc_assert (group->vuses[0] != NULL);
3610 add_iv_candidate_for_use (data, group->vuses[0]);
3612 add_iv_candidate_derived_from_uses (data);
3615 /* Record important candidates and add them to related_cands bitmaps. */
3617 static void
3618 record_important_candidates (struct ivopts_data *data)
3620 unsigned i;
3621 struct iv_group *group;
3623 for (i = 0; i < data->vcands.length (); i++)
3625 struct iv_cand *cand = data->vcands[i];
3627 if (cand->important)
3628 bitmap_set_bit (data->important_candidates, i);
3631 data->consider_all_candidates = (data->vcands.length ()
3632 <= CONSIDER_ALL_CANDIDATES_BOUND);
3634 /* Add important candidates to groups' related_cands bitmaps. */
3635 for (i = 0; i < data->vgroups.length (); i++)
3637 group = data->vgroups[i];
3638 bitmap_ior_into (group->related_cands, data->important_candidates);
3642 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3643 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3644 we allocate a simple list to every use. */
3646 static void
3647 alloc_use_cost_map (struct ivopts_data *data)
3649 unsigned i, size, s;
3651 for (i = 0; i < data->vgroups.length (); i++)
3653 struct iv_group *group = data->vgroups[i];
3655 if (data->consider_all_candidates)
3656 size = data->vcands.length ();
3657 else
3659 s = bitmap_count_bits (group->related_cands);
3661 /* Round up to the power of two, so that moduling by it is fast. */
3662 size = s ? (1 << ceil_log2 (s)) : 1;
3665 group->n_map_members = size;
3666 group->cost_map = XCNEWVEC (class cost_pair, size);
3670 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3671 on invariants INV_VARS and that the value used in expressing it is
3672 VALUE, and in case of iv elimination the comparison operator is COMP. */
3674 static void
3675 set_group_iv_cost (struct ivopts_data *data,
3676 struct iv_group *group, struct iv_cand *cand,
3677 comp_cost cost, bitmap inv_vars, tree value,
3678 enum tree_code comp, bitmap inv_exprs)
3680 unsigned i, s;
3682 if (cost.infinite_cost_p ())
3684 BITMAP_FREE (inv_vars);
3685 BITMAP_FREE (inv_exprs);
3686 return;
3689 if (data->consider_all_candidates)
3691 group->cost_map[cand->id].cand = cand;
3692 group->cost_map[cand->id].cost = cost;
3693 group->cost_map[cand->id].inv_vars = inv_vars;
3694 group->cost_map[cand->id].inv_exprs = inv_exprs;
3695 group->cost_map[cand->id].value = value;
3696 group->cost_map[cand->id].comp = comp;
3697 return;
3700 /* n_map_members is a power of two, so this computes modulo. */
3701 s = cand->id & (group->n_map_members - 1);
3702 for (i = s; i < group->n_map_members; i++)
3703 if (!group->cost_map[i].cand)
3704 goto found;
3705 for (i = 0; i < s; i++)
3706 if (!group->cost_map[i].cand)
3707 goto found;
3709 gcc_unreachable ();
3711 found:
3712 group->cost_map[i].cand = cand;
3713 group->cost_map[i].cost = cost;
3714 group->cost_map[i].inv_vars = inv_vars;
3715 group->cost_map[i].inv_exprs = inv_exprs;
3716 group->cost_map[i].value = value;
3717 group->cost_map[i].comp = comp;
3720 /* Gets cost of (GROUP, CAND) pair. */
3722 static class cost_pair *
3723 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3724 struct iv_cand *cand)
3726 unsigned i, s;
3727 class cost_pair *ret;
3729 if (!cand)
3730 return NULL;
3732 if (data->consider_all_candidates)
3734 ret = group->cost_map + cand->id;
3735 if (!ret->cand)
3736 return NULL;
3738 return ret;
3741 /* n_map_members is a power of two, so this computes modulo. */
3742 s = cand->id & (group->n_map_members - 1);
3743 for (i = s; i < group->n_map_members; i++)
3744 if (group->cost_map[i].cand == cand)
3745 return group->cost_map + i;
3746 else if (group->cost_map[i].cand == NULL)
3747 return NULL;
3748 for (i = 0; i < s; i++)
3749 if (group->cost_map[i].cand == cand)
3750 return group->cost_map + i;
3751 else if (group->cost_map[i].cand == NULL)
3752 return NULL;
3754 return NULL;
3757 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3758 static rtx
3759 produce_memory_decl_rtl (tree obj, int *regno)
3761 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3762 machine_mode address_mode = targetm.addr_space.address_mode (as);
3763 rtx x;
3765 gcc_assert (obj);
3766 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3768 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3769 x = gen_rtx_SYMBOL_REF (address_mode, name);
3770 SET_SYMBOL_REF_DECL (x, obj);
3771 x = gen_rtx_MEM (DECL_MODE (obj), x);
3772 set_mem_addr_space (x, as);
3773 targetm.encode_section_info (obj, x, true);
3775 else
3777 x = gen_raw_REG (address_mode, (*regno)++);
3778 x = gen_rtx_MEM (DECL_MODE (obj), x);
3779 set_mem_addr_space (x, as);
3782 return x;
3785 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3786 walk_tree. DATA contains the actual fake register number. */
3788 static tree
3789 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3791 tree obj = NULL_TREE;
3792 rtx x = NULL_RTX;
3793 int *regno = (int *) data;
3795 switch (TREE_CODE (*expr_p))
3797 case ADDR_EXPR:
3798 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3799 handled_component_p (*expr_p);
3800 expr_p = &TREE_OPERAND (*expr_p, 0))
3801 continue;
3802 obj = *expr_p;
3803 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3804 x = produce_memory_decl_rtl (obj, regno);
3805 break;
3807 case SSA_NAME:
3808 *ws = 0;
3809 obj = SSA_NAME_VAR (*expr_p);
3810 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3811 if (!obj)
3812 return NULL_TREE;
3813 if (!DECL_RTL_SET_P (obj))
3814 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3815 break;
3817 case VAR_DECL:
3818 case PARM_DECL:
3819 case RESULT_DECL:
3820 *ws = 0;
3821 obj = *expr_p;
3823 if (DECL_RTL_SET_P (obj))
3824 break;
3826 if (DECL_MODE (obj) == BLKmode)
3827 x = produce_memory_decl_rtl (obj, regno);
3828 else
3829 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3831 break;
3833 default:
3834 break;
3837 if (x)
3839 decl_rtl_to_reset.safe_push (obj);
3840 SET_DECL_RTL (obj, x);
3843 return NULL_TREE;
3846 /* Predict whether the given loop will be transformed in the RTL
3847 doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3848 This is only for target independent checks, see targetm.predict_doloop_p
3849 for the target dependent ones.
3851 Note that according to some initial investigation, some checks like costly
3852 niter check and invalid stmt scanning don't have much gains among general
3853 cases, so keep this as simple as possible first.
3855 Some RTL specific checks seems unable to be checked in gimple, if any new
3856 checks or easy checks _are_ missing here, please add them. */
3858 static bool
3859 generic_predict_doloop_p (struct ivopts_data *data)
3861 class loop *loop = data->current_loop;
3863 /* Call target hook for target dependent checks. */
3864 if (!targetm.predict_doloop_p (loop))
3866 if (dump_file && (dump_flags & TDF_DETAILS))
3867 fprintf (dump_file, "Predict doloop failure due to"
3868 " target specific checks.\n");
3869 return false;
3872 /* Similar to doloop_optimize, check iteration description to know it's
3873 suitable or not. Keep it as simple as possible, feel free to extend it
3874 if you find any multiple exits cases matter. */
3875 edge exit = single_dom_exit (loop);
3876 class tree_niter_desc *niter_desc;
3877 if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3879 if (dump_file && (dump_flags & TDF_DETAILS))
3880 fprintf (dump_file, "Predict doloop failure due to"
3881 " unexpected niters.\n");
3882 return false;
3885 /* Similar to doloop_optimize, check whether iteration count too small
3886 and not profitable. */
3887 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3888 if (est_niter == -1)
3889 est_niter = get_likely_max_loop_iterations_int (loop);
3890 if (est_niter >= 0 && est_niter < 3)
3892 if (dump_file && (dump_flags & TDF_DETAILS))
3893 fprintf (dump_file,
3894 "Predict doloop failure due to"
3895 " too few iterations (%u).\n",
3896 (unsigned int) est_niter);
3897 return false;
3900 return true;
3903 /* Determines cost of the computation of EXPR. */
3905 static unsigned
3906 computation_cost (tree expr, bool speed)
3908 rtx_insn *seq;
3909 rtx rslt;
3910 tree type = TREE_TYPE (expr);
3911 unsigned cost;
3912 /* Avoid using hard regs in ways which may be unsupported. */
3913 int regno = LAST_VIRTUAL_REGISTER + 1;
3914 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3915 enum node_frequency real_frequency = node->frequency;
3917 node->frequency = NODE_FREQUENCY_NORMAL;
3918 crtl->maybe_hot_insn_p = speed;
3919 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3920 start_sequence ();
3921 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3922 seq = get_insns ();
3923 end_sequence ();
3924 default_rtl_profile ();
3925 node->frequency = real_frequency;
3927 cost = seq_cost (seq, speed);
3928 if (MEM_P (rslt))
3929 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3930 TYPE_ADDR_SPACE (type), speed);
3931 else if (!REG_P (rslt))
3932 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3934 return cost;
3937 /* Returns variable containing the value of candidate CAND at statement AT. */
3939 static tree
3940 var_at_stmt (class loop *loop, struct iv_cand *cand, gimple *stmt)
3942 if (stmt_after_increment (loop, cand, stmt))
3943 return cand->var_after;
3944 else
3945 return cand->var_before;
3948 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3949 same precision that is at least as wide as the precision of TYPE, stores
3950 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3951 type of A and B. */
3953 static tree
3954 determine_common_wider_type (tree *a, tree *b)
3956 tree wider_type = NULL;
3957 tree suba, subb;
3958 tree atype = TREE_TYPE (*a);
3960 if (CONVERT_EXPR_P (*a))
3962 suba = TREE_OPERAND (*a, 0);
3963 wider_type = TREE_TYPE (suba);
3964 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3965 return atype;
3967 else
3968 return atype;
3970 if (CONVERT_EXPR_P (*b))
3972 subb = TREE_OPERAND (*b, 0);
3973 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3974 return atype;
3976 else
3977 return atype;
3979 *a = suba;
3980 *b = subb;
3981 return wider_type;
3984 /* Determines the expression by that USE is expressed from induction variable
3985 CAND at statement AT in LOOP. The expression is stored in two parts in a
3986 decomposed form. The invariant part is stored in AFF_INV; while variant
3987 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3988 non-null. Returns false if USE cannot be expressed using CAND. */
3990 static bool
3991 get_computation_aff_1 (class loop *loop, gimple *at, struct iv_use *use,
3992 struct iv_cand *cand, class aff_tree *aff_inv,
3993 class aff_tree *aff_var, widest_int *prat = NULL)
3995 tree ubase = use->iv->base, ustep = use->iv->step;
3996 tree cbase = cand->iv->base, cstep = cand->iv->step;
3997 tree common_type, uutype, var, cstep_common;
3998 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3999 aff_tree aff_cbase;
4000 widest_int rat;
4002 /* We must have a precision to express the values of use. */
4003 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4004 return false;
4006 var = var_at_stmt (loop, cand, at);
4007 uutype = unsigned_type_for (utype);
4009 /* If the conversion is not noop, perform it. */
4010 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4012 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
4013 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
4015 tree inner_base, inner_step, inner_type;
4016 inner_base = TREE_OPERAND (cbase, 0);
4017 if (CONVERT_EXPR_P (cstep))
4018 inner_step = TREE_OPERAND (cstep, 0);
4019 else
4020 inner_step = cstep;
4022 inner_type = TREE_TYPE (inner_base);
4023 /* If candidate is added from a biv whose type is smaller than
4024 ctype, we know both candidate and the biv won't overflow.
4025 In this case, it's safe to skip the convertion in candidate.
4026 As an example, (unsigned short)((unsigned long)A) equals to
4027 (unsigned short)A, if A has a type no larger than short. */
4028 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
4030 cbase = inner_base;
4031 cstep = inner_step;
4034 cbase = fold_convert (uutype, cbase);
4035 cstep = fold_convert (uutype, cstep);
4036 var = fold_convert (uutype, var);
4039 /* Ratio is 1 when computing the value of biv cand by itself.
4040 We can't rely on constant_multiple_of in this case because the
4041 use is created after the original biv is selected. The call
4042 could fail because of inconsistent fold behavior. See PR68021
4043 for more information. */
4044 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4046 gcc_assert (is_gimple_assign (use->stmt));
4047 gcc_assert (use->iv->ssa_name == cand->var_after);
4048 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
4049 rat = 1;
4051 else if (!constant_multiple_of (ustep, cstep, &rat))
4052 return false;
4054 if (prat)
4055 *prat = rat;
4057 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
4058 type, we achieve better folding by computing their difference in this
4059 wider type, and cast the result to UUTYPE. We do not need to worry about
4060 overflows, as all the arithmetics will in the end be performed in UUTYPE
4061 anyway. */
4062 common_type = determine_common_wider_type (&ubase, &cbase);
4064 /* use = ubase - ratio * cbase + ratio * var. */
4065 tree_to_aff_combination (ubase, common_type, aff_inv);
4066 tree_to_aff_combination (cbase, common_type, &aff_cbase);
4067 tree_to_aff_combination (var, uutype, aff_var);
4069 /* We need to shift the value if we are after the increment. */
4070 if (stmt_after_increment (loop, cand, at))
4072 aff_tree cstep_aff;
4074 if (common_type != uutype)
4075 cstep_common = fold_convert (common_type, cstep);
4076 else
4077 cstep_common = cstep;
4079 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
4080 aff_combination_add (&aff_cbase, &cstep_aff);
4083 aff_combination_scale (&aff_cbase, -rat);
4084 aff_combination_add (aff_inv, &aff_cbase);
4085 if (common_type != uutype)
4086 aff_combination_convert (aff_inv, uutype);
4088 aff_combination_scale (aff_var, rat);
4089 return true;
4092 /* Determines the expression by that USE is expressed from induction variable
4093 CAND at statement AT in LOOP. The expression is stored in a decomposed
4094 form into AFF. Returns false if USE cannot be expressed using CAND. */
4096 static bool
4097 get_computation_aff (class loop *loop, gimple *at, struct iv_use *use,
4098 struct iv_cand *cand, class aff_tree *aff)
4100 aff_tree aff_var;
4102 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
4103 return false;
4105 aff_combination_add (aff, &aff_var);
4106 return true;
4109 /* Return the type of USE. */
4111 static tree
4112 get_use_type (struct iv_use *use)
4114 tree base_type = TREE_TYPE (use->iv->base);
4115 tree type;
4117 if (use->type == USE_REF_ADDRESS)
4119 /* The base_type may be a void pointer. Create a pointer type based on
4120 the mem_ref instead. */
4121 type = build_pointer_type (TREE_TYPE (*use->op_p));
4122 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4123 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4125 else
4126 type = base_type;
4128 return type;
4131 /* Determines the expression by that USE is expressed from induction variable
4132 CAND at statement AT in LOOP. The computation is unshared. */
4134 static tree
4135 get_computation_at (class loop *loop, gimple *at,
4136 struct iv_use *use, struct iv_cand *cand)
4138 aff_tree aff;
4139 tree type = get_use_type (use);
4141 if (!get_computation_aff (loop, at, use, cand, &aff))
4142 return NULL_TREE;
4143 unshare_aff_combination (&aff);
4144 return fold_convert (type, aff_combination_to_tree (&aff));
4147 /* Like get_computation_at, but try harder, even if the computation
4148 is more expensive. Intended for debug stmts. */
4150 static tree
4151 get_debug_computation_at (class loop *loop, gimple *at,
4152 struct iv_use *use, struct iv_cand *cand)
4154 if (tree ret = get_computation_at (loop, at, use, cand))
4155 return ret;
4157 tree ubase = use->iv->base, ustep = use->iv->step;
4158 tree cbase = cand->iv->base, cstep = cand->iv->step;
4159 tree var;
4160 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4161 widest_int rat;
4163 /* We must have a precision to express the values of use. */
4164 if (TYPE_PRECISION (utype) >= TYPE_PRECISION (ctype))
4165 return NULL_TREE;
4167 /* Try to handle the case that get_computation_at doesn't,
4168 try to express
4169 use = ubase + (var - cbase) / ratio. */
4170 if (!constant_multiple_of (cstep, fold_convert (TREE_TYPE (cstep), ustep),
4171 &rat))
4172 return NULL_TREE;
4174 bool neg_p = false;
4175 if (wi::neg_p (rat))
4177 if (TYPE_UNSIGNED (ctype))
4178 return NULL_TREE;
4179 neg_p = true;
4180 rat = wi::neg (rat);
4183 /* If both IVs can wrap around and CAND doesn't have a power of two step,
4184 it is unsafe. Consider uint16_t CAND with step 9, when wrapping around,
4185 the values will be ... 0xfff0, 0xfff9, 2, 11 ... and when use is say
4186 uint8_t with step 3, those values divided by 3 cast to uint8_t will be
4187 ... 0x50, 0x53, 0, 3 ... rather than expected 0x50, 0x53, 0x56, 0x59. */
4188 if (!use->iv->no_overflow
4189 && !cand->iv->no_overflow
4190 && !integer_pow2p (cstep))
4191 return NULL_TREE;
4193 int bits = wi::exact_log2 (rat);
4194 if (bits == -1)
4195 bits = wi::floor_log2 (rat) + 1;
4196 if (!cand->iv->no_overflow
4197 && TYPE_PRECISION (utype) + bits > TYPE_PRECISION (ctype))
4198 return NULL_TREE;
4200 var = var_at_stmt (loop, cand, at);
4202 if (POINTER_TYPE_P (ctype))
4204 ctype = unsigned_type_for (ctype);
4205 cbase = fold_convert (ctype, cbase);
4206 cstep = fold_convert (ctype, cstep);
4207 var = fold_convert (ctype, var);
4210 if (stmt_after_increment (loop, cand, at))
4211 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var,
4212 unshare_expr (cstep));
4214 var = fold_build2 (MINUS_EXPR, TREE_TYPE (var), var, cbase);
4215 var = fold_build2 (EXACT_DIV_EXPR, TREE_TYPE (var), var,
4216 wide_int_to_tree (TREE_TYPE (var), rat));
4217 if (POINTER_TYPE_P (utype))
4219 var = fold_convert (sizetype, var);
4220 if (neg_p)
4221 var = fold_build1 (NEGATE_EXPR, sizetype, var);
4222 var = fold_build2 (POINTER_PLUS_EXPR, utype, ubase, var);
4224 else
4226 var = fold_convert (utype, var);
4227 var = fold_build2 (neg_p ? MINUS_EXPR : PLUS_EXPR, utype,
4228 ubase, var);
4230 return var;
4233 /* Adjust the cost COST for being in loop setup rather than loop body.
4234 If we're optimizing for space, the loop setup overhead is constant;
4235 if we're optimizing for speed, amortize it over the per-iteration cost.
4236 If ROUND_UP_P is true, the result is round up rather than to zero when
4237 optimizing for speed. */
4238 static int64_t
4239 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4240 bool round_up_p = false)
4242 if (cost == INFTY)
4243 return cost;
4244 else if (optimize_loop_for_speed_p (data->current_loop))
4246 int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4247 return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4249 else
4250 return cost;
4253 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4254 EXPR operand holding the shift. COST0 and COST1 are the costs for
4255 calculating the operands of EXPR. Returns true if successful, and returns
4256 the cost in COST. */
4258 static bool
4259 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4260 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4262 comp_cost res;
4263 tree op1 = TREE_OPERAND (expr, 1);
4264 tree cst = TREE_OPERAND (mult, 1);
4265 tree multop = TREE_OPERAND (mult, 0);
4266 int m = exact_log2 (int_cst_value (cst));
4267 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4268 int as_cost, sa_cost;
4269 bool mult_in_op1;
4271 if (!(m >= 0 && m < maxm))
4272 return false;
4274 STRIP_NOPS (op1);
4275 mult_in_op1 = operand_equal_p (op1, mult, 0);
4277 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4279 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4280 use that in preference to a shift insn followed by an add insn. */
4281 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4282 ? shiftadd_cost (speed, mode, m)
4283 : (mult_in_op1
4284 ? shiftsub1_cost (speed, mode, m)
4285 : shiftsub0_cost (speed, mode, m)));
4287 res = comp_cost (MIN (as_cost, sa_cost), 0);
4288 res += (mult_in_op1 ? cost0 : cost1);
4290 STRIP_NOPS (multop);
4291 if (!is_gimple_val (multop))
4292 res += force_expr_to_var_cost (multop, speed);
4294 *cost = res;
4295 return true;
4298 /* Estimates cost of forcing expression EXPR into a variable. */
4300 static comp_cost
4301 force_expr_to_var_cost (tree expr, bool speed)
4303 static bool costs_initialized = false;
4304 static unsigned integer_cost [2];
4305 static unsigned symbol_cost [2];
4306 static unsigned address_cost [2];
4307 tree op0, op1;
4308 comp_cost cost0, cost1, cost;
4309 machine_mode mode;
4310 scalar_int_mode int_mode;
4312 if (!costs_initialized)
4314 tree type = build_pointer_type (integer_type_node);
4315 tree var, addr;
4316 rtx x;
4317 int i;
4319 var = create_tmp_var_raw (integer_type_node, "test_var");
4320 TREE_STATIC (var) = 1;
4321 x = produce_memory_decl_rtl (var, NULL);
4322 SET_DECL_RTL (var, x);
4324 addr = build1 (ADDR_EXPR, type, var);
4327 for (i = 0; i < 2; i++)
4329 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4330 2000), i);
4332 symbol_cost[i] = computation_cost (addr, i) + 1;
4334 address_cost[i]
4335 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4336 if (dump_file && (dump_flags & TDF_DETAILS))
4338 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4339 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4340 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4341 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4342 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4343 fprintf (dump_file, "\n");
4347 costs_initialized = true;
4350 STRIP_NOPS (expr);
4352 if (SSA_VAR_P (expr))
4353 return no_cost;
4355 if (is_gimple_min_invariant (expr))
4357 if (poly_int_tree_p (expr))
4358 return comp_cost (integer_cost [speed], 0);
4360 if (TREE_CODE (expr) == ADDR_EXPR)
4362 tree obj = TREE_OPERAND (expr, 0);
4364 if (VAR_P (obj)
4365 || TREE_CODE (obj) == PARM_DECL
4366 || TREE_CODE (obj) == RESULT_DECL)
4367 return comp_cost (symbol_cost [speed], 0);
4370 return comp_cost (address_cost [speed], 0);
4373 switch (TREE_CODE (expr))
4375 case POINTER_PLUS_EXPR:
4376 case PLUS_EXPR:
4377 case MINUS_EXPR:
4378 case MULT_EXPR:
4379 case TRUNC_DIV_EXPR:
4380 case BIT_AND_EXPR:
4381 case BIT_IOR_EXPR:
4382 case LSHIFT_EXPR:
4383 case RSHIFT_EXPR:
4384 op0 = TREE_OPERAND (expr, 0);
4385 op1 = TREE_OPERAND (expr, 1);
4386 STRIP_NOPS (op0);
4387 STRIP_NOPS (op1);
4388 break;
4390 CASE_CONVERT:
4391 case NEGATE_EXPR:
4392 case BIT_NOT_EXPR:
4393 op0 = TREE_OPERAND (expr, 0);
4394 STRIP_NOPS (op0);
4395 op1 = NULL_TREE;
4396 break;
4397 /* See add_iv_candidate_for_doloop, for doloop may_be_zero case, we
4398 introduce COND_EXPR for IV base, need to support better cost estimation
4399 for this COND_EXPR and tcc_comparison. */
4400 case COND_EXPR:
4401 op0 = TREE_OPERAND (expr, 1);
4402 STRIP_NOPS (op0);
4403 op1 = TREE_OPERAND (expr, 2);
4404 STRIP_NOPS (op1);
4405 break;
4406 case LT_EXPR:
4407 case LE_EXPR:
4408 case GT_EXPR:
4409 case GE_EXPR:
4410 case EQ_EXPR:
4411 case NE_EXPR:
4412 case UNORDERED_EXPR:
4413 case ORDERED_EXPR:
4414 case UNLT_EXPR:
4415 case UNLE_EXPR:
4416 case UNGT_EXPR:
4417 case UNGE_EXPR:
4418 case UNEQ_EXPR:
4419 case LTGT_EXPR:
4420 case MAX_EXPR:
4421 case MIN_EXPR:
4422 op0 = TREE_OPERAND (expr, 0);
4423 STRIP_NOPS (op0);
4424 op1 = TREE_OPERAND (expr, 1);
4425 STRIP_NOPS (op1);
4426 break;
4428 default:
4429 /* Just an arbitrary value, FIXME. */
4430 return comp_cost (target_spill_cost[speed], 0);
4433 if (op0 == NULL_TREE
4434 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4435 cost0 = no_cost;
4436 else
4437 cost0 = force_expr_to_var_cost (op0, speed);
4439 if (op1 == NULL_TREE
4440 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4441 cost1 = no_cost;
4442 else
4443 cost1 = force_expr_to_var_cost (op1, speed);
4445 mode = TYPE_MODE (TREE_TYPE (expr));
4446 switch (TREE_CODE (expr))
4448 case POINTER_PLUS_EXPR:
4449 case PLUS_EXPR:
4450 case MINUS_EXPR:
4451 case NEGATE_EXPR:
4452 cost = comp_cost (add_cost (speed, mode), 0);
4453 if (TREE_CODE (expr) != NEGATE_EXPR)
4455 tree mult = NULL_TREE;
4456 comp_cost sa_cost;
4457 if (TREE_CODE (op1) == MULT_EXPR)
4458 mult = op1;
4459 else if (TREE_CODE (op0) == MULT_EXPR)
4460 mult = op0;
4462 if (mult != NULL_TREE
4463 && is_a <scalar_int_mode> (mode, &int_mode)
4464 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4465 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4466 speed, &sa_cost))
4467 return sa_cost;
4469 break;
4471 CASE_CONVERT:
4473 tree inner_mode, outer_mode;
4474 outer_mode = TREE_TYPE (expr);
4475 inner_mode = TREE_TYPE (op0);
4476 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4477 TYPE_MODE (inner_mode), speed), 0);
4479 break;
4481 case MULT_EXPR:
4482 if (cst_and_fits_in_hwi (op0))
4483 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4484 mode, speed), 0);
4485 else if (cst_and_fits_in_hwi (op1))
4486 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4487 mode, speed), 0);
4488 else
4489 return comp_cost (target_spill_cost [speed], 0);
4490 break;
4492 case TRUNC_DIV_EXPR:
4493 /* Division by power of two is usually cheap, so we allow it. Forbid
4494 anything else. */
4495 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4496 cost = comp_cost (add_cost (speed, mode), 0);
4497 else
4498 cost = comp_cost (target_spill_cost[speed], 0);
4499 break;
4501 case BIT_AND_EXPR:
4502 case BIT_IOR_EXPR:
4503 case BIT_NOT_EXPR:
4504 case LSHIFT_EXPR:
4505 case RSHIFT_EXPR:
4506 cost = comp_cost (add_cost (speed, mode), 0);
4507 break;
4508 case COND_EXPR:
4509 op0 = TREE_OPERAND (expr, 0);
4510 STRIP_NOPS (op0);
4511 if (op0 == NULL_TREE || TREE_CODE (op0) == SSA_NAME
4512 || CONSTANT_CLASS_P (op0))
4513 cost = no_cost;
4514 else
4515 cost = force_expr_to_var_cost (op0, speed);
4516 break;
4517 case LT_EXPR:
4518 case LE_EXPR:
4519 case GT_EXPR:
4520 case GE_EXPR:
4521 case EQ_EXPR:
4522 case NE_EXPR:
4523 case UNORDERED_EXPR:
4524 case ORDERED_EXPR:
4525 case UNLT_EXPR:
4526 case UNLE_EXPR:
4527 case UNGT_EXPR:
4528 case UNGE_EXPR:
4529 case UNEQ_EXPR:
4530 case LTGT_EXPR:
4531 case MAX_EXPR:
4532 case MIN_EXPR:
4533 /* Simply use add cost for now, FIXME if there is some more accurate cost
4534 evaluation way. */
4535 cost = comp_cost (add_cost (speed, mode), 0);
4536 break;
4538 default:
4539 gcc_unreachable ();
4542 cost += cost0;
4543 cost += cost1;
4544 return cost;
4547 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4548 invariants the computation depends on. */
4550 static comp_cost
4551 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4553 if (!expr)
4554 return no_cost;
4556 find_inv_vars (data, &expr, inv_vars);
4557 return force_expr_to_var_cost (expr, data->speed);
4560 /* Returns cost of auto-modifying address expression in shape base + offset.
4561 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4562 address expression. The address expression has ADDR_MODE in addr space
4563 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4564 speed or size. */
4566 enum ainc_type
4568 AINC_PRE_INC, /* Pre increment. */
4569 AINC_PRE_DEC, /* Pre decrement. */
4570 AINC_POST_INC, /* Post increment. */
4571 AINC_POST_DEC, /* Post decrement. */
4572 AINC_NONE /* Also the number of auto increment types. */
4575 struct ainc_cost_data
4577 int64_t costs[AINC_NONE];
4580 static comp_cost
4581 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4582 machine_mode addr_mode, machine_mode mem_mode,
4583 addr_space_t as, bool speed)
4585 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4586 && !USE_STORE_PRE_DECREMENT (mem_mode)
4587 && !USE_LOAD_POST_DECREMENT (mem_mode)
4588 && !USE_STORE_POST_DECREMENT (mem_mode)
4589 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4590 && !USE_STORE_PRE_INCREMENT (mem_mode)
4591 && !USE_LOAD_POST_INCREMENT (mem_mode)
4592 && !USE_STORE_POST_INCREMENT (mem_mode))
4593 return infinite_cost;
4595 static vec<ainc_cost_data *> ainc_cost_data_list;
4596 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4597 if (idx >= ainc_cost_data_list.length ())
4599 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4601 gcc_assert (nsize > idx);
4602 ainc_cost_data_list.safe_grow_cleared (nsize, true);
4605 ainc_cost_data *data = ainc_cost_data_list[idx];
4606 if (data == NULL)
4608 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4610 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4611 data->costs[AINC_PRE_DEC] = INFTY;
4612 data->costs[AINC_POST_DEC] = INFTY;
4613 data->costs[AINC_PRE_INC] = INFTY;
4614 data->costs[AINC_POST_INC] = INFTY;
4615 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4616 || USE_STORE_PRE_DECREMENT (mem_mode))
4618 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4620 if (memory_address_addr_space_p (mem_mode, addr, as))
4621 data->costs[AINC_PRE_DEC]
4622 = address_cost (addr, mem_mode, as, speed);
4624 if (USE_LOAD_POST_DECREMENT (mem_mode)
4625 || USE_STORE_POST_DECREMENT (mem_mode))
4627 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4629 if (memory_address_addr_space_p (mem_mode, addr, as))
4630 data->costs[AINC_POST_DEC]
4631 = address_cost (addr, mem_mode, as, speed);
4633 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4634 || USE_STORE_PRE_INCREMENT (mem_mode))
4636 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4638 if (memory_address_addr_space_p (mem_mode, addr, as))
4639 data->costs[AINC_PRE_INC]
4640 = address_cost (addr, mem_mode, as, speed);
4642 if (USE_LOAD_POST_INCREMENT (mem_mode)
4643 || USE_STORE_POST_INCREMENT (mem_mode))
4645 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4647 if (memory_address_addr_space_p (mem_mode, addr, as))
4648 data->costs[AINC_POST_INC]
4649 = address_cost (addr, mem_mode, as, speed);
4651 ainc_cost_data_list[idx] = data;
4654 poly_int64 msize = GET_MODE_SIZE (mem_mode);
4655 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4656 return comp_cost (data->costs[AINC_POST_INC], 0);
4657 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4658 return comp_cost (data->costs[AINC_POST_DEC], 0);
4659 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4660 return comp_cost (data->costs[AINC_PRE_INC], 0);
4661 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4662 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4664 return infinite_cost;
4667 /* Return cost of computing USE's address expression by using CAND.
4668 AFF_INV and AFF_VAR represent invariant and variant parts of the
4669 address expression, respectively. If AFF_INV is simple, store
4670 the loop invariant variables which are depended by it in INV_VARS;
4671 if AFF_INV is complicated, handle it as a new invariant expression
4672 and record it in INV_EXPR. RATIO indicates multiple times between
4673 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4674 value to it indicating if this is an auto-increment address. */
4676 static comp_cost
4677 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4678 struct iv_cand *cand, aff_tree *aff_inv,
4679 aff_tree *aff_var, HOST_WIDE_INT ratio,
4680 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4681 bool *can_autoinc, bool speed)
4683 rtx addr;
4684 bool simple_inv = true;
4685 tree comp_inv = NULL_TREE, type = aff_var->type;
4686 comp_cost var_cost = no_cost, cost = no_cost;
4687 struct mem_address parts = {NULL_TREE, integer_one_node,
4688 NULL_TREE, NULL_TREE, NULL_TREE};
4689 machine_mode addr_mode = TYPE_MODE (type);
4690 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4691 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4692 /* Only true if ratio != 1. */
4693 bool ok_with_ratio_p = false;
4694 bool ok_without_ratio_p = false;
4696 if (!aff_combination_const_p (aff_inv))
4698 parts.index = integer_one_node;
4699 /* Addressing mode "base + index". */
4700 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4701 if (ratio != 1)
4703 parts.step = wide_int_to_tree (type, ratio);
4704 /* Addressing mode "base + index << scale". */
4705 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4706 if (!ok_with_ratio_p)
4707 parts.step = NULL_TREE;
4709 if (ok_with_ratio_p || ok_without_ratio_p)
4711 if (maybe_ne (aff_inv->offset, 0))
4713 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4714 /* Addressing mode "base + index [<< scale] + offset". */
4715 if (!valid_mem_ref_p (mem_mode, as, &parts))
4716 parts.offset = NULL_TREE;
4717 else
4718 aff_inv->offset = 0;
4721 move_fixed_address_to_symbol (&parts, aff_inv);
4722 /* Base is fixed address and is moved to symbol part. */
4723 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4724 parts.base = NULL_TREE;
4726 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4727 if (parts.symbol != NULL_TREE
4728 && !valid_mem_ref_p (mem_mode, as, &parts))
4730 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4731 parts.symbol = NULL_TREE;
4732 /* Reset SIMPLE_INV since symbol address needs to be computed
4733 outside of address expression in this case. */
4734 simple_inv = false;
4735 /* Symbol part is moved back to base part, it can't be NULL. */
4736 parts.base = integer_one_node;
4739 else
4740 parts.index = NULL_TREE;
4742 else
4744 poly_int64 ainc_step;
4745 if (can_autoinc
4746 && ratio == 1
4747 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4749 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4751 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4752 ainc_offset += ainc_step;
4753 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4754 addr_mode, mem_mode, as, speed);
4755 if (!cost.infinite_cost_p ())
4757 *can_autoinc = true;
4758 return cost;
4760 cost = no_cost;
4762 if (!aff_combination_zero_p (aff_inv))
4764 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4765 /* Addressing mode "base + offset". */
4766 if (!valid_mem_ref_p (mem_mode, as, &parts))
4767 parts.offset = NULL_TREE;
4768 else
4769 aff_inv->offset = 0;
4773 if (simple_inv)
4774 simple_inv = (aff_inv == NULL
4775 || aff_combination_const_p (aff_inv)
4776 || aff_combination_singleton_var_p (aff_inv));
4777 if (!aff_combination_zero_p (aff_inv))
4778 comp_inv = aff_combination_to_tree (aff_inv);
4779 if (comp_inv != NULL_TREE)
4780 cost = force_var_cost (data, comp_inv, inv_vars);
4781 if (ratio != 1 && parts.step == NULL_TREE)
4782 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4783 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4784 var_cost += add_cost (speed, addr_mode);
4786 if (comp_inv && inv_expr && !simple_inv)
4788 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4789 /* Clear depends on. */
4790 if (*inv_expr != NULL && inv_vars && *inv_vars)
4791 bitmap_clear (*inv_vars);
4793 /* Cost of small invariant expression adjusted against loop niters
4794 is usually zero, which makes it difficult to be differentiated
4795 from candidate based on loop invariant variables. Secondly, the
4796 generated invariant expression may not be hoisted out of loop by
4797 following pass. We penalize the cost by rounding up in order to
4798 neutralize such effects. */
4799 cost.cost = adjust_setup_cost (data, cost.cost, true);
4800 cost.scratch = cost.cost;
4803 cost += var_cost;
4804 addr = addr_for_mem_ref (&parts, as, false);
4805 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4806 cost += address_cost (addr, mem_mode, as, speed);
4808 if (parts.symbol != NULL_TREE)
4809 cost.complexity += 1;
4810 /* Don't increase the complexity of adding a scaled index if it's
4811 the only kind of index that the target allows. */
4812 if (parts.step != NULL_TREE && ok_without_ratio_p)
4813 cost.complexity += 1;
4814 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4815 cost.complexity += 1;
4816 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4817 cost.complexity += 1;
4819 return cost;
4822 /* Scale (multiply) the computed COST (except scratch part that should be
4823 hoisted out a loop) by header->frequency / AT->frequency, which makes
4824 expected cost more accurate. */
4826 static comp_cost
4827 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4829 if (data->speed
4830 && data->current_loop->header->count.to_frequency (cfun) > 0)
4832 basic_block bb = gimple_bb (at);
4833 gcc_assert (cost.scratch <= cost.cost);
4834 int scale_factor = (int)(intptr_t) bb->aux;
4835 if (scale_factor == 1)
4836 return cost;
4838 int64_t scaled_cost
4839 = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4841 if (dump_file && (dump_flags & TDF_DETAILS))
4842 fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4843 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4844 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4846 cost.cost = scaled_cost;
4849 return cost;
4852 /* Determines the cost of the computation by that USE is expressed
4853 from induction variable CAND. If ADDRESS_P is true, we just need
4854 to create an address from it, otherwise we want to get it into
4855 register. A set of invariants we depend on is stored in INV_VARS.
4856 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4857 addressing is likely. If INV_EXPR is nonnull, record invariant
4858 expr entry in it. */
4860 static comp_cost
4861 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4862 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4863 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4865 gimple *at = use->stmt;
4866 tree ubase = use->iv->base, cbase = cand->iv->base;
4867 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4868 tree comp_inv = NULL_TREE;
4869 HOST_WIDE_INT ratio, aratio;
4870 comp_cost cost;
4871 widest_int rat;
4872 aff_tree aff_inv, aff_var;
4873 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4875 if (inv_vars)
4876 *inv_vars = NULL;
4877 if (can_autoinc)
4878 *can_autoinc = false;
4879 if (inv_expr)
4880 *inv_expr = NULL;
4882 /* Check if we have enough precision to express the values of use. */
4883 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4884 return infinite_cost;
4886 if (address_p
4887 || (use->iv->base_object
4888 && cand->iv->base_object
4889 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4890 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4892 /* Do not try to express address of an object with computation based
4893 on address of a different object. This may cause problems in rtl
4894 level alias analysis (that does not expect this to be happening,
4895 as this is illegal in C), and would be unlikely to be useful
4896 anyway. */
4897 if (use->iv->base_object
4898 && cand->iv->base_object
4899 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4900 return infinite_cost;
4903 if (!get_computation_aff_1 (data->current_loop, at, use,
4904 cand, &aff_inv, &aff_var, &rat)
4905 || !wi::fits_shwi_p (rat))
4906 return infinite_cost;
4908 ratio = rat.to_shwi ();
4909 if (address_p)
4911 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4912 inv_vars, inv_expr, can_autoinc, speed);
4913 cost = get_scaled_computation_cost_at (data, at, cost);
4914 /* For doloop IV cand, add on the extra cost. */
4915 cost += cand->doloop_p ? targetm.doloop_cost_for_address : 0;
4916 return cost;
4919 bool simple_inv = (aff_combination_const_p (&aff_inv)
4920 || aff_combination_singleton_var_p (&aff_inv));
4921 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4922 aff_combination_convert (&aff_inv, signed_type);
4923 if (!aff_combination_zero_p (&aff_inv))
4924 comp_inv = aff_combination_to_tree (&aff_inv);
4926 cost = force_var_cost (data, comp_inv, inv_vars);
4927 if (comp_inv && inv_expr && !simple_inv)
4929 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4930 /* Clear depends on. */
4931 if (*inv_expr != NULL && inv_vars && *inv_vars)
4932 bitmap_clear (*inv_vars);
4934 cost.cost = adjust_setup_cost (data, cost.cost);
4935 /* Record setup cost in scratch field. */
4936 cost.scratch = cost.cost;
4938 /* Cost of constant integer can be covered when adding invariant part to
4939 variant part. */
4940 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4941 cost = no_cost;
4943 /* Need type narrowing to represent use with cand. */
4944 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4946 machine_mode outer_mode = TYPE_MODE (utype);
4947 machine_mode inner_mode = TYPE_MODE (ctype);
4948 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4951 /* Turn a + i * (-c) into a - i * c. */
4952 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4953 aratio = -ratio;
4954 else
4955 aratio = ratio;
4957 if (ratio != 1)
4958 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4960 /* TODO: We may also need to check if we can compute a + i * 4 in one
4961 instruction. */
4962 /* Need to add up the invariant and variant parts. */
4963 if (comp_inv && !integer_zerop (comp_inv))
4964 cost += add_cost (speed, TYPE_MODE (utype));
4966 cost = get_scaled_computation_cost_at (data, at, cost);
4968 /* For doloop IV cand, add on the extra cost. */
4969 if (cand->doloop_p && use->type == USE_NONLINEAR_EXPR)
4970 cost += targetm.doloop_cost_for_generic;
4972 return cost;
4975 /* Determines cost of computing the use in GROUP with CAND in a generic
4976 expression. */
4978 static bool
4979 determine_group_iv_cost_generic (struct ivopts_data *data,
4980 struct iv_group *group, struct iv_cand *cand)
4982 comp_cost cost;
4983 iv_inv_expr_ent *inv_expr = NULL;
4984 bitmap inv_vars = NULL, inv_exprs = NULL;
4985 struct iv_use *use = group->vuses[0];
4987 /* The simple case first -- if we need to express value of the preserved
4988 original biv, the cost is 0. This also prevents us from counting the
4989 cost of increment twice -- once at this use and once in the cost of
4990 the candidate. */
4991 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4992 cost = no_cost;
4993 /* If the IV candidate involves undefined SSA values and is not the
4994 same IV as on the USE avoid using that candidate here. */
4995 else if (cand->involves_undefs
4996 && (!use->iv || !operand_equal_p (cand->iv->base, use->iv->base, 0)))
4997 return false;
4998 else
4999 cost = get_computation_cost (data, use, cand, false,
5000 &inv_vars, NULL, &inv_expr);
5002 if (inv_expr)
5004 inv_exprs = BITMAP_ALLOC (NULL);
5005 bitmap_set_bit (inv_exprs, inv_expr->id);
5007 set_group_iv_cost (data, group, cand, cost, inv_vars,
5008 NULL_TREE, ERROR_MARK, inv_exprs);
5009 return !cost.infinite_cost_p ();
5012 /* Determines cost of computing uses in GROUP with CAND in addresses. */
5014 static bool
5015 determine_group_iv_cost_address (struct ivopts_data *data,
5016 struct iv_group *group, struct iv_cand *cand)
5018 unsigned i;
5019 bitmap inv_vars = NULL, inv_exprs = NULL;
5020 bool can_autoinc;
5021 iv_inv_expr_ent *inv_expr = NULL;
5022 struct iv_use *use = group->vuses[0];
5023 comp_cost sum_cost = no_cost, cost;
5025 cost = get_computation_cost (data, use, cand, true,
5026 &inv_vars, &can_autoinc, &inv_expr);
5028 if (inv_expr)
5030 inv_exprs = BITMAP_ALLOC (NULL);
5031 bitmap_set_bit (inv_exprs, inv_expr->id);
5033 sum_cost = cost;
5034 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5036 if (can_autoinc)
5037 sum_cost -= cand->cost_step;
5038 /* If we generated the candidate solely for exploiting autoincrement
5039 opportunities, and it turns out it can't be used, set the cost to
5040 infinity to make sure we ignore it. */
5041 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5042 sum_cost = infinite_cost;
5045 /* Uses in a group can share setup code, so only add setup cost once. */
5046 cost -= cost.scratch;
5047 /* Compute and add costs for rest uses of this group. */
5048 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5050 struct iv_use *next = group->vuses[i];
5052 /* TODO: We could skip computing cost for sub iv_use when it has the
5053 same cost as the first iv_use, but the cost really depends on the
5054 offset and where the iv_use is. */
5055 cost = get_computation_cost (data, next, cand, true,
5056 NULL, &can_autoinc, &inv_expr);
5057 if (inv_expr)
5059 if (!inv_exprs)
5060 inv_exprs = BITMAP_ALLOC (NULL);
5062 bitmap_set_bit (inv_exprs, inv_expr->id);
5064 sum_cost += cost;
5066 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
5067 NULL_TREE, ERROR_MARK, inv_exprs);
5069 return !sum_cost.infinite_cost_p ();
5072 /* Computes value of candidate CAND at position AT in iteration DESC->NITER,
5073 and stores it to VAL. */
5075 static void
5076 cand_value_at (class loop *loop, struct iv_cand *cand, gimple *at,
5077 class tree_niter_desc *desc, aff_tree *val)
5079 aff_tree step, delta, nit;
5080 struct iv *iv = cand->iv;
5081 tree type = TREE_TYPE (iv->base);
5082 tree niter = desc->niter;
5083 bool after_adjust = stmt_after_increment (loop, cand, at);
5084 tree steptype;
5086 if (POINTER_TYPE_P (type))
5087 steptype = sizetype;
5088 else
5089 steptype = unsigned_type_for (type);
5091 /* If AFTER_ADJUST is required, the code below generates the equivalent
5092 of BASE + NITER * STEP + STEP, when ideally we'd prefer the expression
5093 BASE + (NITER + 1) * STEP, especially when NITER is often of the form
5094 SSA_NAME - 1. Unfortunately, guaranteeing that adding 1 to NITER
5095 doesn't overflow is tricky, so we peek inside the TREE_NITER_DESC
5096 class for common idioms that we know are safe. */
5097 if (after_adjust
5098 && desc->control.no_overflow
5099 && integer_onep (desc->control.step)
5100 && (desc->cmp == LT_EXPR
5101 || desc->cmp == NE_EXPR)
5102 && TREE_CODE (desc->bound) == SSA_NAME)
5104 if (integer_onep (desc->control.base))
5106 niter = desc->bound;
5107 after_adjust = false;
5109 else if (TREE_CODE (niter) == MINUS_EXPR
5110 && integer_onep (TREE_OPERAND (niter, 1)))
5112 niter = TREE_OPERAND (niter, 0);
5113 after_adjust = false;
5117 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5118 aff_combination_convert (&step, steptype);
5119 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5120 aff_combination_convert (&nit, steptype);
5121 aff_combination_mult (&nit, &step, &delta);
5122 if (after_adjust)
5123 aff_combination_add (&delta, &step);
5125 tree_to_aff_combination (iv->base, type, val);
5126 if (!POINTER_TYPE_P (type))
5127 aff_combination_convert (val, steptype);
5128 aff_combination_add (val, &delta);
5131 /* Returns period of induction variable iv. */
5133 static tree
5134 iv_period (struct iv *iv)
5136 tree step = iv->step, period, type;
5137 tree pow2div;
5139 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5141 type = unsigned_type_for (TREE_TYPE (step));
5142 /* Period of the iv is lcm (step, type_range)/step -1,
5143 i.e., N*type_range/step - 1. Since type range is power
5144 of two, N == (step >> num_of_ending_zeros_binary (step),
5145 so the final result is
5147 (type_range >> num_of_ending_zeros_binary (step)) - 1
5150 pow2div = num_ending_zeros (step);
5152 period = build_low_bits_mask (type,
5153 (TYPE_PRECISION (type)
5154 - tree_to_uhwi (pow2div)));
5156 return period;
5159 /* Returns the comparison operator used when eliminating the iv USE. */
5161 static enum tree_code
5162 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5164 class loop *loop = data->current_loop;
5165 basic_block ex_bb;
5166 edge exit;
5168 ex_bb = gimple_bb (use->stmt);
5169 exit = EDGE_SUCC (ex_bb, 0);
5170 if (flow_bb_inside_loop_p (loop, exit->dest))
5171 exit = EDGE_SUCC (ex_bb, 1);
5173 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5176 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5177 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5178 calculation is performed in non-wrapping type.
5180 TODO: More generally, we could test for the situation that
5181 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5182 This would require knowing the sign of OFFSET. */
5184 static bool
5185 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5187 enum tree_code code;
5188 tree e1, e2;
5189 aff_tree aff_e1, aff_e2, aff_offset;
5191 if (!nowrap_type_p (TREE_TYPE (base)))
5192 return false;
5194 base = expand_simple_operations (base);
5196 if (TREE_CODE (base) == SSA_NAME)
5198 gimple *stmt = SSA_NAME_DEF_STMT (base);
5200 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5201 return false;
5203 code = gimple_assign_rhs_code (stmt);
5204 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5205 return false;
5207 e1 = gimple_assign_rhs1 (stmt);
5208 e2 = gimple_assign_rhs2 (stmt);
5210 else
5212 code = TREE_CODE (base);
5213 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5214 return false;
5215 e1 = TREE_OPERAND (base, 0);
5216 e2 = TREE_OPERAND (base, 1);
5219 /* Use affine expansion as deeper inspection to prove the equality. */
5220 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5221 &aff_e2, &data->name_expansion_cache);
5222 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5223 &aff_offset, &data->name_expansion_cache);
5224 aff_combination_scale (&aff_offset, -1);
5225 switch (code)
5227 case PLUS_EXPR:
5228 aff_combination_add (&aff_e2, &aff_offset);
5229 if (aff_combination_zero_p (&aff_e2))
5230 return true;
5232 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5233 &aff_e1, &data->name_expansion_cache);
5234 aff_combination_add (&aff_e1, &aff_offset);
5235 return aff_combination_zero_p (&aff_e1);
5237 case POINTER_PLUS_EXPR:
5238 aff_combination_add (&aff_e2, &aff_offset);
5239 return aff_combination_zero_p (&aff_e2);
5241 default:
5242 return false;
5246 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5247 comparison with CAND. NITER describes the number of iterations of
5248 the loops. If successful, the comparison in COMP_P is altered accordingly.
5250 We aim to handle the following situation:
5252 sometype *base, *p;
5253 int a, b, i;
5255 i = a;
5256 p = p_0 = base + a;
5260 bla (*p);
5261 p++;
5262 i++;
5264 while (i < b);
5266 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5267 We aim to optimize this to
5269 p = p_0 = base + a;
5272 bla (*p);
5273 p++;
5275 while (p < p_0 - a + b);
5277 This preserves the correctness, since the pointer arithmetics does not
5278 overflow. More precisely:
5280 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5281 overflow in computing it or the values of p.
5282 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5283 overflow. To prove this, we use the fact that p_0 = base + a. */
5285 static bool
5286 iv_elimination_compare_lt (struct ivopts_data *data,
5287 struct iv_cand *cand, enum tree_code *comp_p,
5288 class tree_niter_desc *niter)
5290 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5291 class aff_tree nit, tmpa, tmpb;
5292 enum tree_code comp;
5293 HOST_WIDE_INT step;
5295 /* We need to know that the candidate induction variable does not overflow.
5296 While more complex analysis may be used to prove this, for now just
5297 check that the variable appears in the original program and that it
5298 is computed in a type that guarantees no overflows. */
5299 cand_type = TREE_TYPE (cand->iv->base);
5300 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5301 return false;
5303 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5304 the calculation of the BOUND could overflow, making the comparison
5305 invalid. */
5306 if (!data->loop_single_exit_p)
5307 return false;
5309 /* We need to be able to decide whether candidate is increasing or decreasing
5310 in order to choose the right comparison operator. */
5311 if (!cst_and_fits_in_hwi (cand->iv->step))
5312 return false;
5313 step = int_cst_value (cand->iv->step);
5315 /* Check that the number of iterations matches the expected pattern:
5316 a + 1 > b ? 0 : b - a - 1. */
5317 mbz = niter->may_be_zero;
5318 if (TREE_CODE (mbz) == GT_EXPR)
5320 /* Handle a + 1 > b. */
5321 tree op0 = TREE_OPERAND (mbz, 0);
5322 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5324 a = TREE_OPERAND (op0, 0);
5325 b = TREE_OPERAND (mbz, 1);
5327 else
5328 return false;
5330 else if (TREE_CODE (mbz) == LT_EXPR)
5332 tree op1 = TREE_OPERAND (mbz, 1);
5334 /* Handle b < a + 1. */
5335 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5337 a = TREE_OPERAND (op1, 0);
5338 b = TREE_OPERAND (mbz, 0);
5340 else
5341 return false;
5343 else
5344 return false;
5346 /* Expected number of iterations is B - A - 1. Check that it matches
5347 the actual number, i.e., that B - A - NITER = 1. */
5348 tree_to_aff_combination (niter->niter, nit_type, &nit);
5349 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5350 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5351 aff_combination_scale (&nit, -1);
5352 aff_combination_scale (&tmpa, -1);
5353 aff_combination_add (&tmpb, &tmpa);
5354 aff_combination_add (&tmpb, &nit);
5355 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5356 return false;
5358 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5359 overflow. */
5360 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5361 cand->iv->step,
5362 fold_convert (TREE_TYPE (cand->iv->step), a));
5363 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5364 return false;
5366 /* Determine the new comparison operator. */
5367 comp = step < 0 ? GT_EXPR : LT_EXPR;
5368 if (*comp_p == NE_EXPR)
5369 *comp_p = comp;
5370 else if (*comp_p == EQ_EXPR)
5371 *comp_p = invert_tree_comparison (comp, false);
5372 else
5373 gcc_unreachable ();
5375 return true;
5378 /* Check whether it is possible to express the condition in USE by comparison
5379 of candidate CAND. If so, store the value compared with to BOUND, and the
5380 comparison operator to COMP. */
5382 static bool
5383 may_eliminate_iv (struct ivopts_data *data,
5384 struct iv_use *use, struct iv_cand *cand, tree *bound,
5385 enum tree_code *comp)
5387 basic_block ex_bb;
5388 edge exit;
5389 tree period;
5390 class loop *loop = data->current_loop;
5391 aff_tree bnd;
5392 class tree_niter_desc *desc = NULL;
5394 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5395 return false;
5397 /* For now works only for exits that dominate the loop latch.
5398 TODO: extend to other conditions inside loop body. */
5399 ex_bb = gimple_bb (use->stmt);
5400 if (use->stmt != last_stmt (ex_bb)
5401 || gimple_code (use->stmt) != GIMPLE_COND
5402 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5403 return false;
5405 exit = EDGE_SUCC (ex_bb, 0);
5406 if (flow_bb_inside_loop_p (loop, exit->dest))
5407 exit = EDGE_SUCC (ex_bb, 1);
5408 if (flow_bb_inside_loop_p (loop, exit->dest))
5409 return false;
5411 desc = niter_for_exit (data, exit);
5412 if (!desc)
5413 return false;
5415 /* Determine whether we can use the variable to test the exit condition.
5416 This is the case iff the period of the induction variable is greater
5417 than the number of iterations for which the exit condition is true. */
5418 period = iv_period (cand->iv);
5420 /* If the number of iterations is constant, compare against it directly. */
5421 if (TREE_CODE (desc->niter) == INTEGER_CST)
5423 /* See cand_value_at. */
5424 if (stmt_after_increment (loop, cand, use->stmt))
5426 if (!tree_int_cst_lt (desc->niter, period))
5427 return false;
5429 else
5431 if (tree_int_cst_lt (period, desc->niter))
5432 return false;
5436 /* If not, and if this is the only possible exit of the loop, see whether
5437 we can get a conservative estimate on the number of iterations of the
5438 entire loop and compare against that instead. */
5439 else
5441 widest_int period_value, max_niter;
5443 max_niter = desc->max;
5444 if (stmt_after_increment (loop, cand, use->stmt))
5445 max_niter += 1;
5446 period_value = wi::to_widest (period);
5447 if (wi::gtu_p (max_niter, period_value))
5449 /* See if we can take advantage of inferred loop bound
5450 information. */
5451 if (data->loop_single_exit_p)
5453 if (!max_loop_iterations (loop, &max_niter))
5454 return false;
5455 /* The loop bound is already adjusted by adding 1. */
5456 if (wi::gtu_p (max_niter, period_value))
5457 return false;
5459 else
5460 return false;
5464 /* For doloop IV cand, the bound would be zero. It's safe whether
5465 may_be_zero set or not. */
5466 if (cand->doloop_p)
5468 *bound = build_int_cst (TREE_TYPE (cand->iv->base), 0);
5469 *comp = iv_elimination_compare (data, use);
5470 return true;
5473 cand_value_at (loop, cand, use->stmt, desc, &bnd);
5475 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5476 aff_combination_to_tree (&bnd));
5477 *comp = iv_elimination_compare (data, use);
5479 /* It is unlikely that computing the number of iterations using division
5480 would be more profitable than keeping the original induction variable. */
5481 if (expression_expensive_p (*bound))
5482 return false;
5484 /* Sometimes, it is possible to handle the situation that the number of
5485 iterations may be zero unless additional assumptions by using <
5486 instead of != in the exit condition.
5488 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5489 base the exit condition on it. However, that is often too
5490 expensive. */
5491 if (!integer_zerop (desc->may_be_zero))
5492 return iv_elimination_compare_lt (data, cand, comp, desc);
5494 return true;
5497 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5498 be copied, if it is used in the loop body and DATA->body_includes_call. */
5500 static int
5501 parm_decl_cost (struct ivopts_data *data, tree bound)
5503 tree sbound = bound;
5504 STRIP_NOPS (sbound);
5506 if (TREE_CODE (sbound) == SSA_NAME
5507 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5508 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5509 && data->body_includes_call)
5510 return COSTS_N_INSNS (1);
5512 return 0;
5515 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5517 static bool
5518 determine_group_iv_cost_cond (struct ivopts_data *data,
5519 struct iv_group *group, struct iv_cand *cand)
5521 tree bound = NULL_TREE;
5522 struct iv *cmp_iv;
5523 bitmap inv_exprs = NULL;
5524 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5525 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5526 enum comp_iv_rewrite rewrite_type;
5527 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5528 tree *control_var, *bound_cst;
5529 enum tree_code comp = ERROR_MARK;
5530 struct iv_use *use = group->vuses[0];
5532 /* Extract condition operands. */
5533 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5534 &bound_cst, NULL, &cmp_iv);
5535 gcc_assert (rewrite_type != COMP_IV_NA);
5537 /* Try iv elimination. */
5538 if (rewrite_type == COMP_IV_ELIM
5539 && may_eliminate_iv (data, use, cand, &bound, &comp))
5541 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5542 if (elim_cost.cost == 0)
5543 elim_cost.cost = parm_decl_cost (data, bound);
5544 else if (TREE_CODE (bound) == INTEGER_CST)
5545 elim_cost.cost = 0;
5546 /* If we replace a loop condition 'i < n' with 'p < base + n',
5547 inv_vars_elim will have 'base' and 'n' set, which implies that both
5548 'base' and 'n' will be live during the loop. More likely,
5549 'base + n' will be loop invariant, resulting in only one live value
5550 during the loop. So in that case we clear inv_vars_elim and set
5551 inv_expr_elim instead. */
5552 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5554 inv_expr_elim = get_loop_invariant_expr (data, bound);
5555 bitmap_clear (inv_vars_elim);
5557 /* The bound is a loop invariant, so it will be only computed
5558 once. */
5559 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5562 /* When the condition is a comparison of the candidate IV against
5563 zero, prefer this IV.
5565 TODO: The constant that we're subtracting from the cost should
5566 be target-dependent. This information should be added to the
5567 target costs for each backend. */
5568 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5569 && integer_zerop (*bound_cst)
5570 && (operand_equal_p (*control_var, cand->var_after, 0)
5571 || operand_equal_p (*control_var, cand->var_before, 0)))
5572 elim_cost -= 1;
5574 express_cost = get_computation_cost (data, use, cand, false,
5575 &inv_vars_express, NULL,
5576 &inv_expr_express);
5577 if (cmp_iv != NULL)
5578 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5580 /* Count the cost of the original bound as well. */
5581 bound_cost = force_var_cost (data, *bound_cst, NULL);
5582 if (bound_cost.cost == 0)
5583 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5584 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5585 bound_cost.cost = 0;
5586 express_cost += bound_cost;
5588 /* Choose the better approach, preferring the eliminated IV. */
5589 if (elim_cost <= express_cost)
5591 cost = elim_cost;
5592 inv_vars = inv_vars_elim;
5593 inv_vars_elim = NULL;
5594 inv_expr = inv_expr_elim;
5595 /* For doloop candidate/use pair, adjust to zero cost. */
5596 if (group->doloop_p && cand->doloop_p && elim_cost.cost > no_cost.cost)
5597 cost = no_cost;
5599 else
5601 cost = express_cost;
5602 inv_vars = inv_vars_express;
5603 inv_vars_express = NULL;
5604 bound = NULL_TREE;
5605 comp = ERROR_MARK;
5606 inv_expr = inv_expr_express;
5609 if (inv_expr)
5611 inv_exprs = BITMAP_ALLOC (NULL);
5612 bitmap_set_bit (inv_exprs, inv_expr->id);
5614 set_group_iv_cost (data, group, cand, cost,
5615 inv_vars, bound, comp, inv_exprs);
5617 if (inv_vars_elim)
5618 BITMAP_FREE (inv_vars_elim);
5619 if (inv_vars_express)
5620 BITMAP_FREE (inv_vars_express);
5622 return !cost.infinite_cost_p ();
5625 /* Determines cost of computing uses in GROUP with CAND. Returns false
5626 if USE cannot be represented with CAND. */
5628 static bool
5629 determine_group_iv_cost (struct ivopts_data *data,
5630 struct iv_group *group, struct iv_cand *cand)
5632 switch (group->type)
5634 case USE_NONLINEAR_EXPR:
5635 return determine_group_iv_cost_generic (data, group, cand);
5637 case USE_REF_ADDRESS:
5638 case USE_PTR_ADDRESS:
5639 return determine_group_iv_cost_address (data, group, cand);
5641 case USE_COMPARE:
5642 return determine_group_iv_cost_cond (data, group, cand);
5644 default:
5645 gcc_unreachable ();
5649 /* Return true if get_computation_cost indicates that autoincrement is
5650 a possibility for the pair of USE and CAND, false otherwise. */
5652 static bool
5653 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5654 struct iv_cand *cand)
5656 if (!address_p (use->type))
5657 return false;
5659 bool can_autoinc = false;
5660 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5661 return can_autoinc;
5664 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5665 use that allows autoincrement, and set their AINC_USE if possible. */
5667 static void
5668 set_autoinc_for_original_candidates (struct ivopts_data *data)
5670 unsigned i, j;
5672 for (i = 0; i < data->vcands.length (); i++)
5674 struct iv_cand *cand = data->vcands[i];
5675 struct iv_use *closest_before = NULL;
5676 struct iv_use *closest_after = NULL;
5677 if (cand->pos != IP_ORIGINAL)
5678 continue;
5680 for (j = 0; j < data->vgroups.length (); j++)
5682 struct iv_group *group = data->vgroups[j];
5683 struct iv_use *use = group->vuses[0];
5684 unsigned uid = gimple_uid (use->stmt);
5686 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5687 continue;
5689 if (uid < gimple_uid (cand->incremented_at)
5690 && (closest_before == NULL
5691 || uid > gimple_uid (closest_before->stmt)))
5692 closest_before = use;
5694 if (uid > gimple_uid (cand->incremented_at)
5695 && (closest_after == NULL
5696 || uid < gimple_uid (closest_after->stmt)))
5697 closest_after = use;
5700 if (closest_before != NULL
5701 && autoinc_possible_for_pair (data, closest_before, cand))
5702 cand->ainc_use = closest_before;
5703 else if (closest_after != NULL
5704 && autoinc_possible_for_pair (data, closest_after, cand))
5705 cand->ainc_use = closest_after;
5709 /* Relate compare use with all candidates. */
5711 static void
5712 relate_compare_use_with_all_cands (struct ivopts_data *data)
5714 unsigned i, count = data->vcands.length ();
5715 for (i = 0; i < data->vgroups.length (); i++)
5717 struct iv_group *group = data->vgroups[i];
5719 if (group->type == USE_COMPARE)
5720 bitmap_set_range (group->related_cands, 0, count);
5724 /* If PREFERRED_MODE is suitable and profitable, use the preferred
5725 PREFERRED_MODE to compute doloop iv base from niter: base = niter + 1. */
5727 static tree
5728 compute_doloop_base_on_mode (machine_mode preferred_mode, tree niter,
5729 const widest_int &iterations_max)
5731 tree ntype = TREE_TYPE (niter);
5732 tree pref_type = lang_hooks.types.type_for_mode (preferred_mode, 1);
5733 if (!pref_type)
5734 return fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5735 build_int_cst (ntype, 1));
5737 gcc_assert (TREE_CODE (pref_type) == INTEGER_TYPE);
5739 int prec = TYPE_PRECISION (ntype);
5740 int pref_prec = TYPE_PRECISION (pref_type);
5742 tree base;
5744 /* Check if the PREFERRED_MODED is able to present niter. */
5745 if (pref_prec > prec
5746 || wi::ltu_p (iterations_max,
5747 widest_int::from (wi::max_value (pref_prec, UNSIGNED),
5748 UNSIGNED)))
5750 /* No wrap, it is safe to use preferred type after niter + 1. */
5751 if (wi::ltu_p (iterations_max,
5752 widest_int::from (wi::max_value (prec, UNSIGNED),
5753 UNSIGNED)))
5755 /* This could help to optimize "-1 +1" pair when niter looks
5756 like "n-1": n is in original mode. "base = (n - 1) + 1"
5757 in PREFERRED_MODED: it could be base = (PREFERRED_TYPE)n. */
5758 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5759 build_int_cst (ntype, 1));
5760 base = fold_convert (pref_type, base);
5763 /* To avoid wrap, convert niter to preferred type before plus 1. */
5764 else
5766 niter = fold_convert (pref_type, niter);
5767 base = fold_build2 (PLUS_EXPR, pref_type, unshare_expr (niter),
5768 build_int_cst (pref_type, 1));
5771 else
5772 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5773 build_int_cst (ntype, 1));
5774 return base;
5777 /* Add one doloop dedicated IV candidate:
5778 - Base is (may_be_zero ? 1 : (niter + 1)).
5779 - Step is -1. */
5781 static void
5782 add_iv_candidate_for_doloop (struct ivopts_data *data)
5784 tree_niter_desc *niter_desc = niter_for_single_dom_exit (data);
5785 gcc_assert (niter_desc && niter_desc->assumptions);
5787 tree niter = niter_desc->niter;
5788 tree ntype = TREE_TYPE (niter);
5789 gcc_assert (TREE_CODE (ntype) == INTEGER_TYPE);
5791 tree may_be_zero = niter_desc->may_be_zero;
5792 if (may_be_zero && integer_zerop (may_be_zero))
5793 may_be_zero = NULL_TREE;
5794 if (may_be_zero)
5796 if (COMPARISON_CLASS_P (may_be_zero))
5798 niter = fold_build3 (COND_EXPR, ntype, may_be_zero,
5799 build_int_cst (ntype, 0),
5800 rewrite_to_non_trapping_overflow (niter));
5802 /* Don't try to obtain the iteration count expression when may_be_zero is
5803 integer_nonzerop (actually iteration count is one) or else. */
5804 else
5805 return;
5808 machine_mode mode = TYPE_MODE (ntype);
5809 machine_mode pref_mode = targetm.preferred_doloop_mode (mode);
5811 tree base;
5812 if (mode != pref_mode)
5814 base = compute_doloop_base_on_mode (pref_mode, niter, niter_desc->max);
5815 ntype = TREE_TYPE (base);
5817 else
5818 base = fold_build2 (PLUS_EXPR, ntype, unshare_expr (niter),
5819 build_int_cst (ntype, 1));
5822 add_candidate (data, base, build_int_cst (ntype, -1), true, NULL, NULL, true);
5825 /* Finds the candidates for the induction variables. */
5827 static void
5828 find_iv_candidates (struct ivopts_data *data)
5830 /* Add commonly used ivs. */
5831 add_standard_iv_candidates (data);
5833 /* Add doloop dedicated ivs. */
5834 if (data->doloop_use_p)
5835 add_iv_candidate_for_doloop (data);
5837 /* Add old induction variables. */
5838 add_iv_candidate_for_bivs (data);
5840 /* Add induction variables derived from uses. */
5841 add_iv_candidate_for_groups (data);
5843 set_autoinc_for_original_candidates (data);
5845 /* Record the important candidates. */
5846 record_important_candidates (data);
5848 /* Relate compare iv_use with all candidates. */
5849 if (!data->consider_all_candidates)
5850 relate_compare_use_with_all_cands (data);
5852 if (dump_file && (dump_flags & TDF_DETAILS))
5854 unsigned i;
5856 fprintf (dump_file, "\n<Important Candidates>:\t");
5857 for (i = 0; i < data->vcands.length (); i++)
5858 if (data->vcands[i]->important)
5859 fprintf (dump_file, " %d,", data->vcands[i]->id);
5860 fprintf (dump_file, "\n");
5862 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5863 for (i = 0; i < data->vgroups.length (); i++)
5865 struct iv_group *group = data->vgroups[i];
5867 if (group->related_cands)
5869 fprintf (dump_file, " Group %d:\t", group->id);
5870 dump_bitmap (dump_file, group->related_cands);
5873 fprintf (dump_file, "\n");
5877 /* Determines costs of computing use of iv with an iv candidate. */
5879 static void
5880 determine_group_iv_costs (struct ivopts_data *data)
5882 unsigned i, j;
5883 struct iv_cand *cand;
5884 struct iv_group *group;
5885 bitmap to_clear = BITMAP_ALLOC (NULL);
5887 alloc_use_cost_map (data);
5889 for (i = 0; i < data->vgroups.length (); i++)
5891 group = data->vgroups[i];
5893 if (data->consider_all_candidates)
5895 for (j = 0; j < data->vcands.length (); j++)
5897 cand = data->vcands[j];
5898 determine_group_iv_cost (data, group, cand);
5901 else
5903 bitmap_iterator bi;
5905 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5907 cand = data->vcands[j];
5908 if (!determine_group_iv_cost (data, group, cand))
5909 bitmap_set_bit (to_clear, j);
5912 /* Remove the candidates for that the cost is infinite from
5913 the list of related candidates. */
5914 bitmap_and_compl_into (group->related_cands, to_clear);
5915 bitmap_clear (to_clear);
5919 BITMAP_FREE (to_clear);
5921 if (dump_file && (dump_flags & TDF_DETAILS))
5923 bitmap_iterator bi;
5925 /* Dump invariant variables. */
5926 fprintf (dump_file, "\n<Invariant Vars>:\n");
5927 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5929 struct version_info *info = ver_info (data, i);
5930 if (info->inv_id)
5932 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5933 print_generic_expr (dump_file, info->name, TDF_SLIM);
5934 fprintf (dump_file, "%s\n",
5935 info->has_nonlin_use ? "" : "\t(eliminable)");
5939 /* Dump invariant expressions. */
5940 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5941 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5943 for (hash_table<iv_inv_expr_hasher>::iterator it
5944 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5945 ++it)
5946 list.safe_push (*it);
5948 list.qsort (sort_iv_inv_expr_ent);
5950 for (i = 0; i < list.length (); ++i)
5952 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5953 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5954 fprintf (dump_file, "\n");
5957 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5959 for (i = 0; i < data->vgroups.length (); i++)
5961 group = data->vgroups[i];
5963 fprintf (dump_file, "Group %d:\n", i);
5964 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5965 for (j = 0; j < group->n_map_members; j++)
5967 if (!group->cost_map[j].cand
5968 || group->cost_map[j].cost.infinite_cost_p ())
5969 continue;
5971 fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
5972 group->cost_map[j].cand->id,
5973 group->cost_map[j].cost.cost,
5974 group->cost_map[j].cost.complexity);
5975 if (!group->cost_map[j].inv_exprs
5976 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5977 fprintf (dump_file, "NIL;\t");
5978 else
5979 bitmap_print (dump_file,
5980 group->cost_map[j].inv_exprs, "", ";\t");
5981 if (!group->cost_map[j].inv_vars
5982 || bitmap_empty_p (group->cost_map[j].inv_vars))
5983 fprintf (dump_file, "NIL;\n");
5984 else
5985 bitmap_print (dump_file,
5986 group->cost_map[j].inv_vars, "", "\n");
5989 fprintf (dump_file, "\n");
5991 fprintf (dump_file, "\n");
5995 /* Determines cost of the candidate CAND. */
5997 static void
5998 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
6000 comp_cost cost_base;
6001 int64_t cost, cost_step;
6002 tree base;
6004 gcc_assert (cand->iv != NULL);
6006 /* There are two costs associated with the candidate -- its increment
6007 and its initialization. The second is almost negligible for any loop
6008 that rolls enough, so we take it just very little into account. */
6010 base = cand->iv->base;
6011 cost_base = force_var_cost (data, base, NULL);
6012 /* It will be exceptional that the iv register happens to be initialized with
6013 the proper value at no cost. In general, there will at least be a regcopy
6014 or a const set. */
6015 if (cost_base.cost == 0)
6016 cost_base.cost = COSTS_N_INSNS (1);
6017 /* Doloop decrement should be considered as zero cost. */
6018 if (cand->doloop_p)
6019 cost_step = 0;
6020 else
6021 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
6022 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
6024 /* Prefer the original ivs unless we may gain something by replacing it.
6025 The reason is to make debugging simpler; so this is not relevant for
6026 artificial ivs created by other optimization passes. */
6027 if ((cand->pos != IP_ORIGINAL
6028 || !SSA_NAME_VAR (cand->var_before)
6029 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
6030 /* Prefer doloop as well. */
6031 && !cand->doloop_p)
6032 cost++;
6034 /* Prefer not to insert statements into latch unless there are some
6035 already (so that we do not create unnecessary jumps). */
6036 if (cand->pos == IP_END
6037 && empty_block_p (ip_end_pos (data->current_loop)))
6038 cost++;
6040 cand->cost = cost;
6041 cand->cost_step = cost_step;
6044 /* Determines costs of computation of the candidates. */
6046 static void
6047 determine_iv_costs (struct ivopts_data *data)
6049 unsigned i;
6051 if (dump_file && (dump_flags & TDF_DETAILS))
6053 fprintf (dump_file, "<Candidate Costs>:\n");
6054 fprintf (dump_file, " cand\tcost\n");
6057 for (i = 0; i < data->vcands.length (); i++)
6059 struct iv_cand *cand = data->vcands[i];
6061 determine_iv_cost (data, cand);
6063 if (dump_file && (dump_flags & TDF_DETAILS))
6064 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
6067 if (dump_file && (dump_flags & TDF_DETAILS))
6068 fprintf (dump_file, "\n");
6071 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
6072 induction variables. Note N_INVS includes both invariant variables and
6073 invariant expressions. */
6075 static unsigned
6076 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
6077 unsigned n_cands)
6079 unsigned cost;
6080 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
6081 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
6082 bool speed = data->speed;
6084 /* If there is a call in the loop body, the call-clobbered registers
6085 are not available for loop invariants. */
6086 if (data->body_includes_call)
6087 available_regs = available_regs - target_clobbered_regs;
6089 /* If we have enough registers. */
6090 if (regs_needed + target_res_regs < available_regs)
6091 cost = n_new;
6092 /* If close to running out of registers, try to preserve them. */
6093 else if (regs_needed <= available_regs)
6094 cost = target_reg_cost [speed] * regs_needed;
6095 /* If we run out of available registers but the number of candidates
6096 does not, we penalize extra registers using target_spill_cost. */
6097 else if (n_cands <= available_regs)
6098 cost = target_reg_cost [speed] * available_regs
6099 + target_spill_cost [speed] * (regs_needed - available_regs);
6100 /* If the number of candidates runs out available registers, we penalize
6101 extra candidate registers using target_spill_cost * 2. Because it is
6102 more expensive to spill induction variable than invariant. */
6103 else
6104 cost = target_reg_cost [speed] * available_regs
6105 + target_spill_cost [speed] * (n_cands - available_regs) * 2
6106 + target_spill_cost [speed] * (regs_needed - n_cands);
6108 /* Finally, add the number of candidates, so that we prefer eliminating
6109 induction variables if possible. */
6110 return cost + n_cands;
6113 /* For each size of the induction variable set determine the penalty. */
6115 static void
6116 determine_set_costs (struct ivopts_data *data)
6118 unsigned j, n;
6119 gphi *phi;
6120 gphi_iterator psi;
6121 tree op;
6122 class loop *loop = data->current_loop;
6123 bitmap_iterator bi;
6125 if (dump_file && (dump_flags & TDF_DETAILS))
6127 fprintf (dump_file, "<Global Costs>:\n");
6128 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
6129 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
6130 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
6131 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
6134 n = 0;
6135 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6137 phi = psi.phi ();
6138 op = PHI_RESULT (phi);
6140 if (virtual_operand_p (op))
6141 continue;
6143 if (get_iv (data, op))
6144 continue;
6146 if (!POINTER_TYPE_P (TREE_TYPE (op))
6147 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
6148 continue;
6150 n++;
6153 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6155 struct version_info *info = ver_info (data, j);
6157 if (info->inv_id && info->has_nonlin_use)
6158 n++;
6161 data->regs_used = n;
6162 if (dump_file && (dump_flags & TDF_DETAILS))
6163 fprintf (dump_file, " regs_used %d\n", n);
6165 if (dump_file && (dump_flags & TDF_DETAILS))
6167 fprintf (dump_file, " cost for size:\n");
6168 fprintf (dump_file, " ivs\tcost\n");
6169 for (j = 0; j <= 2 * target_avail_regs; j++)
6170 fprintf (dump_file, " %d\t%d\n", j,
6171 ivopts_estimate_reg_pressure (data, 0, j));
6172 fprintf (dump_file, "\n");
6176 /* Returns true if A is a cheaper cost pair than B. */
6178 static bool
6179 cheaper_cost_pair (class cost_pair *a, class cost_pair *b)
6181 if (!a)
6182 return false;
6184 if (!b)
6185 return true;
6187 if (a->cost < b->cost)
6188 return true;
6190 if (b->cost < a->cost)
6191 return false;
6193 /* In case the costs are the same, prefer the cheaper candidate. */
6194 if (a->cand->cost < b->cand->cost)
6195 return true;
6197 return false;
6200 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
6201 for more expensive, equal and cheaper respectively. */
6203 static int
6204 compare_cost_pair (class cost_pair *a, class cost_pair *b)
6206 if (cheaper_cost_pair (a, b))
6207 return -1;
6208 if (cheaper_cost_pair (b, a))
6209 return 1;
6211 return 0;
6214 /* Returns candidate by that USE is expressed in IVS. */
6216 static class cost_pair *
6217 iv_ca_cand_for_group (class iv_ca *ivs, struct iv_group *group)
6219 return ivs->cand_for_group[group->id];
6222 /* Computes the cost field of IVS structure. */
6224 static void
6225 iv_ca_recount_cost (struct ivopts_data *data, class iv_ca *ivs)
6227 comp_cost cost = ivs->cand_use_cost;
6229 cost += ivs->cand_cost;
6230 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
6231 ivs->cost = cost;
6234 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
6235 and IVS. */
6237 static void
6238 iv_ca_set_remove_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6240 bitmap_iterator bi;
6241 unsigned iid;
6243 if (!invs)
6244 return;
6246 gcc_assert (n_inv_uses != NULL);
6247 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6249 n_inv_uses[iid]--;
6250 if (n_inv_uses[iid] == 0)
6251 ivs->n_invs--;
6255 /* Set USE not to be expressed by any candidate in IVS. */
6257 static void
6258 iv_ca_set_no_cp (struct ivopts_data *data, class iv_ca *ivs,
6259 struct iv_group *group)
6261 unsigned gid = group->id, cid;
6262 class cost_pair *cp;
6264 cp = ivs->cand_for_group[gid];
6265 if (!cp)
6266 return;
6267 cid = cp->cand->id;
6269 ivs->bad_groups++;
6270 ivs->cand_for_group[gid] = NULL;
6271 ivs->n_cand_uses[cid]--;
6273 if (ivs->n_cand_uses[cid] == 0)
6275 bitmap_clear_bit (ivs->cands, cid);
6276 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6277 ivs->n_cands--;
6278 ivs->cand_cost -= cp->cand->cost;
6279 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6280 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6283 ivs->cand_use_cost -= cp->cost;
6284 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6285 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6286 iv_ca_recount_cost (data, ivs);
6289 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
6290 IVS. */
6292 static void
6293 iv_ca_set_add_invs (class iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
6295 bitmap_iterator bi;
6296 unsigned iid;
6298 if (!invs)
6299 return;
6301 gcc_assert (n_inv_uses != NULL);
6302 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6304 n_inv_uses[iid]++;
6305 if (n_inv_uses[iid] == 1)
6306 ivs->n_invs++;
6310 /* Set cost pair for GROUP in set IVS to CP. */
6312 static void
6313 iv_ca_set_cp (struct ivopts_data *data, class iv_ca *ivs,
6314 struct iv_group *group, class cost_pair *cp)
6316 unsigned gid = group->id, cid;
6318 if (ivs->cand_for_group[gid] == cp)
6319 return;
6321 if (ivs->cand_for_group[gid])
6322 iv_ca_set_no_cp (data, ivs, group);
6324 if (cp)
6326 cid = cp->cand->id;
6328 ivs->bad_groups--;
6329 ivs->cand_for_group[gid] = cp;
6330 ivs->n_cand_uses[cid]++;
6331 if (ivs->n_cand_uses[cid] == 1)
6333 bitmap_set_bit (ivs->cands, cid);
6334 if (!cp->cand->doloop_p || !targetm.have_count_reg_decr_p)
6335 ivs->n_cands++;
6336 ivs->cand_cost += cp->cand->cost;
6337 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
6338 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
6341 ivs->cand_use_cost += cp->cost;
6342 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
6343 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
6344 iv_ca_recount_cost (data, ivs);
6348 /* Extend set IVS by expressing USE by some of the candidates in it
6349 if possible. Consider all important candidates if candidates in
6350 set IVS don't give any result. */
6352 static void
6353 iv_ca_add_group (struct ivopts_data *data, class iv_ca *ivs,
6354 struct iv_group *group)
6356 class cost_pair *best_cp = NULL, *cp;
6357 bitmap_iterator bi;
6358 unsigned i;
6359 struct iv_cand *cand;
6361 gcc_assert (ivs->upto >= group->id);
6362 ivs->upto++;
6363 ivs->bad_groups++;
6365 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6367 cand = data->vcands[i];
6368 cp = get_group_iv_cost (data, group, cand);
6369 if (cheaper_cost_pair (cp, best_cp))
6370 best_cp = cp;
6373 if (best_cp == NULL)
6375 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6377 cand = data->vcands[i];
6378 cp = get_group_iv_cost (data, group, cand);
6379 if (cheaper_cost_pair (cp, best_cp))
6380 best_cp = cp;
6384 iv_ca_set_cp (data, ivs, group, best_cp);
6387 /* Get cost for assignment IVS. */
6389 static comp_cost
6390 iv_ca_cost (class iv_ca *ivs)
6392 /* This was a conditional expression but it triggered a bug in
6393 Sun C 5.5. */
6394 if (ivs->bad_groups)
6395 return infinite_cost;
6396 else
6397 return ivs->cost;
6400 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
6401 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
6402 respectively. */
6404 static int
6405 iv_ca_compare_deps (struct ivopts_data *data, class iv_ca *ivs,
6406 struct iv_group *group, class cost_pair *old_cp,
6407 class cost_pair *new_cp)
6409 gcc_assert (old_cp && new_cp && old_cp != new_cp);
6410 unsigned old_n_invs = ivs->n_invs;
6411 iv_ca_set_cp (data, ivs, group, new_cp);
6412 unsigned new_n_invs = ivs->n_invs;
6413 iv_ca_set_cp (data, ivs, group, old_cp);
6415 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
6418 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6419 it before NEXT. */
6421 static struct iv_ca_delta *
6422 iv_ca_delta_add (struct iv_group *group, class cost_pair *old_cp,
6423 class cost_pair *new_cp, struct iv_ca_delta *next)
6425 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6427 change->group = group;
6428 change->old_cp = old_cp;
6429 change->new_cp = new_cp;
6430 change->next = next;
6432 return change;
6435 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6436 are rewritten. */
6438 static struct iv_ca_delta *
6439 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6441 struct iv_ca_delta *last;
6443 if (!l2)
6444 return l1;
6446 if (!l1)
6447 return l2;
6449 for (last = l1; last->next; last = last->next)
6450 continue;
6451 last->next = l2;
6453 return l1;
6456 /* Reverse the list of changes DELTA, forming the inverse to it. */
6458 static struct iv_ca_delta *
6459 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6461 struct iv_ca_delta *act, *next, *prev = NULL;
6463 for (act = delta; act; act = next)
6465 next = act->next;
6466 act->next = prev;
6467 prev = act;
6469 std::swap (act->old_cp, act->new_cp);
6472 return prev;
6475 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6476 reverted instead. */
6478 static void
6479 iv_ca_delta_commit (struct ivopts_data *data, class iv_ca *ivs,
6480 struct iv_ca_delta *delta, bool forward)
6482 class cost_pair *from, *to;
6483 struct iv_ca_delta *act;
6485 if (!forward)
6486 delta = iv_ca_delta_reverse (delta);
6488 for (act = delta; act; act = act->next)
6490 from = act->old_cp;
6491 to = act->new_cp;
6492 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6493 iv_ca_set_cp (data, ivs, act->group, to);
6496 if (!forward)
6497 iv_ca_delta_reverse (delta);
6500 /* Returns true if CAND is used in IVS. */
6502 static bool
6503 iv_ca_cand_used_p (class iv_ca *ivs, struct iv_cand *cand)
6505 return ivs->n_cand_uses[cand->id] > 0;
6508 /* Returns number of induction variable candidates in the set IVS. */
6510 static unsigned
6511 iv_ca_n_cands (class iv_ca *ivs)
6513 return ivs->n_cands;
6516 /* Free the list of changes DELTA. */
6518 static void
6519 iv_ca_delta_free (struct iv_ca_delta **delta)
6521 struct iv_ca_delta *act, *next;
6523 for (act = *delta; act; act = next)
6525 next = act->next;
6526 free (act);
6529 *delta = NULL;
6532 /* Allocates new iv candidates assignment. */
6534 static class iv_ca *
6535 iv_ca_new (struct ivopts_data *data)
6537 class iv_ca *nw = XNEW (class iv_ca);
6539 nw->upto = 0;
6540 nw->bad_groups = 0;
6541 nw->cand_for_group = XCNEWVEC (class cost_pair *,
6542 data->vgroups.length ());
6543 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6544 nw->cands = BITMAP_ALLOC (NULL);
6545 nw->n_cands = 0;
6546 nw->n_invs = 0;
6547 nw->cand_use_cost = no_cost;
6548 nw->cand_cost = 0;
6549 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6550 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6551 nw->cost = no_cost;
6553 return nw;
6556 /* Free memory occupied by the set IVS. */
6558 static void
6559 iv_ca_free (class iv_ca **ivs)
6561 free ((*ivs)->cand_for_group);
6562 free ((*ivs)->n_cand_uses);
6563 BITMAP_FREE ((*ivs)->cands);
6564 free ((*ivs)->n_inv_var_uses);
6565 free ((*ivs)->n_inv_expr_uses);
6566 free (*ivs);
6567 *ivs = NULL;
6570 /* Dumps IVS to FILE. */
6572 static void
6573 iv_ca_dump (struct ivopts_data *data, FILE *file, class iv_ca *ivs)
6575 unsigned i;
6576 comp_cost cost = iv_ca_cost (ivs);
6578 fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6579 cost.complexity);
6580 fprintf (file, " reg_cost: %d\n",
6581 ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands));
6582 fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
6583 "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6584 ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6585 bitmap_print (file, ivs->cands, " candidates: ","\n");
6587 for (i = 0; i < ivs->upto; i++)
6589 struct iv_group *group = data->vgroups[i];
6590 class cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6591 if (cp)
6592 fprintf (file, " group:%d --> iv_cand:%d, cost=("
6593 "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6594 cp->cost.cost, cp->cost.complexity);
6595 else
6596 fprintf (file, " group:%d --> ??\n", group->id);
6599 const char *pref = "";
6600 fprintf (file, " invariant variables: ");
6601 for (i = 1; i <= data->max_inv_var_id; i++)
6602 if (ivs->n_inv_var_uses[i])
6604 fprintf (file, "%s%d", pref, i);
6605 pref = ", ";
6608 pref = "";
6609 fprintf (file, "\n invariant expressions: ");
6610 for (i = 1; i <= data->max_inv_expr_id; i++)
6611 if (ivs->n_inv_expr_uses[i])
6613 fprintf (file, "%s%d", pref, i);
6614 pref = ", ";
6617 fprintf (file, "\n\n");
6620 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6621 new set, and store differences in DELTA. Number of induction variables
6622 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6623 the function will try to find a solution with mimimal iv candidates. */
6625 static comp_cost
6626 iv_ca_extend (struct ivopts_data *data, class iv_ca *ivs,
6627 struct iv_cand *cand, struct iv_ca_delta **delta,
6628 unsigned *n_ivs, bool min_ncand)
6630 unsigned i;
6631 comp_cost cost;
6632 struct iv_group *group;
6633 class cost_pair *old_cp, *new_cp;
6635 *delta = NULL;
6636 for (i = 0; i < ivs->upto; i++)
6638 group = data->vgroups[i];
6639 old_cp = iv_ca_cand_for_group (ivs, group);
6641 if (old_cp
6642 && old_cp->cand == cand)
6643 continue;
6645 new_cp = get_group_iv_cost (data, group, cand);
6646 if (!new_cp)
6647 continue;
6649 if (!min_ncand)
6651 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6652 /* Skip if new_cp depends on more invariants. */
6653 if (cmp_invs > 0)
6654 continue;
6656 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6657 /* Skip if new_cp is not cheaper. */
6658 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6659 continue;
6662 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6665 iv_ca_delta_commit (data, ivs, *delta, true);
6666 cost = iv_ca_cost (ivs);
6667 if (n_ivs)
6668 *n_ivs = iv_ca_n_cands (ivs);
6669 iv_ca_delta_commit (data, ivs, *delta, false);
6671 return cost;
6674 /* Try narrowing set IVS by removing CAND. Return the cost of
6675 the new set and store the differences in DELTA. START is
6676 the candidate with which we start narrowing. */
6678 static comp_cost
6679 iv_ca_narrow (struct ivopts_data *data, class iv_ca *ivs,
6680 struct iv_cand *cand, struct iv_cand *start,
6681 struct iv_ca_delta **delta)
6683 unsigned i, ci;
6684 struct iv_group *group;
6685 class cost_pair *old_cp, *new_cp, *cp;
6686 bitmap_iterator bi;
6687 struct iv_cand *cnd;
6688 comp_cost cost, best_cost, acost;
6690 *delta = NULL;
6691 for (i = 0; i < data->vgroups.length (); i++)
6693 group = data->vgroups[i];
6695 old_cp = iv_ca_cand_for_group (ivs, group);
6696 if (old_cp->cand != cand)
6697 continue;
6699 best_cost = iv_ca_cost (ivs);
6700 /* Start narrowing with START. */
6701 new_cp = get_group_iv_cost (data, group, start);
6703 if (data->consider_all_candidates)
6705 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6707 if (ci == cand->id || (start && ci == start->id))
6708 continue;
6710 cnd = data->vcands[ci];
6712 cp = get_group_iv_cost (data, group, cnd);
6713 if (!cp)
6714 continue;
6716 iv_ca_set_cp (data, ivs, group, cp);
6717 acost = iv_ca_cost (ivs);
6719 if (acost < best_cost)
6721 best_cost = acost;
6722 new_cp = cp;
6726 else
6728 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6730 if (ci == cand->id || (start && ci == start->id))
6731 continue;
6733 cnd = data->vcands[ci];
6735 cp = get_group_iv_cost (data, group, cnd);
6736 if (!cp)
6737 continue;
6739 iv_ca_set_cp (data, ivs, group, cp);
6740 acost = iv_ca_cost (ivs);
6742 if (acost < best_cost)
6744 best_cost = acost;
6745 new_cp = cp;
6749 /* Restore to old cp for use. */
6750 iv_ca_set_cp (data, ivs, group, old_cp);
6752 if (!new_cp)
6754 iv_ca_delta_free (delta);
6755 return infinite_cost;
6758 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6761 iv_ca_delta_commit (data, ivs, *delta, true);
6762 cost = iv_ca_cost (ivs);
6763 iv_ca_delta_commit (data, ivs, *delta, false);
6765 return cost;
6768 /* Try optimizing the set of candidates IVS by removing candidates different
6769 from to EXCEPT_CAND from it. Return cost of the new set, and store
6770 differences in DELTA. */
6772 static comp_cost
6773 iv_ca_prune (struct ivopts_data *data, class iv_ca *ivs,
6774 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6776 bitmap_iterator bi;
6777 struct iv_ca_delta *act_delta, *best_delta;
6778 unsigned i;
6779 comp_cost best_cost, acost;
6780 struct iv_cand *cand;
6782 best_delta = NULL;
6783 best_cost = iv_ca_cost (ivs);
6785 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6787 cand = data->vcands[i];
6789 if (cand == except_cand)
6790 continue;
6792 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6794 if (acost < best_cost)
6796 best_cost = acost;
6797 iv_ca_delta_free (&best_delta);
6798 best_delta = act_delta;
6800 else
6801 iv_ca_delta_free (&act_delta);
6804 if (!best_delta)
6806 *delta = NULL;
6807 return best_cost;
6810 /* Recurse to possibly remove other unnecessary ivs. */
6811 iv_ca_delta_commit (data, ivs, best_delta, true);
6812 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6813 iv_ca_delta_commit (data, ivs, best_delta, false);
6814 *delta = iv_ca_delta_join (best_delta, *delta);
6815 return best_cost;
6818 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6819 cheaper local cost for GROUP than BEST_CP. Return pointer to
6820 the corresponding cost_pair, otherwise just return BEST_CP. */
6822 static class cost_pair*
6823 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6824 unsigned int cand_idx, struct iv_cand *old_cand,
6825 class cost_pair *best_cp)
6827 struct iv_cand *cand;
6828 class cost_pair *cp;
6830 gcc_assert (old_cand != NULL && best_cp != NULL);
6831 if (cand_idx == old_cand->id)
6832 return best_cp;
6834 cand = data->vcands[cand_idx];
6835 cp = get_group_iv_cost (data, group, cand);
6836 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6837 return cp;
6839 return best_cp;
6842 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6843 which are used by more than one iv uses. For each of those candidates,
6844 this function tries to represent iv uses under that candidate using
6845 other ones with lower local cost, then tries to prune the new set.
6846 If the new set has lower cost, It returns the new cost after recording
6847 candidate replacement in list DELTA. */
6849 static comp_cost
6850 iv_ca_replace (struct ivopts_data *data, class iv_ca *ivs,
6851 struct iv_ca_delta **delta)
6853 bitmap_iterator bi, bj;
6854 unsigned int i, j, k;
6855 struct iv_cand *cand;
6856 comp_cost orig_cost, acost;
6857 struct iv_ca_delta *act_delta, *tmp_delta;
6858 class cost_pair *old_cp, *best_cp = NULL;
6860 *delta = NULL;
6861 orig_cost = iv_ca_cost (ivs);
6863 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6865 if (ivs->n_cand_uses[i] == 1
6866 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6867 continue;
6869 cand = data->vcands[i];
6871 act_delta = NULL;
6872 /* Represent uses under current candidate using other ones with
6873 lower local cost. */
6874 for (j = 0; j < ivs->upto; j++)
6876 struct iv_group *group = data->vgroups[j];
6877 old_cp = iv_ca_cand_for_group (ivs, group);
6879 if (old_cp->cand != cand)
6880 continue;
6882 best_cp = old_cp;
6883 if (data->consider_all_candidates)
6884 for (k = 0; k < data->vcands.length (); k++)
6885 best_cp = cheaper_cost_with_cand (data, group, k,
6886 old_cp->cand, best_cp);
6887 else
6888 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6889 best_cp = cheaper_cost_with_cand (data, group, k,
6890 old_cp->cand, best_cp);
6892 if (best_cp == old_cp)
6893 continue;
6895 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6897 /* No need for further prune. */
6898 if (!act_delta)
6899 continue;
6901 /* Prune the new candidate set. */
6902 iv_ca_delta_commit (data, ivs, act_delta, true);
6903 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6904 iv_ca_delta_commit (data, ivs, act_delta, false);
6905 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6907 if (acost < orig_cost)
6909 *delta = act_delta;
6910 return acost;
6912 else
6913 iv_ca_delta_free (&act_delta);
6916 return orig_cost;
6919 /* Tries to extend the sets IVS in the best possible way in order to
6920 express the GROUP. If ORIGINALP is true, prefer candidates from
6921 the original set of IVs, otherwise favor important candidates not
6922 based on any memory object. */
6924 static bool
6925 try_add_cand_for (struct ivopts_data *data, class iv_ca *ivs,
6926 struct iv_group *group, bool originalp)
6928 comp_cost best_cost, act_cost;
6929 unsigned i;
6930 bitmap_iterator bi;
6931 struct iv_cand *cand;
6932 struct iv_ca_delta *best_delta = NULL, *act_delta;
6933 class cost_pair *cp;
6935 iv_ca_add_group (data, ivs, group);
6936 best_cost = iv_ca_cost (ivs);
6937 cp = iv_ca_cand_for_group (ivs, group);
6938 if (cp)
6940 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6941 iv_ca_set_no_cp (data, ivs, group);
6944 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6945 first try important candidates not based on any memory object. Only if
6946 this fails, try the specific ones. Rationale -- in loops with many
6947 variables the best choice often is to use just one generic biv. If we
6948 added here many ivs specific to the uses, the optimization algorithm later
6949 would be likely to get stuck in a local minimum, thus causing us to create
6950 too many ivs. The approach from few ivs to more seems more likely to be
6951 successful -- starting from few ivs, replacing an expensive use by a
6952 specific iv should always be a win. */
6953 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6955 cand = data->vcands[i];
6957 if (originalp && cand->pos !=IP_ORIGINAL)
6958 continue;
6960 if (!originalp && cand->iv->base_object != NULL_TREE)
6961 continue;
6963 if (iv_ca_cand_used_p (ivs, cand))
6964 continue;
6966 cp = get_group_iv_cost (data, group, cand);
6967 if (!cp)
6968 continue;
6970 iv_ca_set_cp (data, ivs, group, cp);
6971 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6972 true);
6973 iv_ca_set_no_cp (data, ivs, group);
6974 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6976 if (act_cost < best_cost)
6978 best_cost = act_cost;
6980 iv_ca_delta_free (&best_delta);
6981 best_delta = act_delta;
6983 else
6984 iv_ca_delta_free (&act_delta);
6987 if (best_cost.infinite_cost_p ())
6989 for (i = 0; i < group->n_map_members; i++)
6991 cp = group->cost_map + i;
6992 cand = cp->cand;
6993 if (!cand)
6994 continue;
6996 /* Already tried this. */
6997 if (cand->important)
6999 if (originalp && cand->pos == IP_ORIGINAL)
7000 continue;
7001 if (!originalp && cand->iv->base_object == NULL_TREE)
7002 continue;
7005 if (iv_ca_cand_used_p (ivs, cand))
7006 continue;
7008 act_delta = NULL;
7009 iv_ca_set_cp (data, ivs, group, cp);
7010 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
7011 iv_ca_set_no_cp (data, ivs, group);
7012 act_delta = iv_ca_delta_add (group,
7013 iv_ca_cand_for_group (ivs, group),
7014 cp, act_delta);
7016 if (act_cost < best_cost)
7018 best_cost = act_cost;
7020 if (best_delta)
7021 iv_ca_delta_free (&best_delta);
7022 best_delta = act_delta;
7024 else
7025 iv_ca_delta_free (&act_delta);
7029 iv_ca_delta_commit (data, ivs, best_delta, true);
7030 iv_ca_delta_free (&best_delta);
7032 return !best_cost.infinite_cost_p ();
7035 /* Finds an initial assignment of candidates to uses. */
7037 static class iv_ca *
7038 get_initial_solution (struct ivopts_data *data, bool originalp)
7040 unsigned i;
7041 class iv_ca *ivs = iv_ca_new (data);
7043 for (i = 0; i < data->vgroups.length (); i++)
7044 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
7046 iv_ca_free (&ivs);
7047 return NULL;
7050 return ivs;
7053 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
7054 points to a bool variable, this function tries to break local
7055 optimal fixed-point by replacing candidates in IVS if it's true. */
7057 static bool
7058 try_improve_iv_set (struct ivopts_data *data,
7059 class iv_ca *ivs, bool *try_replace_p)
7061 unsigned i, n_ivs;
7062 comp_cost acost, best_cost = iv_ca_cost (ivs);
7063 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
7064 struct iv_cand *cand;
7066 /* Try extending the set of induction variables by one. */
7067 for (i = 0; i < data->vcands.length (); i++)
7069 cand = data->vcands[i];
7071 if (iv_ca_cand_used_p (ivs, cand))
7072 continue;
7074 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
7075 if (!act_delta)
7076 continue;
7078 /* If we successfully added the candidate and the set is small enough,
7079 try optimizing it by removing other candidates. */
7080 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
7082 iv_ca_delta_commit (data, ivs, act_delta, true);
7083 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
7084 iv_ca_delta_commit (data, ivs, act_delta, false);
7085 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
7088 if (acost < best_cost)
7090 best_cost = acost;
7091 iv_ca_delta_free (&best_delta);
7092 best_delta = act_delta;
7094 else
7095 iv_ca_delta_free (&act_delta);
7098 if (!best_delta)
7100 /* Try removing the candidates from the set instead. */
7101 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
7103 if (!best_delta && *try_replace_p)
7105 *try_replace_p = false;
7106 /* So far candidate selecting algorithm tends to choose fewer IVs
7107 so that it can handle cases in which loops have many variables
7108 but the best choice is often to use only one general biv. One
7109 weakness is it can't handle opposite cases, in which different
7110 candidates should be chosen with respect to each use. To solve
7111 the problem, we replace candidates in a manner described by the
7112 comments of iv_ca_replace, thus give general algorithm a chance
7113 to break local optimal fixed-point in these cases. */
7114 best_cost = iv_ca_replace (data, ivs, &best_delta);
7117 if (!best_delta)
7118 return false;
7121 iv_ca_delta_commit (data, ivs, best_delta, true);
7122 iv_ca_delta_free (&best_delta);
7123 return best_cost == iv_ca_cost (ivs);
7126 /* Attempts to find the optimal set of induction variables. We do simple
7127 greedy heuristic -- we try to replace at most one candidate in the selected
7128 solution and remove the unused ivs while this improves the cost. */
7130 static class iv_ca *
7131 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7133 class iv_ca *set;
7134 bool try_replace_p = true;
7136 /* Get the initial solution. */
7137 set = get_initial_solution (data, originalp);
7138 if (!set)
7140 if (dump_file && (dump_flags & TDF_DETAILS))
7141 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7142 return NULL;
7145 if (dump_file && (dump_flags & TDF_DETAILS))
7147 fprintf (dump_file, "Initial set of candidates:\n");
7148 iv_ca_dump (data, dump_file, set);
7151 while (try_improve_iv_set (data, set, &try_replace_p))
7153 if (dump_file && (dump_flags & TDF_DETAILS))
7155 fprintf (dump_file, "Improved to:\n");
7156 iv_ca_dump (data, dump_file, set);
7160 /* If the set has infinite_cost, it can't be optimal. */
7161 if (iv_ca_cost (set).infinite_cost_p ())
7163 if (dump_file && (dump_flags & TDF_DETAILS))
7164 fprintf (dump_file,
7165 "Overflow to infinite cost in try_improve_iv_set.\n");
7166 iv_ca_free (&set);
7168 return set;
7171 static class iv_ca *
7172 find_optimal_iv_set (struct ivopts_data *data)
7174 unsigned i;
7175 comp_cost cost, origcost;
7176 class iv_ca *set, *origset;
7178 /* Determine the cost based on a strategy that starts with original IVs,
7179 and try again using a strategy that prefers candidates not based
7180 on any IVs. */
7181 origset = find_optimal_iv_set_1 (data, true);
7182 set = find_optimal_iv_set_1 (data, false);
7184 if (!origset && !set)
7185 return NULL;
7187 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7188 cost = set ? iv_ca_cost (set) : infinite_cost;
7190 if (dump_file && (dump_flags & TDF_DETAILS))
7192 fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
7193 origcost.cost, origcost.complexity);
7194 fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
7195 cost.cost, cost.complexity);
7198 /* Choose the one with the best cost. */
7199 if (origcost <= cost)
7201 if (set)
7202 iv_ca_free (&set);
7203 set = origset;
7205 else if (origset)
7206 iv_ca_free (&origset);
7208 for (i = 0; i < data->vgroups.length (); i++)
7210 struct iv_group *group = data->vgroups[i];
7211 group->selected = iv_ca_cand_for_group (set, group)->cand;
7214 return set;
7217 /* Creates a new induction variable corresponding to CAND. */
7219 static void
7220 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7222 gimple_stmt_iterator incr_pos;
7223 tree base;
7224 struct iv_use *use;
7225 struct iv_group *group;
7226 bool after = false;
7228 gcc_assert (cand->iv != NULL);
7230 switch (cand->pos)
7232 case IP_NORMAL:
7233 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7234 break;
7236 case IP_END:
7237 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7238 after = true;
7239 if (!gsi_end_p (incr_pos) && stmt_ends_bb_p (gsi_stmt (incr_pos)))
7241 edge e = find_edge (gsi_bb (incr_pos), data->current_loop->header);
7242 incr_pos = gsi_after_labels (split_edge (e));
7243 after = false;
7245 break;
7247 case IP_AFTER_USE:
7248 after = true;
7249 /* fall through */
7250 case IP_BEFORE_USE:
7251 incr_pos = gsi_for_stmt (cand->incremented_at);
7252 break;
7254 case IP_ORIGINAL:
7255 /* Mark that the iv is preserved. */
7256 name_info (data, cand->var_before)->preserve_biv = true;
7257 name_info (data, cand->var_after)->preserve_biv = true;
7259 /* Rewrite the increment so that it uses var_before directly. */
7260 use = find_interesting_uses_op (data, cand->var_after);
7261 group = data->vgroups[use->group_id];
7262 group->selected = cand;
7263 return;
7266 gimple_add_tmp_var (cand->var_before);
7268 base = unshare_expr (cand->iv->base);
7270 create_iv (base, unshare_expr (cand->iv->step),
7271 cand->var_before, data->current_loop,
7272 &incr_pos, after, &cand->var_before, &cand->var_after);
7275 /* Creates new induction variables described in SET. */
7277 static void
7278 create_new_ivs (struct ivopts_data *data, class iv_ca *set)
7280 unsigned i;
7281 struct iv_cand *cand;
7282 bitmap_iterator bi;
7284 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7286 cand = data->vcands[i];
7287 create_new_iv (data, cand);
7290 if (dump_file && (dump_flags & TDF_DETAILS))
7292 fprintf (dump_file, "Selected IV set for loop %d",
7293 data->current_loop->num);
7294 if (data->loop_loc != UNKNOWN_LOCATION)
7295 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7296 LOCATION_LINE (data->loop_loc));
7297 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7298 avg_loop_niter (data->current_loop));
7299 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7300 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7302 cand = data->vcands[i];
7303 dump_cand (dump_file, cand);
7305 fprintf (dump_file, "\n");
7309 /* Rewrites USE (definition of iv used in a nonlinear expression)
7310 using candidate CAND. */
7312 static void
7313 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7314 struct iv_use *use, struct iv_cand *cand)
7316 gassign *ass;
7317 gimple_stmt_iterator bsi;
7318 tree comp, type = get_use_type (use), tgt;
7320 /* An important special case -- if we are asked to express value of
7321 the original iv by itself, just exit; there is no need to
7322 introduce a new computation (that might also need casting the
7323 variable to unsigned and back). */
7324 if (cand->pos == IP_ORIGINAL
7325 && cand->incremented_at == use->stmt)
7327 tree op = NULL_TREE;
7328 enum tree_code stmt_code;
7330 gcc_assert (is_gimple_assign (use->stmt));
7331 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7333 /* Check whether we may leave the computation unchanged.
7334 This is the case only if it does not rely on other
7335 computations in the loop -- otherwise, the computation
7336 we rely upon may be removed in remove_unused_ivs,
7337 thus leading to ICE. */
7338 stmt_code = gimple_assign_rhs_code (use->stmt);
7339 if (stmt_code == PLUS_EXPR
7340 || stmt_code == MINUS_EXPR
7341 || stmt_code == POINTER_PLUS_EXPR)
7343 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7344 op = gimple_assign_rhs2 (use->stmt);
7345 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7346 op = gimple_assign_rhs1 (use->stmt);
7349 if (op != NULL_TREE)
7351 if (expr_invariant_in_loop_p (data->current_loop, op))
7352 return;
7353 if (TREE_CODE (op) == SSA_NAME)
7355 struct iv *iv = get_iv (data, op);
7356 if (iv != NULL && integer_zerop (iv->step))
7357 return;
7362 switch (gimple_code (use->stmt))
7364 case GIMPLE_PHI:
7365 tgt = PHI_RESULT (use->stmt);
7367 /* If we should keep the biv, do not replace it. */
7368 if (name_info (data, tgt)->preserve_biv)
7369 return;
7371 bsi = gsi_after_labels (gimple_bb (use->stmt));
7372 break;
7374 case GIMPLE_ASSIGN:
7375 tgt = gimple_assign_lhs (use->stmt);
7376 bsi = gsi_for_stmt (use->stmt);
7377 break;
7379 default:
7380 gcc_unreachable ();
7383 aff_tree aff_inv, aff_var;
7384 if (!get_computation_aff_1 (data->current_loop, use->stmt,
7385 use, cand, &aff_inv, &aff_var))
7386 gcc_unreachable ();
7388 unshare_aff_combination (&aff_inv);
7389 unshare_aff_combination (&aff_var);
7390 /* Prefer CSE opportunity than loop invariant by adding offset at last
7391 so that iv_uses have different offsets can be CSEed. */
7392 poly_widest_int offset = aff_inv.offset;
7393 aff_inv.offset = 0;
7395 gimple_seq stmt_list = NULL, seq = NULL;
7396 tree comp_op1 = aff_combination_to_tree (&aff_inv);
7397 tree comp_op2 = aff_combination_to_tree (&aff_var);
7398 gcc_assert (comp_op1 && comp_op2);
7400 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
7401 gimple_seq_add_seq (&stmt_list, seq);
7402 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
7403 gimple_seq_add_seq (&stmt_list, seq);
7405 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
7406 std::swap (comp_op1, comp_op2);
7408 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
7410 comp = fold_build_pointer_plus (comp_op1,
7411 fold_convert (sizetype, comp_op2));
7412 comp = fold_build_pointer_plus (comp,
7413 wide_int_to_tree (sizetype, offset));
7415 else
7417 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
7418 fold_convert (TREE_TYPE (comp_op1), comp_op2));
7419 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
7420 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
7423 comp = fold_convert (type, comp);
7424 comp = force_gimple_operand (comp, &seq, false, NULL);
7425 gimple_seq_add_seq (&stmt_list, seq);
7426 if (gimple_code (use->stmt) != GIMPLE_PHI
7427 /* We can't allow re-allocating the stmt as it might be pointed
7428 to still. */
7429 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7430 >= gimple_num_ops (gsi_stmt (bsi))))
7432 comp = force_gimple_operand (comp, &seq, true, NULL);
7433 gimple_seq_add_seq (&stmt_list, seq);
7434 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7436 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7437 /* As this isn't a plain copy we have to reset alignment
7438 information. */
7439 if (SSA_NAME_PTR_INFO (comp))
7440 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7444 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7445 if (gimple_code (use->stmt) == GIMPLE_PHI)
7447 ass = gimple_build_assign (tgt, comp);
7448 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7450 bsi = gsi_for_stmt (use->stmt);
7451 remove_phi_node (&bsi, false);
7453 else
7455 gimple_assign_set_rhs_from_tree (&bsi, comp);
7456 use->stmt = gsi_stmt (bsi);
7460 /* Performs a peephole optimization to reorder the iv update statement with
7461 a mem ref to enable instruction combining in later phases. The mem ref uses
7462 the iv value before the update, so the reordering transformation requires
7463 adjustment of the offset. CAND is the selected IV_CAND.
7465 Example:
7467 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7468 iv2 = iv1 + 1;
7470 if (t < val) (1)
7471 goto L;
7472 goto Head;
7475 directly propagating t over to (1) will introduce overlapping live range
7476 thus increase register pressure. This peephole transform it into:
7479 iv2 = iv1 + 1;
7480 t = MEM_REF (base, iv2, 8, 8);
7481 if (t < val)
7482 goto L;
7483 goto Head;
7486 static void
7487 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7489 tree var_after;
7490 gimple *iv_update, *stmt;
7491 basic_block bb;
7492 gimple_stmt_iterator gsi, gsi_iv;
7494 if (cand->pos != IP_NORMAL)
7495 return;
7497 var_after = cand->var_after;
7498 iv_update = SSA_NAME_DEF_STMT (var_after);
7500 bb = gimple_bb (iv_update);
7501 gsi = gsi_last_nondebug_bb (bb);
7502 stmt = gsi_stmt (gsi);
7504 /* Only handle conditional statement for now. */
7505 if (gimple_code (stmt) != GIMPLE_COND)
7506 return;
7508 gsi_prev_nondebug (&gsi);
7509 stmt = gsi_stmt (gsi);
7510 if (stmt != iv_update)
7511 return;
7513 gsi_prev_nondebug (&gsi);
7514 if (gsi_end_p (gsi))
7515 return;
7517 stmt = gsi_stmt (gsi);
7518 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7519 return;
7521 if (stmt != use->stmt)
7522 return;
7524 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7525 return;
7527 if (dump_file && (dump_flags & TDF_DETAILS))
7529 fprintf (dump_file, "Reordering \n");
7530 print_gimple_stmt (dump_file, iv_update, 0);
7531 print_gimple_stmt (dump_file, use->stmt, 0);
7532 fprintf (dump_file, "\n");
7535 gsi = gsi_for_stmt (use->stmt);
7536 gsi_iv = gsi_for_stmt (iv_update);
7537 gsi_move_before (&gsi_iv, &gsi);
7539 cand->pos = IP_BEFORE_USE;
7540 cand->incremented_at = use->stmt;
7543 /* Return the alias pointer type that should be used for a MEM_REF
7544 associated with USE, which has type USE_PTR_ADDRESS. */
7546 static tree
7547 get_alias_ptr_type_for_ptr_address (iv_use *use)
7549 gcall *call = as_a <gcall *> (use->stmt);
7550 switch (gimple_call_internal_fn (call))
7552 case IFN_MASK_LOAD:
7553 case IFN_MASK_STORE:
7554 case IFN_MASK_LOAD_LANES:
7555 case IFN_MASK_STORE_LANES:
7556 case IFN_LEN_LOAD:
7557 case IFN_LEN_STORE:
7558 /* The second argument contains the correct alias type. */
7559 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7560 return TREE_TYPE (gimple_call_arg (call, 1));
7562 default:
7563 gcc_unreachable ();
7568 /* Rewrites USE (address that is an iv) using candidate CAND. */
7570 static void
7571 rewrite_use_address (struct ivopts_data *data,
7572 struct iv_use *use, struct iv_cand *cand)
7574 aff_tree aff;
7575 bool ok;
7577 adjust_iv_update_pos (cand, use);
7578 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7579 gcc_assert (ok);
7580 unshare_aff_combination (&aff);
7582 /* To avoid undefined overflow problems, all IV candidates use unsigned
7583 integer types. The drawback is that this makes it impossible for
7584 create_mem_ref to distinguish an IV that is based on a memory object
7585 from one that represents simply an offset.
7587 To work around this problem, we pass a hint to create_mem_ref that
7588 indicates which variable (if any) in aff is an IV based on a memory
7589 object. Note that we only consider the candidate. If this is not
7590 based on an object, the base of the reference is in some subexpression
7591 of the use -- but these will use pointer types, so they are recognized
7592 by the create_mem_ref heuristics anyway. */
7593 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7594 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7595 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7596 tree type = use->mem_type;
7597 tree alias_ptr_type;
7598 if (use->type == USE_PTR_ADDRESS)
7599 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7600 else
7602 gcc_assert (type == TREE_TYPE (*use->op_p));
7603 unsigned int align = get_object_alignment (*use->op_p);
7604 if (align != TYPE_ALIGN (type))
7605 type = build_aligned_type (type, align);
7606 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7608 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7609 iv, base_hint, data->speed);
7611 if (use->type == USE_PTR_ADDRESS)
7613 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7614 ref = fold_convert (get_use_type (use), ref);
7615 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7616 true, GSI_SAME_STMT);
7618 else
7619 copy_ref_info (ref, *use->op_p);
7621 *use->op_p = ref;
7624 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7625 candidate CAND. */
7627 static void
7628 rewrite_use_compare (struct ivopts_data *data,
7629 struct iv_use *use, struct iv_cand *cand)
7631 tree comp, op, bound;
7632 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7633 enum tree_code compare;
7634 struct iv_group *group = data->vgroups[use->group_id];
7635 class cost_pair *cp = get_group_iv_cost (data, group, cand);
7637 bound = cp->value;
7638 if (bound)
7640 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7641 tree var_type = TREE_TYPE (var);
7642 gimple_seq stmts;
7644 if (dump_file && (dump_flags & TDF_DETAILS))
7646 fprintf (dump_file, "Replacing exit test: ");
7647 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7649 compare = cp->comp;
7650 bound = unshare_expr (fold_convert (var_type, bound));
7651 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7652 if (stmts)
7653 gsi_insert_seq_on_edge_immediate (
7654 loop_preheader_edge (data->current_loop),
7655 stmts);
7657 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7658 gimple_cond_set_lhs (cond_stmt, var);
7659 gimple_cond_set_code (cond_stmt, compare);
7660 gimple_cond_set_rhs (cond_stmt, op);
7661 return;
7664 /* The induction variable elimination failed; just express the original
7665 giv. */
7666 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7667 gcc_assert (comp != NULL_TREE);
7668 gcc_assert (use->op_p != NULL);
7669 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7670 SSA_NAME_VAR (*use->op_p),
7671 true, GSI_SAME_STMT);
7674 /* Rewrite the groups using the selected induction variables. */
7676 static void
7677 rewrite_groups (struct ivopts_data *data)
7679 unsigned i, j;
7681 for (i = 0; i < data->vgroups.length (); i++)
7683 struct iv_group *group = data->vgroups[i];
7684 struct iv_cand *cand = group->selected;
7686 gcc_assert (cand);
7688 if (group->type == USE_NONLINEAR_EXPR)
7690 for (j = 0; j < group->vuses.length (); j++)
7692 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7693 update_stmt (group->vuses[j]->stmt);
7696 else if (address_p (group->type))
7698 for (j = 0; j < group->vuses.length (); j++)
7700 rewrite_use_address (data, group->vuses[j], cand);
7701 update_stmt (group->vuses[j]->stmt);
7704 else
7706 gcc_assert (group->type == USE_COMPARE);
7708 for (j = 0; j < group->vuses.length (); j++)
7710 rewrite_use_compare (data, group->vuses[j], cand);
7711 update_stmt (group->vuses[j]->stmt);
7717 /* Removes the ivs that are not used after rewriting. */
7719 static void
7720 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7722 unsigned j;
7723 bitmap_iterator bi;
7725 /* Figure out an order in which to release SSA DEFs so that we don't
7726 release something that we'd have to propagate into a debug stmt
7727 afterwards. */
7728 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7730 struct version_info *info;
7732 info = ver_info (data, j);
7733 if (info->iv
7734 && !integer_zerop (info->iv->step)
7735 && !info->inv_id
7736 && !info->iv->nonlin_use
7737 && !info->preserve_biv)
7739 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7741 tree def = info->iv->ssa_name;
7743 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7745 imm_use_iterator imm_iter;
7746 use_operand_p use_p;
7747 gimple *stmt;
7748 int count = 0;
7750 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7752 if (!gimple_debug_bind_p (stmt))
7753 continue;
7755 /* We just want to determine whether to do nothing
7756 (count == 0), to substitute the computed
7757 expression into a single use of the SSA DEF by
7758 itself (count == 1), or to use a debug temp
7759 because the SSA DEF is used multiple times or as
7760 part of a larger expression (count > 1). */
7761 count++;
7762 if (gimple_debug_bind_get_value (stmt) != def)
7763 count++;
7765 if (count > 1)
7766 break;
7769 if (!count)
7770 continue;
7772 struct iv_use dummy_use;
7773 struct iv_cand *best_cand = NULL, *cand;
7774 unsigned i, best_pref = 0, cand_pref;
7775 tree comp = NULL_TREE;
7777 memset (&dummy_use, 0, sizeof (dummy_use));
7778 dummy_use.iv = info->iv;
7779 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7781 cand = data->vgroups[i]->selected;
7782 if (cand == best_cand)
7783 continue;
7784 cand_pref = operand_equal_p (cand->iv->step,
7785 info->iv->step, 0)
7786 ? 4 : 0;
7787 cand_pref
7788 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7789 == TYPE_MODE (TREE_TYPE (info->iv->base))
7790 ? 2 : 0;
7791 cand_pref
7792 += TREE_CODE (cand->iv->base) == INTEGER_CST
7793 ? 1 : 0;
7794 if (best_cand == NULL || best_pref < cand_pref)
7796 tree this_comp
7797 = get_debug_computation_at (data->current_loop,
7798 SSA_NAME_DEF_STMT (def),
7799 &dummy_use, cand);
7800 if (this_comp)
7802 best_cand = cand;
7803 best_pref = cand_pref;
7804 comp = this_comp;
7809 if (!best_cand)
7810 continue;
7812 comp = unshare_expr (comp);
7813 if (count > 1)
7815 tree vexpr = build_debug_expr_decl (TREE_TYPE (comp));
7816 /* FIXME: Is setting the mode really necessary? */
7817 if (SSA_NAME_VAR (def))
7818 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7819 else
7820 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7821 gdebug *def_temp
7822 = gimple_build_debug_bind (vexpr, comp, NULL);
7823 gimple_stmt_iterator gsi;
7825 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7826 gsi = gsi_after_labels (gimple_bb
7827 (SSA_NAME_DEF_STMT (def)));
7828 else
7829 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7831 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7832 comp = vexpr;
7835 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7837 if (!gimple_debug_bind_p (stmt))
7838 continue;
7840 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7841 SET_USE (use_p, comp);
7843 update_stmt (stmt);
7850 /* Frees memory occupied by class tree_niter_desc in *VALUE. Callback
7851 for hash_map::traverse. */
7853 bool
7854 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7856 free (value);
7857 return true;
7860 /* Frees data allocated by the optimization of a single loop. */
7862 static void
7863 free_loop_data (struct ivopts_data *data)
7865 unsigned i, j;
7866 bitmap_iterator bi;
7867 tree obj;
7869 if (data->niters)
7871 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7872 delete data->niters;
7873 data->niters = NULL;
7876 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7878 struct version_info *info;
7880 info = ver_info (data, i);
7881 info->iv = NULL;
7882 info->has_nonlin_use = false;
7883 info->preserve_biv = false;
7884 info->inv_id = 0;
7886 bitmap_clear (data->relevant);
7887 bitmap_clear (data->important_candidates);
7889 for (i = 0; i < data->vgroups.length (); i++)
7891 struct iv_group *group = data->vgroups[i];
7893 for (j = 0; j < group->vuses.length (); j++)
7894 free (group->vuses[j]);
7895 group->vuses.release ();
7897 BITMAP_FREE (group->related_cands);
7898 for (j = 0; j < group->n_map_members; j++)
7900 if (group->cost_map[j].inv_vars)
7901 BITMAP_FREE (group->cost_map[j].inv_vars);
7902 if (group->cost_map[j].inv_exprs)
7903 BITMAP_FREE (group->cost_map[j].inv_exprs);
7906 free (group->cost_map);
7907 free (group);
7909 data->vgroups.truncate (0);
7911 for (i = 0; i < data->vcands.length (); i++)
7913 struct iv_cand *cand = data->vcands[i];
7915 if (cand->inv_vars)
7916 BITMAP_FREE (cand->inv_vars);
7917 if (cand->inv_exprs)
7918 BITMAP_FREE (cand->inv_exprs);
7919 free (cand);
7921 data->vcands.truncate (0);
7923 if (data->version_info_size < num_ssa_names)
7925 data->version_info_size = 2 * num_ssa_names;
7926 free (data->version_info);
7927 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7930 data->max_inv_var_id = 0;
7931 data->max_inv_expr_id = 0;
7933 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7934 SET_DECL_RTL (obj, NULL_RTX);
7936 decl_rtl_to_reset.truncate (0);
7938 data->inv_expr_tab->empty ();
7940 data->iv_common_cand_tab->empty ();
7941 data->iv_common_cands.truncate (0);
7944 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7945 loop tree. */
7947 static void
7948 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7950 free_loop_data (data);
7951 free (data->version_info);
7952 BITMAP_FREE (data->relevant);
7953 BITMAP_FREE (data->important_candidates);
7955 decl_rtl_to_reset.release ();
7956 data->vgroups.release ();
7957 data->vcands.release ();
7958 delete data->inv_expr_tab;
7959 data->inv_expr_tab = NULL;
7960 free_affine_expand_cache (&data->name_expansion_cache);
7961 if (data->base_object_map)
7962 delete data->base_object_map;
7963 delete data->iv_common_cand_tab;
7964 data->iv_common_cand_tab = NULL;
7965 data->iv_common_cands.release ();
7966 obstack_free (&data->iv_obstack, NULL);
7969 /* Returns true if the loop body BODY includes any function calls. */
7971 static bool
7972 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7974 gimple_stmt_iterator gsi;
7975 unsigned i;
7977 for (i = 0; i < num_nodes; i++)
7978 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7980 gimple *stmt = gsi_stmt (gsi);
7981 if (is_gimple_call (stmt)
7982 && !gimple_call_internal_p (stmt)
7983 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7984 return true;
7986 return false;
7989 /* Determine cost scaling factor for basic blocks in loop. */
7990 #define COST_SCALING_FACTOR_BOUND (20)
7992 static void
7993 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7995 int lfreq = data->current_loop->header->count.to_frequency (cfun);
7996 if (!data->speed || lfreq <= 0)
7997 return;
7999 int max_freq = lfreq;
8000 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8002 body[i]->aux = (void *)(intptr_t) 1;
8003 if (max_freq < body[i]->count.to_frequency (cfun))
8004 max_freq = body[i]->count.to_frequency (cfun);
8006 if (max_freq > lfreq)
8008 int divisor, factor;
8009 /* Check if scaling factor itself needs to be scaled by the bound. This
8010 is to avoid overflow when scaling cost according to profile info. */
8011 if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
8013 divisor = max_freq;
8014 factor = COST_SCALING_FACTOR_BOUND;
8016 else
8018 divisor = lfreq;
8019 factor = 1;
8021 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8023 int bfreq = body[i]->count.to_frequency (cfun);
8024 if (bfreq <= lfreq)
8025 continue;
8027 body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
8032 /* Find doloop comparison use and set its doloop_p on if found. */
8034 static bool
8035 find_doloop_use (struct ivopts_data *data)
8037 struct loop *loop = data->current_loop;
8039 for (unsigned i = 0; i < data->vgroups.length (); i++)
8041 struct iv_group *group = data->vgroups[i];
8042 if (group->type == USE_COMPARE)
8044 gcc_assert (group->vuses.length () == 1);
8045 struct iv_use *use = group->vuses[0];
8046 gimple *stmt = use->stmt;
8047 if (gimple_code (stmt) == GIMPLE_COND)
8049 basic_block bb = gimple_bb (stmt);
8050 edge true_edge, false_edge;
8051 extract_true_false_edges_from_block (bb, &true_edge, &false_edge);
8052 /* This comparison is used for loop latch. Require latch is empty
8053 for now. */
8054 if ((loop->latch == true_edge->dest
8055 || loop->latch == false_edge->dest)
8056 && empty_block_p (loop->latch))
8058 group->doloop_p = true;
8059 if (dump_file && (dump_flags & TDF_DETAILS))
8061 fprintf (dump_file, "Doloop cmp iv use: ");
8062 print_gimple_stmt (dump_file, stmt, TDF_DETAILS);
8064 return true;
8070 return false;
8073 /* For the targets which support doloop, to predict whether later RTL doloop
8074 transformation will perform on this loop, further detect the doloop use and
8075 mark the flag doloop_use_p if predicted. */
8077 void
8078 analyze_and_mark_doloop_use (struct ivopts_data *data)
8080 data->doloop_use_p = false;
8082 if (!flag_branch_on_count_reg)
8083 return;
8085 if (data->current_loop->unroll == USHRT_MAX)
8086 return;
8088 if (!generic_predict_doloop_p (data))
8089 return;
8091 if (find_doloop_use (data))
8093 data->doloop_use_p = true;
8094 if (dump_file && (dump_flags & TDF_DETAILS))
8096 struct loop *loop = data->current_loop;
8097 fprintf (dump_file,
8098 "Predict loop %d can perform"
8099 " doloop optimization later.\n",
8100 loop->num);
8101 flow_loop_dump (loop, dump_file, NULL, 1);
8106 /* Optimizes the LOOP. Returns true if anything changed. */
8108 static bool
8109 tree_ssa_iv_optimize_loop (struct ivopts_data *data, class loop *loop,
8110 bitmap toremove)
8112 bool changed = false;
8113 class iv_ca *iv_ca;
8114 edge exit = single_dom_exit (loop);
8115 basic_block *body;
8117 gcc_assert (!data->niters);
8118 data->current_loop = loop;
8119 data->loop_loc = find_loop_location (loop).get_location_t ();
8120 data->speed = optimize_loop_for_speed_p (loop);
8122 if (dump_file && (dump_flags & TDF_DETAILS))
8124 fprintf (dump_file, "Processing loop %d", loop->num);
8125 if (data->loop_loc != UNKNOWN_LOCATION)
8126 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
8127 LOCATION_LINE (data->loop_loc));
8128 fprintf (dump_file, "\n");
8130 if (exit)
8132 fprintf (dump_file, " single exit %d -> %d, exit condition ",
8133 exit->src->index, exit->dest->index);
8134 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
8135 fprintf (dump_file, "\n");
8138 fprintf (dump_file, "\n");
8141 body = get_loop_body (loop);
8142 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
8143 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
8145 data->loop_single_exit_p
8146 = exit != NULL && loop_only_exit_p (loop, body, exit);
8148 /* For each ssa name determines whether it behaves as an induction variable
8149 in some loop. */
8150 if (!find_induction_variables (data, body))
8151 goto finish;
8153 /* Finds interesting uses (item 1). */
8154 find_interesting_uses (data, body);
8155 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8156 goto finish;
8158 /* Determine cost scaling factor for basic blocks in loop. */
8159 determine_scaling_factor (data, body);
8161 /* Analyze doloop possibility and mark the doloop use if predicted. */
8162 analyze_and_mark_doloop_use (data);
8164 /* Finds candidates for the induction variables (item 2). */
8165 find_iv_candidates (data);
8167 /* Calculates the costs (item 3, part 1). */
8168 determine_iv_costs (data);
8169 determine_group_iv_costs (data);
8170 determine_set_costs (data);
8172 /* Find the optimal set of induction variables (item 3, part 2). */
8173 iv_ca = find_optimal_iv_set (data);
8174 /* Cleanup basic block aux field. */
8175 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
8176 body[i]->aux = NULL;
8177 if (!iv_ca)
8178 goto finish;
8179 changed = true;
8181 /* Create the new induction variables (item 4, part 1). */
8182 create_new_ivs (data, iv_ca);
8183 iv_ca_free (&iv_ca);
8185 /* Rewrite the uses (item 4, part 2). */
8186 rewrite_groups (data);
8188 /* Remove the ivs that are unused after rewriting. */
8189 remove_unused_ivs (data, toremove);
8191 finish:
8192 free (body);
8193 free_loop_data (data);
8195 return changed;
8198 /* Main entry point. Optimizes induction variables in loops. */
8200 void
8201 tree_ssa_iv_optimize (void)
8203 struct ivopts_data data;
8204 auto_bitmap toremove;
8206 tree_ssa_iv_optimize_init (&data);
8207 mark_ssa_maybe_undefs ();
8209 /* Optimize the loops starting with the innermost ones. */
8210 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
8212 if (!dbg_cnt (ivopts_loop))
8213 continue;
8215 if (dump_file && (dump_flags & TDF_DETAILS))
8216 flow_loop_dump (loop, dump_file, NULL, 1);
8218 tree_ssa_iv_optimize_loop (&data, loop, toremove);
8221 /* Remove eliminated IV defs. */
8222 release_defs_bitset (toremove);
8224 /* We have changed the structure of induction variables; it might happen
8225 that definitions in the scev database refer to some of them that were
8226 eliminated. */
8227 scev_reset_htab ();
8228 /* Likewise niter and control-IV information. */
8229 free_numbers_of_iterations_estimates (cfun);
8231 tree_ssa_iv_optimize_finalize (&data);
8234 #include "gt-tree-ssa-loop-ivopts.h"