[Ada] Missing range check on assignment to bit-packed array
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobbbae83cbc6eaa432212a4c71463a449ee9a30b33
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113 cost of different addressing modes. This should be moved to a TBD
114 interface between the GIMPLE and RTL worlds. */
116 /* The infinite cost. */
117 #define INFTY 1000000000
119 /* Returns the expected number of loop iterations for LOOP.
120 The average trip count is computed from profile data if it
121 exists. */
123 static inline HOST_WIDE_INT
124 avg_loop_niter (struct loop *loop)
126 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127 if (niter == -1)
129 niter = likely_max_stmt_executions_int (loop);
131 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
135 return niter;
138 struct iv_use;
140 /* Representation of the induction variable. */
141 struct iv
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
165 /* Types of uses. */
166 enum use_type
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_REF_ADDRESS, /* Use is an address for an explicit memory
170 reference. */
171 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
172 cases where the expansion of the function
173 will turn the argument into a normal address. */
174 USE_COMPARE /* Use is a compare. */
177 /* Cost of a computation. */
178 struct comp_cost
180 comp_cost (): cost (0), complexity (0), scratch (0)
183 comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
184 : cost (cost), complexity (complexity), scratch (scratch)
187 /* Returns true if COST is infinite. */
188 bool infinite_cost_p ();
190 /* Adds costs COST1 and COST2. */
191 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
193 /* Adds COST to the comp_cost. */
194 comp_cost operator+= (comp_cost cost);
196 /* Adds constant C to this comp_cost. */
197 comp_cost operator+= (HOST_WIDE_INT c);
199 /* Subtracts constant C to this comp_cost. */
200 comp_cost operator-= (HOST_WIDE_INT c);
202 /* Divide the comp_cost by constant C. */
203 comp_cost operator/= (HOST_WIDE_INT c);
205 /* Multiply the comp_cost by constant C. */
206 comp_cost operator*= (HOST_WIDE_INT c);
208 /* Subtracts costs COST1 and COST2. */
209 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
211 /* Subtracts COST from this comp_cost. */
212 comp_cost operator-= (comp_cost cost);
214 /* Returns true if COST1 is smaller than COST2. */
215 friend bool operator< (comp_cost cost1, comp_cost cost2);
217 /* Returns true if COST1 and COST2 are equal. */
218 friend bool operator== (comp_cost cost1, comp_cost cost2);
220 /* Returns true if COST1 is smaller or equal than COST2. */
221 friend bool operator<= (comp_cost cost1, comp_cost cost2);
223 int64_t cost; /* The runtime cost. */
224 unsigned complexity; /* The estimate of the complexity of the code for
225 the computation (in no concrete units --
226 complexity field should be larger for more
227 complex expressions and addressing modes). */
228 int64_t scratch; /* Scratch used during cost computation. */
231 static const comp_cost no_cost;
232 static const comp_cost infinite_cost (INFTY, 0, INFTY);
234 bool
235 comp_cost::infinite_cost_p ()
237 return cost == INFTY;
240 comp_cost
241 operator+ (comp_cost cost1, comp_cost cost2)
243 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
244 return infinite_cost;
246 gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
247 cost1.cost += cost2.cost;
248 cost1.complexity += cost2.complexity;
250 return cost1;
253 comp_cost
254 operator- (comp_cost cost1, comp_cost cost2)
256 if (cost1.infinite_cost_p ())
257 return infinite_cost;
259 gcc_assert (!cost2.infinite_cost_p ());
260 gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
262 cost1.cost -= cost2.cost;
263 cost1.complexity -= cost2.complexity;
265 return cost1;
268 comp_cost
269 comp_cost::operator+= (comp_cost cost)
271 *this = *this + cost;
272 return *this;
275 comp_cost
276 comp_cost::operator+= (HOST_WIDE_INT c)
278 if (infinite_cost_p ())
279 return *this;
281 gcc_assert (this->cost + c < infinite_cost.cost);
282 this->cost += c;
284 return *this;
287 comp_cost
288 comp_cost::operator-= (HOST_WIDE_INT c)
290 if (infinite_cost_p ())
291 return *this;
293 gcc_assert (this->cost - c < infinite_cost.cost);
294 this->cost -= c;
296 return *this;
299 comp_cost
300 comp_cost::operator/= (HOST_WIDE_INT c)
302 gcc_assert (c != 0);
303 if (infinite_cost_p ())
304 return *this;
306 this->cost /= c;
308 return *this;
311 comp_cost
312 comp_cost::operator*= (HOST_WIDE_INT c)
314 if (infinite_cost_p ())
315 return *this;
317 gcc_assert (this->cost * c < infinite_cost.cost);
318 this->cost *= c;
320 return *this;
323 comp_cost
324 comp_cost::operator-= (comp_cost cost)
326 *this = *this - cost;
327 return *this;
330 bool
331 operator< (comp_cost cost1, comp_cost cost2)
333 if (cost1.cost == cost2.cost)
334 return cost1.complexity < cost2.complexity;
336 return cost1.cost < cost2.cost;
339 bool
340 operator== (comp_cost cost1, comp_cost cost2)
342 return cost1.cost == cost2.cost
343 && cost1.complexity == cost2.complexity;
346 bool
347 operator<= (comp_cost cost1, comp_cost cost2)
349 return cost1 < cost2 || cost1 == cost2;
352 struct iv_inv_expr_ent;
354 /* The candidate - cost pair. */
355 struct cost_pair
357 struct iv_cand *cand; /* The candidate. */
358 comp_cost cost; /* The cost. */
359 enum tree_code comp; /* For iv elimination, the comparison. */
360 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
361 preserved when representing iv_use with iv_cand. */
362 bitmap inv_exprs; /* The list of newly created invariant expressions
363 when representing iv_use with iv_cand. */
364 tree value; /* For final value elimination, the expression for
365 the final value of the iv. For iv elimination,
366 the new bound to compare with. */
369 /* Use. */
370 struct iv_use
372 unsigned id; /* The id of the use. */
373 unsigned group_id; /* The group id the use belongs to. */
374 enum use_type type; /* Type of the use. */
375 tree mem_type; /* The memory type to use when testing whether an
376 address is legitimate, and what the address's
377 cost is. */
378 struct iv *iv; /* The induction variable it is based on. */
379 gimple *stmt; /* Statement in that it occurs. */
380 tree *op_p; /* The place where it occurs. */
382 tree addr_base; /* Base address with const offset stripped. */
383 poly_uint64_pod addr_offset;
384 /* Const offset stripped from base address. */
387 /* Group of uses. */
388 struct iv_group
390 /* The id of the group. */
391 unsigned id;
392 /* Uses of the group are of the same type. */
393 enum use_type type;
394 /* The set of "related" IV candidates, plus the important ones. */
395 bitmap related_cands;
396 /* Number of IV candidates in the cost_map. */
397 unsigned n_map_members;
398 /* The costs wrto the iv candidates. */
399 struct cost_pair *cost_map;
400 /* The selected candidate for the group. */
401 struct iv_cand *selected;
402 /* Uses in the group. */
403 vec<struct iv_use *> vuses;
406 /* The position where the iv is computed. */
407 enum iv_position
409 IP_NORMAL, /* At the end, just before the exit condition. */
410 IP_END, /* At the end of the latch block. */
411 IP_BEFORE_USE, /* Immediately before a specific use. */
412 IP_AFTER_USE, /* Immediately after a specific use. */
413 IP_ORIGINAL /* The original biv. */
416 /* The induction variable candidate. */
417 struct iv_cand
419 unsigned id; /* The number of the candidate. */
420 bool important; /* Whether this is an "important" candidate, i.e. such
421 that it should be considered by all uses. */
422 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
423 gimple *incremented_at;/* For original biv, the statement where it is
424 incremented. */
425 tree var_before; /* The variable used for it before increment. */
426 tree var_after; /* The variable used for it after increment. */
427 struct iv *iv; /* The value of the candidate. NULL for
428 "pseudocandidate" used to indicate the possibility
429 to replace the final value of an iv by direct
430 computation of the value. */
431 unsigned cost; /* Cost of the candidate. */
432 unsigned cost_step; /* Cost of the candidate's increment operation. */
433 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
434 where it is incremented. */
435 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
436 iv_cand. */
437 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
438 hanlde it as a new invariant expression which will
439 be hoisted out of loop. */
440 struct iv *orig_iv; /* The original iv if this cand is added from biv with
441 smaller type. */
444 /* Hashtable entry for common candidate derived from iv uses. */
445 struct iv_common_cand
447 tree base;
448 tree step;
449 /* IV uses from which this common candidate is derived. */
450 auto_vec<struct iv_use *> uses;
451 hashval_t hash;
454 /* Hashtable helpers. */
456 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
458 static inline hashval_t hash (const iv_common_cand *);
459 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
462 /* Hash function for possible common candidates. */
464 inline hashval_t
465 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
467 return ccand->hash;
470 /* Hash table equality function for common candidates. */
472 inline bool
473 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
474 const iv_common_cand *ccand2)
476 return (ccand1->hash == ccand2->hash
477 && operand_equal_p (ccand1->base, ccand2->base, 0)
478 && operand_equal_p (ccand1->step, ccand2->step, 0)
479 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
480 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
483 /* Loop invariant expression hashtable entry. */
485 struct iv_inv_expr_ent
487 /* Tree expression of the entry. */
488 tree expr;
489 /* Unique indentifier. */
490 int id;
491 /* Hash value. */
492 hashval_t hash;
495 /* Sort iv_inv_expr_ent pair A and B by id field. */
497 static int
498 sort_iv_inv_expr_ent (const void *a, const void *b)
500 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
501 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
503 unsigned id1 = (*e1)->id;
504 unsigned id2 = (*e2)->id;
506 if (id1 < id2)
507 return -1;
508 else if (id1 > id2)
509 return 1;
510 else
511 return 0;
514 /* Hashtable helpers. */
516 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
518 static inline hashval_t hash (const iv_inv_expr_ent *);
519 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
522 /* Return true if uses of type TYPE represent some form of address. */
524 inline bool
525 address_p (use_type type)
527 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
530 /* Hash function for loop invariant expressions. */
532 inline hashval_t
533 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
535 return expr->hash;
538 /* Hash table equality function for expressions. */
540 inline bool
541 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
542 const iv_inv_expr_ent *expr2)
544 return expr1->hash == expr2->hash
545 && operand_equal_p (expr1->expr, expr2->expr, 0);
548 struct ivopts_data
550 /* The currently optimized loop. */
551 struct loop *current_loop;
552 location_t loop_loc;
554 /* Numbers of iterations for all exits of the current loop. */
555 hash_map<edge, tree_niter_desc *> *niters;
557 /* Number of registers used in it. */
558 unsigned regs_used;
560 /* The size of version_info array allocated. */
561 unsigned version_info_size;
563 /* The array of information for the ssa names. */
564 struct version_info *version_info;
566 /* The hashtable of loop invariant expressions created
567 by ivopt. */
568 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
570 /* The bitmap of indices in version_info whose value was changed. */
571 bitmap relevant;
573 /* The uses of induction variables. */
574 vec<iv_group *> vgroups;
576 /* The candidates. */
577 vec<iv_cand *> vcands;
579 /* A bitmap of important candidates. */
580 bitmap important_candidates;
582 /* Cache used by tree_to_aff_combination_expand. */
583 hash_map<tree, name_expansion *> *name_expansion_cache;
585 /* The hashtable of common candidates derived from iv uses. */
586 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
588 /* The common candidates. */
589 vec<iv_common_cand *> iv_common_cands;
591 /* The maximum invariant variable id. */
592 unsigned max_inv_var_id;
594 /* The maximum invariant expression id. */
595 unsigned max_inv_expr_id;
597 /* Number of no_overflow BIVs which are not used in memory address. */
598 unsigned bivs_not_used_in_addr;
600 /* Obstack for iv structure. */
601 struct obstack iv_obstack;
603 /* Whether to consider just related and important candidates when replacing a
604 use. */
605 bool consider_all_candidates;
607 /* Are we optimizing for speed? */
608 bool speed;
610 /* Whether the loop body includes any function calls. */
611 bool body_includes_call;
613 /* Whether the loop body can only be exited via single exit. */
614 bool loop_single_exit_p;
617 /* An assignment of iv candidates to uses. */
619 struct iv_ca
621 /* The number of uses covered by the assignment. */
622 unsigned upto;
624 /* Number of uses that cannot be expressed by the candidates in the set. */
625 unsigned bad_groups;
627 /* Candidate assigned to a use, together with the related costs. */
628 struct cost_pair **cand_for_group;
630 /* Number of times each candidate is used. */
631 unsigned *n_cand_uses;
633 /* The candidates used. */
634 bitmap cands;
636 /* The number of candidates in the set. */
637 unsigned n_cands;
639 /* The number of invariants needed, including both invariant variants and
640 invariant expressions. */
641 unsigned n_invs;
643 /* Total cost of expressing uses. */
644 comp_cost cand_use_cost;
646 /* Total cost of candidates. */
647 int64_t cand_cost;
649 /* Number of times each invariant variable is used. */
650 unsigned *n_inv_var_uses;
652 /* Number of times each invariant expression is used. */
653 unsigned *n_inv_expr_uses;
655 /* Total cost of the assignment. */
656 comp_cost cost;
659 /* Difference of two iv candidate assignments. */
661 struct iv_ca_delta
663 /* Changed group. */
664 struct iv_group *group;
666 /* An old assignment (for rollback purposes). */
667 struct cost_pair *old_cp;
669 /* A new assignment. */
670 struct cost_pair *new_cp;
672 /* Next change in the list. */
673 struct iv_ca_delta *next;
676 /* Bound on number of candidates below that all candidates are considered. */
678 #define CONSIDER_ALL_CANDIDATES_BOUND \
679 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
681 /* If there are more iv occurrences, we just give up (it is quite unlikely that
682 optimizing such a loop would help, and it would take ages). */
684 #define MAX_CONSIDERED_GROUPS \
685 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
687 /* If there are at most this number of ivs in the set, try removing unnecessary
688 ivs from the set always. */
690 #define ALWAYS_PRUNE_CAND_SET_BOUND \
691 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
693 /* The list of trees for that the decl_rtl field must be reset is stored
694 here. */
696 static vec<tree> decl_rtl_to_reset;
698 static comp_cost force_expr_to_var_cost (tree, bool);
700 /* The single loop exit if it dominates the latch, NULL otherwise. */
702 edge
703 single_dom_exit (struct loop *loop)
705 edge exit = single_exit (loop);
707 if (!exit)
708 return NULL;
710 if (!just_once_each_iteration_p (loop, exit->src))
711 return NULL;
713 return exit;
716 /* Dumps information about the induction variable IV to FILE. Don't dump
717 variable's name if DUMP_NAME is FALSE. The information is dumped with
718 preceding spaces indicated by INDENT_LEVEL. */
720 void
721 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
723 const char *p;
724 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
726 if (indent_level > 4)
727 indent_level = 4;
728 p = spaces + 8 - (indent_level << 1);
730 fprintf (file, "%sIV struct:\n", p);
731 if (iv->ssa_name && dump_name)
733 fprintf (file, "%s SSA_NAME:\t", p);
734 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
735 fprintf (file, "\n");
738 fprintf (file, "%s Type:\t", p);
739 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
740 fprintf (file, "\n");
742 fprintf (file, "%s Base:\t", p);
743 print_generic_expr (file, iv->base, TDF_SLIM);
744 fprintf (file, "\n");
746 fprintf (file, "%s Step:\t", p);
747 print_generic_expr (file, iv->step, TDF_SLIM);
748 fprintf (file, "\n");
750 if (iv->base_object)
752 fprintf (file, "%s Object:\t", p);
753 print_generic_expr (file, iv->base_object, TDF_SLIM);
754 fprintf (file, "\n");
757 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
759 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
760 p, iv->no_overflow ? "No-overflow" : "Overflow");
763 /* Dumps information about the USE to FILE. */
765 void
766 dump_use (FILE *file, struct iv_use *use)
768 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
769 fprintf (file, " At stmt:\t");
770 print_gimple_stmt (file, use->stmt, 0);
771 fprintf (file, " At pos:\t");
772 if (use->op_p)
773 print_generic_expr (file, *use->op_p, TDF_SLIM);
774 fprintf (file, "\n");
775 dump_iv (file, use->iv, false, 2);
778 /* Dumps information about the uses to FILE. */
780 void
781 dump_groups (FILE *file, struct ivopts_data *data)
783 unsigned i, j;
784 struct iv_group *group;
786 for (i = 0; i < data->vgroups.length (); i++)
788 group = data->vgroups[i];
789 fprintf (file, "Group %d:\n", group->id);
790 if (group->type == USE_NONLINEAR_EXPR)
791 fprintf (file, " Type:\tGENERIC\n");
792 else if (group->type == USE_REF_ADDRESS)
793 fprintf (file, " Type:\tREFERENCE ADDRESS\n");
794 else if (group->type == USE_PTR_ADDRESS)
795 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
796 else
798 gcc_assert (group->type == USE_COMPARE);
799 fprintf (file, " Type:\tCOMPARE\n");
801 for (j = 0; j < group->vuses.length (); j++)
802 dump_use (file, group->vuses[j]);
806 /* Dumps information about induction variable candidate CAND to FILE. */
808 void
809 dump_cand (FILE *file, struct iv_cand *cand)
811 struct iv *iv = cand->iv;
813 fprintf (file, "Candidate %d:\n", cand->id);
814 if (cand->inv_vars)
816 fprintf (file, " Depend on inv.vars: ");
817 dump_bitmap (file, cand->inv_vars);
819 if (cand->inv_exprs)
821 fprintf (file, " Depend on inv.exprs: ");
822 dump_bitmap (file, cand->inv_exprs);
825 if (cand->var_before)
827 fprintf (file, " Var befor: ");
828 print_generic_expr (file, cand->var_before, TDF_SLIM);
829 fprintf (file, "\n");
831 if (cand->var_after)
833 fprintf (file, " Var after: ");
834 print_generic_expr (file, cand->var_after, TDF_SLIM);
835 fprintf (file, "\n");
838 switch (cand->pos)
840 case IP_NORMAL:
841 fprintf (file, " Incr POS: before exit test\n");
842 break;
844 case IP_BEFORE_USE:
845 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
846 break;
848 case IP_AFTER_USE:
849 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
850 break;
852 case IP_END:
853 fprintf (file, " Incr POS: at end\n");
854 break;
856 case IP_ORIGINAL:
857 fprintf (file, " Incr POS: orig biv\n");
858 break;
861 dump_iv (file, iv, false, 1);
864 /* Returns the info for ssa version VER. */
866 static inline struct version_info *
867 ver_info (struct ivopts_data *data, unsigned ver)
869 return data->version_info + ver;
872 /* Returns the info for ssa name NAME. */
874 static inline struct version_info *
875 name_info (struct ivopts_data *data, tree name)
877 return ver_info (data, SSA_NAME_VERSION (name));
880 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
881 emitted in LOOP. */
883 static bool
884 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
886 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
888 gcc_assert (bb);
890 if (sbb == loop->latch)
891 return true;
893 if (sbb != bb)
894 return false;
896 return stmt == last_stmt (bb);
899 /* Returns true if STMT if after the place where the original induction
900 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
901 if the positions are identical. */
903 static bool
904 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
906 basic_block cand_bb = gimple_bb (cand->incremented_at);
907 basic_block stmt_bb = gimple_bb (stmt);
909 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
910 return false;
912 if (stmt_bb != cand_bb)
913 return true;
915 if (true_if_equal
916 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
917 return true;
918 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
921 /* Returns true if STMT if after the place where the induction variable
922 CAND is incremented in LOOP. */
924 static bool
925 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
927 switch (cand->pos)
929 case IP_END:
930 return false;
932 case IP_NORMAL:
933 return stmt_after_ip_normal_pos (loop, stmt);
935 case IP_ORIGINAL:
936 case IP_AFTER_USE:
937 return stmt_after_inc_pos (cand, stmt, false);
939 case IP_BEFORE_USE:
940 return stmt_after_inc_pos (cand, stmt, true);
942 default:
943 gcc_unreachable ();
947 /* walk_tree callback for contains_abnormal_ssa_name_p. */
949 static tree
950 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
952 if (TREE_CODE (*tp) == SSA_NAME
953 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
954 return *tp;
956 if (!EXPR_P (*tp))
957 *walk_subtrees = 0;
959 return NULL_TREE;
962 /* Returns true if EXPR contains a ssa name that occurs in an
963 abnormal phi node. */
965 bool
966 contains_abnormal_ssa_name_p (tree expr)
968 return walk_tree_without_duplicates
969 (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
972 /* Returns the structure describing number of iterations determined from
973 EXIT of DATA->current_loop, or NULL if something goes wrong. */
975 static struct tree_niter_desc *
976 niter_for_exit (struct ivopts_data *data, edge exit)
978 struct tree_niter_desc *desc;
979 tree_niter_desc **slot;
981 if (!data->niters)
983 data->niters = new hash_map<edge, tree_niter_desc *>;
984 slot = NULL;
986 else
987 slot = data->niters->get (exit);
989 if (!slot)
991 /* Try to determine number of iterations. We cannot safely work with ssa
992 names that appear in phi nodes on abnormal edges, so that we do not
993 create overlapping life ranges for them (PR 27283). */
994 desc = XNEW (struct tree_niter_desc);
995 if (!number_of_iterations_exit (data->current_loop,
996 exit, desc, true)
997 || contains_abnormal_ssa_name_p (desc->niter))
999 XDELETE (desc);
1000 desc = NULL;
1002 data->niters->put (exit, desc);
1004 else
1005 desc = *slot;
1007 return desc;
1010 /* Returns the structure describing number of iterations determined from
1011 single dominating exit of DATA->current_loop, or NULL if something
1012 goes wrong. */
1014 static struct tree_niter_desc *
1015 niter_for_single_dom_exit (struct ivopts_data *data)
1017 edge exit = single_dom_exit (data->current_loop);
1019 if (!exit)
1020 return NULL;
1022 return niter_for_exit (data, exit);
1025 /* Initializes data structures used by the iv optimization pass, stored
1026 in DATA. */
1028 static void
1029 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1031 data->version_info_size = 2 * num_ssa_names;
1032 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1033 data->relevant = BITMAP_ALLOC (NULL);
1034 data->important_candidates = BITMAP_ALLOC (NULL);
1035 data->max_inv_var_id = 0;
1036 data->max_inv_expr_id = 0;
1037 data->niters = NULL;
1038 data->vgroups.create (20);
1039 data->vcands.create (20);
1040 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1041 data->name_expansion_cache = NULL;
1042 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1043 data->iv_common_cands.create (20);
1044 decl_rtl_to_reset.create (20);
1045 gcc_obstack_init (&data->iv_obstack);
1048 /* Returns a memory object to that EXPR points. In case we are able to
1049 determine that it does not point to any such object, NULL is returned. */
1051 static tree
1052 determine_base_object (tree expr)
1054 enum tree_code code = TREE_CODE (expr);
1055 tree base, obj;
1057 /* If this is a pointer casted to any type, we need to determine
1058 the base object for the pointer; so handle conversions before
1059 throwing away non-pointer expressions. */
1060 if (CONVERT_EXPR_P (expr))
1061 return determine_base_object (TREE_OPERAND (expr, 0));
1063 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1064 return NULL_TREE;
1066 switch (code)
1068 case INTEGER_CST:
1069 return NULL_TREE;
1071 case ADDR_EXPR:
1072 obj = TREE_OPERAND (expr, 0);
1073 base = get_base_address (obj);
1075 if (!base)
1076 return expr;
1078 if (TREE_CODE (base) == MEM_REF)
1079 return determine_base_object (TREE_OPERAND (base, 0));
1081 return fold_convert (ptr_type_node,
1082 build_fold_addr_expr (base));
1084 case POINTER_PLUS_EXPR:
1085 return determine_base_object (TREE_OPERAND (expr, 0));
1087 case PLUS_EXPR:
1088 case MINUS_EXPR:
1089 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1090 gcc_unreachable ();
1092 default:
1093 if (POLY_INT_CST_P (expr))
1094 return NULL_TREE;
1095 return fold_convert (ptr_type_node, expr);
1099 /* Return true if address expression with non-DECL_P operand appears
1100 in EXPR. */
1102 static bool
1103 contain_complex_addr_expr (tree expr)
1105 bool res = false;
1107 STRIP_NOPS (expr);
1108 switch (TREE_CODE (expr))
1110 case POINTER_PLUS_EXPR:
1111 case PLUS_EXPR:
1112 case MINUS_EXPR:
1113 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1114 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1115 break;
1117 case ADDR_EXPR:
1118 return (!DECL_P (TREE_OPERAND (expr, 0)));
1120 default:
1121 return false;
1124 return res;
1127 /* Allocates an induction variable with given initial value BASE and step STEP
1128 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1130 static struct iv *
1131 alloc_iv (struct ivopts_data *data, tree base, tree step,
1132 bool no_overflow = false)
1134 tree expr = base;
1135 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1136 sizeof (struct iv));
1137 gcc_assert (step != NULL_TREE);
1139 /* Lower address expression in base except ones with DECL_P as operand.
1140 By doing this:
1141 1) More accurate cost can be computed for address expressions;
1142 2) Duplicate candidates won't be created for bases in different
1143 forms, like &a[0] and &a. */
1144 STRIP_NOPS (expr);
1145 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1146 || contain_complex_addr_expr (expr))
1148 aff_tree comb;
1149 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1150 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1153 iv->base = base;
1154 iv->base_object = determine_base_object (base);
1155 iv->step = step;
1156 iv->biv_p = false;
1157 iv->nonlin_use = NULL;
1158 iv->ssa_name = NULL_TREE;
1159 if (!no_overflow
1160 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1161 base, step))
1162 no_overflow = true;
1163 iv->no_overflow = no_overflow;
1164 iv->have_address_use = false;
1166 return iv;
1169 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1170 doesn't overflow. */
1172 static void
1173 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1174 bool no_overflow)
1176 struct version_info *info = name_info (data, iv);
1178 gcc_assert (!info->iv);
1180 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1181 info->iv = alloc_iv (data, base, step, no_overflow);
1182 info->iv->ssa_name = iv;
1185 /* Finds induction variable declaration for VAR. */
1187 static struct iv *
1188 get_iv (struct ivopts_data *data, tree var)
1190 basic_block bb;
1191 tree type = TREE_TYPE (var);
1193 if (!POINTER_TYPE_P (type)
1194 && !INTEGRAL_TYPE_P (type))
1195 return NULL;
1197 if (!name_info (data, var)->iv)
1199 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1201 if (!bb
1202 || !flow_bb_inside_loop_p (data->current_loop, bb))
1203 set_iv (data, var, var, build_int_cst (type, 0), true);
1206 return name_info (data, var)->iv;
1209 /* Return the first non-invariant ssa var found in EXPR. */
1211 static tree
1212 extract_single_var_from_expr (tree expr)
1214 int i, n;
1215 tree tmp;
1216 enum tree_code code;
1218 if (!expr || is_gimple_min_invariant (expr))
1219 return NULL;
1221 code = TREE_CODE (expr);
1222 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1224 n = TREE_OPERAND_LENGTH (expr);
1225 for (i = 0; i < n; i++)
1227 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1229 if (tmp)
1230 return tmp;
1233 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1236 /* Finds basic ivs. */
1238 static bool
1239 find_bivs (struct ivopts_data *data)
1241 gphi *phi;
1242 affine_iv iv;
1243 tree step, type, base, stop;
1244 bool found = false;
1245 struct loop *loop = data->current_loop;
1246 gphi_iterator psi;
1248 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1250 phi = psi.phi ();
1252 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1253 continue;
1255 if (virtual_operand_p (PHI_RESULT (phi)))
1256 continue;
1258 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1259 continue;
1261 if (integer_zerop (iv.step))
1262 continue;
1264 step = iv.step;
1265 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1266 /* Stop expanding iv base at the first ssa var referred by iv step.
1267 Ideally we should stop at any ssa var, because that's expensive
1268 and unusual to happen, we just do it on the first one.
1270 See PR64705 for the rationale. */
1271 stop = extract_single_var_from_expr (step);
1272 base = expand_simple_operations (base, stop);
1273 if (contains_abnormal_ssa_name_p (base)
1274 || contains_abnormal_ssa_name_p (step))
1275 continue;
1277 type = TREE_TYPE (PHI_RESULT (phi));
1278 base = fold_convert (type, base);
1279 if (step)
1281 if (POINTER_TYPE_P (type))
1282 step = convert_to_ptrofftype (step);
1283 else
1284 step = fold_convert (type, step);
1287 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1288 found = true;
1291 return found;
1294 /* Marks basic ivs. */
1296 static void
1297 mark_bivs (struct ivopts_data *data)
1299 gphi *phi;
1300 gimple *def;
1301 tree var;
1302 struct iv *iv, *incr_iv;
1303 struct loop *loop = data->current_loop;
1304 basic_block incr_bb;
1305 gphi_iterator psi;
1307 data->bivs_not_used_in_addr = 0;
1308 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1310 phi = psi.phi ();
1312 iv = get_iv (data, PHI_RESULT (phi));
1313 if (!iv)
1314 continue;
1316 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1317 def = SSA_NAME_DEF_STMT (var);
1318 /* Don't mark iv peeled from other one as biv. */
1319 if (def
1320 && gimple_code (def) == GIMPLE_PHI
1321 && gimple_bb (def) == loop->header)
1322 continue;
1324 incr_iv = get_iv (data, var);
1325 if (!incr_iv)
1326 continue;
1328 /* If the increment is in the subloop, ignore it. */
1329 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1330 if (incr_bb->loop_father != data->current_loop
1331 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1332 continue;
1334 iv->biv_p = true;
1335 incr_iv->biv_p = true;
1336 if (iv->no_overflow)
1337 data->bivs_not_used_in_addr++;
1338 if (incr_iv->no_overflow)
1339 data->bivs_not_used_in_addr++;
1343 /* Checks whether STMT defines a linear induction variable and stores its
1344 parameters to IV. */
1346 static bool
1347 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1349 tree lhs, stop;
1350 struct loop *loop = data->current_loop;
1352 iv->base = NULL_TREE;
1353 iv->step = NULL_TREE;
1355 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1356 return false;
1358 lhs = gimple_assign_lhs (stmt);
1359 if (TREE_CODE (lhs) != SSA_NAME)
1360 return false;
1362 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1363 return false;
1365 /* Stop expanding iv base at the first ssa var referred by iv step.
1366 Ideally we should stop at any ssa var, because that's expensive
1367 and unusual to happen, we just do it on the first one.
1369 See PR64705 for the rationale. */
1370 stop = extract_single_var_from_expr (iv->step);
1371 iv->base = expand_simple_operations (iv->base, stop);
1372 if (contains_abnormal_ssa_name_p (iv->base)
1373 || contains_abnormal_ssa_name_p (iv->step))
1374 return false;
1376 /* If STMT could throw, then do not consider STMT as defining a GIV.
1377 While this will suppress optimizations, we cannot safely delete this
1378 GIV and associated statements, even if it appears it is not used. */
1379 if (stmt_could_throw_p (cfun, stmt))
1380 return false;
1382 return true;
1385 /* Finds general ivs in statement STMT. */
1387 static void
1388 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1390 affine_iv iv;
1392 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1393 return;
1395 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1398 /* Finds general ivs in basic block BB. */
1400 static void
1401 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1403 gimple_stmt_iterator bsi;
1405 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1406 find_givs_in_stmt (data, gsi_stmt (bsi));
1409 /* Finds general ivs. */
1411 static void
1412 find_givs (struct ivopts_data *data)
1414 struct loop *loop = data->current_loop;
1415 basic_block *body = get_loop_body_in_dom_order (loop);
1416 unsigned i;
1418 for (i = 0; i < loop->num_nodes; i++)
1419 find_givs_in_bb (data, body[i]);
1420 free (body);
1423 /* For each ssa name defined in LOOP determines whether it is an induction
1424 variable and if so, its initial value and step. */
1426 static bool
1427 find_induction_variables (struct ivopts_data *data)
1429 unsigned i;
1430 bitmap_iterator bi;
1432 if (!find_bivs (data))
1433 return false;
1435 find_givs (data);
1436 mark_bivs (data);
1438 if (dump_file && (dump_flags & TDF_DETAILS))
1440 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1442 if (niter)
1444 fprintf (dump_file, " number of iterations ");
1445 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1446 if (!integer_zerop (niter->may_be_zero))
1448 fprintf (dump_file, "; zero if ");
1449 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1451 fprintf (dump_file, "\n");
1454 fprintf (dump_file, "\n<Induction Vars>:\n");
1455 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1457 struct version_info *info = ver_info (data, i);
1458 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1459 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1463 return true;
1466 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1467 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1468 is the const offset stripped from IV base and MEM_TYPE is the type
1469 of the memory being addressed. For uses of other types, ADDR_BASE
1470 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1472 static struct iv_use *
1473 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1474 gimple *stmt, enum use_type type, tree mem_type,
1475 tree addr_base, poly_uint64 addr_offset)
1477 struct iv_use *use = XCNEW (struct iv_use);
1479 use->id = group->vuses.length ();
1480 use->group_id = group->id;
1481 use->type = type;
1482 use->mem_type = mem_type;
1483 use->iv = iv;
1484 use->stmt = stmt;
1485 use->op_p = use_p;
1486 use->addr_base = addr_base;
1487 use->addr_offset = addr_offset;
1489 group->vuses.safe_push (use);
1490 return use;
1493 /* Checks whether OP is a loop-level invariant and if so, records it.
1494 NONLINEAR_USE is true if the invariant is used in a way we do not
1495 handle specially. */
1497 static void
1498 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1500 basic_block bb;
1501 struct version_info *info;
1503 if (TREE_CODE (op) != SSA_NAME
1504 || virtual_operand_p (op))
1505 return;
1507 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1508 if (bb
1509 && flow_bb_inside_loop_p (data->current_loop, bb))
1510 return;
1512 info = name_info (data, op);
1513 info->name = op;
1514 info->has_nonlin_use |= nonlinear_use;
1515 if (!info->inv_id)
1516 info->inv_id = ++data->max_inv_var_id;
1517 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1520 /* Record a group of TYPE. */
1522 static struct iv_group *
1523 record_group (struct ivopts_data *data, enum use_type type)
1525 struct iv_group *group = XCNEW (struct iv_group);
1527 group->id = data->vgroups.length ();
1528 group->type = type;
1529 group->related_cands = BITMAP_ALLOC (NULL);
1530 group->vuses.create (1);
1532 data->vgroups.safe_push (group);
1533 return group;
1536 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1537 New group will be created if there is no existing group for the use.
1538 MEM_TYPE is the type of memory being addressed, or NULL if this
1539 isn't an address reference. */
1541 static struct iv_use *
1542 record_group_use (struct ivopts_data *data, tree *use_p,
1543 struct iv *iv, gimple *stmt, enum use_type type,
1544 tree mem_type)
1546 tree addr_base = NULL;
1547 struct iv_group *group = NULL;
1548 poly_uint64 addr_offset = 0;
1550 /* Record non address type use in a new group. */
1551 if (address_p (type))
1553 unsigned int i;
1555 addr_base = strip_offset (iv->base, &addr_offset);
1556 for (i = 0; i < data->vgroups.length (); i++)
1558 struct iv_use *use;
1560 group = data->vgroups[i];
1561 use = group->vuses[0];
1562 if (!address_p (use->type))
1563 continue;
1565 /* Check if it has the same stripped base and step. */
1566 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1567 && operand_equal_p (iv->step, use->iv->step, 0)
1568 && operand_equal_p (addr_base, use->addr_base, 0))
1569 break;
1571 if (i == data->vgroups.length ())
1572 group = NULL;
1575 if (!group)
1576 group = record_group (data, type);
1578 return record_use (group, use_p, iv, stmt, type, mem_type,
1579 addr_base, addr_offset);
1582 /* Checks whether the use OP is interesting and if so, records it. */
1584 static struct iv_use *
1585 find_interesting_uses_op (struct ivopts_data *data, tree op)
1587 struct iv *iv;
1588 gimple *stmt;
1589 struct iv_use *use;
1591 if (TREE_CODE (op) != SSA_NAME)
1592 return NULL;
1594 iv = get_iv (data, op);
1595 if (!iv)
1596 return NULL;
1598 if (iv->nonlin_use)
1600 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1601 return iv->nonlin_use;
1604 if (integer_zerop (iv->step))
1606 record_invariant (data, op, true);
1607 return NULL;
1610 stmt = SSA_NAME_DEF_STMT (op);
1611 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1613 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1614 iv->nonlin_use = use;
1615 return use;
1618 /* Indicate how compare type iv_use can be handled. */
1619 enum comp_iv_rewrite
1621 COMP_IV_NA,
1622 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1623 COMP_IV_EXPR,
1624 /* We may rewrite compare type iv_uses on both sides of comparison by
1625 expressing value of each iv_use. */
1626 COMP_IV_EXPR_2,
1627 /* We may rewrite compare type iv_use by expressing value of the iv_use
1628 or by eliminating it with other iv_cand. */
1629 COMP_IV_ELIM
1632 /* Given a condition in statement STMT, checks whether it is a compare
1633 of an induction variable and an invariant. If this is the case,
1634 CONTROL_VAR is set to location of the iv, BOUND to the location of
1635 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1636 induction variable descriptions, and true is returned. If this is not
1637 the case, CONTROL_VAR and BOUND are set to the arguments of the
1638 condition and false is returned. */
1640 static enum comp_iv_rewrite
1641 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1642 tree **control_var, tree **bound,
1643 struct iv **iv_var, struct iv **iv_bound)
1645 /* The objects returned when COND has constant operands. */
1646 static struct iv const_iv;
1647 static tree zero;
1648 tree *op0 = &zero, *op1 = &zero;
1649 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1650 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1652 if (gimple_code (stmt) == GIMPLE_COND)
1654 gcond *cond_stmt = as_a <gcond *> (stmt);
1655 op0 = gimple_cond_lhs_ptr (cond_stmt);
1656 op1 = gimple_cond_rhs_ptr (cond_stmt);
1658 else
1660 op0 = gimple_assign_rhs1_ptr (stmt);
1661 op1 = gimple_assign_rhs2_ptr (stmt);
1664 zero = integer_zero_node;
1665 const_iv.step = integer_zero_node;
1667 if (TREE_CODE (*op0) == SSA_NAME)
1668 iv0 = get_iv (data, *op0);
1669 if (TREE_CODE (*op1) == SSA_NAME)
1670 iv1 = get_iv (data, *op1);
1672 /* If both sides of comparison are IVs. We can express ivs on both end. */
1673 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1675 rewrite_type = COMP_IV_EXPR_2;
1676 goto end;
1679 /* If none side of comparison is IV. */
1680 if ((!iv0 || integer_zerop (iv0->step))
1681 && (!iv1 || integer_zerop (iv1->step)))
1682 goto end;
1684 /* Control variable may be on the other side. */
1685 if (!iv0 || integer_zerop (iv0->step))
1687 std::swap (op0, op1);
1688 std::swap (iv0, iv1);
1690 /* If one side is IV and the other side isn't loop invariant. */
1691 if (!iv1)
1692 rewrite_type = COMP_IV_EXPR;
1693 /* If one side is IV and the other side is loop invariant. */
1694 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1695 rewrite_type = COMP_IV_ELIM;
1697 end:
1698 if (control_var)
1699 *control_var = op0;
1700 if (iv_var)
1701 *iv_var = iv0;
1702 if (bound)
1703 *bound = op1;
1704 if (iv_bound)
1705 *iv_bound = iv1;
1707 return rewrite_type;
1710 /* Checks whether the condition in STMT is interesting and if so,
1711 records it. */
1713 static void
1714 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1716 tree *var_p, *bound_p;
1717 struct iv *var_iv, *bound_iv;
1718 enum comp_iv_rewrite ret;
1720 ret = extract_cond_operands (data, stmt,
1721 &var_p, &bound_p, &var_iv, &bound_iv);
1722 if (ret == COMP_IV_NA)
1724 find_interesting_uses_op (data, *var_p);
1725 find_interesting_uses_op (data, *bound_p);
1726 return;
1729 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1730 /* Record compare type iv_use for iv on the other side of comparison. */
1731 if (ret == COMP_IV_EXPR_2)
1732 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1735 /* Returns the outermost loop EXPR is obviously invariant in
1736 relative to the loop LOOP, i.e. if all its operands are defined
1737 outside of the returned loop. Returns NULL if EXPR is not
1738 even obviously invariant in LOOP. */
1740 struct loop *
1741 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1743 basic_block def_bb;
1744 unsigned i, len;
1746 if (is_gimple_min_invariant (expr))
1747 return current_loops->tree_root;
1749 if (TREE_CODE (expr) == SSA_NAME)
1751 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1752 if (def_bb)
1754 if (flow_bb_inside_loop_p (loop, def_bb))
1755 return NULL;
1756 return superloop_at_depth (loop,
1757 loop_depth (def_bb->loop_father) + 1);
1760 return current_loops->tree_root;
1763 if (!EXPR_P (expr))
1764 return NULL;
1766 unsigned maxdepth = 0;
1767 len = TREE_OPERAND_LENGTH (expr);
1768 for (i = 0; i < len; i++)
1770 struct loop *ivloop;
1771 if (!TREE_OPERAND (expr, i))
1772 continue;
1774 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1775 if (!ivloop)
1776 return NULL;
1777 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1780 return superloop_at_depth (loop, maxdepth);
1783 /* Returns true if expression EXPR is obviously invariant in LOOP,
1784 i.e. if all its operands are defined outside of the LOOP. LOOP
1785 should not be the function body. */
1787 bool
1788 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1790 basic_block def_bb;
1791 unsigned i, len;
1793 gcc_assert (loop_depth (loop) > 0);
1795 if (is_gimple_min_invariant (expr))
1796 return true;
1798 if (TREE_CODE (expr) == SSA_NAME)
1800 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1801 if (def_bb
1802 && flow_bb_inside_loop_p (loop, def_bb))
1803 return false;
1805 return true;
1808 if (!EXPR_P (expr))
1809 return false;
1811 len = TREE_OPERAND_LENGTH (expr);
1812 for (i = 0; i < len; i++)
1813 if (TREE_OPERAND (expr, i)
1814 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1815 return false;
1817 return true;
1820 /* Given expression EXPR which computes inductive values with respect
1821 to loop recorded in DATA, this function returns biv from which EXPR
1822 is derived by tracing definition chains of ssa variables in EXPR. */
1824 static struct iv*
1825 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1827 struct iv *iv;
1828 unsigned i, n;
1829 tree e2, e1;
1830 enum tree_code code;
1831 gimple *stmt;
1833 if (expr == NULL_TREE)
1834 return NULL;
1836 if (is_gimple_min_invariant (expr))
1837 return NULL;
1839 code = TREE_CODE (expr);
1840 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1842 n = TREE_OPERAND_LENGTH (expr);
1843 for (i = 0; i < n; i++)
1845 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1846 if (iv)
1847 return iv;
1851 /* Stop if it's not ssa name. */
1852 if (code != SSA_NAME)
1853 return NULL;
1855 iv = get_iv (data, expr);
1856 if (!iv || integer_zerop (iv->step))
1857 return NULL;
1858 else if (iv->biv_p)
1859 return iv;
1861 stmt = SSA_NAME_DEF_STMT (expr);
1862 if (gphi *phi = dyn_cast <gphi *> (stmt))
1864 ssa_op_iter iter;
1865 use_operand_p use_p;
1866 basic_block phi_bb = gimple_bb (phi);
1868 /* Skip loop header PHI that doesn't define biv. */
1869 if (phi_bb->loop_father == data->current_loop)
1870 return NULL;
1872 if (virtual_operand_p (gimple_phi_result (phi)))
1873 return NULL;
1875 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1877 tree use = USE_FROM_PTR (use_p);
1878 iv = find_deriving_biv_for_expr (data, use);
1879 if (iv)
1880 return iv;
1882 return NULL;
1884 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1885 return NULL;
1887 e1 = gimple_assign_rhs1 (stmt);
1888 code = gimple_assign_rhs_code (stmt);
1889 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1890 return find_deriving_biv_for_expr (data, e1);
1892 switch (code)
1894 case MULT_EXPR:
1895 case PLUS_EXPR:
1896 case MINUS_EXPR:
1897 case POINTER_PLUS_EXPR:
1898 /* Increments, decrements and multiplications by a constant
1899 are simple. */
1900 e2 = gimple_assign_rhs2 (stmt);
1901 iv = find_deriving_biv_for_expr (data, e2);
1902 if (iv)
1903 return iv;
1904 gcc_fallthrough ();
1906 CASE_CONVERT:
1907 /* Casts are simple. */
1908 return find_deriving_biv_for_expr (data, e1);
1910 default:
1911 break;
1914 return NULL;
1917 /* Record BIV, its predecessor and successor that they are used in
1918 address type uses. */
1920 static void
1921 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1923 unsigned i;
1924 tree type, base_1, base_2;
1925 bitmap_iterator bi;
1927 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1928 || biv->have_address_use || !biv->no_overflow)
1929 return;
1931 type = TREE_TYPE (biv->base);
1932 if (!INTEGRAL_TYPE_P (type))
1933 return;
1935 biv->have_address_use = true;
1936 data->bivs_not_used_in_addr--;
1937 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1938 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1940 struct iv *iv = ver_info (data, i)->iv;
1942 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1943 || iv->have_address_use || !iv->no_overflow)
1944 continue;
1946 if (type != TREE_TYPE (iv->base)
1947 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1948 continue;
1950 if (!operand_equal_p (biv->step, iv->step, 0))
1951 continue;
1953 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1954 if (operand_equal_p (base_1, iv->base, 0)
1955 || operand_equal_p (base_2, biv->base, 0))
1957 iv->have_address_use = true;
1958 data->bivs_not_used_in_addr--;
1963 /* Cumulates the steps of indices into DATA and replaces their values with the
1964 initial ones. Returns false when the value of the index cannot be determined.
1965 Callback for for_each_index. */
1967 struct ifs_ivopts_data
1969 struct ivopts_data *ivopts_data;
1970 gimple *stmt;
1971 tree step;
1974 static bool
1975 idx_find_step (tree base, tree *idx, void *data)
1977 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1978 struct iv *iv;
1979 bool use_overflow_semantics = false;
1980 tree step, iv_base, iv_step, lbound, off;
1981 struct loop *loop = dta->ivopts_data->current_loop;
1983 /* If base is a component ref, require that the offset of the reference
1984 be invariant. */
1985 if (TREE_CODE (base) == COMPONENT_REF)
1987 off = component_ref_field_offset (base);
1988 return expr_invariant_in_loop_p (loop, off);
1991 /* If base is array, first check whether we will be able to move the
1992 reference out of the loop (in order to take its address in strength
1993 reduction). In order for this to work we need both lower bound
1994 and step to be loop invariants. */
1995 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1997 /* Moreover, for a range, the size needs to be invariant as well. */
1998 if (TREE_CODE (base) == ARRAY_RANGE_REF
1999 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2000 return false;
2002 step = array_ref_element_size (base);
2003 lbound = array_ref_low_bound (base);
2005 if (!expr_invariant_in_loop_p (loop, step)
2006 || !expr_invariant_in_loop_p (loop, lbound))
2007 return false;
2010 if (TREE_CODE (*idx) != SSA_NAME)
2011 return true;
2013 iv = get_iv (dta->ivopts_data, *idx);
2014 if (!iv)
2015 return false;
2017 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2018 *&x[0], which is not folded and does not trigger the
2019 ARRAY_REF path below. */
2020 *idx = iv->base;
2022 if (integer_zerop (iv->step))
2023 return true;
2025 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2027 step = array_ref_element_size (base);
2029 /* We only handle addresses whose step is an integer constant. */
2030 if (TREE_CODE (step) != INTEGER_CST)
2031 return false;
2033 else
2034 /* The step for pointer arithmetics already is 1 byte. */
2035 step = size_one_node;
2037 iv_base = iv->base;
2038 iv_step = iv->step;
2039 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2040 use_overflow_semantics = true;
2042 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2043 sizetype, &iv_base, &iv_step, dta->stmt,
2044 use_overflow_semantics))
2046 /* The index might wrap. */
2047 return false;
2050 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2051 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2053 if (dta->ivopts_data->bivs_not_used_in_addr)
2055 if (!iv->biv_p)
2056 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2058 record_biv_for_address_use (dta->ivopts_data, iv);
2060 return true;
2063 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2064 object is passed to it in DATA. */
2066 static bool
2067 idx_record_use (tree base, tree *idx,
2068 void *vdata)
2070 struct ivopts_data *data = (struct ivopts_data *) vdata;
2071 find_interesting_uses_op (data, *idx);
2072 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2074 find_interesting_uses_op (data, array_ref_element_size (base));
2075 find_interesting_uses_op (data, array_ref_low_bound (base));
2077 return true;
2080 /* If we can prove that TOP = cst * BOT for some constant cst,
2081 store cst to MUL and return true. Otherwise return false.
2082 The returned value is always sign-extended, regardless of the
2083 signedness of TOP and BOT. */
2085 static bool
2086 constant_multiple_of (tree top, tree bot, widest_int *mul)
2088 tree mby;
2089 enum tree_code code;
2090 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2091 widest_int res, p0, p1;
2093 STRIP_NOPS (top);
2094 STRIP_NOPS (bot);
2096 if (operand_equal_p (top, bot, 0))
2098 *mul = 1;
2099 return true;
2102 code = TREE_CODE (top);
2103 switch (code)
2105 case MULT_EXPR:
2106 mby = TREE_OPERAND (top, 1);
2107 if (TREE_CODE (mby) != INTEGER_CST)
2108 return false;
2110 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2111 return false;
2113 *mul = wi::sext (res * wi::to_widest (mby), precision);
2114 return true;
2116 case PLUS_EXPR:
2117 case MINUS_EXPR:
2118 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2119 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2120 return false;
2122 if (code == MINUS_EXPR)
2123 p1 = -p1;
2124 *mul = wi::sext (p0 + p1, precision);
2125 return true;
2127 case INTEGER_CST:
2128 if (TREE_CODE (bot) != INTEGER_CST)
2129 return false;
2131 p0 = widest_int::from (wi::to_wide (top), SIGNED);
2132 p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2133 if (p1 == 0)
2134 return false;
2135 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2136 return res == 0;
2138 default:
2139 if (POLY_INT_CST_P (top)
2140 && POLY_INT_CST_P (bot)
2141 && constant_multiple_p (wi::to_poly_widest (top),
2142 wi::to_poly_widest (bot), mul))
2143 return true;
2145 return false;
2149 /* Return true if memory reference REF with step STEP may be unaligned. */
2151 static bool
2152 may_be_unaligned_p (tree ref, tree step)
2154 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2155 thus they are not misaligned. */
2156 if (TREE_CODE (ref) == TARGET_MEM_REF)
2157 return false;
2159 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2160 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2161 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2163 unsigned HOST_WIDE_INT bitpos;
2164 unsigned int ref_align;
2165 get_object_alignment_1 (ref, &ref_align, &bitpos);
2166 if (ref_align < align
2167 || (bitpos % align) != 0
2168 || (bitpos % BITS_PER_UNIT) != 0)
2169 return true;
2171 unsigned int trailing_zeros = tree_ctz (step);
2172 if (trailing_zeros < HOST_BITS_PER_INT
2173 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2174 return true;
2176 return false;
2179 /* Return true if EXPR may be non-addressable. */
2181 bool
2182 may_be_nonaddressable_p (tree expr)
2184 switch (TREE_CODE (expr))
2186 case VAR_DECL:
2187 /* Check if it's a register variable. */
2188 return DECL_HARD_REGISTER (expr);
2190 case TARGET_MEM_REF:
2191 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2192 target, thus they are always addressable. */
2193 return false;
2195 case MEM_REF:
2196 /* Likewise for MEM_REFs, modulo the storage order. */
2197 return REF_REVERSE_STORAGE_ORDER (expr);
2199 case BIT_FIELD_REF:
2200 if (REF_REVERSE_STORAGE_ORDER (expr))
2201 return true;
2202 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2204 case COMPONENT_REF:
2205 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2206 return true;
2207 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2208 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2210 case ARRAY_REF:
2211 case ARRAY_RANGE_REF:
2212 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2213 return true;
2214 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2216 case VIEW_CONVERT_EXPR:
2217 /* This kind of view-conversions may wrap non-addressable objects
2218 and make them look addressable. After some processing the
2219 non-addressability may be uncovered again, causing ADDR_EXPRs
2220 of inappropriate objects to be built. */
2221 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2222 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2223 return true;
2224 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2226 CASE_CONVERT:
2227 return true;
2229 default:
2230 break;
2233 return false;
2236 /* Finds addresses in *OP_P inside STMT. */
2238 static void
2239 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2240 tree *op_p)
2242 tree base = *op_p, step = size_zero_node;
2243 struct iv *civ;
2244 struct ifs_ivopts_data ifs_ivopts_data;
2246 /* Do not play with volatile memory references. A bit too conservative,
2247 perhaps, but safe. */
2248 if (gimple_has_volatile_ops (stmt))
2249 goto fail;
2251 /* Ignore bitfields for now. Not really something terribly complicated
2252 to handle. TODO. */
2253 if (TREE_CODE (base) == BIT_FIELD_REF)
2254 goto fail;
2256 base = unshare_expr (base);
2258 if (TREE_CODE (base) == TARGET_MEM_REF)
2260 tree type = build_pointer_type (TREE_TYPE (base));
2261 tree astep;
2263 if (TMR_BASE (base)
2264 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2266 civ = get_iv (data, TMR_BASE (base));
2267 if (!civ)
2268 goto fail;
2270 TMR_BASE (base) = civ->base;
2271 step = civ->step;
2273 if (TMR_INDEX2 (base)
2274 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2276 civ = get_iv (data, TMR_INDEX2 (base));
2277 if (!civ)
2278 goto fail;
2280 TMR_INDEX2 (base) = civ->base;
2281 step = civ->step;
2283 if (TMR_INDEX (base)
2284 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2286 civ = get_iv (data, TMR_INDEX (base));
2287 if (!civ)
2288 goto fail;
2290 TMR_INDEX (base) = civ->base;
2291 astep = civ->step;
2293 if (astep)
2295 if (TMR_STEP (base))
2296 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2298 step = fold_build2 (PLUS_EXPR, type, step, astep);
2302 if (integer_zerop (step))
2303 goto fail;
2304 base = tree_mem_ref_addr (type, base);
2306 else
2308 ifs_ivopts_data.ivopts_data = data;
2309 ifs_ivopts_data.stmt = stmt;
2310 ifs_ivopts_data.step = size_zero_node;
2311 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2312 || integer_zerop (ifs_ivopts_data.step))
2313 goto fail;
2314 step = ifs_ivopts_data.step;
2316 /* Check that the base expression is addressable. This needs
2317 to be done after substituting bases of IVs into it. */
2318 if (may_be_nonaddressable_p (base))
2319 goto fail;
2321 /* Moreover, on strict alignment platforms, check that it is
2322 sufficiently aligned. */
2323 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2324 goto fail;
2326 base = build_fold_addr_expr (base);
2328 /* Substituting bases of IVs into the base expression might
2329 have caused folding opportunities. */
2330 if (TREE_CODE (base) == ADDR_EXPR)
2332 tree *ref = &TREE_OPERAND (base, 0);
2333 while (handled_component_p (*ref))
2334 ref = &TREE_OPERAND (*ref, 0);
2335 if (TREE_CODE (*ref) == MEM_REF)
2337 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2338 TREE_OPERAND (*ref, 0),
2339 TREE_OPERAND (*ref, 1));
2340 if (tem)
2341 *ref = tem;
2346 civ = alloc_iv (data, base, step);
2347 /* Fail if base object of this memory reference is unknown. */
2348 if (civ->base_object == NULL_TREE)
2349 goto fail;
2351 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2352 return;
2354 fail:
2355 for_each_index (op_p, idx_record_use, data);
2358 /* Finds and records invariants used in STMT. */
2360 static void
2361 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2363 ssa_op_iter iter;
2364 use_operand_p use_p;
2365 tree op;
2367 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2369 op = USE_FROM_PTR (use_p);
2370 record_invariant (data, op, false);
2374 /* CALL calls an internal function. If operand *OP_P will become an
2375 address when the call is expanded, return the type of the memory
2376 being addressed, otherwise return null. */
2378 static tree
2379 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2381 switch (gimple_call_internal_fn (call))
2383 case IFN_MASK_LOAD:
2384 case IFN_MASK_LOAD_LANES:
2385 if (op_p == gimple_call_arg_ptr (call, 0))
2386 return TREE_TYPE (gimple_call_lhs (call));
2387 return NULL_TREE;
2389 case IFN_MASK_STORE:
2390 case IFN_MASK_STORE_LANES:
2391 if (op_p == gimple_call_arg_ptr (call, 0))
2392 return TREE_TYPE (gimple_call_arg (call, 3));
2393 return NULL_TREE;
2395 default:
2396 return NULL_TREE;
2400 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2401 Return true if the operand will become an address when STMT
2402 is expanded and record the associated address use if so. */
2404 static bool
2405 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2406 struct iv *iv)
2408 /* Fail if base object of this memory reference is unknown. */
2409 if (iv->base_object == NULL_TREE)
2410 return false;
2412 tree mem_type = NULL_TREE;
2413 if (gcall *call = dyn_cast <gcall *> (stmt))
2414 if (gimple_call_internal_p (call))
2415 mem_type = get_mem_type_for_internal_fn (call, op_p);
2416 if (mem_type)
2418 iv = alloc_iv (data, iv->base, iv->step);
2419 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2420 return true;
2422 return false;
2425 /* Finds interesting uses of induction variables in the statement STMT. */
2427 static void
2428 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2430 struct iv *iv;
2431 tree op, *lhs, *rhs;
2432 ssa_op_iter iter;
2433 use_operand_p use_p;
2434 enum tree_code code;
2436 find_invariants_stmt (data, stmt);
2438 if (gimple_code (stmt) == GIMPLE_COND)
2440 find_interesting_uses_cond (data, stmt);
2441 return;
2444 if (is_gimple_assign (stmt))
2446 lhs = gimple_assign_lhs_ptr (stmt);
2447 rhs = gimple_assign_rhs1_ptr (stmt);
2449 if (TREE_CODE (*lhs) == SSA_NAME)
2451 /* If the statement defines an induction variable, the uses are not
2452 interesting by themselves. */
2454 iv = get_iv (data, *lhs);
2456 if (iv && !integer_zerop (iv->step))
2457 return;
2460 code = gimple_assign_rhs_code (stmt);
2461 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2462 && (REFERENCE_CLASS_P (*rhs)
2463 || is_gimple_val (*rhs)))
2465 if (REFERENCE_CLASS_P (*rhs))
2466 find_interesting_uses_address (data, stmt, rhs);
2467 else
2468 find_interesting_uses_op (data, *rhs);
2470 if (REFERENCE_CLASS_P (*lhs))
2471 find_interesting_uses_address (data, stmt, lhs);
2472 return;
2474 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2476 find_interesting_uses_cond (data, stmt);
2477 return;
2480 /* TODO -- we should also handle address uses of type
2482 memory = call (whatever);
2486 call (memory). */
2489 if (gimple_code (stmt) == GIMPLE_PHI
2490 && gimple_bb (stmt) == data->current_loop->header)
2492 iv = get_iv (data, PHI_RESULT (stmt));
2494 if (iv && !integer_zerop (iv->step))
2495 return;
2498 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2500 op = USE_FROM_PTR (use_p);
2502 if (TREE_CODE (op) != SSA_NAME)
2503 continue;
2505 iv = get_iv (data, op);
2506 if (!iv)
2507 continue;
2509 if (!find_address_like_use (data, stmt, use_p->use, iv))
2510 find_interesting_uses_op (data, op);
2514 /* Finds interesting uses of induction variables outside of loops
2515 on loop exit edge EXIT. */
2517 static void
2518 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2520 gphi *phi;
2521 gphi_iterator psi;
2522 tree def;
2524 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2526 phi = psi.phi ();
2527 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2528 if (!virtual_operand_p (def))
2529 find_interesting_uses_op (data, def);
2533 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2534 mode for memory reference represented by USE. */
2536 static GTY (()) vec<rtx, va_gc> *addr_list;
2538 static bool
2539 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2541 rtx reg, addr;
2542 unsigned list_index;
2543 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2544 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2546 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2547 if (list_index >= vec_safe_length (addr_list))
2548 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2550 addr = (*addr_list)[list_index];
2551 if (!addr)
2553 addr_mode = targetm.addr_space.address_mode (as);
2554 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2555 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2556 (*addr_list)[list_index] = addr;
2558 else
2559 addr_mode = GET_MODE (addr);
2561 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2562 return (memory_address_addr_space_p (mem_mode, addr, as));
2565 /* Comparison function to sort group in ascending order of addr_offset. */
2567 static int
2568 group_compare_offset (const void *a, const void *b)
2570 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2571 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2573 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2576 /* Check if small groups should be split. Return true if no group
2577 contains more than two uses with distinct addr_offsets. Return
2578 false otherwise. We want to split such groups because:
2580 1) Small groups don't have much benefit and may interfer with
2581 general candidate selection.
2582 2) Size for problem with only small groups is usually small and
2583 general algorithm can handle it well.
2585 TODO -- Above claim may not hold when we want to merge memory
2586 accesses with conseuctive addresses. */
2588 static bool
2589 split_small_address_groups_p (struct ivopts_data *data)
2591 unsigned int i, j, distinct = 1;
2592 struct iv_use *pre;
2593 struct iv_group *group;
2595 for (i = 0; i < data->vgroups.length (); i++)
2597 group = data->vgroups[i];
2598 if (group->vuses.length () == 1)
2599 continue;
2601 gcc_assert (address_p (group->type));
2602 if (group->vuses.length () == 2)
2604 if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2605 group->vuses[1]->addr_offset) > 0)
2606 std::swap (group->vuses[0], group->vuses[1]);
2608 else
2609 group->vuses.qsort (group_compare_offset);
2611 if (distinct > 2)
2612 continue;
2614 distinct = 1;
2615 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2617 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2619 pre = group->vuses[j];
2620 distinct++;
2623 if (distinct > 2)
2624 break;
2628 return (distinct <= 2);
2631 /* For each group of address type uses, this function further groups
2632 these uses according to the maximum offset supported by target's
2633 [base + offset] addressing mode. */
2635 static void
2636 split_address_groups (struct ivopts_data *data)
2638 unsigned int i, j;
2639 /* Always split group. */
2640 bool split_p = split_small_address_groups_p (data);
2642 for (i = 0; i < data->vgroups.length (); i++)
2644 struct iv_group *new_group = NULL;
2645 struct iv_group *group = data->vgroups[i];
2646 struct iv_use *use = group->vuses[0];
2648 use->id = 0;
2649 use->group_id = group->id;
2650 if (group->vuses.length () == 1)
2651 continue;
2653 gcc_assert (address_p (use->type));
2655 for (j = 1; j < group->vuses.length ();)
2657 struct iv_use *next = group->vuses[j];
2658 poly_int64 offset = next->addr_offset - use->addr_offset;
2660 /* Split group if aksed to, or the offset against the first
2661 use can't fit in offset part of addressing mode. IV uses
2662 having the same offset are still kept in one group. */
2663 if (maybe_ne (offset, 0)
2664 && (split_p || !addr_offset_valid_p (use, offset)))
2666 if (!new_group)
2667 new_group = record_group (data, group->type);
2668 group->vuses.ordered_remove (j);
2669 new_group->vuses.safe_push (next);
2670 continue;
2673 next->id = j;
2674 next->group_id = group->id;
2675 j++;
2680 /* Finds uses of the induction variables that are interesting. */
2682 static void
2683 find_interesting_uses (struct ivopts_data *data)
2685 basic_block bb;
2686 gimple_stmt_iterator bsi;
2687 basic_block *body = get_loop_body (data->current_loop);
2688 unsigned i;
2689 edge e;
2691 for (i = 0; i < data->current_loop->num_nodes; i++)
2693 edge_iterator ei;
2694 bb = body[i];
2696 FOR_EACH_EDGE (e, ei, bb->succs)
2697 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2698 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2699 find_interesting_uses_outside (data, e);
2701 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2702 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2703 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2704 if (!is_gimple_debug (gsi_stmt (bsi)))
2705 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2707 free (body);
2709 split_address_groups (data);
2711 if (dump_file && (dump_flags & TDF_DETAILS))
2713 fprintf (dump_file, "\n<IV Groups>:\n");
2714 dump_groups (dump_file, data);
2715 fprintf (dump_file, "\n");
2719 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2720 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2721 we are at the top-level of the processed address. */
2723 static tree
2724 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2725 poly_int64 *offset)
2727 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2728 enum tree_code code;
2729 tree type, orig_type = TREE_TYPE (expr);
2730 poly_int64 off0, off1;
2731 HOST_WIDE_INT st;
2732 tree orig_expr = expr;
2734 STRIP_NOPS (expr);
2736 type = TREE_TYPE (expr);
2737 code = TREE_CODE (expr);
2738 *offset = 0;
2740 switch (code)
2742 case POINTER_PLUS_EXPR:
2743 case PLUS_EXPR:
2744 case MINUS_EXPR:
2745 op0 = TREE_OPERAND (expr, 0);
2746 op1 = TREE_OPERAND (expr, 1);
2748 op0 = strip_offset_1 (op0, false, false, &off0);
2749 op1 = strip_offset_1 (op1, false, false, &off1);
2751 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2752 if (op0 == TREE_OPERAND (expr, 0)
2753 && op1 == TREE_OPERAND (expr, 1))
2754 return orig_expr;
2756 if (integer_zerop (op1))
2757 expr = op0;
2758 else if (integer_zerop (op0))
2760 if (code == MINUS_EXPR)
2761 expr = fold_build1 (NEGATE_EXPR, type, op1);
2762 else
2763 expr = op1;
2765 else
2766 expr = fold_build2 (code, type, op0, op1);
2768 return fold_convert (orig_type, expr);
2770 case MULT_EXPR:
2771 op1 = TREE_OPERAND (expr, 1);
2772 if (!cst_and_fits_in_hwi (op1))
2773 return orig_expr;
2775 op0 = TREE_OPERAND (expr, 0);
2776 op0 = strip_offset_1 (op0, false, false, &off0);
2777 if (op0 == TREE_OPERAND (expr, 0))
2778 return orig_expr;
2780 *offset = off0 * int_cst_value (op1);
2781 if (integer_zerop (op0))
2782 expr = op0;
2783 else
2784 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2786 return fold_convert (orig_type, expr);
2788 case ARRAY_REF:
2789 case ARRAY_RANGE_REF:
2790 if (!inside_addr)
2791 return orig_expr;
2793 step = array_ref_element_size (expr);
2794 if (!cst_and_fits_in_hwi (step))
2795 break;
2797 st = int_cst_value (step);
2798 op1 = TREE_OPERAND (expr, 1);
2799 op1 = strip_offset_1 (op1, false, false, &off1);
2800 *offset = off1 * st;
2802 if (top_compref
2803 && integer_zerop (op1))
2805 /* Strip the component reference completely. */
2806 op0 = TREE_OPERAND (expr, 0);
2807 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2808 *offset += off0;
2809 return op0;
2811 break;
2813 case COMPONENT_REF:
2815 tree field;
2817 if (!inside_addr)
2818 return orig_expr;
2820 tmp = component_ref_field_offset (expr);
2821 field = TREE_OPERAND (expr, 1);
2822 if (top_compref
2823 && cst_and_fits_in_hwi (tmp)
2824 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2826 HOST_WIDE_INT boffset, abs_off;
2828 /* Strip the component reference completely. */
2829 op0 = TREE_OPERAND (expr, 0);
2830 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2831 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2832 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2833 if (boffset < 0)
2834 abs_off = -abs_off;
2836 *offset = off0 + int_cst_value (tmp) + abs_off;
2837 return op0;
2840 break;
2842 case ADDR_EXPR:
2843 op0 = TREE_OPERAND (expr, 0);
2844 op0 = strip_offset_1 (op0, true, true, &off0);
2845 *offset += off0;
2847 if (op0 == TREE_OPERAND (expr, 0))
2848 return orig_expr;
2850 expr = build_fold_addr_expr (op0);
2851 return fold_convert (orig_type, expr);
2853 case MEM_REF:
2854 /* ??? Offset operand? */
2855 inside_addr = false;
2856 break;
2858 default:
2859 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2860 return build_int_cst (orig_type, 0);
2861 return orig_expr;
2864 /* Default handling of expressions for that we want to recurse into
2865 the first operand. */
2866 op0 = TREE_OPERAND (expr, 0);
2867 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2868 *offset += off0;
2870 if (op0 == TREE_OPERAND (expr, 0)
2871 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2872 return orig_expr;
2874 expr = copy_node (expr);
2875 TREE_OPERAND (expr, 0) = op0;
2876 if (op1)
2877 TREE_OPERAND (expr, 1) = op1;
2879 /* Inside address, we might strip the top level component references,
2880 thus changing type of the expression. Handling of ADDR_EXPR
2881 will fix that. */
2882 expr = fold_convert (orig_type, expr);
2884 return expr;
2887 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2889 tree
2890 strip_offset (tree expr, poly_uint64_pod *offset)
2892 poly_int64 off;
2893 tree core = strip_offset_1 (expr, false, false, &off);
2894 *offset = off;
2895 return core;
2898 /* Returns variant of TYPE that can be used as base for different uses.
2899 We return unsigned type with the same precision, which avoids problems
2900 with overflows. */
2902 static tree
2903 generic_type_for (tree type)
2905 if (POINTER_TYPE_P (type))
2906 return unsigned_type_for (type);
2908 if (TYPE_UNSIGNED (type))
2909 return type;
2911 return unsigned_type_for (type);
2914 /* Private data for walk_tree. */
2916 struct walk_tree_data
2918 bitmap *inv_vars;
2919 struct ivopts_data *idata;
2922 /* Callback function for walk_tree, it records invariants and symbol
2923 reference in *EXPR_P. DATA is the structure storing result info. */
2925 static tree
2926 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2928 tree op = *expr_p;
2929 struct version_info *info;
2930 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2932 if (TREE_CODE (op) != SSA_NAME)
2933 return NULL_TREE;
2935 info = name_info (wdata->idata, op);
2936 /* Because we expand simple operations when finding IVs, loop invariant
2937 variable that isn't referred by the original loop could be used now.
2938 Record such invariant variables here. */
2939 if (!info->iv)
2941 struct ivopts_data *idata = wdata->idata;
2942 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2944 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2946 set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2947 record_invariant (idata, op, false);
2950 if (!info->inv_id || info->has_nonlin_use)
2951 return NULL_TREE;
2953 if (!*wdata->inv_vars)
2954 *wdata->inv_vars = BITMAP_ALLOC (NULL);
2955 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2957 return NULL_TREE;
2960 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
2961 store it. */
2963 static inline void
2964 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2966 struct walk_tree_data wdata;
2968 if (!inv_vars)
2969 return;
2971 wdata.idata = data;
2972 wdata.inv_vars = inv_vars;
2973 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2976 /* Get entry from invariant expr hash table for INV_EXPR. New entry
2977 will be recorded if it doesn't exist yet. Given below two exprs:
2978 inv_expr + cst1, inv_expr + cst2
2979 It's hard to make decision whether constant part should be stripped
2980 or not. We choose to not strip based on below facts:
2981 1) We need to count ADD cost for constant part if it's stripped,
2982 which isn't always trivial where this functions is called.
2983 2) Stripping constant away may be conflict with following loop
2984 invariant hoisting pass.
2985 3) Not stripping constant away results in more invariant exprs,
2986 which usually leads to decision preferring lower reg pressure. */
2988 static iv_inv_expr_ent *
2989 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
2991 STRIP_NOPS (inv_expr);
2993 if (poly_int_tree_p (inv_expr)
2994 || TREE_CODE (inv_expr) == SSA_NAME)
2995 return NULL;
2997 /* Don't strip constant part away as we used to. */
2999 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3000 struct iv_inv_expr_ent ent;
3001 ent.expr = inv_expr;
3002 ent.hash = iterative_hash_expr (inv_expr, 0);
3003 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3005 if (!*slot)
3007 *slot = XNEW (struct iv_inv_expr_ent);
3008 (*slot)->expr = inv_expr;
3009 (*slot)->hash = ent.hash;
3010 (*slot)->id = ++data->max_inv_expr_id;
3013 return *slot;
3016 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3017 position to POS. If USE is not NULL, the candidate is set as related to
3018 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3019 replacement of the final value of the iv by a direct computation. */
3021 static struct iv_cand *
3022 add_candidate_1 (struct ivopts_data *data,
3023 tree base, tree step, bool important, enum iv_position pos,
3024 struct iv_use *use, gimple *incremented_at,
3025 struct iv *orig_iv = NULL)
3027 unsigned i;
3028 struct iv_cand *cand = NULL;
3029 tree type, orig_type;
3031 gcc_assert (base && step);
3033 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3034 live, but the ivopts code may replace a real pointer with one
3035 pointing before or after the memory block that is then adjusted
3036 into the memory block during the loop. FIXME: It would likely be
3037 better to actually force the pointer live and still use ivopts;
3038 for example, it would be enough to write the pointer into memory
3039 and keep it there until after the loop. */
3040 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3041 return NULL;
3043 /* For non-original variables, make sure their values are computed in a type
3044 that does not invoke undefined behavior on overflows (since in general,
3045 we cannot prove that these induction variables are non-wrapping). */
3046 if (pos != IP_ORIGINAL)
3048 orig_type = TREE_TYPE (base);
3049 type = generic_type_for (orig_type);
3050 if (type != orig_type)
3052 base = fold_convert (type, base);
3053 step = fold_convert (type, step);
3057 for (i = 0; i < data->vcands.length (); i++)
3059 cand = data->vcands[i];
3061 if (cand->pos != pos)
3062 continue;
3064 if (cand->incremented_at != incremented_at
3065 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3066 && cand->ainc_use != use))
3067 continue;
3069 if (operand_equal_p (base, cand->iv->base, 0)
3070 && operand_equal_p (step, cand->iv->step, 0)
3071 && (TYPE_PRECISION (TREE_TYPE (base))
3072 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3073 break;
3076 if (i == data->vcands.length ())
3078 cand = XCNEW (struct iv_cand);
3079 cand->id = i;
3080 cand->iv = alloc_iv (data, base, step);
3081 cand->pos = pos;
3082 if (pos != IP_ORIGINAL)
3084 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3085 cand->var_after = cand->var_before;
3087 cand->important = important;
3088 cand->incremented_at = incremented_at;
3089 data->vcands.safe_push (cand);
3091 if (!poly_int_tree_p (step))
3093 find_inv_vars (data, &step, &cand->inv_vars);
3095 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3096 /* Share bitmap between inv_vars and inv_exprs for cand. */
3097 if (inv_expr != NULL)
3099 cand->inv_exprs = cand->inv_vars;
3100 cand->inv_vars = NULL;
3101 if (cand->inv_exprs)
3102 bitmap_clear (cand->inv_exprs);
3103 else
3104 cand->inv_exprs = BITMAP_ALLOC (NULL);
3106 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3110 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3111 cand->ainc_use = use;
3112 else
3113 cand->ainc_use = NULL;
3115 cand->orig_iv = orig_iv;
3116 if (dump_file && (dump_flags & TDF_DETAILS))
3117 dump_cand (dump_file, cand);
3120 cand->important |= important;
3122 /* Relate candidate to the group for which it is added. */
3123 if (use)
3124 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3126 return cand;
3129 /* Returns true if incrementing the induction variable at the end of the LOOP
3130 is allowed.
3132 The purpose is to avoid splitting latch edge with a biv increment, thus
3133 creating a jump, possibly confusing other optimization passes and leaving
3134 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3135 available (so we do not have a better alternative), or if the latch edge
3136 is already nonempty. */
3138 static bool
3139 allow_ip_end_pos_p (struct loop *loop)
3141 if (!ip_normal_pos (loop))
3142 return true;
3144 if (!empty_block_p (ip_end_pos (loop)))
3145 return true;
3147 return false;
3150 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3151 Important field is set to IMPORTANT. */
3153 static void
3154 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3155 bool important, struct iv_use *use)
3157 basic_block use_bb = gimple_bb (use->stmt);
3158 machine_mode mem_mode;
3159 unsigned HOST_WIDE_INT cstepi;
3161 /* If we insert the increment in any position other than the standard
3162 ones, we must ensure that it is incremented once per iteration.
3163 It must not be in an inner nested loop, or one side of an if
3164 statement. */
3165 if (use_bb->loop_father != data->current_loop
3166 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3167 || stmt_can_throw_internal (cfun, use->stmt)
3168 || !cst_and_fits_in_hwi (step))
3169 return;
3171 cstepi = int_cst_value (step);
3173 mem_mode = TYPE_MODE (use->mem_type);
3174 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3175 || USE_STORE_PRE_INCREMENT (mem_mode))
3176 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3177 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3178 || USE_STORE_PRE_DECREMENT (mem_mode))
3179 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3181 enum tree_code code = MINUS_EXPR;
3182 tree new_base;
3183 tree new_step = step;
3185 if (POINTER_TYPE_P (TREE_TYPE (base)))
3187 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3188 code = POINTER_PLUS_EXPR;
3190 else
3191 new_step = fold_convert (TREE_TYPE (base), new_step);
3192 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3193 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3194 use->stmt);
3196 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3197 || USE_STORE_POST_INCREMENT (mem_mode))
3198 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3199 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3200 || USE_STORE_POST_DECREMENT (mem_mode))
3201 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3203 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3204 use->stmt);
3208 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3209 position to POS. If USE is not NULL, the candidate is set as related to
3210 it. The candidate computation is scheduled before exit condition and at
3211 the end of loop. */
3213 static void
3214 add_candidate (struct ivopts_data *data,
3215 tree base, tree step, bool important, struct iv_use *use,
3216 struct iv *orig_iv = NULL)
3218 if (ip_normal_pos (data->current_loop))
3219 add_candidate_1 (data, base, step, important,
3220 IP_NORMAL, use, NULL, orig_iv);
3221 if (ip_end_pos (data->current_loop)
3222 && allow_ip_end_pos_p (data->current_loop))
3223 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3226 /* Adds standard iv candidates. */
3228 static void
3229 add_standard_iv_candidates (struct ivopts_data *data)
3231 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3233 /* The same for a double-integer type if it is still fast enough. */
3234 if (TYPE_PRECISION
3235 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3236 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3237 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3238 build_int_cst (long_integer_type_node, 1), true, NULL);
3240 /* The same for a double-integer type if it is still fast enough. */
3241 if (TYPE_PRECISION
3242 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3243 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3244 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3245 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3249 /* Adds candidates bases on the old induction variable IV. */
3251 static void
3252 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3254 gimple *phi;
3255 tree def;
3256 struct iv_cand *cand;
3258 /* Check if this biv is used in address type use. */
3259 if (iv->no_overflow && iv->have_address_use
3260 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3261 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3263 tree base = fold_convert (sizetype, iv->base);
3264 tree step = fold_convert (sizetype, iv->step);
3266 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3267 add_candidate (data, base, step, true, NULL, iv);
3268 /* Add iv cand of the original type only if it has nonlinear use. */
3269 if (iv->nonlin_use)
3270 add_candidate (data, iv->base, iv->step, true, NULL);
3272 else
3273 add_candidate (data, iv->base, iv->step, true, NULL);
3275 /* The same, but with initial value zero. */
3276 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3277 add_candidate (data, size_int (0), iv->step, true, NULL);
3278 else
3279 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3280 iv->step, true, NULL);
3282 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3283 if (gimple_code (phi) == GIMPLE_PHI)
3285 /* Additionally record the possibility of leaving the original iv
3286 untouched. */
3287 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3288 /* Don't add candidate if it's from another PHI node because
3289 it's an affine iv appearing in the form of PEELED_CHREC. */
3290 phi = SSA_NAME_DEF_STMT (def);
3291 if (gimple_code (phi) != GIMPLE_PHI)
3293 cand = add_candidate_1 (data,
3294 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3295 SSA_NAME_DEF_STMT (def));
3296 if (cand)
3298 cand->var_before = iv->ssa_name;
3299 cand->var_after = def;
3302 else
3303 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3307 /* Adds candidates based on the old induction variables. */
3309 static void
3310 add_iv_candidate_for_bivs (struct ivopts_data *data)
3312 unsigned i;
3313 struct iv *iv;
3314 bitmap_iterator bi;
3316 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3318 iv = ver_info (data, i)->iv;
3319 if (iv && iv->biv_p && !integer_zerop (iv->step))
3320 add_iv_candidate_for_biv (data, iv);
3324 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3326 static void
3327 record_common_cand (struct ivopts_data *data, tree base,
3328 tree step, struct iv_use *use)
3330 struct iv_common_cand ent;
3331 struct iv_common_cand **slot;
3333 ent.base = base;
3334 ent.step = step;
3335 ent.hash = iterative_hash_expr (base, 0);
3336 ent.hash = iterative_hash_expr (step, ent.hash);
3338 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3339 if (*slot == NULL)
3341 *slot = new iv_common_cand ();
3342 (*slot)->base = base;
3343 (*slot)->step = step;
3344 (*slot)->uses.create (8);
3345 (*slot)->hash = ent.hash;
3346 data->iv_common_cands.safe_push ((*slot));
3349 gcc_assert (use != NULL);
3350 (*slot)->uses.safe_push (use);
3351 return;
3354 /* Comparison function used to sort common candidates. */
3356 static int
3357 common_cand_cmp (const void *p1, const void *p2)
3359 unsigned n1, n2;
3360 const struct iv_common_cand *const *const ccand1
3361 = (const struct iv_common_cand *const *)p1;
3362 const struct iv_common_cand *const *const ccand2
3363 = (const struct iv_common_cand *const *)p2;
3365 n1 = (*ccand1)->uses.length ();
3366 n2 = (*ccand2)->uses.length ();
3367 return n2 - n1;
3370 /* Adds IV candidates based on common candidated recorded. */
3372 static void
3373 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3375 unsigned i, j;
3376 struct iv_cand *cand_1, *cand_2;
3378 data->iv_common_cands.qsort (common_cand_cmp);
3379 for (i = 0; i < data->iv_common_cands.length (); i++)
3381 struct iv_common_cand *ptr = data->iv_common_cands[i];
3383 /* Only add IV candidate if it's derived from multiple uses. */
3384 if (ptr->uses.length () <= 1)
3385 break;
3387 cand_1 = NULL;
3388 cand_2 = NULL;
3389 if (ip_normal_pos (data->current_loop))
3390 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3391 false, IP_NORMAL, NULL, NULL);
3393 if (ip_end_pos (data->current_loop)
3394 && allow_ip_end_pos_p (data->current_loop))
3395 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3396 false, IP_END, NULL, NULL);
3398 /* Bind deriving uses and the new candidates. */
3399 for (j = 0; j < ptr->uses.length (); j++)
3401 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3402 if (cand_1)
3403 bitmap_set_bit (group->related_cands, cand_1->id);
3404 if (cand_2)
3405 bitmap_set_bit (group->related_cands, cand_2->id);
3409 /* Release data since it is useless from this point. */
3410 data->iv_common_cand_tab->empty ();
3411 data->iv_common_cands.truncate (0);
3414 /* Adds candidates based on the value of USE's iv. */
3416 static void
3417 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3419 poly_uint64 offset;
3420 tree base;
3421 tree basetype;
3422 struct iv *iv = use->iv;
3424 add_candidate (data, iv->base, iv->step, false, use);
3426 /* Record common candidate for use in case it can be shared by others. */
3427 record_common_cand (data, iv->base, iv->step, use);
3429 /* Record common candidate with initial value zero. */
3430 basetype = TREE_TYPE (iv->base);
3431 if (POINTER_TYPE_P (basetype))
3432 basetype = sizetype;
3433 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3435 /* Compare the cost of an address with an unscaled index with the cost of
3436 an address with a scaled index and add candidate if useful. */
3437 poly_int64 step;
3438 if (use != NULL
3439 && poly_int_tree_p (iv->step, &step)
3440 && address_p (use->type))
3442 poly_int64 new_step;
3443 unsigned int fact = preferred_mem_scale_factor
3444 (use->iv->base,
3445 TYPE_MODE (use->mem_type),
3446 optimize_loop_for_speed_p (data->current_loop));
3448 if (fact != 1
3449 && multiple_p (step, fact, &new_step))
3450 add_candidate (data, size_int (0),
3451 wide_int_to_tree (sizetype, new_step),
3452 true, NULL);
3455 /* Record common candidate with constant offset stripped in base.
3456 Like the use itself, we also add candidate directly for it. */
3457 base = strip_offset (iv->base, &offset);
3458 if (maybe_ne (offset, 0U) || base != iv->base)
3460 record_common_cand (data, base, iv->step, use);
3461 add_candidate (data, base, iv->step, false, use);
3464 /* Record common candidate with base_object removed in base. */
3465 base = iv->base;
3466 STRIP_NOPS (base);
3467 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3469 tree step = iv->step;
3471 STRIP_NOPS (step);
3472 base = TREE_OPERAND (base, 1);
3473 step = fold_convert (sizetype, step);
3474 record_common_cand (data, base, step, use);
3475 /* Also record common candidate with offset stripped. */
3476 base = strip_offset (base, &offset);
3477 if (maybe_ne (offset, 0U))
3478 record_common_cand (data, base, step, use);
3481 /* At last, add auto-incremental candidates. Make such variables
3482 important since other iv uses with same base object may be based
3483 on it. */
3484 if (use != NULL && address_p (use->type))
3485 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3488 /* Adds candidates based on the uses. */
3490 static void
3491 add_iv_candidate_for_groups (struct ivopts_data *data)
3493 unsigned i;
3495 /* Only add candidate for the first use in group. */
3496 for (i = 0; i < data->vgroups.length (); i++)
3498 struct iv_group *group = data->vgroups[i];
3500 gcc_assert (group->vuses[0] != NULL);
3501 add_iv_candidate_for_use (data, group->vuses[0]);
3503 add_iv_candidate_derived_from_uses (data);
3506 /* Record important candidates and add them to related_cands bitmaps. */
3508 static void
3509 record_important_candidates (struct ivopts_data *data)
3511 unsigned i;
3512 struct iv_group *group;
3514 for (i = 0; i < data->vcands.length (); i++)
3516 struct iv_cand *cand = data->vcands[i];
3518 if (cand->important)
3519 bitmap_set_bit (data->important_candidates, i);
3522 data->consider_all_candidates = (data->vcands.length ()
3523 <= CONSIDER_ALL_CANDIDATES_BOUND);
3525 /* Add important candidates to groups' related_cands bitmaps. */
3526 for (i = 0; i < data->vgroups.length (); i++)
3528 group = data->vgroups[i];
3529 bitmap_ior_into (group->related_cands, data->important_candidates);
3533 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3534 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3535 we allocate a simple list to every use. */
3537 static void
3538 alloc_use_cost_map (struct ivopts_data *data)
3540 unsigned i, size, s;
3542 for (i = 0; i < data->vgroups.length (); i++)
3544 struct iv_group *group = data->vgroups[i];
3546 if (data->consider_all_candidates)
3547 size = data->vcands.length ();
3548 else
3550 s = bitmap_count_bits (group->related_cands);
3552 /* Round up to the power of two, so that moduling by it is fast. */
3553 size = s ? (1 << ceil_log2 (s)) : 1;
3556 group->n_map_members = size;
3557 group->cost_map = XCNEWVEC (struct cost_pair, size);
3561 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3562 on invariants INV_VARS and that the value used in expressing it is
3563 VALUE, and in case of iv elimination the comparison operator is COMP. */
3565 static void
3566 set_group_iv_cost (struct ivopts_data *data,
3567 struct iv_group *group, struct iv_cand *cand,
3568 comp_cost cost, bitmap inv_vars, tree value,
3569 enum tree_code comp, bitmap inv_exprs)
3571 unsigned i, s;
3573 if (cost.infinite_cost_p ())
3575 BITMAP_FREE (inv_vars);
3576 BITMAP_FREE (inv_exprs);
3577 return;
3580 if (data->consider_all_candidates)
3582 group->cost_map[cand->id].cand = cand;
3583 group->cost_map[cand->id].cost = cost;
3584 group->cost_map[cand->id].inv_vars = inv_vars;
3585 group->cost_map[cand->id].inv_exprs = inv_exprs;
3586 group->cost_map[cand->id].value = value;
3587 group->cost_map[cand->id].comp = comp;
3588 return;
3591 /* n_map_members is a power of two, so this computes modulo. */
3592 s = cand->id & (group->n_map_members - 1);
3593 for (i = s; i < group->n_map_members; i++)
3594 if (!group->cost_map[i].cand)
3595 goto found;
3596 for (i = 0; i < s; i++)
3597 if (!group->cost_map[i].cand)
3598 goto found;
3600 gcc_unreachable ();
3602 found:
3603 group->cost_map[i].cand = cand;
3604 group->cost_map[i].cost = cost;
3605 group->cost_map[i].inv_vars = inv_vars;
3606 group->cost_map[i].inv_exprs = inv_exprs;
3607 group->cost_map[i].value = value;
3608 group->cost_map[i].comp = comp;
3611 /* Gets cost of (GROUP, CAND) pair. */
3613 static struct cost_pair *
3614 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3615 struct iv_cand *cand)
3617 unsigned i, s;
3618 struct cost_pair *ret;
3620 if (!cand)
3621 return NULL;
3623 if (data->consider_all_candidates)
3625 ret = group->cost_map + cand->id;
3626 if (!ret->cand)
3627 return NULL;
3629 return ret;
3632 /* n_map_members is a power of two, so this computes modulo. */
3633 s = cand->id & (group->n_map_members - 1);
3634 for (i = s; i < group->n_map_members; i++)
3635 if (group->cost_map[i].cand == cand)
3636 return group->cost_map + i;
3637 else if (group->cost_map[i].cand == NULL)
3638 return NULL;
3639 for (i = 0; i < s; i++)
3640 if (group->cost_map[i].cand == cand)
3641 return group->cost_map + i;
3642 else if (group->cost_map[i].cand == NULL)
3643 return NULL;
3645 return NULL;
3648 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3649 static rtx
3650 produce_memory_decl_rtl (tree obj, int *regno)
3652 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3653 machine_mode address_mode = targetm.addr_space.address_mode (as);
3654 rtx x;
3656 gcc_assert (obj);
3657 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3659 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3660 x = gen_rtx_SYMBOL_REF (address_mode, name);
3661 SET_SYMBOL_REF_DECL (x, obj);
3662 x = gen_rtx_MEM (DECL_MODE (obj), x);
3663 set_mem_addr_space (x, as);
3664 targetm.encode_section_info (obj, x, true);
3666 else
3668 x = gen_raw_REG (address_mode, (*regno)++);
3669 x = gen_rtx_MEM (DECL_MODE (obj), x);
3670 set_mem_addr_space (x, as);
3673 return x;
3676 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3677 walk_tree. DATA contains the actual fake register number. */
3679 static tree
3680 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3682 tree obj = NULL_TREE;
3683 rtx x = NULL_RTX;
3684 int *regno = (int *) data;
3686 switch (TREE_CODE (*expr_p))
3688 case ADDR_EXPR:
3689 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3690 handled_component_p (*expr_p);
3691 expr_p = &TREE_OPERAND (*expr_p, 0))
3692 continue;
3693 obj = *expr_p;
3694 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3695 x = produce_memory_decl_rtl (obj, regno);
3696 break;
3698 case SSA_NAME:
3699 *ws = 0;
3700 obj = SSA_NAME_VAR (*expr_p);
3701 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3702 if (!obj)
3703 return NULL_TREE;
3704 if (!DECL_RTL_SET_P (obj))
3705 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3706 break;
3708 case VAR_DECL:
3709 case PARM_DECL:
3710 case RESULT_DECL:
3711 *ws = 0;
3712 obj = *expr_p;
3714 if (DECL_RTL_SET_P (obj))
3715 break;
3717 if (DECL_MODE (obj) == BLKmode)
3718 x = produce_memory_decl_rtl (obj, regno);
3719 else
3720 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3722 break;
3724 default:
3725 break;
3728 if (x)
3730 decl_rtl_to_reset.safe_push (obj);
3731 SET_DECL_RTL (obj, x);
3734 return NULL_TREE;
3737 /* Predict whether the given loop will be transformed in the RTL
3738 doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3739 This is only for target independent checks, see targetm.predict_doloop_p
3740 for the target dependent ones.
3742 Note that according to some initial investigation, some checks like costly
3743 niter check and invalid stmt scanning don't have much gains among general
3744 cases, so keep this as simple as possible first.
3746 Some RTL specific checks seems unable to be checked in gimple, if any new
3747 checks or easy checks _are_ missing here, please add them. */
3749 static bool ATTRIBUTE_UNUSED
3750 generic_predict_doloop_p (struct ivopts_data *data)
3752 struct loop *loop = data->current_loop;
3754 /* Call target hook for target dependent checks. */
3755 if (!targetm.predict_doloop_p (loop))
3757 if (dump_file && (dump_flags & TDF_DETAILS))
3758 fprintf (dump_file, "Predict doloop failure due to"
3759 " target specific checks.\n");
3760 return false;
3763 /* Similar to doloop_optimize, check iteration description to know it's
3764 suitable or not. Keep it as simple as possible, feel free to extend it
3765 if you find any multiple exits cases matter. */
3766 edge exit = single_dom_exit (loop);
3767 struct tree_niter_desc *niter_desc;
3768 if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3770 if (dump_file && (dump_flags & TDF_DETAILS))
3771 fprintf (dump_file, "Predict doloop failure due to"
3772 " unexpected niters.\n");
3773 return false;
3776 /* Similar to doloop_optimize, check whether iteration count too small
3777 and not profitable. */
3778 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3779 if (est_niter == -1)
3780 est_niter = get_likely_max_loop_iterations_int (loop);
3781 if (est_niter >= 0 && est_niter < 3)
3783 if (dump_file && (dump_flags & TDF_DETAILS))
3784 fprintf (dump_file,
3785 "Predict doloop failure due to"
3786 " too few iterations (%u).\n",
3787 (unsigned int) est_niter);
3788 return false;
3791 return true;
3794 /* Determines cost of the computation of EXPR. */
3796 static unsigned
3797 computation_cost (tree expr, bool speed)
3799 rtx_insn *seq;
3800 rtx rslt;
3801 tree type = TREE_TYPE (expr);
3802 unsigned cost;
3803 /* Avoid using hard regs in ways which may be unsupported. */
3804 int regno = LAST_VIRTUAL_REGISTER + 1;
3805 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3806 enum node_frequency real_frequency = node->frequency;
3808 node->frequency = NODE_FREQUENCY_NORMAL;
3809 crtl->maybe_hot_insn_p = speed;
3810 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3811 start_sequence ();
3812 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3813 seq = get_insns ();
3814 end_sequence ();
3815 default_rtl_profile ();
3816 node->frequency = real_frequency;
3818 cost = seq_cost (seq, speed);
3819 if (MEM_P (rslt))
3820 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3821 TYPE_ADDR_SPACE (type), speed);
3822 else if (!REG_P (rslt))
3823 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3825 return cost;
3828 /* Returns variable containing the value of candidate CAND at statement AT. */
3830 static tree
3831 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3833 if (stmt_after_increment (loop, cand, stmt))
3834 return cand->var_after;
3835 else
3836 return cand->var_before;
3839 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3840 same precision that is at least as wide as the precision of TYPE, stores
3841 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3842 type of A and B. */
3844 static tree
3845 determine_common_wider_type (tree *a, tree *b)
3847 tree wider_type = NULL;
3848 tree suba, subb;
3849 tree atype = TREE_TYPE (*a);
3851 if (CONVERT_EXPR_P (*a))
3853 suba = TREE_OPERAND (*a, 0);
3854 wider_type = TREE_TYPE (suba);
3855 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3856 return atype;
3858 else
3859 return atype;
3861 if (CONVERT_EXPR_P (*b))
3863 subb = TREE_OPERAND (*b, 0);
3864 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3865 return atype;
3867 else
3868 return atype;
3870 *a = suba;
3871 *b = subb;
3872 return wider_type;
3875 /* Determines the expression by that USE is expressed from induction variable
3876 CAND at statement AT in LOOP. The expression is stored in two parts in a
3877 decomposed form. The invariant part is stored in AFF_INV; while variant
3878 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3879 non-null. Returns false if USE cannot be expressed using CAND. */
3881 static bool
3882 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3883 struct iv_cand *cand, struct aff_tree *aff_inv,
3884 struct aff_tree *aff_var, widest_int *prat = NULL)
3886 tree ubase = use->iv->base, ustep = use->iv->step;
3887 tree cbase = cand->iv->base, cstep = cand->iv->step;
3888 tree common_type, uutype, var, cstep_common;
3889 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3890 aff_tree aff_cbase;
3891 widest_int rat;
3893 /* We must have a precision to express the values of use. */
3894 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3895 return false;
3897 var = var_at_stmt (loop, cand, at);
3898 uutype = unsigned_type_for (utype);
3900 /* If the conversion is not noop, perform it. */
3901 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3903 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3904 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3906 tree inner_base, inner_step, inner_type;
3907 inner_base = TREE_OPERAND (cbase, 0);
3908 if (CONVERT_EXPR_P (cstep))
3909 inner_step = TREE_OPERAND (cstep, 0);
3910 else
3911 inner_step = cstep;
3913 inner_type = TREE_TYPE (inner_base);
3914 /* If candidate is added from a biv whose type is smaller than
3915 ctype, we know both candidate and the biv won't overflow.
3916 In this case, it's safe to skip the convertion in candidate.
3917 As an example, (unsigned short)((unsigned long)A) equals to
3918 (unsigned short)A, if A has a type no larger than short. */
3919 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3921 cbase = inner_base;
3922 cstep = inner_step;
3925 cbase = fold_convert (uutype, cbase);
3926 cstep = fold_convert (uutype, cstep);
3927 var = fold_convert (uutype, var);
3930 /* Ratio is 1 when computing the value of biv cand by itself.
3931 We can't rely on constant_multiple_of in this case because the
3932 use is created after the original biv is selected. The call
3933 could fail because of inconsistent fold behavior. See PR68021
3934 for more information. */
3935 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3937 gcc_assert (is_gimple_assign (use->stmt));
3938 gcc_assert (use->iv->ssa_name == cand->var_after);
3939 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3940 rat = 1;
3942 else if (!constant_multiple_of (ustep, cstep, &rat))
3943 return false;
3945 if (prat)
3946 *prat = rat;
3948 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3949 type, we achieve better folding by computing their difference in this
3950 wider type, and cast the result to UUTYPE. We do not need to worry about
3951 overflows, as all the arithmetics will in the end be performed in UUTYPE
3952 anyway. */
3953 common_type = determine_common_wider_type (&ubase, &cbase);
3955 /* use = ubase - ratio * cbase + ratio * var. */
3956 tree_to_aff_combination (ubase, common_type, aff_inv);
3957 tree_to_aff_combination (cbase, common_type, &aff_cbase);
3958 tree_to_aff_combination (var, uutype, aff_var);
3960 /* We need to shift the value if we are after the increment. */
3961 if (stmt_after_increment (loop, cand, at))
3963 aff_tree cstep_aff;
3965 if (common_type != uutype)
3966 cstep_common = fold_convert (common_type, cstep);
3967 else
3968 cstep_common = cstep;
3970 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3971 aff_combination_add (&aff_cbase, &cstep_aff);
3974 aff_combination_scale (&aff_cbase, -rat);
3975 aff_combination_add (aff_inv, &aff_cbase);
3976 if (common_type != uutype)
3977 aff_combination_convert (aff_inv, uutype);
3979 aff_combination_scale (aff_var, rat);
3980 return true;
3983 /* Determines the expression by that USE is expressed from induction variable
3984 CAND at statement AT in LOOP. The expression is stored in a decomposed
3985 form into AFF. Returns false if USE cannot be expressed using CAND. */
3987 static bool
3988 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3989 struct iv_cand *cand, struct aff_tree *aff)
3991 aff_tree aff_var;
3993 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3994 return false;
3996 aff_combination_add (aff, &aff_var);
3997 return true;
4000 /* Return the type of USE. */
4002 static tree
4003 get_use_type (struct iv_use *use)
4005 tree base_type = TREE_TYPE (use->iv->base);
4006 tree type;
4008 if (use->type == USE_REF_ADDRESS)
4010 /* The base_type may be a void pointer. Create a pointer type based on
4011 the mem_ref instead. */
4012 type = build_pointer_type (TREE_TYPE (*use->op_p));
4013 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4014 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4016 else
4017 type = base_type;
4019 return type;
4022 /* Determines the expression by that USE is expressed from induction variable
4023 CAND at statement AT in LOOP. The computation is unshared. */
4025 static tree
4026 get_computation_at (struct loop *loop, gimple *at,
4027 struct iv_use *use, struct iv_cand *cand)
4029 aff_tree aff;
4030 tree type = get_use_type (use);
4032 if (!get_computation_aff (loop, at, use, cand, &aff))
4033 return NULL_TREE;
4034 unshare_aff_combination (&aff);
4035 return fold_convert (type, aff_combination_to_tree (&aff));
4038 /* Adjust the cost COST for being in loop setup rather than loop body.
4039 If we're optimizing for space, the loop setup overhead is constant;
4040 if we're optimizing for speed, amortize it over the per-iteration cost.
4041 If ROUND_UP_P is true, the result is round up rather than to zero when
4042 optimizing for speed. */
4043 static int64_t
4044 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4045 bool round_up_p = false)
4047 if (cost == INFTY)
4048 return cost;
4049 else if (optimize_loop_for_speed_p (data->current_loop))
4051 int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4052 return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4054 else
4055 return cost;
4058 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4059 EXPR operand holding the shift. COST0 and COST1 are the costs for
4060 calculating the operands of EXPR. Returns true if successful, and returns
4061 the cost in COST. */
4063 static bool
4064 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4065 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4067 comp_cost res;
4068 tree op1 = TREE_OPERAND (expr, 1);
4069 tree cst = TREE_OPERAND (mult, 1);
4070 tree multop = TREE_OPERAND (mult, 0);
4071 int m = exact_log2 (int_cst_value (cst));
4072 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4073 int as_cost, sa_cost;
4074 bool mult_in_op1;
4076 if (!(m >= 0 && m < maxm))
4077 return false;
4079 STRIP_NOPS (op1);
4080 mult_in_op1 = operand_equal_p (op1, mult, 0);
4082 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4084 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4085 use that in preference to a shift insn followed by an add insn. */
4086 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4087 ? shiftadd_cost (speed, mode, m)
4088 : (mult_in_op1
4089 ? shiftsub1_cost (speed, mode, m)
4090 : shiftsub0_cost (speed, mode, m)));
4092 res = comp_cost (MIN (as_cost, sa_cost), 0);
4093 res += (mult_in_op1 ? cost0 : cost1);
4095 STRIP_NOPS (multop);
4096 if (!is_gimple_val (multop))
4097 res += force_expr_to_var_cost (multop, speed);
4099 *cost = res;
4100 return true;
4103 /* Estimates cost of forcing expression EXPR into a variable. */
4105 static comp_cost
4106 force_expr_to_var_cost (tree expr, bool speed)
4108 static bool costs_initialized = false;
4109 static unsigned integer_cost [2];
4110 static unsigned symbol_cost [2];
4111 static unsigned address_cost [2];
4112 tree op0, op1;
4113 comp_cost cost0, cost1, cost;
4114 machine_mode mode;
4115 scalar_int_mode int_mode;
4117 if (!costs_initialized)
4119 tree type = build_pointer_type (integer_type_node);
4120 tree var, addr;
4121 rtx x;
4122 int i;
4124 var = create_tmp_var_raw (integer_type_node, "test_var");
4125 TREE_STATIC (var) = 1;
4126 x = produce_memory_decl_rtl (var, NULL);
4127 SET_DECL_RTL (var, x);
4129 addr = build1 (ADDR_EXPR, type, var);
4132 for (i = 0; i < 2; i++)
4134 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4135 2000), i);
4137 symbol_cost[i] = computation_cost (addr, i) + 1;
4139 address_cost[i]
4140 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4141 if (dump_file && (dump_flags & TDF_DETAILS))
4143 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4144 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4145 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4146 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4147 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4148 fprintf (dump_file, "\n");
4152 costs_initialized = true;
4155 STRIP_NOPS (expr);
4157 if (SSA_VAR_P (expr))
4158 return no_cost;
4160 if (is_gimple_min_invariant (expr))
4162 if (poly_int_tree_p (expr))
4163 return comp_cost (integer_cost [speed], 0);
4165 if (TREE_CODE (expr) == ADDR_EXPR)
4167 tree obj = TREE_OPERAND (expr, 0);
4169 if (VAR_P (obj)
4170 || TREE_CODE (obj) == PARM_DECL
4171 || TREE_CODE (obj) == RESULT_DECL)
4172 return comp_cost (symbol_cost [speed], 0);
4175 return comp_cost (address_cost [speed], 0);
4178 switch (TREE_CODE (expr))
4180 case POINTER_PLUS_EXPR:
4181 case PLUS_EXPR:
4182 case MINUS_EXPR:
4183 case MULT_EXPR:
4184 case TRUNC_DIV_EXPR:
4185 case BIT_AND_EXPR:
4186 case BIT_IOR_EXPR:
4187 case LSHIFT_EXPR:
4188 case RSHIFT_EXPR:
4189 op0 = TREE_OPERAND (expr, 0);
4190 op1 = TREE_OPERAND (expr, 1);
4191 STRIP_NOPS (op0);
4192 STRIP_NOPS (op1);
4193 break;
4195 CASE_CONVERT:
4196 case NEGATE_EXPR:
4197 case BIT_NOT_EXPR:
4198 op0 = TREE_OPERAND (expr, 0);
4199 STRIP_NOPS (op0);
4200 op1 = NULL_TREE;
4201 break;
4203 default:
4204 /* Just an arbitrary value, FIXME. */
4205 return comp_cost (target_spill_cost[speed], 0);
4208 if (op0 == NULL_TREE
4209 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4210 cost0 = no_cost;
4211 else
4212 cost0 = force_expr_to_var_cost (op0, speed);
4214 if (op1 == NULL_TREE
4215 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4216 cost1 = no_cost;
4217 else
4218 cost1 = force_expr_to_var_cost (op1, speed);
4220 mode = TYPE_MODE (TREE_TYPE (expr));
4221 switch (TREE_CODE (expr))
4223 case POINTER_PLUS_EXPR:
4224 case PLUS_EXPR:
4225 case MINUS_EXPR:
4226 case NEGATE_EXPR:
4227 cost = comp_cost (add_cost (speed, mode), 0);
4228 if (TREE_CODE (expr) != NEGATE_EXPR)
4230 tree mult = NULL_TREE;
4231 comp_cost sa_cost;
4232 if (TREE_CODE (op1) == MULT_EXPR)
4233 mult = op1;
4234 else if (TREE_CODE (op0) == MULT_EXPR)
4235 mult = op0;
4237 if (mult != NULL_TREE
4238 && is_a <scalar_int_mode> (mode, &int_mode)
4239 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4240 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4241 speed, &sa_cost))
4242 return sa_cost;
4244 break;
4246 CASE_CONVERT:
4248 tree inner_mode, outer_mode;
4249 outer_mode = TREE_TYPE (expr);
4250 inner_mode = TREE_TYPE (op0);
4251 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4252 TYPE_MODE (inner_mode), speed), 0);
4254 break;
4256 case MULT_EXPR:
4257 if (cst_and_fits_in_hwi (op0))
4258 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4259 mode, speed), 0);
4260 else if (cst_and_fits_in_hwi (op1))
4261 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4262 mode, speed), 0);
4263 else
4264 return comp_cost (target_spill_cost [speed], 0);
4265 break;
4267 case TRUNC_DIV_EXPR:
4268 /* Division by power of two is usually cheap, so we allow it. Forbid
4269 anything else. */
4270 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4271 cost = comp_cost (add_cost (speed, mode), 0);
4272 else
4273 cost = comp_cost (target_spill_cost[speed], 0);
4274 break;
4276 case BIT_AND_EXPR:
4277 case BIT_IOR_EXPR:
4278 case BIT_NOT_EXPR:
4279 case LSHIFT_EXPR:
4280 case RSHIFT_EXPR:
4281 cost = comp_cost (add_cost (speed, mode), 0);
4282 break;
4284 default:
4285 gcc_unreachable ();
4288 cost += cost0;
4289 cost += cost1;
4290 return cost;
4293 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4294 invariants the computation depends on. */
4296 static comp_cost
4297 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4299 if (!expr)
4300 return no_cost;
4302 find_inv_vars (data, &expr, inv_vars);
4303 return force_expr_to_var_cost (expr, data->speed);
4306 /* Returns cost of auto-modifying address expression in shape base + offset.
4307 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4308 address expression. The address expression has ADDR_MODE in addr space
4309 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4310 speed or size. */
4312 enum ainc_type
4314 AINC_PRE_INC, /* Pre increment. */
4315 AINC_PRE_DEC, /* Pre decrement. */
4316 AINC_POST_INC, /* Post increment. */
4317 AINC_POST_DEC, /* Post decrement. */
4318 AINC_NONE /* Also the number of auto increment types. */
4321 struct ainc_cost_data
4323 int64_t costs[AINC_NONE];
4326 static comp_cost
4327 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4328 machine_mode addr_mode, machine_mode mem_mode,
4329 addr_space_t as, bool speed)
4331 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4332 && !USE_STORE_PRE_DECREMENT (mem_mode)
4333 && !USE_LOAD_POST_DECREMENT (mem_mode)
4334 && !USE_STORE_POST_DECREMENT (mem_mode)
4335 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4336 && !USE_STORE_PRE_INCREMENT (mem_mode)
4337 && !USE_LOAD_POST_INCREMENT (mem_mode)
4338 && !USE_STORE_POST_INCREMENT (mem_mode))
4339 return infinite_cost;
4341 static vec<ainc_cost_data *> ainc_cost_data_list;
4342 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4343 if (idx >= ainc_cost_data_list.length ())
4345 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4347 gcc_assert (nsize > idx);
4348 ainc_cost_data_list.safe_grow_cleared (nsize);
4351 ainc_cost_data *data = ainc_cost_data_list[idx];
4352 if (data == NULL)
4354 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4356 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4357 data->costs[AINC_PRE_DEC] = INFTY;
4358 data->costs[AINC_POST_DEC] = INFTY;
4359 data->costs[AINC_PRE_INC] = INFTY;
4360 data->costs[AINC_POST_INC] = INFTY;
4361 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4362 || USE_STORE_PRE_DECREMENT (mem_mode))
4364 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4366 if (memory_address_addr_space_p (mem_mode, addr, as))
4367 data->costs[AINC_PRE_DEC]
4368 = address_cost (addr, mem_mode, as, speed);
4370 if (USE_LOAD_POST_DECREMENT (mem_mode)
4371 || USE_STORE_POST_DECREMENT (mem_mode))
4373 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4375 if (memory_address_addr_space_p (mem_mode, addr, as))
4376 data->costs[AINC_POST_DEC]
4377 = address_cost (addr, mem_mode, as, speed);
4379 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4380 || USE_STORE_PRE_INCREMENT (mem_mode))
4382 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4384 if (memory_address_addr_space_p (mem_mode, addr, as))
4385 data->costs[AINC_PRE_INC]
4386 = address_cost (addr, mem_mode, as, speed);
4388 if (USE_LOAD_POST_INCREMENT (mem_mode)
4389 || USE_STORE_POST_INCREMENT (mem_mode))
4391 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4393 if (memory_address_addr_space_p (mem_mode, addr, as))
4394 data->costs[AINC_POST_INC]
4395 = address_cost (addr, mem_mode, as, speed);
4397 ainc_cost_data_list[idx] = data;
4400 poly_int64 msize = GET_MODE_SIZE (mem_mode);
4401 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4402 return comp_cost (data->costs[AINC_POST_INC], 0);
4403 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4404 return comp_cost (data->costs[AINC_POST_DEC], 0);
4405 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4406 return comp_cost (data->costs[AINC_PRE_INC], 0);
4407 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4408 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4410 return infinite_cost;
4413 /* Return cost of computing USE's address expression by using CAND.
4414 AFF_INV and AFF_VAR represent invariant and variant parts of the
4415 address expression, respectively. If AFF_INV is simple, store
4416 the loop invariant variables which are depended by it in INV_VARS;
4417 if AFF_INV is complicated, handle it as a new invariant expression
4418 and record it in INV_EXPR. RATIO indicates multiple times between
4419 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4420 value to it indicating if this is an auto-increment address. */
4422 static comp_cost
4423 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4424 struct iv_cand *cand, aff_tree *aff_inv,
4425 aff_tree *aff_var, HOST_WIDE_INT ratio,
4426 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4427 bool *can_autoinc, bool speed)
4429 rtx addr;
4430 bool simple_inv = true;
4431 tree comp_inv = NULL_TREE, type = aff_var->type;
4432 comp_cost var_cost = no_cost, cost = no_cost;
4433 struct mem_address parts = {NULL_TREE, integer_one_node,
4434 NULL_TREE, NULL_TREE, NULL_TREE};
4435 machine_mode addr_mode = TYPE_MODE (type);
4436 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4437 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4438 /* Only true if ratio != 1. */
4439 bool ok_with_ratio_p = false;
4440 bool ok_without_ratio_p = false;
4442 if (!aff_combination_const_p (aff_inv))
4444 parts.index = integer_one_node;
4445 /* Addressing mode "base + index". */
4446 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4447 if (ratio != 1)
4449 parts.step = wide_int_to_tree (type, ratio);
4450 /* Addressing mode "base + index << scale". */
4451 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4452 if (!ok_with_ratio_p)
4453 parts.step = NULL_TREE;
4455 if (ok_with_ratio_p || ok_without_ratio_p)
4457 if (maybe_ne (aff_inv->offset, 0))
4459 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4460 /* Addressing mode "base + index [<< scale] + offset". */
4461 if (!valid_mem_ref_p (mem_mode, as, &parts))
4462 parts.offset = NULL_TREE;
4463 else
4464 aff_inv->offset = 0;
4467 move_fixed_address_to_symbol (&parts, aff_inv);
4468 /* Base is fixed address and is moved to symbol part. */
4469 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4470 parts.base = NULL_TREE;
4472 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4473 if (parts.symbol != NULL_TREE
4474 && !valid_mem_ref_p (mem_mode, as, &parts))
4476 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4477 parts.symbol = NULL_TREE;
4478 /* Reset SIMPLE_INV since symbol address needs to be computed
4479 outside of address expression in this case. */
4480 simple_inv = false;
4481 /* Symbol part is moved back to base part, it can't be NULL. */
4482 parts.base = integer_one_node;
4485 else
4486 parts.index = NULL_TREE;
4488 else
4490 poly_int64 ainc_step;
4491 if (can_autoinc
4492 && ratio == 1
4493 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4495 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4497 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4498 ainc_offset += ainc_step;
4499 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4500 addr_mode, mem_mode, as, speed);
4501 if (!cost.infinite_cost_p ())
4503 *can_autoinc = true;
4504 return cost;
4506 cost = no_cost;
4508 if (!aff_combination_zero_p (aff_inv))
4510 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4511 /* Addressing mode "base + offset". */
4512 if (!valid_mem_ref_p (mem_mode, as, &parts))
4513 parts.offset = NULL_TREE;
4514 else
4515 aff_inv->offset = 0;
4519 if (simple_inv)
4520 simple_inv = (aff_inv == NULL
4521 || aff_combination_const_p (aff_inv)
4522 || aff_combination_singleton_var_p (aff_inv));
4523 if (!aff_combination_zero_p (aff_inv))
4524 comp_inv = aff_combination_to_tree (aff_inv);
4525 if (comp_inv != NULL_TREE)
4526 cost = force_var_cost (data, comp_inv, inv_vars);
4527 if (ratio != 1 && parts.step == NULL_TREE)
4528 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4529 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4530 var_cost += add_cost (speed, addr_mode);
4532 if (comp_inv && inv_expr && !simple_inv)
4534 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4535 /* Clear depends on. */
4536 if (*inv_expr != NULL && inv_vars && *inv_vars)
4537 bitmap_clear (*inv_vars);
4539 /* Cost of small invariant expression adjusted against loop niters
4540 is usually zero, which makes it difficult to be differentiated
4541 from candidate based on loop invariant variables. Secondly, the
4542 generated invariant expression may not be hoisted out of loop by
4543 following pass. We penalize the cost by rounding up in order to
4544 neutralize such effects. */
4545 cost.cost = adjust_setup_cost (data, cost.cost, true);
4546 cost.scratch = cost.cost;
4549 cost += var_cost;
4550 addr = addr_for_mem_ref (&parts, as, false);
4551 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4552 cost += address_cost (addr, mem_mode, as, speed);
4554 if (parts.symbol != NULL_TREE)
4555 cost.complexity += 1;
4556 /* Don't increase the complexity of adding a scaled index if it's
4557 the only kind of index that the target allows. */
4558 if (parts.step != NULL_TREE && ok_without_ratio_p)
4559 cost.complexity += 1;
4560 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4561 cost.complexity += 1;
4562 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4563 cost.complexity += 1;
4565 return cost;
4568 /* Scale (multiply) the computed COST (except scratch part that should be
4569 hoisted out a loop) by header->frequency / AT->frequency, which makes
4570 expected cost more accurate. */
4572 static comp_cost
4573 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4575 if (data->speed
4576 && data->current_loop->header->count.to_frequency (cfun) > 0)
4578 basic_block bb = gimple_bb (at);
4579 gcc_assert (cost.scratch <= cost.cost);
4580 int scale_factor = (int)(intptr_t) bb->aux;
4581 if (scale_factor == 1)
4582 return cost;
4584 int64_t scaled_cost
4585 = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4587 if (dump_file && (dump_flags & TDF_DETAILS))
4588 fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4589 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4590 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4592 cost.cost = scaled_cost;
4595 return cost;
4598 /* Determines the cost of the computation by that USE is expressed
4599 from induction variable CAND. If ADDRESS_P is true, we just need
4600 to create an address from it, otherwise we want to get it into
4601 register. A set of invariants we depend on is stored in INV_VARS.
4602 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4603 addressing is likely. If INV_EXPR is nonnull, record invariant
4604 expr entry in it. */
4606 static comp_cost
4607 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4608 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4609 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4611 gimple *at = use->stmt;
4612 tree ubase = use->iv->base, cbase = cand->iv->base;
4613 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4614 tree comp_inv = NULL_TREE;
4615 HOST_WIDE_INT ratio, aratio;
4616 comp_cost cost;
4617 widest_int rat;
4618 aff_tree aff_inv, aff_var;
4619 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4621 if (inv_vars)
4622 *inv_vars = NULL;
4623 if (can_autoinc)
4624 *can_autoinc = false;
4625 if (inv_expr)
4626 *inv_expr = NULL;
4628 /* Check if we have enough precision to express the values of use. */
4629 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4630 return infinite_cost;
4632 if (address_p
4633 || (use->iv->base_object
4634 && cand->iv->base_object
4635 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4636 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4638 /* Do not try to express address of an object with computation based
4639 on address of a different object. This may cause problems in rtl
4640 level alias analysis (that does not expect this to be happening,
4641 as this is illegal in C), and would be unlikely to be useful
4642 anyway. */
4643 if (use->iv->base_object
4644 && cand->iv->base_object
4645 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4646 return infinite_cost;
4649 if (!get_computation_aff_1 (data->current_loop, at, use,
4650 cand, &aff_inv, &aff_var, &rat)
4651 || !wi::fits_shwi_p (rat))
4652 return infinite_cost;
4654 ratio = rat.to_shwi ();
4655 if (address_p)
4657 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4658 inv_vars, inv_expr, can_autoinc, speed);
4659 return get_scaled_computation_cost_at (data, at, cost);
4662 bool simple_inv = (aff_combination_const_p (&aff_inv)
4663 || aff_combination_singleton_var_p (&aff_inv));
4664 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4665 aff_combination_convert (&aff_inv, signed_type);
4666 if (!aff_combination_zero_p (&aff_inv))
4667 comp_inv = aff_combination_to_tree (&aff_inv);
4669 cost = force_var_cost (data, comp_inv, inv_vars);
4670 if (comp_inv && inv_expr && !simple_inv)
4672 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4673 /* Clear depends on. */
4674 if (*inv_expr != NULL && inv_vars && *inv_vars)
4675 bitmap_clear (*inv_vars);
4677 cost.cost = adjust_setup_cost (data, cost.cost);
4678 /* Record setup cost in scratch field. */
4679 cost.scratch = cost.cost;
4681 /* Cost of constant integer can be covered when adding invariant part to
4682 variant part. */
4683 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4684 cost = no_cost;
4686 /* Need type narrowing to represent use with cand. */
4687 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4689 machine_mode outer_mode = TYPE_MODE (utype);
4690 machine_mode inner_mode = TYPE_MODE (ctype);
4691 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4694 /* Turn a + i * (-c) into a - i * c. */
4695 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4696 aratio = -ratio;
4697 else
4698 aratio = ratio;
4700 if (ratio != 1)
4701 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4703 /* TODO: We may also need to check if we can compute a + i * 4 in one
4704 instruction. */
4705 /* Need to add up the invariant and variant parts. */
4706 if (comp_inv && !integer_zerop (comp_inv))
4707 cost += add_cost (speed, TYPE_MODE (utype));
4709 return get_scaled_computation_cost_at (data, at, cost);
4712 /* Determines cost of computing the use in GROUP with CAND in a generic
4713 expression. */
4715 static bool
4716 determine_group_iv_cost_generic (struct ivopts_data *data,
4717 struct iv_group *group, struct iv_cand *cand)
4719 comp_cost cost;
4720 iv_inv_expr_ent *inv_expr = NULL;
4721 bitmap inv_vars = NULL, inv_exprs = NULL;
4722 struct iv_use *use = group->vuses[0];
4724 /* The simple case first -- if we need to express value of the preserved
4725 original biv, the cost is 0. This also prevents us from counting the
4726 cost of increment twice -- once at this use and once in the cost of
4727 the candidate. */
4728 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4729 cost = no_cost;
4730 else
4731 cost = get_computation_cost (data, use, cand, false,
4732 &inv_vars, NULL, &inv_expr);
4734 if (inv_expr)
4736 inv_exprs = BITMAP_ALLOC (NULL);
4737 bitmap_set_bit (inv_exprs, inv_expr->id);
4739 set_group_iv_cost (data, group, cand, cost, inv_vars,
4740 NULL_TREE, ERROR_MARK, inv_exprs);
4741 return !cost.infinite_cost_p ();
4744 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4746 static bool
4747 determine_group_iv_cost_address (struct ivopts_data *data,
4748 struct iv_group *group, struct iv_cand *cand)
4750 unsigned i;
4751 bitmap inv_vars = NULL, inv_exprs = NULL;
4752 bool can_autoinc;
4753 iv_inv_expr_ent *inv_expr = NULL;
4754 struct iv_use *use = group->vuses[0];
4755 comp_cost sum_cost = no_cost, cost;
4757 cost = get_computation_cost (data, use, cand, true,
4758 &inv_vars, &can_autoinc, &inv_expr);
4760 if (inv_expr)
4762 inv_exprs = BITMAP_ALLOC (NULL);
4763 bitmap_set_bit (inv_exprs, inv_expr->id);
4765 sum_cost = cost;
4766 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4768 if (can_autoinc)
4769 sum_cost -= cand->cost_step;
4770 /* If we generated the candidate solely for exploiting autoincrement
4771 opportunities, and it turns out it can't be used, set the cost to
4772 infinity to make sure we ignore it. */
4773 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4774 sum_cost = infinite_cost;
4777 /* Uses in a group can share setup code, so only add setup cost once. */
4778 cost -= cost.scratch;
4779 /* Compute and add costs for rest uses of this group. */
4780 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4782 struct iv_use *next = group->vuses[i];
4784 /* TODO: We could skip computing cost for sub iv_use when it has the
4785 same cost as the first iv_use, but the cost really depends on the
4786 offset and where the iv_use is. */
4787 cost = get_computation_cost (data, next, cand, true,
4788 NULL, &can_autoinc, &inv_expr);
4789 if (inv_expr)
4791 if (!inv_exprs)
4792 inv_exprs = BITMAP_ALLOC (NULL);
4794 bitmap_set_bit (inv_exprs, inv_expr->id);
4796 sum_cost += cost;
4798 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4799 NULL_TREE, ERROR_MARK, inv_exprs);
4801 return !sum_cost.infinite_cost_p ();
4804 /* Computes value of candidate CAND at position AT in iteration NITER, and
4805 stores it to VAL. */
4807 static void
4808 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4809 aff_tree *val)
4811 aff_tree step, delta, nit;
4812 struct iv *iv = cand->iv;
4813 tree type = TREE_TYPE (iv->base);
4814 tree steptype;
4815 if (POINTER_TYPE_P (type))
4816 steptype = sizetype;
4817 else
4818 steptype = unsigned_type_for (type);
4820 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4821 aff_combination_convert (&step, steptype);
4822 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4823 aff_combination_convert (&nit, steptype);
4824 aff_combination_mult (&nit, &step, &delta);
4825 if (stmt_after_increment (loop, cand, at))
4826 aff_combination_add (&delta, &step);
4828 tree_to_aff_combination (iv->base, type, val);
4829 if (!POINTER_TYPE_P (type))
4830 aff_combination_convert (val, steptype);
4831 aff_combination_add (val, &delta);
4834 /* Returns period of induction variable iv. */
4836 static tree
4837 iv_period (struct iv *iv)
4839 tree step = iv->step, period, type;
4840 tree pow2div;
4842 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4844 type = unsigned_type_for (TREE_TYPE (step));
4845 /* Period of the iv is lcm (step, type_range)/step -1,
4846 i.e., N*type_range/step - 1. Since type range is power
4847 of two, N == (step >> num_of_ending_zeros_binary (step),
4848 so the final result is
4850 (type_range >> num_of_ending_zeros_binary (step)) - 1
4853 pow2div = num_ending_zeros (step);
4855 period = build_low_bits_mask (type,
4856 (TYPE_PRECISION (type)
4857 - tree_to_uhwi (pow2div)));
4859 return period;
4862 /* Returns the comparison operator used when eliminating the iv USE. */
4864 static enum tree_code
4865 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4867 struct loop *loop = data->current_loop;
4868 basic_block ex_bb;
4869 edge exit;
4871 ex_bb = gimple_bb (use->stmt);
4872 exit = EDGE_SUCC (ex_bb, 0);
4873 if (flow_bb_inside_loop_p (loop, exit->dest))
4874 exit = EDGE_SUCC (ex_bb, 1);
4876 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4879 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4880 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4881 calculation is performed in non-wrapping type.
4883 TODO: More generally, we could test for the situation that
4884 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4885 This would require knowing the sign of OFFSET. */
4887 static bool
4888 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4890 enum tree_code code;
4891 tree e1, e2;
4892 aff_tree aff_e1, aff_e2, aff_offset;
4894 if (!nowrap_type_p (TREE_TYPE (base)))
4895 return false;
4897 base = expand_simple_operations (base);
4899 if (TREE_CODE (base) == SSA_NAME)
4901 gimple *stmt = SSA_NAME_DEF_STMT (base);
4903 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4904 return false;
4906 code = gimple_assign_rhs_code (stmt);
4907 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4908 return false;
4910 e1 = gimple_assign_rhs1 (stmt);
4911 e2 = gimple_assign_rhs2 (stmt);
4913 else
4915 code = TREE_CODE (base);
4916 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4917 return false;
4918 e1 = TREE_OPERAND (base, 0);
4919 e2 = TREE_OPERAND (base, 1);
4922 /* Use affine expansion as deeper inspection to prove the equality. */
4923 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4924 &aff_e2, &data->name_expansion_cache);
4925 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4926 &aff_offset, &data->name_expansion_cache);
4927 aff_combination_scale (&aff_offset, -1);
4928 switch (code)
4930 case PLUS_EXPR:
4931 aff_combination_add (&aff_e2, &aff_offset);
4932 if (aff_combination_zero_p (&aff_e2))
4933 return true;
4935 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4936 &aff_e1, &data->name_expansion_cache);
4937 aff_combination_add (&aff_e1, &aff_offset);
4938 return aff_combination_zero_p (&aff_e1);
4940 case POINTER_PLUS_EXPR:
4941 aff_combination_add (&aff_e2, &aff_offset);
4942 return aff_combination_zero_p (&aff_e2);
4944 default:
4945 return false;
4949 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4950 comparison with CAND. NITER describes the number of iterations of
4951 the loops. If successful, the comparison in COMP_P is altered accordingly.
4953 We aim to handle the following situation:
4955 sometype *base, *p;
4956 int a, b, i;
4958 i = a;
4959 p = p_0 = base + a;
4963 bla (*p);
4964 p++;
4965 i++;
4967 while (i < b);
4969 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4970 We aim to optimize this to
4972 p = p_0 = base + a;
4975 bla (*p);
4976 p++;
4978 while (p < p_0 - a + b);
4980 This preserves the correctness, since the pointer arithmetics does not
4981 overflow. More precisely:
4983 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4984 overflow in computing it or the values of p.
4985 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4986 overflow. To prove this, we use the fact that p_0 = base + a. */
4988 static bool
4989 iv_elimination_compare_lt (struct ivopts_data *data,
4990 struct iv_cand *cand, enum tree_code *comp_p,
4991 struct tree_niter_desc *niter)
4993 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4994 struct aff_tree nit, tmpa, tmpb;
4995 enum tree_code comp;
4996 HOST_WIDE_INT step;
4998 /* We need to know that the candidate induction variable does not overflow.
4999 While more complex analysis may be used to prove this, for now just
5000 check that the variable appears in the original program and that it
5001 is computed in a type that guarantees no overflows. */
5002 cand_type = TREE_TYPE (cand->iv->base);
5003 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5004 return false;
5006 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5007 the calculation of the BOUND could overflow, making the comparison
5008 invalid. */
5009 if (!data->loop_single_exit_p)
5010 return false;
5012 /* We need to be able to decide whether candidate is increasing or decreasing
5013 in order to choose the right comparison operator. */
5014 if (!cst_and_fits_in_hwi (cand->iv->step))
5015 return false;
5016 step = int_cst_value (cand->iv->step);
5018 /* Check that the number of iterations matches the expected pattern:
5019 a + 1 > b ? 0 : b - a - 1. */
5020 mbz = niter->may_be_zero;
5021 if (TREE_CODE (mbz) == GT_EXPR)
5023 /* Handle a + 1 > b. */
5024 tree op0 = TREE_OPERAND (mbz, 0);
5025 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5027 a = TREE_OPERAND (op0, 0);
5028 b = TREE_OPERAND (mbz, 1);
5030 else
5031 return false;
5033 else if (TREE_CODE (mbz) == LT_EXPR)
5035 tree op1 = TREE_OPERAND (mbz, 1);
5037 /* Handle b < a + 1. */
5038 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5040 a = TREE_OPERAND (op1, 0);
5041 b = TREE_OPERAND (mbz, 0);
5043 else
5044 return false;
5046 else
5047 return false;
5049 /* Expected number of iterations is B - A - 1. Check that it matches
5050 the actual number, i.e., that B - A - NITER = 1. */
5051 tree_to_aff_combination (niter->niter, nit_type, &nit);
5052 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5053 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5054 aff_combination_scale (&nit, -1);
5055 aff_combination_scale (&tmpa, -1);
5056 aff_combination_add (&tmpb, &tmpa);
5057 aff_combination_add (&tmpb, &nit);
5058 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5059 return false;
5061 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5062 overflow. */
5063 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5064 cand->iv->step,
5065 fold_convert (TREE_TYPE (cand->iv->step), a));
5066 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5067 return false;
5069 /* Determine the new comparison operator. */
5070 comp = step < 0 ? GT_EXPR : LT_EXPR;
5071 if (*comp_p == NE_EXPR)
5072 *comp_p = comp;
5073 else if (*comp_p == EQ_EXPR)
5074 *comp_p = invert_tree_comparison (comp, false);
5075 else
5076 gcc_unreachable ();
5078 return true;
5081 /* Check whether it is possible to express the condition in USE by comparison
5082 of candidate CAND. If so, store the value compared with to BOUND, and the
5083 comparison operator to COMP. */
5085 static bool
5086 may_eliminate_iv (struct ivopts_data *data,
5087 struct iv_use *use, struct iv_cand *cand, tree *bound,
5088 enum tree_code *comp)
5090 basic_block ex_bb;
5091 edge exit;
5092 tree period;
5093 struct loop *loop = data->current_loop;
5094 aff_tree bnd;
5095 struct tree_niter_desc *desc = NULL;
5097 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5098 return false;
5100 /* For now works only for exits that dominate the loop latch.
5101 TODO: extend to other conditions inside loop body. */
5102 ex_bb = gimple_bb (use->stmt);
5103 if (use->stmt != last_stmt (ex_bb)
5104 || gimple_code (use->stmt) != GIMPLE_COND
5105 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5106 return false;
5108 exit = EDGE_SUCC (ex_bb, 0);
5109 if (flow_bb_inside_loop_p (loop, exit->dest))
5110 exit = EDGE_SUCC (ex_bb, 1);
5111 if (flow_bb_inside_loop_p (loop, exit->dest))
5112 return false;
5114 desc = niter_for_exit (data, exit);
5115 if (!desc)
5116 return false;
5118 /* Determine whether we can use the variable to test the exit condition.
5119 This is the case iff the period of the induction variable is greater
5120 than the number of iterations for which the exit condition is true. */
5121 period = iv_period (cand->iv);
5123 /* If the number of iterations is constant, compare against it directly. */
5124 if (TREE_CODE (desc->niter) == INTEGER_CST)
5126 /* See cand_value_at. */
5127 if (stmt_after_increment (loop, cand, use->stmt))
5129 if (!tree_int_cst_lt (desc->niter, period))
5130 return false;
5132 else
5134 if (tree_int_cst_lt (period, desc->niter))
5135 return false;
5139 /* If not, and if this is the only possible exit of the loop, see whether
5140 we can get a conservative estimate on the number of iterations of the
5141 entire loop and compare against that instead. */
5142 else
5144 widest_int period_value, max_niter;
5146 max_niter = desc->max;
5147 if (stmt_after_increment (loop, cand, use->stmt))
5148 max_niter += 1;
5149 period_value = wi::to_widest (period);
5150 if (wi::gtu_p (max_niter, period_value))
5152 /* See if we can take advantage of inferred loop bound
5153 information. */
5154 if (data->loop_single_exit_p)
5156 if (!max_loop_iterations (loop, &max_niter))
5157 return false;
5158 /* The loop bound is already adjusted by adding 1. */
5159 if (wi::gtu_p (max_niter, period_value))
5160 return false;
5162 else
5163 return false;
5167 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5169 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5170 aff_combination_to_tree (&bnd));
5171 *comp = iv_elimination_compare (data, use);
5173 /* It is unlikely that computing the number of iterations using division
5174 would be more profitable than keeping the original induction variable. */
5175 if (expression_expensive_p (*bound))
5176 return false;
5178 /* Sometimes, it is possible to handle the situation that the number of
5179 iterations may be zero unless additional assumptions by using <
5180 instead of != in the exit condition.
5182 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5183 base the exit condition on it. However, that is often too
5184 expensive. */
5185 if (!integer_zerop (desc->may_be_zero))
5186 return iv_elimination_compare_lt (data, cand, comp, desc);
5188 return true;
5191 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5192 be copied, if it is used in the loop body and DATA->body_includes_call. */
5194 static int
5195 parm_decl_cost (struct ivopts_data *data, tree bound)
5197 tree sbound = bound;
5198 STRIP_NOPS (sbound);
5200 if (TREE_CODE (sbound) == SSA_NAME
5201 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5202 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5203 && data->body_includes_call)
5204 return COSTS_N_INSNS (1);
5206 return 0;
5209 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5211 static bool
5212 determine_group_iv_cost_cond (struct ivopts_data *data,
5213 struct iv_group *group, struct iv_cand *cand)
5215 tree bound = NULL_TREE;
5216 struct iv *cmp_iv;
5217 bitmap inv_exprs = NULL;
5218 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5219 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5220 enum comp_iv_rewrite rewrite_type;
5221 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5222 tree *control_var, *bound_cst;
5223 enum tree_code comp = ERROR_MARK;
5224 struct iv_use *use = group->vuses[0];
5226 /* Extract condition operands. */
5227 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5228 &bound_cst, NULL, &cmp_iv);
5229 gcc_assert (rewrite_type != COMP_IV_NA);
5231 /* Try iv elimination. */
5232 if (rewrite_type == COMP_IV_ELIM
5233 && may_eliminate_iv (data, use, cand, &bound, &comp))
5235 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5236 if (elim_cost.cost == 0)
5237 elim_cost.cost = parm_decl_cost (data, bound);
5238 else if (TREE_CODE (bound) == INTEGER_CST)
5239 elim_cost.cost = 0;
5240 /* If we replace a loop condition 'i < n' with 'p < base + n',
5241 inv_vars_elim will have 'base' and 'n' set, which implies that both
5242 'base' and 'n' will be live during the loop. More likely,
5243 'base + n' will be loop invariant, resulting in only one live value
5244 during the loop. So in that case we clear inv_vars_elim and set
5245 inv_expr_elim instead. */
5246 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5248 inv_expr_elim = get_loop_invariant_expr (data, bound);
5249 bitmap_clear (inv_vars_elim);
5251 /* The bound is a loop invariant, so it will be only computed
5252 once. */
5253 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5256 /* When the condition is a comparison of the candidate IV against
5257 zero, prefer this IV.
5259 TODO: The constant that we're subtracting from the cost should
5260 be target-dependent. This information should be added to the
5261 target costs for each backend. */
5262 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5263 && integer_zerop (*bound_cst)
5264 && (operand_equal_p (*control_var, cand->var_after, 0)
5265 || operand_equal_p (*control_var, cand->var_before, 0)))
5266 elim_cost -= 1;
5268 express_cost = get_computation_cost (data, use, cand, false,
5269 &inv_vars_express, NULL,
5270 &inv_expr_express);
5271 if (cmp_iv != NULL)
5272 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5274 /* Count the cost of the original bound as well. */
5275 bound_cost = force_var_cost (data, *bound_cst, NULL);
5276 if (bound_cost.cost == 0)
5277 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5278 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5279 bound_cost.cost = 0;
5280 express_cost += bound_cost;
5282 /* Choose the better approach, preferring the eliminated IV. */
5283 if (elim_cost <= express_cost)
5285 cost = elim_cost;
5286 inv_vars = inv_vars_elim;
5287 inv_vars_elim = NULL;
5288 inv_expr = inv_expr_elim;
5290 else
5292 cost = express_cost;
5293 inv_vars = inv_vars_express;
5294 inv_vars_express = NULL;
5295 bound = NULL_TREE;
5296 comp = ERROR_MARK;
5297 inv_expr = inv_expr_express;
5300 if (inv_expr)
5302 inv_exprs = BITMAP_ALLOC (NULL);
5303 bitmap_set_bit (inv_exprs, inv_expr->id);
5305 set_group_iv_cost (data, group, cand, cost,
5306 inv_vars, bound, comp, inv_exprs);
5308 if (inv_vars_elim)
5309 BITMAP_FREE (inv_vars_elim);
5310 if (inv_vars_express)
5311 BITMAP_FREE (inv_vars_express);
5313 return !cost.infinite_cost_p ();
5316 /* Determines cost of computing uses in GROUP with CAND. Returns false
5317 if USE cannot be represented with CAND. */
5319 static bool
5320 determine_group_iv_cost (struct ivopts_data *data,
5321 struct iv_group *group, struct iv_cand *cand)
5323 switch (group->type)
5325 case USE_NONLINEAR_EXPR:
5326 return determine_group_iv_cost_generic (data, group, cand);
5328 case USE_REF_ADDRESS:
5329 case USE_PTR_ADDRESS:
5330 return determine_group_iv_cost_address (data, group, cand);
5332 case USE_COMPARE:
5333 return determine_group_iv_cost_cond (data, group, cand);
5335 default:
5336 gcc_unreachable ();
5340 /* Return true if get_computation_cost indicates that autoincrement is
5341 a possibility for the pair of USE and CAND, false otherwise. */
5343 static bool
5344 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5345 struct iv_cand *cand)
5347 if (!address_p (use->type))
5348 return false;
5350 bool can_autoinc = false;
5351 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5352 return can_autoinc;
5355 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5356 use that allows autoincrement, and set their AINC_USE if possible. */
5358 static void
5359 set_autoinc_for_original_candidates (struct ivopts_data *data)
5361 unsigned i, j;
5363 for (i = 0; i < data->vcands.length (); i++)
5365 struct iv_cand *cand = data->vcands[i];
5366 struct iv_use *closest_before = NULL;
5367 struct iv_use *closest_after = NULL;
5368 if (cand->pos != IP_ORIGINAL)
5369 continue;
5371 for (j = 0; j < data->vgroups.length (); j++)
5373 struct iv_group *group = data->vgroups[j];
5374 struct iv_use *use = group->vuses[0];
5375 unsigned uid = gimple_uid (use->stmt);
5377 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5378 continue;
5380 if (uid < gimple_uid (cand->incremented_at)
5381 && (closest_before == NULL
5382 || uid > gimple_uid (closest_before->stmt)))
5383 closest_before = use;
5385 if (uid > gimple_uid (cand->incremented_at)
5386 && (closest_after == NULL
5387 || uid < gimple_uid (closest_after->stmt)))
5388 closest_after = use;
5391 if (closest_before != NULL
5392 && autoinc_possible_for_pair (data, closest_before, cand))
5393 cand->ainc_use = closest_before;
5394 else if (closest_after != NULL
5395 && autoinc_possible_for_pair (data, closest_after, cand))
5396 cand->ainc_use = closest_after;
5400 /* Relate compare use with all candidates. */
5402 static void
5403 relate_compare_use_with_all_cands (struct ivopts_data *data)
5405 unsigned i, count = data->vcands.length ();
5406 for (i = 0; i < data->vgroups.length (); i++)
5408 struct iv_group *group = data->vgroups[i];
5410 if (group->type == USE_COMPARE)
5411 bitmap_set_range (group->related_cands, 0, count);
5415 /* Finds the candidates for the induction variables. */
5417 static void
5418 find_iv_candidates (struct ivopts_data *data)
5420 /* Add commonly used ivs. */
5421 add_standard_iv_candidates (data);
5423 /* Add old induction variables. */
5424 add_iv_candidate_for_bivs (data);
5426 /* Add induction variables derived from uses. */
5427 add_iv_candidate_for_groups (data);
5429 set_autoinc_for_original_candidates (data);
5431 /* Record the important candidates. */
5432 record_important_candidates (data);
5434 /* Relate compare iv_use with all candidates. */
5435 if (!data->consider_all_candidates)
5436 relate_compare_use_with_all_cands (data);
5438 if (dump_file && (dump_flags & TDF_DETAILS))
5440 unsigned i;
5442 fprintf (dump_file, "\n<Important Candidates>:\t");
5443 for (i = 0; i < data->vcands.length (); i++)
5444 if (data->vcands[i]->important)
5445 fprintf (dump_file, " %d,", data->vcands[i]->id);
5446 fprintf (dump_file, "\n");
5448 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5449 for (i = 0; i < data->vgroups.length (); i++)
5451 struct iv_group *group = data->vgroups[i];
5453 if (group->related_cands)
5455 fprintf (dump_file, " Group %d:\t", group->id);
5456 dump_bitmap (dump_file, group->related_cands);
5459 fprintf (dump_file, "\n");
5463 /* Determines costs of computing use of iv with an iv candidate. */
5465 static void
5466 determine_group_iv_costs (struct ivopts_data *data)
5468 unsigned i, j;
5469 struct iv_cand *cand;
5470 struct iv_group *group;
5471 bitmap to_clear = BITMAP_ALLOC (NULL);
5473 alloc_use_cost_map (data);
5475 for (i = 0; i < data->vgroups.length (); i++)
5477 group = data->vgroups[i];
5479 if (data->consider_all_candidates)
5481 for (j = 0; j < data->vcands.length (); j++)
5483 cand = data->vcands[j];
5484 determine_group_iv_cost (data, group, cand);
5487 else
5489 bitmap_iterator bi;
5491 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5493 cand = data->vcands[j];
5494 if (!determine_group_iv_cost (data, group, cand))
5495 bitmap_set_bit (to_clear, j);
5498 /* Remove the candidates for that the cost is infinite from
5499 the list of related candidates. */
5500 bitmap_and_compl_into (group->related_cands, to_clear);
5501 bitmap_clear (to_clear);
5505 BITMAP_FREE (to_clear);
5507 if (dump_file && (dump_flags & TDF_DETAILS))
5509 bitmap_iterator bi;
5511 /* Dump invariant variables. */
5512 fprintf (dump_file, "\n<Invariant Vars>:\n");
5513 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5515 struct version_info *info = ver_info (data, i);
5516 if (info->inv_id)
5518 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5519 print_generic_expr (dump_file, info->name, TDF_SLIM);
5520 fprintf (dump_file, "%s\n",
5521 info->has_nonlin_use ? "" : "\t(eliminable)");
5525 /* Dump invariant expressions. */
5526 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5527 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5529 for (hash_table<iv_inv_expr_hasher>::iterator it
5530 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5531 ++it)
5532 list.safe_push (*it);
5534 list.qsort (sort_iv_inv_expr_ent);
5536 for (i = 0; i < list.length (); ++i)
5538 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5539 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5540 fprintf (dump_file, "\n");
5543 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5545 for (i = 0; i < data->vgroups.length (); i++)
5547 group = data->vgroups[i];
5549 fprintf (dump_file, "Group %d:\n", i);
5550 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5551 for (j = 0; j < group->n_map_members; j++)
5553 if (!group->cost_map[j].cand
5554 || group->cost_map[j].cost.infinite_cost_p ())
5555 continue;
5557 fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
5558 group->cost_map[j].cand->id,
5559 group->cost_map[j].cost.cost,
5560 group->cost_map[j].cost.complexity);
5561 if (!group->cost_map[j].inv_exprs
5562 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5563 fprintf (dump_file, "NIL;\t");
5564 else
5565 bitmap_print (dump_file,
5566 group->cost_map[j].inv_exprs, "", ";\t");
5567 if (!group->cost_map[j].inv_vars
5568 || bitmap_empty_p (group->cost_map[j].inv_vars))
5569 fprintf (dump_file, "NIL;\n");
5570 else
5571 bitmap_print (dump_file,
5572 group->cost_map[j].inv_vars, "", "\n");
5575 fprintf (dump_file, "\n");
5577 fprintf (dump_file, "\n");
5581 /* Determines cost of the candidate CAND. */
5583 static void
5584 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5586 comp_cost cost_base;
5587 int64_t cost, cost_step;
5588 tree base;
5590 gcc_assert (cand->iv != NULL);
5592 /* There are two costs associated with the candidate -- its increment
5593 and its initialization. The second is almost negligible for any loop
5594 that rolls enough, so we take it just very little into account. */
5596 base = cand->iv->base;
5597 cost_base = force_var_cost (data, base, NULL);
5598 /* It will be exceptional that the iv register happens to be initialized with
5599 the proper value at no cost. In general, there will at least be a regcopy
5600 or a const set. */
5601 if (cost_base.cost == 0)
5602 cost_base.cost = COSTS_N_INSNS (1);
5603 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5605 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5607 /* Prefer the original ivs unless we may gain something by replacing it.
5608 The reason is to make debugging simpler; so this is not relevant for
5609 artificial ivs created by other optimization passes. */
5610 if (cand->pos != IP_ORIGINAL
5611 || !SSA_NAME_VAR (cand->var_before)
5612 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5613 cost++;
5615 /* Prefer not to insert statements into latch unless there are some
5616 already (so that we do not create unnecessary jumps). */
5617 if (cand->pos == IP_END
5618 && empty_block_p (ip_end_pos (data->current_loop)))
5619 cost++;
5621 cand->cost = cost;
5622 cand->cost_step = cost_step;
5625 /* Determines costs of computation of the candidates. */
5627 static void
5628 determine_iv_costs (struct ivopts_data *data)
5630 unsigned i;
5632 if (dump_file && (dump_flags & TDF_DETAILS))
5634 fprintf (dump_file, "<Candidate Costs>:\n");
5635 fprintf (dump_file, " cand\tcost\n");
5638 for (i = 0; i < data->vcands.length (); i++)
5640 struct iv_cand *cand = data->vcands[i];
5642 determine_iv_cost (data, cand);
5644 if (dump_file && (dump_flags & TDF_DETAILS))
5645 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5648 if (dump_file && (dump_flags & TDF_DETAILS))
5649 fprintf (dump_file, "\n");
5652 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5653 induction variables. Note N_INVS includes both invariant variables and
5654 invariant expressions. */
5656 static unsigned
5657 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5658 unsigned n_cands)
5660 unsigned cost;
5661 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5662 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5663 bool speed = data->speed;
5665 /* If there is a call in the loop body, the call-clobbered registers
5666 are not available for loop invariants. */
5667 if (data->body_includes_call)
5668 available_regs = available_regs - target_clobbered_regs;
5670 /* If we have enough registers. */
5671 if (regs_needed + target_res_regs < available_regs)
5672 cost = n_new;
5673 /* If close to running out of registers, try to preserve them. */
5674 else if (regs_needed <= available_regs)
5675 cost = target_reg_cost [speed] * regs_needed;
5676 /* If we run out of available registers but the number of candidates
5677 does not, we penalize extra registers using target_spill_cost. */
5678 else if (n_cands <= available_regs)
5679 cost = target_reg_cost [speed] * available_regs
5680 + target_spill_cost [speed] * (regs_needed - available_regs);
5681 /* If the number of candidates runs out available registers, we penalize
5682 extra candidate registers using target_spill_cost * 2. Because it is
5683 more expensive to spill induction variable than invariant. */
5684 else
5685 cost = target_reg_cost [speed] * available_regs
5686 + target_spill_cost [speed] * (n_cands - available_regs) * 2
5687 + target_spill_cost [speed] * (regs_needed - n_cands);
5689 /* Finally, add the number of candidates, so that we prefer eliminating
5690 induction variables if possible. */
5691 return cost + n_cands;
5694 /* For each size of the induction variable set determine the penalty. */
5696 static void
5697 determine_set_costs (struct ivopts_data *data)
5699 unsigned j, n;
5700 gphi *phi;
5701 gphi_iterator psi;
5702 tree op;
5703 struct loop *loop = data->current_loop;
5704 bitmap_iterator bi;
5706 if (dump_file && (dump_flags & TDF_DETAILS))
5708 fprintf (dump_file, "<Global Costs>:\n");
5709 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5710 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5711 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5712 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5715 n = 0;
5716 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5718 phi = psi.phi ();
5719 op = PHI_RESULT (phi);
5721 if (virtual_operand_p (op))
5722 continue;
5724 if (get_iv (data, op))
5725 continue;
5727 if (!POINTER_TYPE_P (TREE_TYPE (op))
5728 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5729 continue;
5731 n++;
5734 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5736 struct version_info *info = ver_info (data, j);
5738 if (info->inv_id && info->has_nonlin_use)
5739 n++;
5742 data->regs_used = n;
5743 if (dump_file && (dump_flags & TDF_DETAILS))
5744 fprintf (dump_file, " regs_used %d\n", n);
5746 if (dump_file && (dump_flags & TDF_DETAILS))
5748 fprintf (dump_file, " cost for size:\n");
5749 fprintf (dump_file, " ivs\tcost\n");
5750 for (j = 0; j <= 2 * target_avail_regs; j++)
5751 fprintf (dump_file, " %d\t%d\n", j,
5752 ivopts_estimate_reg_pressure (data, 0, j));
5753 fprintf (dump_file, "\n");
5757 /* Returns true if A is a cheaper cost pair than B. */
5759 static bool
5760 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5762 if (!a)
5763 return false;
5765 if (!b)
5766 return true;
5768 if (a->cost < b->cost)
5769 return true;
5771 if (b->cost < a->cost)
5772 return false;
5774 /* In case the costs are the same, prefer the cheaper candidate. */
5775 if (a->cand->cost < b->cand->cost)
5776 return true;
5778 return false;
5781 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5782 for more expensive, equal and cheaper respectively. */
5784 static int
5785 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5787 if (cheaper_cost_pair (a, b))
5788 return -1;
5789 if (cheaper_cost_pair (b, a))
5790 return 1;
5792 return 0;
5795 /* Returns candidate by that USE is expressed in IVS. */
5797 static struct cost_pair *
5798 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5800 return ivs->cand_for_group[group->id];
5803 /* Computes the cost field of IVS structure. */
5805 static void
5806 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5808 comp_cost cost = ivs->cand_use_cost;
5810 cost += ivs->cand_cost;
5811 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5812 ivs->cost = cost;
5815 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5816 and IVS. */
5818 static void
5819 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5821 bitmap_iterator bi;
5822 unsigned iid;
5824 if (!invs)
5825 return;
5827 gcc_assert (n_inv_uses != NULL);
5828 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5830 n_inv_uses[iid]--;
5831 if (n_inv_uses[iid] == 0)
5832 ivs->n_invs--;
5836 /* Set USE not to be expressed by any candidate in IVS. */
5838 static void
5839 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5840 struct iv_group *group)
5842 unsigned gid = group->id, cid;
5843 struct cost_pair *cp;
5845 cp = ivs->cand_for_group[gid];
5846 if (!cp)
5847 return;
5848 cid = cp->cand->id;
5850 ivs->bad_groups++;
5851 ivs->cand_for_group[gid] = NULL;
5852 ivs->n_cand_uses[cid]--;
5854 if (ivs->n_cand_uses[cid] == 0)
5856 bitmap_clear_bit (ivs->cands, cid);
5857 ivs->n_cands--;
5858 ivs->cand_cost -= cp->cand->cost;
5859 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5860 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5863 ivs->cand_use_cost -= cp->cost;
5864 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5865 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5866 iv_ca_recount_cost (data, ivs);
5869 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5870 IVS. */
5872 static void
5873 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5875 bitmap_iterator bi;
5876 unsigned iid;
5878 if (!invs)
5879 return;
5881 gcc_assert (n_inv_uses != NULL);
5882 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5884 n_inv_uses[iid]++;
5885 if (n_inv_uses[iid] == 1)
5886 ivs->n_invs++;
5890 /* Set cost pair for GROUP in set IVS to CP. */
5892 static void
5893 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5894 struct iv_group *group, struct cost_pair *cp)
5896 unsigned gid = group->id, cid;
5898 if (ivs->cand_for_group[gid] == cp)
5899 return;
5901 if (ivs->cand_for_group[gid])
5902 iv_ca_set_no_cp (data, ivs, group);
5904 if (cp)
5906 cid = cp->cand->id;
5908 ivs->bad_groups--;
5909 ivs->cand_for_group[gid] = cp;
5910 ivs->n_cand_uses[cid]++;
5911 if (ivs->n_cand_uses[cid] == 1)
5913 bitmap_set_bit (ivs->cands, cid);
5914 ivs->n_cands++;
5915 ivs->cand_cost += cp->cand->cost;
5916 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5917 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5920 ivs->cand_use_cost += cp->cost;
5921 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5922 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5923 iv_ca_recount_cost (data, ivs);
5927 /* Extend set IVS by expressing USE by some of the candidates in it
5928 if possible. Consider all important candidates if candidates in
5929 set IVS don't give any result. */
5931 static void
5932 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5933 struct iv_group *group)
5935 struct cost_pair *best_cp = NULL, *cp;
5936 bitmap_iterator bi;
5937 unsigned i;
5938 struct iv_cand *cand;
5940 gcc_assert (ivs->upto >= group->id);
5941 ivs->upto++;
5942 ivs->bad_groups++;
5944 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5946 cand = data->vcands[i];
5947 cp = get_group_iv_cost (data, group, cand);
5948 if (cheaper_cost_pair (cp, best_cp))
5949 best_cp = cp;
5952 if (best_cp == NULL)
5954 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5956 cand = data->vcands[i];
5957 cp = get_group_iv_cost (data, group, cand);
5958 if (cheaper_cost_pair (cp, best_cp))
5959 best_cp = cp;
5963 iv_ca_set_cp (data, ivs, group, best_cp);
5966 /* Get cost for assignment IVS. */
5968 static comp_cost
5969 iv_ca_cost (struct iv_ca *ivs)
5971 /* This was a conditional expression but it triggered a bug in
5972 Sun C 5.5. */
5973 if (ivs->bad_groups)
5974 return infinite_cost;
5975 else
5976 return ivs->cost;
5979 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5980 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5981 respectively. */
5983 static int
5984 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5985 struct iv_group *group, struct cost_pair *old_cp,
5986 struct cost_pair *new_cp)
5988 gcc_assert (old_cp && new_cp && old_cp != new_cp);
5989 unsigned old_n_invs = ivs->n_invs;
5990 iv_ca_set_cp (data, ivs, group, new_cp);
5991 unsigned new_n_invs = ivs->n_invs;
5992 iv_ca_set_cp (data, ivs, group, old_cp);
5994 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5997 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5998 it before NEXT. */
6000 static struct iv_ca_delta *
6001 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6002 struct cost_pair *new_cp, struct iv_ca_delta *next)
6004 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6006 change->group = group;
6007 change->old_cp = old_cp;
6008 change->new_cp = new_cp;
6009 change->next = next;
6011 return change;
6014 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6015 are rewritten. */
6017 static struct iv_ca_delta *
6018 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6020 struct iv_ca_delta *last;
6022 if (!l2)
6023 return l1;
6025 if (!l1)
6026 return l2;
6028 for (last = l1; last->next; last = last->next)
6029 continue;
6030 last->next = l2;
6032 return l1;
6035 /* Reverse the list of changes DELTA, forming the inverse to it. */
6037 static struct iv_ca_delta *
6038 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6040 struct iv_ca_delta *act, *next, *prev = NULL;
6042 for (act = delta; act; act = next)
6044 next = act->next;
6045 act->next = prev;
6046 prev = act;
6048 std::swap (act->old_cp, act->new_cp);
6051 return prev;
6054 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6055 reverted instead. */
6057 static void
6058 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6059 struct iv_ca_delta *delta, bool forward)
6061 struct cost_pair *from, *to;
6062 struct iv_ca_delta *act;
6064 if (!forward)
6065 delta = iv_ca_delta_reverse (delta);
6067 for (act = delta; act; act = act->next)
6069 from = act->old_cp;
6070 to = act->new_cp;
6071 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6072 iv_ca_set_cp (data, ivs, act->group, to);
6075 if (!forward)
6076 iv_ca_delta_reverse (delta);
6079 /* Returns true if CAND is used in IVS. */
6081 static bool
6082 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6084 return ivs->n_cand_uses[cand->id] > 0;
6087 /* Returns number of induction variable candidates in the set IVS. */
6089 static unsigned
6090 iv_ca_n_cands (struct iv_ca *ivs)
6092 return ivs->n_cands;
6095 /* Free the list of changes DELTA. */
6097 static void
6098 iv_ca_delta_free (struct iv_ca_delta **delta)
6100 struct iv_ca_delta *act, *next;
6102 for (act = *delta; act; act = next)
6104 next = act->next;
6105 free (act);
6108 *delta = NULL;
6111 /* Allocates new iv candidates assignment. */
6113 static struct iv_ca *
6114 iv_ca_new (struct ivopts_data *data)
6116 struct iv_ca *nw = XNEW (struct iv_ca);
6118 nw->upto = 0;
6119 nw->bad_groups = 0;
6120 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6121 data->vgroups.length ());
6122 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6123 nw->cands = BITMAP_ALLOC (NULL);
6124 nw->n_cands = 0;
6125 nw->n_invs = 0;
6126 nw->cand_use_cost = no_cost;
6127 nw->cand_cost = 0;
6128 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6129 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6130 nw->cost = no_cost;
6132 return nw;
6135 /* Free memory occupied by the set IVS. */
6137 static void
6138 iv_ca_free (struct iv_ca **ivs)
6140 free ((*ivs)->cand_for_group);
6141 free ((*ivs)->n_cand_uses);
6142 BITMAP_FREE ((*ivs)->cands);
6143 free ((*ivs)->n_inv_var_uses);
6144 free ((*ivs)->n_inv_expr_uses);
6145 free (*ivs);
6146 *ivs = NULL;
6149 /* Dumps IVS to FILE. */
6151 static void
6152 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6154 unsigned i;
6155 comp_cost cost = iv_ca_cost (ivs);
6157 fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6158 cost.complexity);
6159 fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
6160 "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6161 ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6162 bitmap_print (file, ivs->cands, " candidates: ","\n");
6164 for (i = 0; i < ivs->upto; i++)
6166 struct iv_group *group = data->vgroups[i];
6167 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6168 if (cp)
6169 fprintf (file, " group:%d --> iv_cand:%d, cost=("
6170 "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6171 cp->cost.cost, cp->cost.complexity);
6172 else
6173 fprintf (file, " group:%d --> ??\n", group->id);
6176 const char *pref = "";
6177 fprintf (file, " invariant variables: ");
6178 for (i = 1; i <= data->max_inv_var_id; i++)
6179 if (ivs->n_inv_var_uses[i])
6181 fprintf (file, "%s%d", pref, i);
6182 pref = ", ";
6185 pref = "";
6186 fprintf (file, "\n invariant expressions: ");
6187 for (i = 1; i <= data->max_inv_expr_id; i++)
6188 if (ivs->n_inv_expr_uses[i])
6190 fprintf (file, "%s%d", pref, i);
6191 pref = ", ";
6194 fprintf (file, "\n\n");
6197 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6198 new set, and store differences in DELTA. Number of induction variables
6199 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6200 the function will try to find a solution with mimimal iv candidates. */
6202 static comp_cost
6203 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6204 struct iv_cand *cand, struct iv_ca_delta **delta,
6205 unsigned *n_ivs, bool min_ncand)
6207 unsigned i;
6208 comp_cost cost;
6209 struct iv_group *group;
6210 struct cost_pair *old_cp, *new_cp;
6212 *delta = NULL;
6213 for (i = 0; i < ivs->upto; i++)
6215 group = data->vgroups[i];
6216 old_cp = iv_ca_cand_for_group (ivs, group);
6218 if (old_cp
6219 && old_cp->cand == cand)
6220 continue;
6222 new_cp = get_group_iv_cost (data, group, cand);
6223 if (!new_cp)
6224 continue;
6226 if (!min_ncand)
6228 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6229 /* Skip if new_cp depends on more invariants. */
6230 if (cmp_invs > 0)
6231 continue;
6233 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6234 /* Skip if new_cp is not cheaper. */
6235 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6236 continue;
6239 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6242 iv_ca_delta_commit (data, ivs, *delta, true);
6243 cost = iv_ca_cost (ivs);
6244 if (n_ivs)
6245 *n_ivs = iv_ca_n_cands (ivs);
6246 iv_ca_delta_commit (data, ivs, *delta, false);
6248 return cost;
6251 /* Try narrowing set IVS by removing CAND. Return the cost of
6252 the new set and store the differences in DELTA. START is
6253 the candidate with which we start narrowing. */
6255 static comp_cost
6256 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6257 struct iv_cand *cand, struct iv_cand *start,
6258 struct iv_ca_delta **delta)
6260 unsigned i, ci;
6261 struct iv_group *group;
6262 struct cost_pair *old_cp, *new_cp, *cp;
6263 bitmap_iterator bi;
6264 struct iv_cand *cnd;
6265 comp_cost cost, best_cost, acost;
6267 *delta = NULL;
6268 for (i = 0; i < data->vgroups.length (); i++)
6270 group = data->vgroups[i];
6272 old_cp = iv_ca_cand_for_group (ivs, group);
6273 if (old_cp->cand != cand)
6274 continue;
6276 best_cost = iv_ca_cost (ivs);
6277 /* Start narrowing with START. */
6278 new_cp = get_group_iv_cost (data, group, start);
6280 if (data->consider_all_candidates)
6282 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6284 if (ci == cand->id || (start && ci == start->id))
6285 continue;
6287 cnd = data->vcands[ci];
6289 cp = get_group_iv_cost (data, group, cnd);
6290 if (!cp)
6291 continue;
6293 iv_ca_set_cp (data, ivs, group, cp);
6294 acost = iv_ca_cost (ivs);
6296 if (acost < best_cost)
6298 best_cost = acost;
6299 new_cp = cp;
6303 else
6305 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6307 if (ci == cand->id || (start && ci == start->id))
6308 continue;
6310 cnd = data->vcands[ci];
6312 cp = get_group_iv_cost (data, group, cnd);
6313 if (!cp)
6314 continue;
6316 iv_ca_set_cp (data, ivs, group, cp);
6317 acost = iv_ca_cost (ivs);
6319 if (acost < best_cost)
6321 best_cost = acost;
6322 new_cp = cp;
6326 /* Restore to old cp for use. */
6327 iv_ca_set_cp (data, ivs, group, old_cp);
6329 if (!new_cp)
6331 iv_ca_delta_free (delta);
6332 return infinite_cost;
6335 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6338 iv_ca_delta_commit (data, ivs, *delta, true);
6339 cost = iv_ca_cost (ivs);
6340 iv_ca_delta_commit (data, ivs, *delta, false);
6342 return cost;
6345 /* Try optimizing the set of candidates IVS by removing candidates different
6346 from to EXCEPT_CAND from it. Return cost of the new set, and store
6347 differences in DELTA. */
6349 static comp_cost
6350 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6351 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6353 bitmap_iterator bi;
6354 struct iv_ca_delta *act_delta, *best_delta;
6355 unsigned i;
6356 comp_cost best_cost, acost;
6357 struct iv_cand *cand;
6359 best_delta = NULL;
6360 best_cost = iv_ca_cost (ivs);
6362 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6364 cand = data->vcands[i];
6366 if (cand == except_cand)
6367 continue;
6369 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6371 if (acost < best_cost)
6373 best_cost = acost;
6374 iv_ca_delta_free (&best_delta);
6375 best_delta = act_delta;
6377 else
6378 iv_ca_delta_free (&act_delta);
6381 if (!best_delta)
6383 *delta = NULL;
6384 return best_cost;
6387 /* Recurse to possibly remove other unnecessary ivs. */
6388 iv_ca_delta_commit (data, ivs, best_delta, true);
6389 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6390 iv_ca_delta_commit (data, ivs, best_delta, false);
6391 *delta = iv_ca_delta_join (best_delta, *delta);
6392 return best_cost;
6395 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6396 cheaper local cost for GROUP than BEST_CP. Return pointer to
6397 the corresponding cost_pair, otherwise just return BEST_CP. */
6399 static struct cost_pair*
6400 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6401 unsigned int cand_idx, struct iv_cand *old_cand,
6402 struct cost_pair *best_cp)
6404 struct iv_cand *cand;
6405 struct cost_pair *cp;
6407 gcc_assert (old_cand != NULL && best_cp != NULL);
6408 if (cand_idx == old_cand->id)
6409 return best_cp;
6411 cand = data->vcands[cand_idx];
6412 cp = get_group_iv_cost (data, group, cand);
6413 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6414 return cp;
6416 return best_cp;
6419 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6420 which are used by more than one iv uses. For each of those candidates,
6421 this function tries to represent iv uses under that candidate using
6422 other ones with lower local cost, then tries to prune the new set.
6423 If the new set has lower cost, It returns the new cost after recording
6424 candidate replacement in list DELTA. */
6426 static comp_cost
6427 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6428 struct iv_ca_delta **delta)
6430 bitmap_iterator bi, bj;
6431 unsigned int i, j, k;
6432 struct iv_cand *cand;
6433 comp_cost orig_cost, acost;
6434 struct iv_ca_delta *act_delta, *tmp_delta;
6435 struct cost_pair *old_cp, *best_cp = NULL;
6437 *delta = NULL;
6438 orig_cost = iv_ca_cost (ivs);
6440 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6442 if (ivs->n_cand_uses[i] == 1
6443 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6444 continue;
6446 cand = data->vcands[i];
6448 act_delta = NULL;
6449 /* Represent uses under current candidate using other ones with
6450 lower local cost. */
6451 for (j = 0; j < ivs->upto; j++)
6453 struct iv_group *group = data->vgroups[j];
6454 old_cp = iv_ca_cand_for_group (ivs, group);
6456 if (old_cp->cand != cand)
6457 continue;
6459 best_cp = old_cp;
6460 if (data->consider_all_candidates)
6461 for (k = 0; k < data->vcands.length (); k++)
6462 best_cp = cheaper_cost_with_cand (data, group, k,
6463 old_cp->cand, best_cp);
6464 else
6465 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6466 best_cp = cheaper_cost_with_cand (data, group, k,
6467 old_cp->cand, best_cp);
6469 if (best_cp == old_cp)
6470 continue;
6472 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6474 /* No need for further prune. */
6475 if (!act_delta)
6476 continue;
6478 /* Prune the new candidate set. */
6479 iv_ca_delta_commit (data, ivs, act_delta, true);
6480 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6481 iv_ca_delta_commit (data, ivs, act_delta, false);
6482 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6484 if (acost < orig_cost)
6486 *delta = act_delta;
6487 return acost;
6489 else
6490 iv_ca_delta_free (&act_delta);
6493 return orig_cost;
6496 /* Tries to extend the sets IVS in the best possible way in order to
6497 express the GROUP. If ORIGINALP is true, prefer candidates from
6498 the original set of IVs, otherwise favor important candidates not
6499 based on any memory object. */
6501 static bool
6502 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6503 struct iv_group *group, bool originalp)
6505 comp_cost best_cost, act_cost;
6506 unsigned i;
6507 bitmap_iterator bi;
6508 struct iv_cand *cand;
6509 struct iv_ca_delta *best_delta = NULL, *act_delta;
6510 struct cost_pair *cp;
6512 iv_ca_add_group (data, ivs, group);
6513 best_cost = iv_ca_cost (ivs);
6514 cp = iv_ca_cand_for_group (ivs, group);
6515 if (cp)
6517 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6518 iv_ca_set_no_cp (data, ivs, group);
6521 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6522 first try important candidates not based on any memory object. Only if
6523 this fails, try the specific ones. Rationale -- in loops with many
6524 variables the best choice often is to use just one generic biv. If we
6525 added here many ivs specific to the uses, the optimization algorithm later
6526 would be likely to get stuck in a local minimum, thus causing us to create
6527 too many ivs. The approach from few ivs to more seems more likely to be
6528 successful -- starting from few ivs, replacing an expensive use by a
6529 specific iv should always be a win. */
6530 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6532 cand = data->vcands[i];
6534 if (originalp && cand->pos !=IP_ORIGINAL)
6535 continue;
6537 if (!originalp && cand->iv->base_object != NULL_TREE)
6538 continue;
6540 if (iv_ca_cand_used_p (ivs, cand))
6541 continue;
6543 cp = get_group_iv_cost (data, group, cand);
6544 if (!cp)
6545 continue;
6547 iv_ca_set_cp (data, ivs, group, cp);
6548 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6549 true);
6550 iv_ca_set_no_cp (data, ivs, group);
6551 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6553 if (act_cost < best_cost)
6555 best_cost = act_cost;
6557 iv_ca_delta_free (&best_delta);
6558 best_delta = act_delta;
6560 else
6561 iv_ca_delta_free (&act_delta);
6564 if (best_cost.infinite_cost_p ())
6566 for (i = 0; i < group->n_map_members; i++)
6568 cp = group->cost_map + i;
6569 cand = cp->cand;
6570 if (!cand)
6571 continue;
6573 /* Already tried this. */
6574 if (cand->important)
6576 if (originalp && cand->pos == IP_ORIGINAL)
6577 continue;
6578 if (!originalp && cand->iv->base_object == NULL_TREE)
6579 continue;
6582 if (iv_ca_cand_used_p (ivs, cand))
6583 continue;
6585 act_delta = NULL;
6586 iv_ca_set_cp (data, ivs, group, cp);
6587 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6588 iv_ca_set_no_cp (data, ivs, group);
6589 act_delta = iv_ca_delta_add (group,
6590 iv_ca_cand_for_group (ivs, group),
6591 cp, act_delta);
6593 if (act_cost < best_cost)
6595 best_cost = act_cost;
6597 if (best_delta)
6598 iv_ca_delta_free (&best_delta);
6599 best_delta = act_delta;
6601 else
6602 iv_ca_delta_free (&act_delta);
6606 iv_ca_delta_commit (data, ivs, best_delta, true);
6607 iv_ca_delta_free (&best_delta);
6609 return !best_cost.infinite_cost_p ();
6612 /* Finds an initial assignment of candidates to uses. */
6614 static struct iv_ca *
6615 get_initial_solution (struct ivopts_data *data, bool originalp)
6617 unsigned i;
6618 struct iv_ca *ivs = iv_ca_new (data);
6620 for (i = 0; i < data->vgroups.length (); i++)
6621 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6623 iv_ca_free (&ivs);
6624 return NULL;
6627 return ivs;
6630 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6631 points to a bool variable, this function tries to break local
6632 optimal fixed-point by replacing candidates in IVS if it's true. */
6634 static bool
6635 try_improve_iv_set (struct ivopts_data *data,
6636 struct iv_ca *ivs, bool *try_replace_p)
6638 unsigned i, n_ivs;
6639 comp_cost acost, best_cost = iv_ca_cost (ivs);
6640 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6641 struct iv_cand *cand;
6643 /* Try extending the set of induction variables by one. */
6644 for (i = 0; i < data->vcands.length (); i++)
6646 cand = data->vcands[i];
6648 if (iv_ca_cand_used_p (ivs, cand))
6649 continue;
6651 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6652 if (!act_delta)
6653 continue;
6655 /* If we successfully added the candidate and the set is small enough,
6656 try optimizing it by removing other candidates. */
6657 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6659 iv_ca_delta_commit (data, ivs, act_delta, true);
6660 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6661 iv_ca_delta_commit (data, ivs, act_delta, false);
6662 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6665 if (acost < best_cost)
6667 best_cost = acost;
6668 iv_ca_delta_free (&best_delta);
6669 best_delta = act_delta;
6671 else
6672 iv_ca_delta_free (&act_delta);
6675 if (!best_delta)
6677 /* Try removing the candidates from the set instead. */
6678 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6680 if (!best_delta && *try_replace_p)
6682 *try_replace_p = false;
6683 /* So far candidate selecting algorithm tends to choose fewer IVs
6684 so that it can handle cases in which loops have many variables
6685 but the best choice is often to use only one general biv. One
6686 weakness is it can't handle opposite cases, in which different
6687 candidates should be chosen with respect to each use. To solve
6688 the problem, we replace candidates in a manner described by the
6689 comments of iv_ca_replace, thus give general algorithm a chance
6690 to break local optimal fixed-point in these cases. */
6691 best_cost = iv_ca_replace (data, ivs, &best_delta);
6694 if (!best_delta)
6695 return false;
6698 iv_ca_delta_commit (data, ivs, best_delta, true);
6699 iv_ca_delta_free (&best_delta);
6700 return best_cost == iv_ca_cost (ivs);
6703 /* Attempts to find the optimal set of induction variables. We do simple
6704 greedy heuristic -- we try to replace at most one candidate in the selected
6705 solution and remove the unused ivs while this improves the cost. */
6707 static struct iv_ca *
6708 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6710 struct iv_ca *set;
6711 bool try_replace_p = true;
6713 /* Get the initial solution. */
6714 set = get_initial_solution (data, originalp);
6715 if (!set)
6717 if (dump_file && (dump_flags & TDF_DETAILS))
6718 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6719 return NULL;
6722 if (dump_file && (dump_flags & TDF_DETAILS))
6724 fprintf (dump_file, "Initial set of candidates:\n");
6725 iv_ca_dump (data, dump_file, set);
6728 while (try_improve_iv_set (data, set, &try_replace_p))
6730 if (dump_file && (dump_flags & TDF_DETAILS))
6732 fprintf (dump_file, "Improved to:\n");
6733 iv_ca_dump (data, dump_file, set);
6737 /* If the set has infinite_cost, it can't be optimal. */
6738 if (iv_ca_cost (set).infinite_cost_p ())
6740 if (dump_file && (dump_flags & TDF_DETAILS))
6741 fprintf (dump_file,
6742 "Overflow to infinite cost in try_improve_iv_set.\n");
6743 iv_ca_free (&set);
6745 return set;
6748 static struct iv_ca *
6749 find_optimal_iv_set (struct ivopts_data *data)
6751 unsigned i;
6752 comp_cost cost, origcost;
6753 struct iv_ca *set, *origset;
6755 /* Determine the cost based on a strategy that starts with original IVs,
6756 and try again using a strategy that prefers candidates not based
6757 on any IVs. */
6758 origset = find_optimal_iv_set_1 (data, true);
6759 set = find_optimal_iv_set_1 (data, false);
6761 if (!origset && !set)
6762 return NULL;
6764 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6765 cost = set ? iv_ca_cost (set) : infinite_cost;
6767 if (dump_file && (dump_flags & TDF_DETAILS))
6769 fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
6770 origcost.cost, origcost.complexity);
6771 fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
6772 cost.cost, cost.complexity);
6775 /* Choose the one with the best cost. */
6776 if (origcost <= cost)
6778 if (set)
6779 iv_ca_free (&set);
6780 set = origset;
6782 else if (origset)
6783 iv_ca_free (&origset);
6785 for (i = 0; i < data->vgroups.length (); i++)
6787 struct iv_group *group = data->vgroups[i];
6788 group->selected = iv_ca_cand_for_group (set, group)->cand;
6791 return set;
6794 /* Creates a new induction variable corresponding to CAND. */
6796 static void
6797 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6799 gimple_stmt_iterator incr_pos;
6800 tree base;
6801 struct iv_use *use;
6802 struct iv_group *group;
6803 bool after = false;
6805 gcc_assert (cand->iv != NULL);
6807 switch (cand->pos)
6809 case IP_NORMAL:
6810 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6811 break;
6813 case IP_END:
6814 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6815 after = true;
6816 break;
6818 case IP_AFTER_USE:
6819 after = true;
6820 /* fall through */
6821 case IP_BEFORE_USE:
6822 incr_pos = gsi_for_stmt (cand->incremented_at);
6823 break;
6825 case IP_ORIGINAL:
6826 /* Mark that the iv is preserved. */
6827 name_info (data, cand->var_before)->preserve_biv = true;
6828 name_info (data, cand->var_after)->preserve_biv = true;
6830 /* Rewrite the increment so that it uses var_before directly. */
6831 use = find_interesting_uses_op (data, cand->var_after);
6832 group = data->vgroups[use->group_id];
6833 group->selected = cand;
6834 return;
6837 gimple_add_tmp_var (cand->var_before);
6839 base = unshare_expr (cand->iv->base);
6841 create_iv (base, unshare_expr (cand->iv->step),
6842 cand->var_before, data->current_loop,
6843 &incr_pos, after, &cand->var_before, &cand->var_after);
6846 /* Creates new induction variables described in SET. */
6848 static void
6849 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6851 unsigned i;
6852 struct iv_cand *cand;
6853 bitmap_iterator bi;
6855 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6857 cand = data->vcands[i];
6858 create_new_iv (data, cand);
6861 if (dump_file && (dump_flags & TDF_DETAILS))
6863 fprintf (dump_file, "Selected IV set for loop %d",
6864 data->current_loop->num);
6865 if (data->loop_loc != UNKNOWN_LOCATION)
6866 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6867 LOCATION_LINE (data->loop_loc));
6868 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6869 avg_loop_niter (data->current_loop));
6870 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6871 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6873 cand = data->vcands[i];
6874 dump_cand (dump_file, cand);
6876 fprintf (dump_file, "\n");
6880 /* Rewrites USE (definition of iv used in a nonlinear expression)
6881 using candidate CAND. */
6883 static void
6884 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6885 struct iv_use *use, struct iv_cand *cand)
6887 gassign *ass;
6888 gimple_stmt_iterator bsi;
6889 tree comp, type = get_use_type (use), tgt;
6891 /* An important special case -- if we are asked to express value of
6892 the original iv by itself, just exit; there is no need to
6893 introduce a new computation (that might also need casting the
6894 variable to unsigned and back). */
6895 if (cand->pos == IP_ORIGINAL
6896 && cand->incremented_at == use->stmt)
6898 tree op = NULL_TREE;
6899 enum tree_code stmt_code;
6901 gcc_assert (is_gimple_assign (use->stmt));
6902 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6904 /* Check whether we may leave the computation unchanged.
6905 This is the case only if it does not rely on other
6906 computations in the loop -- otherwise, the computation
6907 we rely upon may be removed in remove_unused_ivs,
6908 thus leading to ICE. */
6909 stmt_code = gimple_assign_rhs_code (use->stmt);
6910 if (stmt_code == PLUS_EXPR
6911 || stmt_code == MINUS_EXPR
6912 || stmt_code == POINTER_PLUS_EXPR)
6914 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6915 op = gimple_assign_rhs2 (use->stmt);
6916 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6917 op = gimple_assign_rhs1 (use->stmt);
6920 if (op != NULL_TREE)
6922 if (expr_invariant_in_loop_p (data->current_loop, op))
6923 return;
6924 if (TREE_CODE (op) == SSA_NAME)
6926 struct iv *iv = get_iv (data, op);
6927 if (iv != NULL && integer_zerop (iv->step))
6928 return;
6933 switch (gimple_code (use->stmt))
6935 case GIMPLE_PHI:
6936 tgt = PHI_RESULT (use->stmt);
6938 /* If we should keep the biv, do not replace it. */
6939 if (name_info (data, tgt)->preserve_biv)
6940 return;
6942 bsi = gsi_after_labels (gimple_bb (use->stmt));
6943 break;
6945 case GIMPLE_ASSIGN:
6946 tgt = gimple_assign_lhs (use->stmt);
6947 bsi = gsi_for_stmt (use->stmt);
6948 break;
6950 default:
6951 gcc_unreachable ();
6954 aff_tree aff_inv, aff_var;
6955 if (!get_computation_aff_1 (data->current_loop, use->stmt,
6956 use, cand, &aff_inv, &aff_var))
6957 gcc_unreachable ();
6959 unshare_aff_combination (&aff_inv);
6960 unshare_aff_combination (&aff_var);
6961 /* Prefer CSE opportunity than loop invariant by adding offset at last
6962 so that iv_uses have different offsets can be CSEed. */
6963 poly_widest_int offset = aff_inv.offset;
6964 aff_inv.offset = 0;
6966 gimple_seq stmt_list = NULL, seq = NULL;
6967 tree comp_op1 = aff_combination_to_tree (&aff_inv);
6968 tree comp_op2 = aff_combination_to_tree (&aff_var);
6969 gcc_assert (comp_op1 && comp_op2);
6971 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6972 gimple_seq_add_seq (&stmt_list, seq);
6973 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6974 gimple_seq_add_seq (&stmt_list, seq);
6976 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6977 std::swap (comp_op1, comp_op2);
6979 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6981 comp = fold_build_pointer_plus (comp_op1,
6982 fold_convert (sizetype, comp_op2));
6983 comp = fold_build_pointer_plus (comp,
6984 wide_int_to_tree (sizetype, offset));
6986 else
6988 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6989 fold_convert (TREE_TYPE (comp_op1), comp_op2));
6990 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6991 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6994 comp = fold_convert (type, comp);
6995 if (!valid_gimple_rhs_p (comp)
6996 || (gimple_code (use->stmt) != GIMPLE_PHI
6997 /* We can't allow re-allocating the stmt as it might be pointed
6998 to still. */
6999 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7000 >= gimple_num_ops (gsi_stmt (bsi)))))
7002 comp = force_gimple_operand (comp, &seq, true, NULL);
7003 gimple_seq_add_seq (&stmt_list, seq);
7004 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7006 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7007 /* As this isn't a plain copy we have to reset alignment
7008 information. */
7009 if (SSA_NAME_PTR_INFO (comp))
7010 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7014 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7015 if (gimple_code (use->stmt) == GIMPLE_PHI)
7017 ass = gimple_build_assign (tgt, comp);
7018 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7020 bsi = gsi_for_stmt (use->stmt);
7021 remove_phi_node (&bsi, false);
7023 else
7025 gimple_assign_set_rhs_from_tree (&bsi, comp);
7026 use->stmt = gsi_stmt (bsi);
7030 /* Performs a peephole optimization to reorder the iv update statement with
7031 a mem ref to enable instruction combining in later phases. The mem ref uses
7032 the iv value before the update, so the reordering transformation requires
7033 adjustment of the offset. CAND is the selected IV_CAND.
7035 Example:
7037 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7038 iv2 = iv1 + 1;
7040 if (t < val) (1)
7041 goto L;
7042 goto Head;
7045 directly propagating t over to (1) will introduce overlapping live range
7046 thus increase register pressure. This peephole transform it into:
7049 iv2 = iv1 + 1;
7050 t = MEM_REF (base, iv2, 8, 8);
7051 if (t < val)
7052 goto L;
7053 goto Head;
7056 static void
7057 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7059 tree var_after;
7060 gimple *iv_update, *stmt;
7061 basic_block bb;
7062 gimple_stmt_iterator gsi, gsi_iv;
7064 if (cand->pos != IP_NORMAL)
7065 return;
7067 var_after = cand->var_after;
7068 iv_update = SSA_NAME_DEF_STMT (var_after);
7070 bb = gimple_bb (iv_update);
7071 gsi = gsi_last_nondebug_bb (bb);
7072 stmt = gsi_stmt (gsi);
7074 /* Only handle conditional statement for now. */
7075 if (gimple_code (stmt) != GIMPLE_COND)
7076 return;
7078 gsi_prev_nondebug (&gsi);
7079 stmt = gsi_stmt (gsi);
7080 if (stmt != iv_update)
7081 return;
7083 gsi_prev_nondebug (&gsi);
7084 if (gsi_end_p (gsi))
7085 return;
7087 stmt = gsi_stmt (gsi);
7088 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7089 return;
7091 if (stmt != use->stmt)
7092 return;
7094 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7095 return;
7097 if (dump_file && (dump_flags & TDF_DETAILS))
7099 fprintf (dump_file, "Reordering \n");
7100 print_gimple_stmt (dump_file, iv_update, 0);
7101 print_gimple_stmt (dump_file, use->stmt, 0);
7102 fprintf (dump_file, "\n");
7105 gsi = gsi_for_stmt (use->stmt);
7106 gsi_iv = gsi_for_stmt (iv_update);
7107 gsi_move_before (&gsi_iv, &gsi);
7109 cand->pos = IP_BEFORE_USE;
7110 cand->incremented_at = use->stmt;
7113 /* Return the alias pointer type that should be used for a MEM_REF
7114 associated with USE, which has type USE_PTR_ADDRESS. */
7116 static tree
7117 get_alias_ptr_type_for_ptr_address (iv_use *use)
7119 gcall *call = as_a <gcall *> (use->stmt);
7120 switch (gimple_call_internal_fn (call))
7122 case IFN_MASK_LOAD:
7123 case IFN_MASK_STORE:
7124 case IFN_MASK_LOAD_LANES:
7125 case IFN_MASK_STORE_LANES:
7126 /* The second argument contains the correct alias type. */
7127 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7128 return TREE_TYPE (gimple_call_arg (call, 1));
7130 default:
7131 gcc_unreachable ();
7136 /* Rewrites USE (address that is an iv) using candidate CAND. */
7138 static void
7139 rewrite_use_address (struct ivopts_data *data,
7140 struct iv_use *use, struct iv_cand *cand)
7142 aff_tree aff;
7143 bool ok;
7145 adjust_iv_update_pos (cand, use);
7146 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7147 gcc_assert (ok);
7148 unshare_aff_combination (&aff);
7150 /* To avoid undefined overflow problems, all IV candidates use unsigned
7151 integer types. The drawback is that this makes it impossible for
7152 create_mem_ref to distinguish an IV that is based on a memory object
7153 from one that represents simply an offset.
7155 To work around this problem, we pass a hint to create_mem_ref that
7156 indicates which variable (if any) in aff is an IV based on a memory
7157 object. Note that we only consider the candidate. If this is not
7158 based on an object, the base of the reference is in some subexpression
7159 of the use -- but these will use pointer types, so they are recognized
7160 by the create_mem_ref heuristics anyway. */
7161 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7162 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7163 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7164 tree type = use->mem_type;
7165 tree alias_ptr_type;
7166 if (use->type == USE_PTR_ADDRESS)
7167 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7168 else
7170 gcc_assert (type == TREE_TYPE (*use->op_p));
7171 unsigned int align = get_object_alignment (*use->op_p);
7172 if (align != TYPE_ALIGN (type))
7173 type = build_aligned_type (type, align);
7174 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7176 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7177 iv, base_hint, data->speed);
7179 if (use->type == USE_PTR_ADDRESS)
7181 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7182 ref = fold_convert (get_use_type (use), ref);
7183 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7184 true, GSI_SAME_STMT);
7186 else
7187 copy_ref_info (ref, *use->op_p);
7189 *use->op_p = ref;
7192 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7193 candidate CAND. */
7195 static void
7196 rewrite_use_compare (struct ivopts_data *data,
7197 struct iv_use *use, struct iv_cand *cand)
7199 tree comp, op, bound;
7200 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7201 enum tree_code compare;
7202 struct iv_group *group = data->vgroups[use->group_id];
7203 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7205 bound = cp->value;
7206 if (bound)
7208 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7209 tree var_type = TREE_TYPE (var);
7210 gimple_seq stmts;
7212 if (dump_file && (dump_flags & TDF_DETAILS))
7214 fprintf (dump_file, "Replacing exit test: ");
7215 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7217 compare = cp->comp;
7218 bound = unshare_expr (fold_convert (var_type, bound));
7219 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7220 if (stmts)
7221 gsi_insert_seq_on_edge_immediate (
7222 loop_preheader_edge (data->current_loop),
7223 stmts);
7225 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7226 gimple_cond_set_lhs (cond_stmt, var);
7227 gimple_cond_set_code (cond_stmt, compare);
7228 gimple_cond_set_rhs (cond_stmt, op);
7229 return;
7232 /* The induction variable elimination failed; just express the original
7233 giv. */
7234 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7235 gcc_assert (comp != NULL_TREE);
7236 gcc_assert (use->op_p != NULL);
7237 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7238 SSA_NAME_VAR (*use->op_p),
7239 true, GSI_SAME_STMT);
7242 /* Rewrite the groups using the selected induction variables. */
7244 static void
7245 rewrite_groups (struct ivopts_data *data)
7247 unsigned i, j;
7249 for (i = 0; i < data->vgroups.length (); i++)
7251 struct iv_group *group = data->vgroups[i];
7252 struct iv_cand *cand = group->selected;
7254 gcc_assert (cand);
7256 if (group->type == USE_NONLINEAR_EXPR)
7258 for (j = 0; j < group->vuses.length (); j++)
7260 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7261 update_stmt (group->vuses[j]->stmt);
7264 else if (address_p (group->type))
7266 for (j = 0; j < group->vuses.length (); j++)
7268 rewrite_use_address (data, group->vuses[j], cand);
7269 update_stmt (group->vuses[j]->stmt);
7272 else
7274 gcc_assert (group->type == USE_COMPARE);
7276 for (j = 0; j < group->vuses.length (); j++)
7278 rewrite_use_compare (data, group->vuses[j], cand);
7279 update_stmt (group->vuses[j]->stmt);
7285 /* Removes the ivs that are not used after rewriting. */
7287 static void
7288 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7290 unsigned j;
7291 bitmap_iterator bi;
7293 /* Figure out an order in which to release SSA DEFs so that we don't
7294 release something that we'd have to propagate into a debug stmt
7295 afterwards. */
7296 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7298 struct version_info *info;
7300 info = ver_info (data, j);
7301 if (info->iv
7302 && !integer_zerop (info->iv->step)
7303 && !info->inv_id
7304 && !info->iv->nonlin_use
7305 && !info->preserve_biv)
7307 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7309 tree def = info->iv->ssa_name;
7311 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7313 imm_use_iterator imm_iter;
7314 use_operand_p use_p;
7315 gimple *stmt;
7316 int count = 0;
7318 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7320 if (!gimple_debug_bind_p (stmt))
7321 continue;
7323 /* We just want to determine whether to do nothing
7324 (count == 0), to substitute the computed
7325 expression into a single use of the SSA DEF by
7326 itself (count == 1), or to use a debug temp
7327 because the SSA DEF is used multiple times or as
7328 part of a larger expression (count > 1). */
7329 count++;
7330 if (gimple_debug_bind_get_value (stmt) != def)
7331 count++;
7333 if (count > 1)
7334 BREAK_FROM_IMM_USE_STMT (imm_iter);
7337 if (!count)
7338 continue;
7340 struct iv_use dummy_use;
7341 struct iv_cand *best_cand = NULL, *cand;
7342 unsigned i, best_pref = 0, cand_pref;
7344 memset (&dummy_use, 0, sizeof (dummy_use));
7345 dummy_use.iv = info->iv;
7346 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7348 cand = data->vgroups[i]->selected;
7349 if (cand == best_cand)
7350 continue;
7351 cand_pref = operand_equal_p (cand->iv->step,
7352 info->iv->step, 0)
7353 ? 4 : 0;
7354 cand_pref
7355 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7356 == TYPE_MODE (TREE_TYPE (info->iv->base))
7357 ? 2 : 0;
7358 cand_pref
7359 += TREE_CODE (cand->iv->base) == INTEGER_CST
7360 ? 1 : 0;
7361 if (best_cand == NULL || best_pref < cand_pref)
7363 best_cand = cand;
7364 best_pref = cand_pref;
7368 if (!best_cand)
7369 continue;
7371 tree comp = get_computation_at (data->current_loop,
7372 SSA_NAME_DEF_STMT (def),
7373 &dummy_use, best_cand);
7374 if (!comp)
7375 continue;
7377 if (count > 1)
7379 tree vexpr = make_node (DEBUG_EXPR_DECL);
7380 DECL_ARTIFICIAL (vexpr) = 1;
7381 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7382 if (SSA_NAME_VAR (def))
7383 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7384 else
7385 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7386 gdebug *def_temp
7387 = gimple_build_debug_bind (vexpr, comp, NULL);
7388 gimple_stmt_iterator gsi;
7390 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7391 gsi = gsi_after_labels (gimple_bb
7392 (SSA_NAME_DEF_STMT (def)));
7393 else
7394 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7396 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7397 comp = vexpr;
7400 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7402 if (!gimple_debug_bind_p (stmt))
7403 continue;
7405 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7406 SET_USE (use_p, comp);
7408 update_stmt (stmt);
7415 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7416 for hash_map::traverse. */
7418 bool
7419 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7421 free (value);
7422 return true;
7425 /* Frees data allocated by the optimization of a single loop. */
7427 static void
7428 free_loop_data (struct ivopts_data *data)
7430 unsigned i, j;
7431 bitmap_iterator bi;
7432 tree obj;
7434 if (data->niters)
7436 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7437 delete data->niters;
7438 data->niters = NULL;
7441 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7443 struct version_info *info;
7445 info = ver_info (data, i);
7446 info->iv = NULL;
7447 info->has_nonlin_use = false;
7448 info->preserve_biv = false;
7449 info->inv_id = 0;
7451 bitmap_clear (data->relevant);
7452 bitmap_clear (data->important_candidates);
7454 for (i = 0; i < data->vgroups.length (); i++)
7456 struct iv_group *group = data->vgroups[i];
7458 for (j = 0; j < group->vuses.length (); j++)
7459 free (group->vuses[j]);
7460 group->vuses.release ();
7462 BITMAP_FREE (group->related_cands);
7463 for (j = 0; j < group->n_map_members; j++)
7465 if (group->cost_map[j].inv_vars)
7466 BITMAP_FREE (group->cost_map[j].inv_vars);
7467 if (group->cost_map[j].inv_exprs)
7468 BITMAP_FREE (group->cost_map[j].inv_exprs);
7471 free (group->cost_map);
7472 free (group);
7474 data->vgroups.truncate (0);
7476 for (i = 0; i < data->vcands.length (); i++)
7478 struct iv_cand *cand = data->vcands[i];
7480 if (cand->inv_vars)
7481 BITMAP_FREE (cand->inv_vars);
7482 if (cand->inv_exprs)
7483 BITMAP_FREE (cand->inv_exprs);
7484 free (cand);
7486 data->vcands.truncate (0);
7488 if (data->version_info_size < num_ssa_names)
7490 data->version_info_size = 2 * num_ssa_names;
7491 free (data->version_info);
7492 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7495 data->max_inv_var_id = 0;
7496 data->max_inv_expr_id = 0;
7498 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7499 SET_DECL_RTL (obj, NULL_RTX);
7501 decl_rtl_to_reset.truncate (0);
7503 data->inv_expr_tab->empty ();
7505 data->iv_common_cand_tab->empty ();
7506 data->iv_common_cands.truncate (0);
7509 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7510 loop tree. */
7512 static void
7513 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7515 free_loop_data (data);
7516 free (data->version_info);
7517 BITMAP_FREE (data->relevant);
7518 BITMAP_FREE (data->important_candidates);
7520 decl_rtl_to_reset.release ();
7521 data->vgroups.release ();
7522 data->vcands.release ();
7523 delete data->inv_expr_tab;
7524 data->inv_expr_tab = NULL;
7525 free_affine_expand_cache (&data->name_expansion_cache);
7526 delete data->iv_common_cand_tab;
7527 data->iv_common_cand_tab = NULL;
7528 data->iv_common_cands.release ();
7529 obstack_free (&data->iv_obstack, NULL);
7532 /* Returns true if the loop body BODY includes any function calls. */
7534 static bool
7535 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7537 gimple_stmt_iterator gsi;
7538 unsigned i;
7540 for (i = 0; i < num_nodes; i++)
7541 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7543 gimple *stmt = gsi_stmt (gsi);
7544 if (is_gimple_call (stmt)
7545 && !gimple_call_internal_p (stmt)
7546 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7547 return true;
7549 return false;
7552 /* Determine cost scaling factor for basic blocks in loop. */
7553 #define COST_SCALING_FACTOR_BOUND (20)
7555 static void
7556 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7558 int lfreq = data->current_loop->header->count.to_frequency (cfun);
7559 if (!data->speed || lfreq <= 0)
7560 return;
7562 int max_freq = lfreq;
7563 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7565 body[i]->aux = (void *)(intptr_t) 1;
7566 if (max_freq < body[i]->count.to_frequency (cfun))
7567 max_freq = body[i]->count.to_frequency (cfun);
7569 if (max_freq > lfreq)
7571 int divisor, factor;
7572 /* Check if scaling factor itself needs to be scaled by the bound. This
7573 is to avoid overflow when scaling cost according to profile info. */
7574 if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7576 divisor = max_freq;
7577 factor = COST_SCALING_FACTOR_BOUND;
7579 else
7581 divisor = lfreq;
7582 factor = 1;
7584 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7586 int bfreq = body[i]->count.to_frequency (cfun);
7587 if (bfreq <= lfreq)
7588 continue;
7590 body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7595 /* Optimizes the LOOP. Returns true if anything changed. */
7597 static bool
7598 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
7599 bitmap toremove)
7601 bool changed = false;
7602 struct iv_ca *iv_ca;
7603 edge exit = single_dom_exit (loop);
7604 basic_block *body;
7606 gcc_assert (!data->niters);
7607 data->current_loop = loop;
7608 data->loop_loc = find_loop_location (loop).get_location_t ();
7609 data->speed = optimize_loop_for_speed_p (loop);
7611 if (dump_file && (dump_flags & TDF_DETAILS))
7613 fprintf (dump_file, "Processing loop %d", loop->num);
7614 if (data->loop_loc != UNKNOWN_LOCATION)
7615 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7616 LOCATION_LINE (data->loop_loc));
7617 fprintf (dump_file, "\n");
7619 if (exit)
7621 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7622 exit->src->index, exit->dest->index);
7623 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7624 fprintf (dump_file, "\n");
7627 fprintf (dump_file, "\n");
7630 body = get_loop_body (loop);
7631 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7632 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7634 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7636 /* For each ssa name determines whether it behaves as an induction variable
7637 in some loop. */
7638 if (!find_induction_variables (data))
7639 goto finish;
7641 /* Finds interesting uses (item 1). */
7642 find_interesting_uses (data);
7643 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7644 goto finish;
7646 /* Determine cost scaling factor for basic blocks in loop. */
7647 determine_scaling_factor (data, body);
7649 /* Finds candidates for the induction variables (item 2). */
7650 find_iv_candidates (data);
7652 /* Calculates the costs (item 3, part 1). */
7653 determine_iv_costs (data);
7654 determine_group_iv_costs (data);
7655 determine_set_costs (data);
7657 /* Find the optimal set of induction variables (item 3, part 2). */
7658 iv_ca = find_optimal_iv_set (data);
7659 /* Cleanup basic block aux field. */
7660 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7661 body[i]->aux = NULL;
7662 if (!iv_ca)
7663 goto finish;
7664 changed = true;
7666 /* Create the new induction variables (item 4, part 1). */
7667 create_new_ivs (data, iv_ca);
7668 iv_ca_free (&iv_ca);
7670 /* Rewrite the uses (item 4, part 2). */
7671 rewrite_groups (data);
7673 /* Remove the ivs that are unused after rewriting. */
7674 remove_unused_ivs (data, toremove);
7676 finish:
7677 free (body);
7678 free_loop_data (data);
7680 return changed;
7683 /* Main entry point. Optimizes induction variables in loops. */
7685 void
7686 tree_ssa_iv_optimize (void)
7688 struct loop *loop;
7689 struct ivopts_data data;
7690 auto_bitmap toremove;
7692 tree_ssa_iv_optimize_init (&data);
7694 /* Optimize the loops starting with the innermost ones. */
7695 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7697 if (dump_file && (dump_flags & TDF_DETAILS))
7698 flow_loop_dump (loop, dump_file, NULL, 1);
7700 tree_ssa_iv_optimize_loop (&data, loop, toremove);
7703 /* Remove eliminated IV defs. */
7704 release_defs_bitset (toremove);
7706 /* We have changed the structure of induction variables; it might happen
7707 that definitions in the scev database refer to some of them that were
7708 eliminated. */
7709 scev_reset_htab ();
7710 /* Likewise niter and control-IV information. */
7711 free_numbers_of_iterations_estimates (cfun);
7713 tree_ssa_iv_optimize_finalize (&data);
7716 #include "gt-tree-ssa-loop-ivopts.h"