re PR fortran/90166 (Compiler Fails at Assembler)
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
bloba2b6b2b2312497333ea1c5c3999c0734b627cd4c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113 cost of different addressing modes. This should be moved to a TBD
114 interface between the GIMPLE and RTL worlds. */
116 /* The infinite cost. */
117 #define INFTY 10000000
119 /* Returns the expected number of loop iterations for LOOP.
120 The average trip count is computed from profile data if it
121 exists. */
123 static inline HOST_WIDE_INT
124 avg_loop_niter (struct loop *loop)
126 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127 if (niter == -1)
129 niter = likely_max_stmt_executions_int (loop);
131 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
135 return niter;
138 struct iv_use;
140 /* Representation of the induction variable. */
141 struct iv
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
165 /* Types of uses. */
166 enum use_type
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_REF_ADDRESS, /* Use is an address for an explicit memory
170 reference. */
171 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
172 cases where the expansion of the function
173 will turn the argument into a normal address. */
174 USE_COMPARE /* Use is a compare. */
177 /* Cost of a computation. */
178 struct comp_cost
180 comp_cost (): cost (0), complexity (0), scratch (0)
183 comp_cost (int cost, unsigned complexity, int scratch = 0)
184 : cost (cost), complexity (complexity), scratch (scratch)
187 /* Returns true if COST is infinite. */
188 bool infinite_cost_p ();
190 /* Adds costs COST1 and COST2. */
191 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
193 /* Adds COST to the comp_cost. */
194 comp_cost operator+= (comp_cost cost);
196 /* Adds constant C to this comp_cost. */
197 comp_cost operator+= (HOST_WIDE_INT c);
199 /* Subtracts constant C to this comp_cost. */
200 comp_cost operator-= (HOST_WIDE_INT c);
202 /* Divide the comp_cost by constant C. */
203 comp_cost operator/= (HOST_WIDE_INT c);
205 /* Multiply the comp_cost by constant C. */
206 comp_cost operator*= (HOST_WIDE_INT c);
208 /* Subtracts costs COST1 and COST2. */
209 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
211 /* Subtracts COST from this comp_cost. */
212 comp_cost operator-= (comp_cost cost);
214 /* Returns true if COST1 is smaller than COST2. */
215 friend bool operator< (comp_cost cost1, comp_cost cost2);
217 /* Returns true if COST1 and COST2 are equal. */
218 friend bool operator== (comp_cost cost1, comp_cost cost2);
220 /* Returns true if COST1 is smaller or equal than COST2. */
221 friend bool operator<= (comp_cost cost1, comp_cost cost2);
223 int cost; /* The runtime cost. */
224 unsigned complexity; /* The estimate of the complexity of the code for
225 the computation (in no concrete units --
226 complexity field should be larger for more
227 complex expressions and addressing modes). */
228 int scratch; /* Scratch used during cost computation. */
231 static const comp_cost no_cost;
232 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
234 bool
235 comp_cost::infinite_cost_p ()
237 return cost == INFTY;
240 comp_cost
241 operator+ (comp_cost cost1, comp_cost cost2)
243 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
244 return infinite_cost;
246 cost1.cost += cost2.cost;
247 cost1.complexity += cost2.complexity;
249 return cost1;
252 comp_cost
253 operator- (comp_cost cost1, comp_cost cost2)
255 if (cost1.infinite_cost_p ())
256 return infinite_cost;
258 gcc_assert (!cost2.infinite_cost_p ());
260 cost1.cost -= cost2.cost;
261 cost1.complexity -= cost2.complexity;
263 return cost1;
266 comp_cost
267 comp_cost::operator+= (comp_cost cost)
269 *this = *this + cost;
270 return *this;
273 comp_cost
274 comp_cost::operator+= (HOST_WIDE_INT c)
276 if (infinite_cost_p ())
277 return *this;
279 this->cost += c;
281 return *this;
284 comp_cost
285 comp_cost::operator-= (HOST_WIDE_INT c)
287 if (infinite_cost_p ())
288 return *this;
290 this->cost -= c;
292 return *this;
295 comp_cost
296 comp_cost::operator/= (HOST_WIDE_INT c)
298 if (infinite_cost_p ())
299 return *this;
301 this->cost /= c;
303 return *this;
306 comp_cost
307 comp_cost::operator*= (HOST_WIDE_INT c)
309 if (infinite_cost_p ())
310 return *this;
312 this->cost *= c;
314 return *this;
317 comp_cost
318 comp_cost::operator-= (comp_cost cost)
320 *this = *this - cost;
321 return *this;
324 bool
325 operator< (comp_cost cost1, comp_cost cost2)
327 if (cost1.cost == cost2.cost)
328 return cost1.complexity < cost2.complexity;
330 return cost1.cost < cost2.cost;
333 bool
334 operator== (comp_cost cost1, comp_cost cost2)
336 return cost1.cost == cost2.cost
337 && cost1.complexity == cost2.complexity;
340 bool
341 operator<= (comp_cost cost1, comp_cost cost2)
343 return cost1 < cost2 || cost1 == cost2;
346 struct iv_inv_expr_ent;
348 /* The candidate - cost pair. */
349 struct cost_pair
351 struct iv_cand *cand; /* The candidate. */
352 comp_cost cost; /* The cost. */
353 enum tree_code comp; /* For iv elimination, the comparison. */
354 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
355 preserved when representing iv_use with iv_cand. */
356 bitmap inv_exprs; /* The list of newly created invariant expressions
357 when representing iv_use with iv_cand. */
358 tree value; /* For final value elimination, the expression for
359 the final value of the iv. For iv elimination,
360 the new bound to compare with. */
363 /* Use. */
364 struct iv_use
366 unsigned id; /* The id of the use. */
367 unsigned group_id; /* The group id the use belongs to. */
368 enum use_type type; /* Type of the use. */
369 tree mem_type; /* The memory type to use when testing whether an
370 address is legitimate, and what the address's
371 cost is. */
372 struct iv *iv; /* The induction variable it is based on. */
373 gimple *stmt; /* Statement in that it occurs. */
374 tree *op_p; /* The place where it occurs. */
376 tree addr_base; /* Base address with const offset stripped. */
377 poly_uint64_pod addr_offset;
378 /* Const offset stripped from base address. */
381 /* Group of uses. */
382 struct iv_group
384 /* The id of the group. */
385 unsigned id;
386 /* Uses of the group are of the same type. */
387 enum use_type type;
388 /* The set of "related" IV candidates, plus the important ones. */
389 bitmap related_cands;
390 /* Number of IV candidates in the cost_map. */
391 unsigned n_map_members;
392 /* The costs wrto the iv candidates. */
393 struct cost_pair *cost_map;
394 /* The selected candidate for the group. */
395 struct iv_cand *selected;
396 /* Uses in the group. */
397 vec<struct iv_use *> vuses;
400 /* The position where the iv is computed. */
401 enum iv_position
403 IP_NORMAL, /* At the end, just before the exit condition. */
404 IP_END, /* At the end of the latch block. */
405 IP_BEFORE_USE, /* Immediately before a specific use. */
406 IP_AFTER_USE, /* Immediately after a specific use. */
407 IP_ORIGINAL /* The original biv. */
410 /* The induction variable candidate. */
411 struct iv_cand
413 unsigned id; /* The number of the candidate. */
414 bool important; /* Whether this is an "important" candidate, i.e. such
415 that it should be considered by all uses. */
416 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
417 gimple *incremented_at;/* For original biv, the statement where it is
418 incremented. */
419 tree var_before; /* The variable used for it before increment. */
420 tree var_after; /* The variable used for it after increment. */
421 struct iv *iv; /* The value of the candidate. NULL for
422 "pseudocandidate" used to indicate the possibility
423 to replace the final value of an iv by direct
424 computation of the value. */
425 unsigned cost; /* Cost of the candidate. */
426 unsigned cost_step; /* Cost of the candidate's increment operation. */
427 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
428 where it is incremented. */
429 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
430 iv_cand. */
431 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
432 hanlde it as a new invariant expression which will
433 be hoisted out of loop. */
434 struct iv *orig_iv; /* The original iv if this cand is added from biv with
435 smaller type. */
438 /* Hashtable entry for common candidate derived from iv uses. */
439 struct iv_common_cand
441 tree base;
442 tree step;
443 /* IV uses from which this common candidate is derived. */
444 auto_vec<struct iv_use *> uses;
445 hashval_t hash;
448 /* Hashtable helpers. */
450 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
452 static inline hashval_t hash (const iv_common_cand *);
453 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
456 /* Hash function for possible common candidates. */
458 inline hashval_t
459 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
461 return ccand->hash;
464 /* Hash table equality function for common candidates. */
466 inline bool
467 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
468 const iv_common_cand *ccand2)
470 return (ccand1->hash == ccand2->hash
471 && operand_equal_p (ccand1->base, ccand2->base, 0)
472 && operand_equal_p (ccand1->step, ccand2->step, 0)
473 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
474 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
477 /* Loop invariant expression hashtable entry. */
479 struct iv_inv_expr_ent
481 /* Tree expression of the entry. */
482 tree expr;
483 /* Unique indentifier. */
484 int id;
485 /* Hash value. */
486 hashval_t hash;
489 /* Sort iv_inv_expr_ent pair A and B by id field. */
491 static int
492 sort_iv_inv_expr_ent (const void *a, const void *b)
494 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
495 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
497 unsigned id1 = (*e1)->id;
498 unsigned id2 = (*e2)->id;
500 if (id1 < id2)
501 return -1;
502 else if (id1 > id2)
503 return 1;
504 else
505 return 0;
508 /* Hashtable helpers. */
510 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
512 static inline hashval_t hash (const iv_inv_expr_ent *);
513 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
516 /* Return true if uses of type TYPE represent some form of address. */
518 inline bool
519 address_p (use_type type)
521 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
524 /* Hash function for loop invariant expressions. */
526 inline hashval_t
527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
529 return expr->hash;
532 /* Hash table equality function for expressions. */
534 inline bool
535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
536 const iv_inv_expr_ent *expr2)
538 return expr1->hash == expr2->hash
539 && operand_equal_p (expr1->expr, expr2->expr, 0);
542 struct ivopts_data
544 /* The currently optimized loop. */
545 struct loop *current_loop;
546 location_t loop_loc;
548 /* Numbers of iterations for all exits of the current loop. */
549 hash_map<edge, tree_niter_desc *> *niters;
551 /* Number of registers used in it. */
552 unsigned regs_used;
554 /* The size of version_info array allocated. */
555 unsigned version_info_size;
557 /* The array of information for the ssa names. */
558 struct version_info *version_info;
560 /* The hashtable of loop invariant expressions created
561 by ivopt. */
562 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
564 /* The bitmap of indices in version_info whose value was changed. */
565 bitmap relevant;
567 /* The uses of induction variables. */
568 vec<iv_group *> vgroups;
570 /* The candidates. */
571 vec<iv_cand *> vcands;
573 /* A bitmap of important candidates. */
574 bitmap important_candidates;
576 /* Cache used by tree_to_aff_combination_expand. */
577 hash_map<tree, name_expansion *> *name_expansion_cache;
579 /* The hashtable of common candidates derived from iv uses. */
580 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
582 /* The common candidates. */
583 vec<iv_common_cand *> iv_common_cands;
585 /* The maximum invariant variable id. */
586 unsigned max_inv_var_id;
588 /* The maximum invariant expression id. */
589 unsigned max_inv_expr_id;
591 /* Number of no_overflow BIVs which are not used in memory address. */
592 unsigned bivs_not_used_in_addr;
594 /* Obstack for iv structure. */
595 struct obstack iv_obstack;
597 /* Whether to consider just related and important candidates when replacing a
598 use. */
599 bool consider_all_candidates;
601 /* Are we optimizing for speed? */
602 bool speed;
604 /* Whether the loop body includes any function calls. */
605 bool body_includes_call;
607 /* Whether the loop body can only be exited via single exit. */
608 bool loop_single_exit_p;
611 /* An assignment of iv candidates to uses. */
613 struct iv_ca
615 /* The number of uses covered by the assignment. */
616 unsigned upto;
618 /* Number of uses that cannot be expressed by the candidates in the set. */
619 unsigned bad_groups;
621 /* Candidate assigned to a use, together with the related costs. */
622 struct cost_pair **cand_for_group;
624 /* Number of times each candidate is used. */
625 unsigned *n_cand_uses;
627 /* The candidates used. */
628 bitmap cands;
630 /* The number of candidates in the set. */
631 unsigned n_cands;
633 /* The number of invariants needed, including both invariant variants and
634 invariant expressions. */
635 unsigned n_invs;
637 /* Total cost of expressing uses. */
638 comp_cost cand_use_cost;
640 /* Total cost of candidates. */
641 unsigned cand_cost;
643 /* Number of times each invariant variable is used. */
644 unsigned *n_inv_var_uses;
646 /* Number of times each invariant expression is used. */
647 unsigned *n_inv_expr_uses;
649 /* Total cost of the assignment. */
650 comp_cost cost;
653 /* Difference of two iv candidate assignments. */
655 struct iv_ca_delta
657 /* Changed group. */
658 struct iv_group *group;
660 /* An old assignment (for rollback purposes). */
661 struct cost_pair *old_cp;
663 /* A new assignment. */
664 struct cost_pair *new_cp;
666 /* Next change in the list. */
667 struct iv_ca_delta *next;
670 /* Bound on number of candidates below that all candidates are considered. */
672 #define CONSIDER_ALL_CANDIDATES_BOUND \
673 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
675 /* If there are more iv occurrences, we just give up (it is quite unlikely that
676 optimizing such a loop would help, and it would take ages). */
678 #define MAX_CONSIDERED_GROUPS \
679 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
681 /* If there are at most this number of ivs in the set, try removing unnecessary
682 ivs from the set always. */
684 #define ALWAYS_PRUNE_CAND_SET_BOUND \
685 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
687 /* The list of trees for that the decl_rtl field must be reset is stored
688 here. */
690 static vec<tree> decl_rtl_to_reset;
692 static comp_cost force_expr_to_var_cost (tree, bool);
694 /* The single loop exit if it dominates the latch, NULL otherwise. */
696 edge
697 single_dom_exit (struct loop *loop)
699 edge exit = single_exit (loop);
701 if (!exit)
702 return NULL;
704 if (!just_once_each_iteration_p (loop, exit->src))
705 return NULL;
707 return exit;
710 /* Dumps information about the induction variable IV to FILE. Don't dump
711 variable's name if DUMP_NAME is FALSE. The information is dumped with
712 preceding spaces indicated by INDENT_LEVEL. */
714 void
715 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
717 const char *p;
718 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
720 if (indent_level > 4)
721 indent_level = 4;
722 p = spaces + 8 - (indent_level << 1);
724 fprintf (file, "%sIV struct:\n", p);
725 if (iv->ssa_name && dump_name)
727 fprintf (file, "%s SSA_NAME:\t", p);
728 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
729 fprintf (file, "\n");
732 fprintf (file, "%s Type:\t", p);
733 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
734 fprintf (file, "\n");
736 fprintf (file, "%s Base:\t", p);
737 print_generic_expr (file, iv->base, TDF_SLIM);
738 fprintf (file, "\n");
740 fprintf (file, "%s Step:\t", p);
741 print_generic_expr (file, iv->step, TDF_SLIM);
742 fprintf (file, "\n");
744 if (iv->base_object)
746 fprintf (file, "%s Object:\t", p);
747 print_generic_expr (file, iv->base_object, TDF_SLIM);
748 fprintf (file, "\n");
751 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
753 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
754 p, iv->no_overflow ? "No-overflow" : "Overflow");
757 /* Dumps information about the USE to FILE. */
759 void
760 dump_use (FILE *file, struct iv_use *use)
762 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
763 fprintf (file, " At stmt:\t");
764 print_gimple_stmt (file, use->stmt, 0);
765 fprintf (file, " At pos:\t");
766 if (use->op_p)
767 print_generic_expr (file, *use->op_p, TDF_SLIM);
768 fprintf (file, "\n");
769 dump_iv (file, use->iv, false, 2);
772 /* Dumps information about the uses to FILE. */
774 void
775 dump_groups (FILE *file, struct ivopts_data *data)
777 unsigned i, j;
778 struct iv_group *group;
780 for (i = 0; i < data->vgroups.length (); i++)
782 group = data->vgroups[i];
783 fprintf (file, "Group %d:\n", group->id);
784 if (group->type == USE_NONLINEAR_EXPR)
785 fprintf (file, " Type:\tGENERIC\n");
786 else if (group->type == USE_REF_ADDRESS)
787 fprintf (file, " Type:\tREFERENCE ADDRESS\n");
788 else if (group->type == USE_PTR_ADDRESS)
789 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
790 else
792 gcc_assert (group->type == USE_COMPARE);
793 fprintf (file, " Type:\tCOMPARE\n");
795 for (j = 0; j < group->vuses.length (); j++)
796 dump_use (file, group->vuses[j]);
800 /* Dumps information about induction variable candidate CAND to FILE. */
802 void
803 dump_cand (FILE *file, struct iv_cand *cand)
805 struct iv *iv = cand->iv;
807 fprintf (file, "Candidate %d:\n", cand->id);
808 if (cand->inv_vars)
810 fprintf (file, " Depend on inv.vars: ");
811 dump_bitmap (file, cand->inv_vars);
813 if (cand->inv_exprs)
815 fprintf (file, " Depend on inv.exprs: ");
816 dump_bitmap (file, cand->inv_exprs);
819 if (cand->var_before)
821 fprintf (file, " Var befor: ");
822 print_generic_expr (file, cand->var_before, TDF_SLIM);
823 fprintf (file, "\n");
825 if (cand->var_after)
827 fprintf (file, " Var after: ");
828 print_generic_expr (file, cand->var_after, TDF_SLIM);
829 fprintf (file, "\n");
832 switch (cand->pos)
834 case IP_NORMAL:
835 fprintf (file, " Incr POS: before exit test\n");
836 break;
838 case IP_BEFORE_USE:
839 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
840 break;
842 case IP_AFTER_USE:
843 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
844 break;
846 case IP_END:
847 fprintf (file, " Incr POS: at end\n");
848 break;
850 case IP_ORIGINAL:
851 fprintf (file, " Incr POS: orig biv\n");
852 break;
855 dump_iv (file, iv, false, 1);
858 /* Returns the info for ssa version VER. */
860 static inline struct version_info *
861 ver_info (struct ivopts_data *data, unsigned ver)
863 return data->version_info + ver;
866 /* Returns the info for ssa name NAME. */
868 static inline struct version_info *
869 name_info (struct ivopts_data *data, tree name)
871 return ver_info (data, SSA_NAME_VERSION (name));
874 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
875 emitted in LOOP. */
877 static bool
878 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
880 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
882 gcc_assert (bb);
884 if (sbb == loop->latch)
885 return true;
887 if (sbb != bb)
888 return false;
890 return stmt == last_stmt (bb);
893 /* Returns true if STMT if after the place where the original induction
894 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
895 if the positions are identical. */
897 static bool
898 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
900 basic_block cand_bb = gimple_bb (cand->incremented_at);
901 basic_block stmt_bb = gimple_bb (stmt);
903 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
904 return false;
906 if (stmt_bb != cand_bb)
907 return true;
909 if (true_if_equal
910 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
911 return true;
912 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
915 /* Returns true if STMT if after the place where the induction variable
916 CAND is incremented in LOOP. */
918 static bool
919 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
921 switch (cand->pos)
923 case IP_END:
924 return false;
926 case IP_NORMAL:
927 return stmt_after_ip_normal_pos (loop, stmt);
929 case IP_ORIGINAL:
930 case IP_AFTER_USE:
931 return stmt_after_inc_pos (cand, stmt, false);
933 case IP_BEFORE_USE:
934 return stmt_after_inc_pos (cand, stmt, true);
936 default:
937 gcc_unreachable ();
941 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
943 static bool
944 abnormal_ssa_name_p (tree exp)
946 if (!exp)
947 return false;
949 if (TREE_CODE (exp) != SSA_NAME)
950 return false;
952 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
955 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
956 abnormal phi node. Callback for for_each_index. */
958 static bool
959 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
960 void *data ATTRIBUTE_UNUSED)
962 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
964 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
965 return false;
966 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
967 return false;
970 return !abnormal_ssa_name_p (*index);
973 /* Returns true if EXPR contains a ssa name that occurs in an
974 abnormal phi node. */
976 bool
977 contains_abnormal_ssa_name_p (tree expr)
979 enum tree_code code;
980 enum tree_code_class codeclass;
982 if (!expr)
983 return false;
985 code = TREE_CODE (expr);
986 codeclass = TREE_CODE_CLASS (code);
988 if (code == CALL_EXPR)
990 tree arg;
991 call_expr_arg_iterator iter;
992 FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
993 if (contains_abnormal_ssa_name_p (arg))
994 return true;
995 return false;
998 if (code == SSA_NAME)
999 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
1001 if (code == INTEGER_CST
1002 || is_gimple_min_invariant (expr))
1003 return false;
1005 if (code == ADDR_EXPR)
1006 return !for_each_index (&TREE_OPERAND (expr, 0),
1007 idx_contains_abnormal_ssa_name_p,
1008 NULL);
1010 if (code == COND_EXPR)
1011 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1012 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1013 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1015 switch (codeclass)
1017 case tcc_binary:
1018 case tcc_comparison:
1019 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1020 return true;
1022 /* Fallthru. */
1023 case tcc_unary:
1024 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1025 return true;
1027 break;
1029 default:
1030 gcc_unreachable ();
1033 return false;
1036 /* Returns the structure describing number of iterations determined from
1037 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1039 static struct tree_niter_desc *
1040 niter_for_exit (struct ivopts_data *data, edge exit)
1042 struct tree_niter_desc *desc;
1043 tree_niter_desc **slot;
1045 if (!data->niters)
1047 data->niters = new hash_map<edge, tree_niter_desc *>;
1048 slot = NULL;
1050 else
1051 slot = data->niters->get (exit);
1053 if (!slot)
1055 /* Try to determine number of iterations. We cannot safely work with ssa
1056 names that appear in phi nodes on abnormal edges, so that we do not
1057 create overlapping life ranges for them (PR 27283). */
1058 desc = XNEW (struct tree_niter_desc);
1059 if (!number_of_iterations_exit (data->current_loop,
1060 exit, desc, true)
1061 || contains_abnormal_ssa_name_p (desc->niter))
1063 XDELETE (desc);
1064 desc = NULL;
1066 data->niters->put (exit, desc);
1068 else
1069 desc = *slot;
1071 return desc;
1074 /* Returns the structure describing number of iterations determined from
1075 single dominating exit of DATA->current_loop, or NULL if something
1076 goes wrong. */
1078 static struct tree_niter_desc *
1079 niter_for_single_dom_exit (struct ivopts_data *data)
1081 edge exit = single_dom_exit (data->current_loop);
1083 if (!exit)
1084 return NULL;
1086 return niter_for_exit (data, exit);
1089 /* Initializes data structures used by the iv optimization pass, stored
1090 in DATA. */
1092 static void
1093 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1095 data->version_info_size = 2 * num_ssa_names;
1096 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1097 data->relevant = BITMAP_ALLOC (NULL);
1098 data->important_candidates = BITMAP_ALLOC (NULL);
1099 data->max_inv_var_id = 0;
1100 data->max_inv_expr_id = 0;
1101 data->niters = NULL;
1102 data->vgroups.create (20);
1103 data->vcands.create (20);
1104 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1105 data->name_expansion_cache = NULL;
1106 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1107 data->iv_common_cands.create (20);
1108 decl_rtl_to_reset.create (20);
1109 gcc_obstack_init (&data->iv_obstack);
1112 /* Returns a memory object to that EXPR points. In case we are able to
1113 determine that it does not point to any such object, NULL is returned. */
1115 static tree
1116 determine_base_object (tree expr)
1118 enum tree_code code = TREE_CODE (expr);
1119 tree base, obj;
1121 /* If this is a pointer casted to any type, we need to determine
1122 the base object for the pointer; so handle conversions before
1123 throwing away non-pointer expressions. */
1124 if (CONVERT_EXPR_P (expr))
1125 return determine_base_object (TREE_OPERAND (expr, 0));
1127 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1128 return NULL_TREE;
1130 switch (code)
1132 case INTEGER_CST:
1133 return NULL_TREE;
1135 case ADDR_EXPR:
1136 obj = TREE_OPERAND (expr, 0);
1137 base = get_base_address (obj);
1139 if (!base)
1140 return expr;
1142 if (TREE_CODE (base) == MEM_REF)
1143 return determine_base_object (TREE_OPERAND (base, 0));
1145 return fold_convert (ptr_type_node,
1146 build_fold_addr_expr (base));
1148 case POINTER_PLUS_EXPR:
1149 return determine_base_object (TREE_OPERAND (expr, 0));
1151 case PLUS_EXPR:
1152 case MINUS_EXPR:
1153 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1154 gcc_unreachable ();
1156 default:
1157 if (POLY_INT_CST_P (expr))
1158 return NULL_TREE;
1159 return fold_convert (ptr_type_node, expr);
1163 /* Return true if address expression with non-DECL_P operand appears
1164 in EXPR. */
1166 static bool
1167 contain_complex_addr_expr (tree expr)
1169 bool res = false;
1171 STRIP_NOPS (expr);
1172 switch (TREE_CODE (expr))
1174 case POINTER_PLUS_EXPR:
1175 case PLUS_EXPR:
1176 case MINUS_EXPR:
1177 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1178 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1179 break;
1181 case ADDR_EXPR:
1182 return (!DECL_P (TREE_OPERAND (expr, 0)));
1184 default:
1185 return false;
1188 return res;
1191 /* Allocates an induction variable with given initial value BASE and step STEP
1192 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1194 static struct iv *
1195 alloc_iv (struct ivopts_data *data, tree base, tree step,
1196 bool no_overflow = false)
1198 tree expr = base;
1199 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1200 sizeof (struct iv));
1201 gcc_assert (step != NULL_TREE);
1203 /* Lower address expression in base except ones with DECL_P as operand.
1204 By doing this:
1205 1) More accurate cost can be computed for address expressions;
1206 2) Duplicate candidates won't be created for bases in different
1207 forms, like &a[0] and &a. */
1208 STRIP_NOPS (expr);
1209 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1210 || contain_complex_addr_expr (expr))
1212 aff_tree comb;
1213 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1214 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1217 iv->base = base;
1218 iv->base_object = determine_base_object (base);
1219 iv->step = step;
1220 iv->biv_p = false;
1221 iv->nonlin_use = NULL;
1222 iv->ssa_name = NULL_TREE;
1223 if (!no_overflow
1224 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1225 base, step))
1226 no_overflow = true;
1227 iv->no_overflow = no_overflow;
1228 iv->have_address_use = false;
1230 return iv;
1233 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1234 doesn't overflow. */
1236 static void
1237 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1238 bool no_overflow)
1240 struct version_info *info = name_info (data, iv);
1242 gcc_assert (!info->iv);
1244 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1245 info->iv = alloc_iv (data, base, step, no_overflow);
1246 info->iv->ssa_name = iv;
1249 /* Finds induction variable declaration for VAR. */
1251 static struct iv *
1252 get_iv (struct ivopts_data *data, tree var)
1254 basic_block bb;
1255 tree type = TREE_TYPE (var);
1257 if (!POINTER_TYPE_P (type)
1258 && !INTEGRAL_TYPE_P (type))
1259 return NULL;
1261 if (!name_info (data, var)->iv)
1263 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1265 if (!bb
1266 || !flow_bb_inside_loop_p (data->current_loop, bb))
1267 set_iv (data, var, var, build_int_cst (type, 0), true);
1270 return name_info (data, var)->iv;
1273 /* Return the first non-invariant ssa var found in EXPR. */
1275 static tree
1276 extract_single_var_from_expr (tree expr)
1278 int i, n;
1279 tree tmp;
1280 enum tree_code code;
1282 if (!expr || is_gimple_min_invariant (expr))
1283 return NULL;
1285 code = TREE_CODE (expr);
1286 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1288 n = TREE_OPERAND_LENGTH (expr);
1289 for (i = 0; i < n; i++)
1291 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1293 if (tmp)
1294 return tmp;
1297 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1300 /* Finds basic ivs. */
1302 static bool
1303 find_bivs (struct ivopts_data *data)
1305 gphi *phi;
1306 affine_iv iv;
1307 tree step, type, base, stop;
1308 bool found = false;
1309 struct loop *loop = data->current_loop;
1310 gphi_iterator psi;
1312 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1314 phi = psi.phi ();
1316 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1317 continue;
1319 if (virtual_operand_p (PHI_RESULT (phi)))
1320 continue;
1322 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1323 continue;
1325 if (integer_zerop (iv.step))
1326 continue;
1328 step = iv.step;
1329 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1330 /* Stop expanding iv base at the first ssa var referred by iv step.
1331 Ideally we should stop at any ssa var, because that's expensive
1332 and unusual to happen, we just do it on the first one.
1334 See PR64705 for the rationale. */
1335 stop = extract_single_var_from_expr (step);
1336 base = expand_simple_operations (base, stop);
1337 if (contains_abnormal_ssa_name_p (base)
1338 || contains_abnormal_ssa_name_p (step))
1339 continue;
1341 type = TREE_TYPE (PHI_RESULT (phi));
1342 base = fold_convert (type, base);
1343 if (step)
1345 if (POINTER_TYPE_P (type))
1346 step = convert_to_ptrofftype (step);
1347 else
1348 step = fold_convert (type, step);
1351 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1352 found = true;
1355 return found;
1358 /* Marks basic ivs. */
1360 static void
1361 mark_bivs (struct ivopts_data *data)
1363 gphi *phi;
1364 gimple *def;
1365 tree var;
1366 struct iv *iv, *incr_iv;
1367 struct loop *loop = data->current_loop;
1368 basic_block incr_bb;
1369 gphi_iterator psi;
1371 data->bivs_not_used_in_addr = 0;
1372 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1374 phi = psi.phi ();
1376 iv = get_iv (data, PHI_RESULT (phi));
1377 if (!iv)
1378 continue;
1380 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1381 def = SSA_NAME_DEF_STMT (var);
1382 /* Don't mark iv peeled from other one as biv. */
1383 if (def
1384 && gimple_code (def) == GIMPLE_PHI
1385 && gimple_bb (def) == loop->header)
1386 continue;
1388 incr_iv = get_iv (data, var);
1389 if (!incr_iv)
1390 continue;
1392 /* If the increment is in the subloop, ignore it. */
1393 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1394 if (incr_bb->loop_father != data->current_loop
1395 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1396 continue;
1398 iv->biv_p = true;
1399 incr_iv->biv_p = true;
1400 if (iv->no_overflow)
1401 data->bivs_not_used_in_addr++;
1402 if (incr_iv->no_overflow)
1403 data->bivs_not_used_in_addr++;
1407 /* Checks whether STMT defines a linear induction variable and stores its
1408 parameters to IV. */
1410 static bool
1411 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1413 tree lhs, stop;
1414 struct loop *loop = data->current_loop;
1416 iv->base = NULL_TREE;
1417 iv->step = NULL_TREE;
1419 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1420 return false;
1422 lhs = gimple_assign_lhs (stmt);
1423 if (TREE_CODE (lhs) != SSA_NAME)
1424 return false;
1426 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1427 return false;
1429 /* Stop expanding iv base at the first ssa var referred by iv step.
1430 Ideally we should stop at any ssa var, because that's expensive
1431 and unusual to happen, we just do it on the first one.
1433 See PR64705 for the rationale. */
1434 stop = extract_single_var_from_expr (iv->step);
1435 iv->base = expand_simple_operations (iv->base, stop);
1436 if (contains_abnormal_ssa_name_p (iv->base)
1437 || contains_abnormal_ssa_name_p (iv->step))
1438 return false;
1440 /* If STMT could throw, then do not consider STMT as defining a GIV.
1441 While this will suppress optimizations, we cannot safely delete this
1442 GIV and associated statements, even if it appears it is not used. */
1443 if (stmt_could_throw_p (cfun, stmt))
1444 return false;
1446 return true;
1449 /* Finds general ivs in statement STMT. */
1451 static void
1452 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1454 affine_iv iv;
1456 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1457 return;
1459 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1462 /* Finds general ivs in basic block BB. */
1464 static void
1465 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1467 gimple_stmt_iterator bsi;
1469 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1470 find_givs_in_stmt (data, gsi_stmt (bsi));
1473 /* Finds general ivs. */
1475 static void
1476 find_givs (struct ivopts_data *data)
1478 struct loop *loop = data->current_loop;
1479 basic_block *body = get_loop_body_in_dom_order (loop);
1480 unsigned i;
1482 for (i = 0; i < loop->num_nodes; i++)
1483 find_givs_in_bb (data, body[i]);
1484 free (body);
1487 /* For each ssa name defined in LOOP determines whether it is an induction
1488 variable and if so, its initial value and step. */
1490 static bool
1491 find_induction_variables (struct ivopts_data *data)
1493 unsigned i;
1494 bitmap_iterator bi;
1496 if (!find_bivs (data))
1497 return false;
1499 find_givs (data);
1500 mark_bivs (data);
1502 if (dump_file && (dump_flags & TDF_DETAILS))
1504 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1506 if (niter)
1508 fprintf (dump_file, " number of iterations ");
1509 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1510 if (!integer_zerop (niter->may_be_zero))
1512 fprintf (dump_file, "; zero if ");
1513 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1515 fprintf (dump_file, "\n");
1518 fprintf (dump_file, "\n<Induction Vars>:\n");
1519 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1521 struct version_info *info = ver_info (data, i);
1522 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1523 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1527 return true;
1530 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1531 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1532 is the const offset stripped from IV base and MEM_TYPE is the type
1533 of the memory being addressed. For uses of other types, ADDR_BASE
1534 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1536 static struct iv_use *
1537 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1538 gimple *stmt, enum use_type type, tree mem_type,
1539 tree addr_base, poly_uint64 addr_offset)
1541 struct iv_use *use = XCNEW (struct iv_use);
1543 use->id = group->vuses.length ();
1544 use->group_id = group->id;
1545 use->type = type;
1546 use->mem_type = mem_type;
1547 use->iv = iv;
1548 use->stmt = stmt;
1549 use->op_p = use_p;
1550 use->addr_base = addr_base;
1551 use->addr_offset = addr_offset;
1553 group->vuses.safe_push (use);
1554 return use;
1557 /* Checks whether OP is a loop-level invariant and if so, records it.
1558 NONLINEAR_USE is true if the invariant is used in a way we do not
1559 handle specially. */
1561 static void
1562 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1564 basic_block bb;
1565 struct version_info *info;
1567 if (TREE_CODE (op) != SSA_NAME
1568 || virtual_operand_p (op))
1569 return;
1571 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1572 if (bb
1573 && flow_bb_inside_loop_p (data->current_loop, bb))
1574 return;
1576 info = name_info (data, op);
1577 info->name = op;
1578 info->has_nonlin_use |= nonlinear_use;
1579 if (!info->inv_id)
1580 info->inv_id = ++data->max_inv_var_id;
1581 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1584 /* Record a group of TYPE. */
1586 static struct iv_group *
1587 record_group (struct ivopts_data *data, enum use_type type)
1589 struct iv_group *group = XCNEW (struct iv_group);
1591 group->id = data->vgroups.length ();
1592 group->type = type;
1593 group->related_cands = BITMAP_ALLOC (NULL);
1594 group->vuses.create (1);
1596 data->vgroups.safe_push (group);
1597 return group;
1600 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1601 New group will be created if there is no existing group for the use.
1602 MEM_TYPE is the type of memory being addressed, or NULL if this
1603 isn't an address reference. */
1605 static struct iv_use *
1606 record_group_use (struct ivopts_data *data, tree *use_p,
1607 struct iv *iv, gimple *stmt, enum use_type type,
1608 tree mem_type)
1610 tree addr_base = NULL;
1611 struct iv_group *group = NULL;
1612 poly_uint64 addr_offset = 0;
1614 /* Record non address type use in a new group. */
1615 if (address_p (type))
1617 unsigned int i;
1619 addr_base = strip_offset (iv->base, &addr_offset);
1620 for (i = 0; i < data->vgroups.length (); i++)
1622 struct iv_use *use;
1624 group = data->vgroups[i];
1625 use = group->vuses[0];
1626 if (!address_p (use->type))
1627 continue;
1629 /* Check if it has the same stripped base and step. */
1630 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1631 && operand_equal_p (iv->step, use->iv->step, 0)
1632 && operand_equal_p (addr_base, use->addr_base, 0))
1633 break;
1635 if (i == data->vgroups.length ())
1636 group = NULL;
1639 if (!group)
1640 group = record_group (data, type);
1642 return record_use (group, use_p, iv, stmt, type, mem_type,
1643 addr_base, addr_offset);
1646 /* Checks whether the use OP is interesting and if so, records it. */
1648 static struct iv_use *
1649 find_interesting_uses_op (struct ivopts_data *data, tree op)
1651 struct iv *iv;
1652 gimple *stmt;
1653 struct iv_use *use;
1655 if (TREE_CODE (op) != SSA_NAME)
1656 return NULL;
1658 iv = get_iv (data, op);
1659 if (!iv)
1660 return NULL;
1662 if (iv->nonlin_use)
1664 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1665 return iv->nonlin_use;
1668 if (integer_zerop (iv->step))
1670 record_invariant (data, op, true);
1671 return NULL;
1674 stmt = SSA_NAME_DEF_STMT (op);
1675 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1677 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1678 iv->nonlin_use = use;
1679 return use;
1682 /* Indicate how compare type iv_use can be handled. */
1683 enum comp_iv_rewrite
1685 COMP_IV_NA,
1686 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1687 COMP_IV_EXPR,
1688 /* We may rewrite compare type iv_uses on both sides of comparison by
1689 expressing value of each iv_use. */
1690 COMP_IV_EXPR_2,
1691 /* We may rewrite compare type iv_use by expressing value of the iv_use
1692 or by eliminating it with other iv_cand. */
1693 COMP_IV_ELIM
1696 /* Given a condition in statement STMT, checks whether it is a compare
1697 of an induction variable and an invariant. If this is the case,
1698 CONTROL_VAR is set to location of the iv, BOUND to the location of
1699 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1700 induction variable descriptions, and true is returned. If this is not
1701 the case, CONTROL_VAR and BOUND are set to the arguments of the
1702 condition and false is returned. */
1704 static enum comp_iv_rewrite
1705 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1706 tree **control_var, tree **bound,
1707 struct iv **iv_var, struct iv **iv_bound)
1709 /* The objects returned when COND has constant operands. */
1710 static struct iv const_iv;
1711 static tree zero;
1712 tree *op0 = &zero, *op1 = &zero;
1713 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1714 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1716 if (gimple_code (stmt) == GIMPLE_COND)
1718 gcond *cond_stmt = as_a <gcond *> (stmt);
1719 op0 = gimple_cond_lhs_ptr (cond_stmt);
1720 op1 = gimple_cond_rhs_ptr (cond_stmt);
1722 else
1724 op0 = gimple_assign_rhs1_ptr (stmt);
1725 op1 = gimple_assign_rhs2_ptr (stmt);
1728 zero = integer_zero_node;
1729 const_iv.step = integer_zero_node;
1731 if (TREE_CODE (*op0) == SSA_NAME)
1732 iv0 = get_iv (data, *op0);
1733 if (TREE_CODE (*op1) == SSA_NAME)
1734 iv1 = get_iv (data, *op1);
1736 /* If both sides of comparison are IVs. We can express ivs on both end. */
1737 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1739 rewrite_type = COMP_IV_EXPR_2;
1740 goto end;
1743 /* If none side of comparison is IV. */
1744 if ((!iv0 || integer_zerop (iv0->step))
1745 && (!iv1 || integer_zerop (iv1->step)))
1746 goto end;
1748 /* Control variable may be on the other side. */
1749 if (!iv0 || integer_zerop (iv0->step))
1751 std::swap (op0, op1);
1752 std::swap (iv0, iv1);
1754 /* If one side is IV and the other side isn't loop invariant. */
1755 if (!iv1)
1756 rewrite_type = COMP_IV_EXPR;
1757 /* If one side is IV and the other side is loop invariant. */
1758 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1759 rewrite_type = COMP_IV_ELIM;
1761 end:
1762 if (control_var)
1763 *control_var = op0;
1764 if (iv_var)
1765 *iv_var = iv0;
1766 if (bound)
1767 *bound = op1;
1768 if (iv_bound)
1769 *iv_bound = iv1;
1771 return rewrite_type;
1774 /* Checks whether the condition in STMT is interesting and if so,
1775 records it. */
1777 static void
1778 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1780 tree *var_p, *bound_p;
1781 struct iv *var_iv, *bound_iv;
1782 enum comp_iv_rewrite ret;
1784 ret = extract_cond_operands (data, stmt,
1785 &var_p, &bound_p, &var_iv, &bound_iv);
1786 if (ret == COMP_IV_NA)
1788 find_interesting_uses_op (data, *var_p);
1789 find_interesting_uses_op (data, *bound_p);
1790 return;
1793 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1794 /* Record compare type iv_use for iv on the other side of comparison. */
1795 if (ret == COMP_IV_EXPR_2)
1796 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1799 /* Returns the outermost loop EXPR is obviously invariant in
1800 relative to the loop LOOP, i.e. if all its operands are defined
1801 outside of the returned loop. Returns NULL if EXPR is not
1802 even obviously invariant in LOOP. */
1804 struct loop *
1805 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1807 basic_block def_bb;
1808 unsigned i, len;
1810 if (is_gimple_min_invariant (expr))
1811 return current_loops->tree_root;
1813 if (TREE_CODE (expr) == SSA_NAME)
1815 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1816 if (def_bb)
1818 if (flow_bb_inside_loop_p (loop, def_bb))
1819 return NULL;
1820 return superloop_at_depth (loop,
1821 loop_depth (def_bb->loop_father) + 1);
1824 return current_loops->tree_root;
1827 if (!EXPR_P (expr))
1828 return NULL;
1830 unsigned maxdepth = 0;
1831 len = TREE_OPERAND_LENGTH (expr);
1832 for (i = 0; i < len; i++)
1834 struct loop *ivloop;
1835 if (!TREE_OPERAND (expr, i))
1836 continue;
1838 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1839 if (!ivloop)
1840 return NULL;
1841 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1844 return superloop_at_depth (loop, maxdepth);
1847 /* Returns true if expression EXPR is obviously invariant in LOOP,
1848 i.e. if all its operands are defined outside of the LOOP. LOOP
1849 should not be the function body. */
1851 bool
1852 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1854 basic_block def_bb;
1855 unsigned i, len;
1857 gcc_assert (loop_depth (loop) > 0);
1859 if (is_gimple_min_invariant (expr))
1860 return true;
1862 if (TREE_CODE (expr) == SSA_NAME)
1864 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1865 if (def_bb
1866 && flow_bb_inside_loop_p (loop, def_bb))
1867 return false;
1869 return true;
1872 if (!EXPR_P (expr))
1873 return false;
1875 len = TREE_OPERAND_LENGTH (expr);
1876 for (i = 0; i < len; i++)
1877 if (TREE_OPERAND (expr, i)
1878 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1879 return false;
1881 return true;
1884 /* Given expression EXPR which computes inductive values with respect
1885 to loop recorded in DATA, this function returns biv from which EXPR
1886 is derived by tracing definition chains of ssa variables in EXPR. */
1888 static struct iv*
1889 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1891 struct iv *iv;
1892 unsigned i, n;
1893 tree e2, e1;
1894 enum tree_code code;
1895 gimple *stmt;
1897 if (expr == NULL_TREE)
1898 return NULL;
1900 if (is_gimple_min_invariant (expr))
1901 return NULL;
1903 code = TREE_CODE (expr);
1904 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1906 n = TREE_OPERAND_LENGTH (expr);
1907 for (i = 0; i < n; i++)
1909 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1910 if (iv)
1911 return iv;
1915 /* Stop if it's not ssa name. */
1916 if (code != SSA_NAME)
1917 return NULL;
1919 iv = get_iv (data, expr);
1920 if (!iv || integer_zerop (iv->step))
1921 return NULL;
1922 else if (iv->biv_p)
1923 return iv;
1925 stmt = SSA_NAME_DEF_STMT (expr);
1926 if (gphi *phi = dyn_cast <gphi *> (stmt))
1928 ssa_op_iter iter;
1929 use_operand_p use_p;
1930 basic_block phi_bb = gimple_bb (phi);
1932 /* Skip loop header PHI that doesn't define biv. */
1933 if (phi_bb->loop_father == data->current_loop)
1934 return NULL;
1936 if (virtual_operand_p (gimple_phi_result (phi)))
1937 return NULL;
1939 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1941 tree use = USE_FROM_PTR (use_p);
1942 iv = find_deriving_biv_for_expr (data, use);
1943 if (iv)
1944 return iv;
1946 return NULL;
1948 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1949 return NULL;
1951 e1 = gimple_assign_rhs1 (stmt);
1952 code = gimple_assign_rhs_code (stmt);
1953 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1954 return find_deriving_biv_for_expr (data, e1);
1956 switch (code)
1958 case MULT_EXPR:
1959 case PLUS_EXPR:
1960 case MINUS_EXPR:
1961 case POINTER_PLUS_EXPR:
1962 /* Increments, decrements and multiplications by a constant
1963 are simple. */
1964 e2 = gimple_assign_rhs2 (stmt);
1965 iv = find_deriving_biv_for_expr (data, e2);
1966 if (iv)
1967 return iv;
1968 gcc_fallthrough ();
1970 CASE_CONVERT:
1971 /* Casts are simple. */
1972 return find_deriving_biv_for_expr (data, e1);
1974 default:
1975 break;
1978 return NULL;
1981 /* Record BIV, its predecessor and successor that they are used in
1982 address type uses. */
1984 static void
1985 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1987 unsigned i;
1988 tree type, base_1, base_2;
1989 bitmap_iterator bi;
1991 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1992 || biv->have_address_use || !biv->no_overflow)
1993 return;
1995 type = TREE_TYPE (biv->base);
1996 if (!INTEGRAL_TYPE_P (type))
1997 return;
1999 biv->have_address_use = true;
2000 data->bivs_not_used_in_addr--;
2001 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
2002 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2004 struct iv *iv = ver_info (data, i)->iv;
2006 if (!iv || !iv->biv_p || integer_zerop (iv->step)
2007 || iv->have_address_use || !iv->no_overflow)
2008 continue;
2010 if (type != TREE_TYPE (iv->base)
2011 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2012 continue;
2014 if (!operand_equal_p (biv->step, iv->step, 0))
2015 continue;
2017 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2018 if (operand_equal_p (base_1, iv->base, 0)
2019 || operand_equal_p (base_2, biv->base, 0))
2021 iv->have_address_use = true;
2022 data->bivs_not_used_in_addr--;
2027 /* Cumulates the steps of indices into DATA and replaces their values with the
2028 initial ones. Returns false when the value of the index cannot be determined.
2029 Callback for for_each_index. */
2031 struct ifs_ivopts_data
2033 struct ivopts_data *ivopts_data;
2034 gimple *stmt;
2035 tree step;
2038 static bool
2039 idx_find_step (tree base, tree *idx, void *data)
2041 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2042 struct iv *iv;
2043 bool use_overflow_semantics = false;
2044 tree step, iv_base, iv_step, lbound, off;
2045 struct loop *loop = dta->ivopts_data->current_loop;
2047 /* If base is a component ref, require that the offset of the reference
2048 be invariant. */
2049 if (TREE_CODE (base) == COMPONENT_REF)
2051 off = component_ref_field_offset (base);
2052 return expr_invariant_in_loop_p (loop, off);
2055 /* If base is array, first check whether we will be able to move the
2056 reference out of the loop (in order to take its address in strength
2057 reduction). In order for this to work we need both lower bound
2058 and step to be loop invariants. */
2059 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2061 /* Moreover, for a range, the size needs to be invariant as well. */
2062 if (TREE_CODE (base) == ARRAY_RANGE_REF
2063 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2064 return false;
2066 step = array_ref_element_size (base);
2067 lbound = array_ref_low_bound (base);
2069 if (!expr_invariant_in_loop_p (loop, step)
2070 || !expr_invariant_in_loop_p (loop, lbound))
2071 return false;
2074 if (TREE_CODE (*idx) != SSA_NAME)
2075 return true;
2077 iv = get_iv (dta->ivopts_data, *idx);
2078 if (!iv)
2079 return false;
2081 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2082 *&x[0], which is not folded and does not trigger the
2083 ARRAY_REF path below. */
2084 *idx = iv->base;
2086 if (integer_zerop (iv->step))
2087 return true;
2089 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2091 step = array_ref_element_size (base);
2093 /* We only handle addresses whose step is an integer constant. */
2094 if (TREE_CODE (step) != INTEGER_CST)
2095 return false;
2097 else
2098 /* The step for pointer arithmetics already is 1 byte. */
2099 step = size_one_node;
2101 iv_base = iv->base;
2102 iv_step = iv->step;
2103 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2104 use_overflow_semantics = true;
2106 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2107 sizetype, &iv_base, &iv_step, dta->stmt,
2108 use_overflow_semantics))
2110 /* The index might wrap. */
2111 return false;
2114 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2115 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2117 if (dta->ivopts_data->bivs_not_used_in_addr)
2119 if (!iv->biv_p)
2120 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2122 record_biv_for_address_use (dta->ivopts_data, iv);
2124 return true;
2127 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2128 object is passed to it in DATA. */
2130 static bool
2131 idx_record_use (tree base, tree *idx,
2132 void *vdata)
2134 struct ivopts_data *data = (struct ivopts_data *) vdata;
2135 find_interesting_uses_op (data, *idx);
2136 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2138 find_interesting_uses_op (data, array_ref_element_size (base));
2139 find_interesting_uses_op (data, array_ref_low_bound (base));
2141 return true;
2144 /* If we can prove that TOP = cst * BOT for some constant cst,
2145 store cst to MUL and return true. Otherwise return false.
2146 The returned value is always sign-extended, regardless of the
2147 signedness of TOP and BOT. */
2149 static bool
2150 constant_multiple_of (tree top, tree bot, widest_int *mul)
2152 tree mby;
2153 enum tree_code code;
2154 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2155 widest_int res, p0, p1;
2157 STRIP_NOPS (top);
2158 STRIP_NOPS (bot);
2160 if (operand_equal_p (top, bot, 0))
2162 *mul = 1;
2163 return true;
2166 code = TREE_CODE (top);
2167 switch (code)
2169 case MULT_EXPR:
2170 mby = TREE_OPERAND (top, 1);
2171 if (TREE_CODE (mby) != INTEGER_CST)
2172 return false;
2174 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2175 return false;
2177 *mul = wi::sext (res * wi::to_widest (mby), precision);
2178 return true;
2180 case PLUS_EXPR:
2181 case MINUS_EXPR:
2182 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2183 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2184 return false;
2186 if (code == MINUS_EXPR)
2187 p1 = -p1;
2188 *mul = wi::sext (p0 + p1, precision);
2189 return true;
2191 case INTEGER_CST:
2192 if (TREE_CODE (bot) != INTEGER_CST)
2193 return false;
2195 p0 = widest_int::from (wi::to_wide (top), SIGNED);
2196 p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2197 if (p1 == 0)
2198 return false;
2199 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2200 return res == 0;
2202 default:
2203 if (POLY_INT_CST_P (top)
2204 && POLY_INT_CST_P (bot)
2205 && constant_multiple_p (wi::to_poly_widest (top),
2206 wi::to_poly_widest (bot), mul))
2207 return true;
2209 return false;
2213 /* Return true if memory reference REF with step STEP may be unaligned. */
2215 static bool
2216 may_be_unaligned_p (tree ref, tree step)
2218 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2219 thus they are not misaligned. */
2220 if (TREE_CODE (ref) == TARGET_MEM_REF)
2221 return false;
2223 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2224 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2225 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2227 unsigned HOST_WIDE_INT bitpos;
2228 unsigned int ref_align;
2229 get_object_alignment_1 (ref, &ref_align, &bitpos);
2230 if (ref_align < align
2231 || (bitpos % align) != 0
2232 || (bitpos % BITS_PER_UNIT) != 0)
2233 return true;
2235 unsigned int trailing_zeros = tree_ctz (step);
2236 if (trailing_zeros < HOST_BITS_PER_INT
2237 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2238 return true;
2240 return false;
2243 /* Return true if EXPR may be non-addressable. */
2245 bool
2246 may_be_nonaddressable_p (tree expr)
2248 switch (TREE_CODE (expr))
2250 case VAR_DECL:
2251 /* Check if it's a register variable. */
2252 return DECL_HARD_REGISTER (expr);
2254 case TARGET_MEM_REF:
2255 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2256 target, thus they are always addressable. */
2257 return false;
2259 case MEM_REF:
2260 /* Likewise for MEM_REFs, modulo the storage order. */
2261 return REF_REVERSE_STORAGE_ORDER (expr);
2263 case BIT_FIELD_REF:
2264 if (REF_REVERSE_STORAGE_ORDER (expr))
2265 return true;
2266 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2268 case COMPONENT_REF:
2269 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2270 return true;
2271 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2272 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2274 case ARRAY_REF:
2275 case ARRAY_RANGE_REF:
2276 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2277 return true;
2278 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2280 case VIEW_CONVERT_EXPR:
2281 /* This kind of view-conversions may wrap non-addressable objects
2282 and make them look addressable. After some processing the
2283 non-addressability may be uncovered again, causing ADDR_EXPRs
2284 of inappropriate objects to be built. */
2285 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2286 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2287 return true;
2288 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2290 CASE_CONVERT:
2291 return true;
2293 default:
2294 break;
2297 return false;
2300 /* Finds addresses in *OP_P inside STMT. */
2302 static void
2303 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2304 tree *op_p)
2306 tree base = *op_p, step = size_zero_node;
2307 struct iv *civ;
2308 struct ifs_ivopts_data ifs_ivopts_data;
2310 /* Do not play with volatile memory references. A bit too conservative,
2311 perhaps, but safe. */
2312 if (gimple_has_volatile_ops (stmt))
2313 goto fail;
2315 /* Ignore bitfields for now. Not really something terribly complicated
2316 to handle. TODO. */
2317 if (TREE_CODE (base) == BIT_FIELD_REF)
2318 goto fail;
2320 base = unshare_expr (base);
2322 if (TREE_CODE (base) == TARGET_MEM_REF)
2324 tree type = build_pointer_type (TREE_TYPE (base));
2325 tree astep;
2327 if (TMR_BASE (base)
2328 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2330 civ = get_iv (data, TMR_BASE (base));
2331 if (!civ)
2332 goto fail;
2334 TMR_BASE (base) = civ->base;
2335 step = civ->step;
2337 if (TMR_INDEX2 (base)
2338 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2340 civ = get_iv (data, TMR_INDEX2 (base));
2341 if (!civ)
2342 goto fail;
2344 TMR_INDEX2 (base) = civ->base;
2345 step = civ->step;
2347 if (TMR_INDEX (base)
2348 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2350 civ = get_iv (data, TMR_INDEX (base));
2351 if (!civ)
2352 goto fail;
2354 TMR_INDEX (base) = civ->base;
2355 astep = civ->step;
2357 if (astep)
2359 if (TMR_STEP (base))
2360 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2362 step = fold_build2 (PLUS_EXPR, type, step, astep);
2366 if (integer_zerop (step))
2367 goto fail;
2368 base = tree_mem_ref_addr (type, base);
2370 else
2372 ifs_ivopts_data.ivopts_data = data;
2373 ifs_ivopts_data.stmt = stmt;
2374 ifs_ivopts_data.step = size_zero_node;
2375 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2376 || integer_zerop (ifs_ivopts_data.step))
2377 goto fail;
2378 step = ifs_ivopts_data.step;
2380 /* Check that the base expression is addressable. This needs
2381 to be done after substituting bases of IVs into it. */
2382 if (may_be_nonaddressable_p (base))
2383 goto fail;
2385 /* Moreover, on strict alignment platforms, check that it is
2386 sufficiently aligned. */
2387 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2388 goto fail;
2390 base = build_fold_addr_expr (base);
2392 /* Substituting bases of IVs into the base expression might
2393 have caused folding opportunities. */
2394 if (TREE_CODE (base) == ADDR_EXPR)
2396 tree *ref = &TREE_OPERAND (base, 0);
2397 while (handled_component_p (*ref))
2398 ref = &TREE_OPERAND (*ref, 0);
2399 if (TREE_CODE (*ref) == MEM_REF)
2401 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2402 TREE_OPERAND (*ref, 0),
2403 TREE_OPERAND (*ref, 1));
2404 if (tem)
2405 *ref = tem;
2410 civ = alloc_iv (data, base, step);
2411 /* Fail if base object of this memory reference is unknown. */
2412 if (civ->base_object == NULL_TREE)
2413 goto fail;
2415 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2416 return;
2418 fail:
2419 for_each_index (op_p, idx_record_use, data);
2422 /* Finds and records invariants used in STMT. */
2424 static void
2425 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2427 ssa_op_iter iter;
2428 use_operand_p use_p;
2429 tree op;
2431 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2433 op = USE_FROM_PTR (use_p);
2434 record_invariant (data, op, false);
2438 /* CALL calls an internal function. If operand *OP_P will become an
2439 address when the call is expanded, return the type of the memory
2440 being addressed, otherwise return null. */
2442 static tree
2443 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2445 switch (gimple_call_internal_fn (call))
2447 case IFN_MASK_LOAD:
2448 if (op_p == gimple_call_arg_ptr (call, 0))
2449 return TREE_TYPE (gimple_call_lhs (call));
2450 return NULL_TREE;
2452 case IFN_MASK_STORE:
2453 if (op_p == gimple_call_arg_ptr (call, 0))
2454 return TREE_TYPE (gimple_call_arg (call, 3));
2455 return NULL_TREE;
2457 default:
2458 return NULL_TREE;
2462 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2463 Return true if the operand will become an address when STMT
2464 is expanded and record the associated address use if so. */
2466 static bool
2467 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2468 struct iv *iv)
2470 /* Fail if base object of this memory reference is unknown. */
2471 if (iv->base_object == NULL_TREE)
2472 return false;
2474 tree mem_type = NULL_TREE;
2475 if (gcall *call = dyn_cast <gcall *> (stmt))
2476 if (gimple_call_internal_p (call))
2477 mem_type = get_mem_type_for_internal_fn (call, op_p);
2478 if (mem_type)
2480 iv = alloc_iv (data, iv->base, iv->step);
2481 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2482 return true;
2484 return false;
2487 /* Finds interesting uses of induction variables in the statement STMT. */
2489 static void
2490 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2492 struct iv *iv;
2493 tree op, *lhs, *rhs;
2494 ssa_op_iter iter;
2495 use_operand_p use_p;
2496 enum tree_code code;
2498 find_invariants_stmt (data, stmt);
2500 if (gimple_code (stmt) == GIMPLE_COND)
2502 find_interesting_uses_cond (data, stmt);
2503 return;
2506 if (is_gimple_assign (stmt))
2508 lhs = gimple_assign_lhs_ptr (stmt);
2509 rhs = gimple_assign_rhs1_ptr (stmt);
2511 if (TREE_CODE (*lhs) == SSA_NAME)
2513 /* If the statement defines an induction variable, the uses are not
2514 interesting by themselves. */
2516 iv = get_iv (data, *lhs);
2518 if (iv && !integer_zerop (iv->step))
2519 return;
2522 code = gimple_assign_rhs_code (stmt);
2523 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2524 && (REFERENCE_CLASS_P (*rhs)
2525 || is_gimple_val (*rhs)))
2527 if (REFERENCE_CLASS_P (*rhs))
2528 find_interesting_uses_address (data, stmt, rhs);
2529 else
2530 find_interesting_uses_op (data, *rhs);
2532 if (REFERENCE_CLASS_P (*lhs))
2533 find_interesting_uses_address (data, stmt, lhs);
2534 return;
2536 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2538 find_interesting_uses_cond (data, stmt);
2539 return;
2542 /* TODO -- we should also handle address uses of type
2544 memory = call (whatever);
2548 call (memory). */
2551 if (gimple_code (stmt) == GIMPLE_PHI
2552 && gimple_bb (stmt) == data->current_loop->header)
2554 iv = get_iv (data, PHI_RESULT (stmt));
2556 if (iv && !integer_zerop (iv->step))
2557 return;
2560 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2562 op = USE_FROM_PTR (use_p);
2564 if (TREE_CODE (op) != SSA_NAME)
2565 continue;
2567 iv = get_iv (data, op);
2568 if (!iv)
2569 continue;
2571 if (!find_address_like_use (data, stmt, use_p->use, iv))
2572 find_interesting_uses_op (data, op);
2576 /* Finds interesting uses of induction variables outside of loops
2577 on loop exit edge EXIT. */
2579 static void
2580 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2582 gphi *phi;
2583 gphi_iterator psi;
2584 tree def;
2586 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2588 phi = psi.phi ();
2589 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2590 if (!virtual_operand_p (def))
2591 find_interesting_uses_op (data, def);
2595 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2596 mode for memory reference represented by USE. */
2598 static GTY (()) vec<rtx, va_gc> *addr_list;
2600 static bool
2601 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2603 rtx reg, addr;
2604 unsigned list_index;
2605 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2606 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2608 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2609 if (list_index >= vec_safe_length (addr_list))
2610 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2612 addr = (*addr_list)[list_index];
2613 if (!addr)
2615 addr_mode = targetm.addr_space.address_mode (as);
2616 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2617 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2618 (*addr_list)[list_index] = addr;
2620 else
2621 addr_mode = GET_MODE (addr);
2623 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2624 return (memory_address_addr_space_p (mem_mode, addr, as));
2627 /* Comparison function to sort group in ascending order of addr_offset. */
2629 static int
2630 group_compare_offset (const void *a, const void *b)
2632 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2633 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2635 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2638 /* Check if small groups should be split. Return true if no group
2639 contains more than two uses with distinct addr_offsets. Return
2640 false otherwise. We want to split such groups because:
2642 1) Small groups don't have much benefit and may interfer with
2643 general candidate selection.
2644 2) Size for problem with only small groups is usually small and
2645 general algorithm can handle it well.
2647 TODO -- Above claim may not hold when we want to merge memory
2648 accesses with conseuctive addresses. */
2650 static bool
2651 split_small_address_groups_p (struct ivopts_data *data)
2653 unsigned int i, j, distinct = 1;
2654 struct iv_use *pre;
2655 struct iv_group *group;
2657 for (i = 0; i < data->vgroups.length (); i++)
2659 group = data->vgroups[i];
2660 if (group->vuses.length () == 1)
2661 continue;
2663 gcc_assert (address_p (group->type));
2664 if (group->vuses.length () == 2)
2666 if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2667 group->vuses[1]->addr_offset) > 0)
2668 std::swap (group->vuses[0], group->vuses[1]);
2670 else
2671 group->vuses.qsort (group_compare_offset);
2673 if (distinct > 2)
2674 continue;
2676 distinct = 1;
2677 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2679 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2681 pre = group->vuses[j];
2682 distinct++;
2685 if (distinct > 2)
2686 break;
2690 return (distinct <= 2);
2693 /* For each group of address type uses, this function further groups
2694 these uses according to the maximum offset supported by target's
2695 [base + offset] addressing mode. */
2697 static void
2698 split_address_groups (struct ivopts_data *data)
2700 unsigned int i, j;
2701 /* Always split group. */
2702 bool split_p = split_small_address_groups_p (data);
2704 for (i = 0; i < data->vgroups.length (); i++)
2706 struct iv_group *new_group = NULL;
2707 struct iv_group *group = data->vgroups[i];
2708 struct iv_use *use = group->vuses[0];
2710 use->id = 0;
2711 use->group_id = group->id;
2712 if (group->vuses.length () == 1)
2713 continue;
2715 gcc_assert (address_p (use->type));
2717 for (j = 1; j < group->vuses.length ();)
2719 struct iv_use *next = group->vuses[j];
2720 poly_int64 offset = next->addr_offset - use->addr_offset;
2722 /* Split group if aksed to, or the offset against the first
2723 use can't fit in offset part of addressing mode. IV uses
2724 having the same offset are still kept in one group. */
2725 if (maybe_ne (offset, 0)
2726 && (split_p || !addr_offset_valid_p (use, offset)))
2728 if (!new_group)
2729 new_group = record_group (data, group->type);
2730 group->vuses.ordered_remove (j);
2731 new_group->vuses.safe_push (next);
2732 continue;
2735 next->id = j;
2736 next->group_id = group->id;
2737 j++;
2742 /* Finds uses of the induction variables that are interesting. */
2744 static void
2745 find_interesting_uses (struct ivopts_data *data)
2747 basic_block bb;
2748 gimple_stmt_iterator bsi;
2749 basic_block *body = get_loop_body (data->current_loop);
2750 unsigned i;
2751 edge e;
2753 for (i = 0; i < data->current_loop->num_nodes; i++)
2755 edge_iterator ei;
2756 bb = body[i];
2758 FOR_EACH_EDGE (e, ei, bb->succs)
2759 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2760 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2761 find_interesting_uses_outside (data, e);
2763 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2764 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2765 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2766 if (!is_gimple_debug (gsi_stmt (bsi)))
2767 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2769 free (body);
2771 split_address_groups (data);
2773 if (dump_file && (dump_flags & TDF_DETAILS))
2775 fprintf (dump_file, "\n<IV Groups>:\n");
2776 dump_groups (dump_file, data);
2777 fprintf (dump_file, "\n");
2781 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2782 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2783 we are at the top-level of the processed address. */
2785 static tree
2786 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2787 poly_int64 *offset)
2789 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2790 enum tree_code code;
2791 tree type, orig_type = TREE_TYPE (expr);
2792 poly_int64 off0, off1;
2793 HOST_WIDE_INT st;
2794 tree orig_expr = expr;
2796 STRIP_NOPS (expr);
2798 type = TREE_TYPE (expr);
2799 code = TREE_CODE (expr);
2800 *offset = 0;
2802 switch (code)
2804 case POINTER_PLUS_EXPR:
2805 case PLUS_EXPR:
2806 case MINUS_EXPR:
2807 op0 = TREE_OPERAND (expr, 0);
2808 op1 = TREE_OPERAND (expr, 1);
2810 op0 = strip_offset_1 (op0, false, false, &off0);
2811 op1 = strip_offset_1 (op1, false, false, &off1);
2813 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2814 if (op0 == TREE_OPERAND (expr, 0)
2815 && op1 == TREE_OPERAND (expr, 1))
2816 return orig_expr;
2818 if (integer_zerop (op1))
2819 expr = op0;
2820 else if (integer_zerop (op0))
2822 if (code == MINUS_EXPR)
2823 expr = fold_build1 (NEGATE_EXPR, type, op1);
2824 else
2825 expr = op1;
2827 else
2828 expr = fold_build2 (code, type, op0, op1);
2830 return fold_convert (orig_type, expr);
2832 case MULT_EXPR:
2833 op1 = TREE_OPERAND (expr, 1);
2834 if (!cst_and_fits_in_hwi (op1))
2835 return orig_expr;
2837 op0 = TREE_OPERAND (expr, 0);
2838 op0 = strip_offset_1 (op0, false, false, &off0);
2839 if (op0 == TREE_OPERAND (expr, 0))
2840 return orig_expr;
2842 *offset = off0 * int_cst_value (op1);
2843 if (integer_zerop (op0))
2844 expr = op0;
2845 else
2846 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2848 return fold_convert (orig_type, expr);
2850 case ARRAY_REF:
2851 case ARRAY_RANGE_REF:
2852 if (!inside_addr)
2853 return orig_expr;
2855 step = array_ref_element_size (expr);
2856 if (!cst_and_fits_in_hwi (step))
2857 break;
2859 st = int_cst_value (step);
2860 op1 = TREE_OPERAND (expr, 1);
2861 op1 = strip_offset_1 (op1, false, false, &off1);
2862 *offset = off1 * st;
2864 if (top_compref
2865 && integer_zerop (op1))
2867 /* Strip the component reference completely. */
2868 op0 = TREE_OPERAND (expr, 0);
2869 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2870 *offset += off0;
2871 return op0;
2873 break;
2875 case COMPONENT_REF:
2877 tree field;
2879 if (!inside_addr)
2880 return orig_expr;
2882 tmp = component_ref_field_offset (expr);
2883 field = TREE_OPERAND (expr, 1);
2884 if (top_compref
2885 && cst_and_fits_in_hwi (tmp)
2886 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2888 HOST_WIDE_INT boffset, abs_off;
2890 /* Strip the component reference completely. */
2891 op0 = TREE_OPERAND (expr, 0);
2892 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2893 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2894 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2895 if (boffset < 0)
2896 abs_off = -abs_off;
2898 *offset = off0 + int_cst_value (tmp) + abs_off;
2899 return op0;
2902 break;
2904 case ADDR_EXPR:
2905 op0 = TREE_OPERAND (expr, 0);
2906 op0 = strip_offset_1 (op0, true, true, &off0);
2907 *offset += off0;
2909 if (op0 == TREE_OPERAND (expr, 0))
2910 return orig_expr;
2912 expr = build_fold_addr_expr (op0);
2913 return fold_convert (orig_type, expr);
2915 case MEM_REF:
2916 /* ??? Offset operand? */
2917 inside_addr = false;
2918 break;
2920 default:
2921 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2922 return build_int_cst (orig_type, 0);
2923 return orig_expr;
2926 /* Default handling of expressions for that we want to recurse into
2927 the first operand. */
2928 op0 = TREE_OPERAND (expr, 0);
2929 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2930 *offset += off0;
2932 if (op0 == TREE_OPERAND (expr, 0)
2933 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2934 return orig_expr;
2936 expr = copy_node (expr);
2937 TREE_OPERAND (expr, 0) = op0;
2938 if (op1)
2939 TREE_OPERAND (expr, 1) = op1;
2941 /* Inside address, we might strip the top level component references,
2942 thus changing type of the expression. Handling of ADDR_EXPR
2943 will fix that. */
2944 expr = fold_convert (orig_type, expr);
2946 return expr;
2949 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2951 tree
2952 strip_offset (tree expr, poly_uint64_pod *offset)
2954 poly_int64 off;
2955 tree core = strip_offset_1 (expr, false, false, &off);
2956 *offset = off;
2957 return core;
2960 /* Returns variant of TYPE that can be used as base for different uses.
2961 We return unsigned type with the same precision, which avoids problems
2962 with overflows. */
2964 static tree
2965 generic_type_for (tree type)
2967 if (POINTER_TYPE_P (type))
2968 return unsigned_type_for (type);
2970 if (TYPE_UNSIGNED (type))
2971 return type;
2973 return unsigned_type_for (type);
2976 /* Private data for walk_tree. */
2978 struct walk_tree_data
2980 bitmap *inv_vars;
2981 struct ivopts_data *idata;
2984 /* Callback function for walk_tree, it records invariants and symbol
2985 reference in *EXPR_P. DATA is the structure storing result info. */
2987 static tree
2988 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2990 tree op = *expr_p;
2991 struct version_info *info;
2992 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2994 if (TREE_CODE (op) != SSA_NAME)
2995 return NULL_TREE;
2997 info = name_info (wdata->idata, op);
2998 /* Because we expand simple operations when finding IVs, loop invariant
2999 variable that isn't referred by the original loop could be used now.
3000 Record such invariant variables here. */
3001 if (!info->iv)
3003 struct ivopts_data *idata = wdata->idata;
3004 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3006 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3008 set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
3009 record_invariant (idata, op, false);
3012 if (!info->inv_id || info->has_nonlin_use)
3013 return NULL_TREE;
3015 if (!*wdata->inv_vars)
3016 *wdata->inv_vars = BITMAP_ALLOC (NULL);
3017 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3019 return NULL_TREE;
3022 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3023 store it. */
3025 static inline void
3026 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3028 struct walk_tree_data wdata;
3030 if (!inv_vars)
3031 return;
3033 wdata.idata = data;
3034 wdata.inv_vars = inv_vars;
3035 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3038 /* Get entry from invariant expr hash table for INV_EXPR. New entry
3039 will be recorded if it doesn't exist yet. Given below two exprs:
3040 inv_expr + cst1, inv_expr + cst2
3041 It's hard to make decision whether constant part should be stripped
3042 or not. We choose to not strip based on below facts:
3043 1) We need to count ADD cost for constant part if it's stripped,
3044 which isn't always trivial where this functions is called.
3045 2) Stripping constant away may be conflict with following loop
3046 invariant hoisting pass.
3047 3) Not stripping constant away results in more invariant exprs,
3048 which usually leads to decision preferring lower reg pressure. */
3050 static iv_inv_expr_ent *
3051 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3053 STRIP_NOPS (inv_expr);
3055 if (poly_int_tree_p (inv_expr)
3056 || TREE_CODE (inv_expr) == SSA_NAME)
3057 return NULL;
3059 /* Don't strip constant part away as we used to. */
3061 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3062 struct iv_inv_expr_ent ent;
3063 ent.expr = inv_expr;
3064 ent.hash = iterative_hash_expr (inv_expr, 0);
3065 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3067 if (!*slot)
3069 *slot = XNEW (struct iv_inv_expr_ent);
3070 (*slot)->expr = inv_expr;
3071 (*slot)->hash = ent.hash;
3072 (*slot)->id = ++data->max_inv_expr_id;
3075 return *slot;
3078 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3079 position to POS. If USE is not NULL, the candidate is set as related to
3080 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3081 replacement of the final value of the iv by a direct computation. */
3083 static struct iv_cand *
3084 add_candidate_1 (struct ivopts_data *data,
3085 tree base, tree step, bool important, enum iv_position pos,
3086 struct iv_use *use, gimple *incremented_at,
3087 struct iv *orig_iv = NULL)
3089 unsigned i;
3090 struct iv_cand *cand = NULL;
3091 tree type, orig_type;
3093 gcc_assert (base && step);
3095 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3096 live, but the ivopts code may replace a real pointer with one
3097 pointing before or after the memory block that is then adjusted
3098 into the memory block during the loop. FIXME: It would likely be
3099 better to actually force the pointer live and still use ivopts;
3100 for example, it would be enough to write the pointer into memory
3101 and keep it there until after the loop. */
3102 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3103 return NULL;
3105 /* For non-original variables, make sure their values are computed in a type
3106 that does not invoke undefined behavior on overflows (since in general,
3107 we cannot prove that these induction variables are non-wrapping). */
3108 if (pos != IP_ORIGINAL)
3110 orig_type = TREE_TYPE (base);
3111 type = generic_type_for (orig_type);
3112 if (type != orig_type)
3114 base = fold_convert (type, base);
3115 step = fold_convert (type, step);
3119 for (i = 0; i < data->vcands.length (); i++)
3121 cand = data->vcands[i];
3123 if (cand->pos != pos)
3124 continue;
3126 if (cand->incremented_at != incremented_at
3127 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3128 && cand->ainc_use != use))
3129 continue;
3131 if (operand_equal_p (base, cand->iv->base, 0)
3132 && operand_equal_p (step, cand->iv->step, 0)
3133 && (TYPE_PRECISION (TREE_TYPE (base))
3134 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3135 break;
3138 if (i == data->vcands.length ())
3140 cand = XCNEW (struct iv_cand);
3141 cand->id = i;
3142 cand->iv = alloc_iv (data, base, step);
3143 cand->pos = pos;
3144 if (pos != IP_ORIGINAL)
3146 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3147 cand->var_after = cand->var_before;
3149 cand->important = important;
3150 cand->incremented_at = incremented_at;
3151 data->vcands.safe_push (cand);
3153 if (!poly_int_tree_p (step))
3155 find_inv_vars (data, &step, &cand->inv_vars);
3157 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3158 /* Share bitmap between inv_vars and inv_exprs for cand. */
3159 if (inv_expr != NULL)
3161 cand->inv_exprs = cand->inv_vars;
3162 cand->inv_vars = NULL;
3163 if (cand->inv_exprs)
3164 bitmap_clear (cand->inv_exprs);
3165 else
3166 cand->inv_exprs = BITMAP_ALLOC (NULL);
3168 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3172 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3173 cand->ainc_use = use;
3174 else
3175 cand->ainc_use = NULL;
3177 cand->orig_iv = orig_iv;
3178 if (dump_file && (dump_flags & TDF_DETAILS))
3179 dump_cand (dump_file, cand);
3182 cand->important |= important;
3184 /* Relate candidate to the group for which it is added. */
3185 if (use)
3186 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3188 return cand;
3191 /* Returns true if incrementing the induction variable at the end of the LOOP
3192 is allowed.
3194 The purpose is to avoid splitting latch edge with a biv increment, thus
3195 creating a jump, possibly confusing other optimization passes and leaving
3196 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3197 available (so we do not have a better alternative), or if the latch edge
3198 is already nonempty. */
3200 static bool
3201 allow_ip_end_pos_p (struct loop *loop)
3203 if (!ip_normal_pos (loop))
3204 return true;
3206 if (!empty_block_p (ip_end_pos (loop)))
3207 return true;
3209 return false;
3212 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3213 Important field is set to IMPORTANT. */
3215 static void
3216 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3217 bool important, struct iv_use *use)
3219 basic_block use_bb = gimple_bb (use->stmt);
3220 machine_mode mem_mode;
3221 unsigned HOST_WIDE_INT cstepi;
3223 /* If we insert the increment in any position other than the standard
3224 ones, we must ensure that it is incremented once per iteration.
3225 It must not be in an inner nested loop, or one side of an if
3226 statement. */
3227 if (use_bb->loop_father != data->current_loop
3228 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3229 || stmt_can_throw_internal (cfun, use->stmt)
3230 || !cst_and_fits_in_hwi (step))
3231 return;
3233 cstepi = int_cst_value (step);
3235 mem_mode = TYPE_MODE (use->mem_type);
3236 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3237 || USE_STORE_PRE_INCREMENT (mem_mode))
3238 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3239 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3240 || USE_STORE_PRE_DECREMENT (mem_mode))
3241 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3243 enum tree_code code = MINUS_EXPR;
3244 tree new_base;
3245 tree new_step = step;
3247 if (POINTER_TYPE_P (TREE_TYPE (base)))
3249 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3250 code = POINTER_PLUS_EXPR;
3252 else
3253 new_step = fold_convert (TREE_TYPE (base), new_step);
3254 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3255 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3256 use->stmt);
3258 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3259 || USE_STORE_POST_INCREMENT (mem_mode))
3260 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3261 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3262 || USE_STORE_POST_DECREMENT (mem_mode))
3263 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3265 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3266 use->stmt);
3270 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3271 position to POS. If USE is not NULL, the candidate is set as related to
3272 it. The candidate computation is scheduled before exit condition and at
3273 the end of loop. */
3275 static void
3276 add_candidate (struct ivopts_data *data,
3277 tree base, tree step, bool important, struct iv_use *use,
3278 struct iv *orig_iv = NULL)
3280 if (ip_normal_pos (data->current_loop))
3281 add_candidate_1 (data, base, step, important,
3282 IP_NORMAL, use, NULL, orig_iv);
3283 if (ip_end_pos (data->current_loop)
3284 && allow_ip_end_pos_p (data->current_loop))
3285 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3288 /* Adds standard iv candidates. */
3290 static void
3291 add_standard_iv_candidates (struct ivopts_data *data)
3293 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3295 /* The same for a double-integer type if it is still fast enough. */
3296 if (TYPE_PRECISION
3297 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3298 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3299 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3300 build_int_cst (long_integer_type_node, 1), true, NULL);
3302 /* The same for a double-integer type if it is still fast enough. */
3303 if (TYPE_PRECISION
3304 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3305 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3306 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3307 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3311 /* Adds candidates bases on the old induction variable IV. */
3313 static void
3314 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3316 gimple *phi;
3317 tree def;
3318 struct iv_cand *cand;
3320 /* Check if this biv is used in address type use. */
3321 if (iv->no_overflow && iv->have_address_use
3322 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3323 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3325 tree base = fold_convert (sizetype, iv->base);
3326 tree step = fold_convert (sizetype, iv->step);
3328 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3329 add_candidate (data, base, step, true, NULL, iv);
3330 /* Add iv cand of the original type only if it has nonlinear use. */
3331 if (iv->nonlin_use)
3332 add_candidate (data, iv->base, iv->step, true, NULL);
3334 else
3335 add_candidate (data, iv->base, iv->step, true, NULL);
3337 /* The same, but with initial value zero. */
3338 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3339 add_candidate (data, size_int (0), iv->step, true, NULL);
3340 else
3341 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3342 iv->step, true, NULL);
3344 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3345 if (gimple_code (phi) == GIMPLE_PHI)
3347 /* Additionally record the possibility of leaving the original iv
3348 untouched. */
3349 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3350 /* Don't add candidate if it's from another PHI node because
3351 it's an affine iv appearing in the form of PEELED_CHREC. */
3352 phi = SSA_NAME_DEF_STMT (def);
3353 if (gimple_code (phi) != GIMPLE_PHI)
3355 cand = add_candidate_1 (data,
3356 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3357 SSA_NAME_DEF_STMT (def));
3358 if (cand)
3360 cand->var_before = iv->ssa_name;
3361 cand->var_after = def;
3364 else
3365 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3369 /* Adds candidates based on the old induction variables. */
3371 static void
3372 add_iv_candidate_for_bivs (struct ivopts_data *data)
3374 unsigned i;
3375 struct iv *iv;
3376 bitmap_iterator bi;
3378 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3380 iv = ver_info (data, i)->iv;
3381 if (iv && iv->biv_p && !integer_zerop (iv->step))
3382 add_iv_candidate_for_biv (data, iv);
3386 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3388 static void
3389 record_common_cand (struct ivopts_data *data, tree base,
3390 tree step, struct iv_use *use)
3392 struct iv_common_cand ent;
3393 struct iv_common_cand **slot;
3395 ent.base = base;
3396 ent.step = step;
3397 ent.hash = iterative_hash_expr (base, 0);
3398 ent.hash = iterative_hash_expr (step, ent.hash);
3400 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3401 if (*slot == NULL)
3403 *slot = new iv_common_cand ();
3404 (*slot)->base = base;
3405 (*slot)->step = step;
3406 (*slot)->uses.create (8);
3407 (*slot)->hash = ent.hash;
3408 data->iv_common_cands.safe_push ((*slot));
3411 gcc_assert (use != NULL);
3412 (*slot)->uses.safe_push (use);
3413 return;
3416 /* Comparison function used to sort common candidates. */
3418 static int
3419 common_cand_cmp (const void *p1, const void *p2)
3421 unsigned n1, n2;
3422 const struct iv_common_cand *const *const ccand1
3423 = (const struct iv_common_cand *const *)p1;
3424 const struct iv_common_cand *const *const ccand2
3425 = (const struct iv_common_cand *const *)p2;
3427 n1 = (*ccand1)->uses.length ();
3428 n2 = (*ccand2)->uses.length ();
3429 return n2 - n1;
3432 /* Adds IV candidates based on common candidated recorded. */
3434 static void
3435 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3437 unsigned i, j;
3438 struct iv_cand *cand_1, *cand_2;
3440 data->iv_common_cands.qsort (common_cand_cmp);
3441 for (i = 0; i < data->iv_common_cands.length (); i++)
3443 struct iv_common_cand *ptr = data->iv_common_cands[i];
3445 /* Only add IV candidate if it's derived from multiple uses. */
3446 if (ptr->uses.length () <= 1)
3447 break;
3449 cand_1 = NULL;
3450 cand_2 = NULL;
3451 if (ip_normal_pos (data->current_loop))
3452 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3453 false, IP_NORMAL, NULL, NULL);
3455 if (ip_end_pos (data->current_loop)
3456 && allow_ip_end_pos_p (data->current_loop))
3457 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3458 false, IP_END, NULL, NULL);
3460 /* Bind deriving uses and the new candidates. */
3461 for (j = 0; j < ptr->uses.length (); j++)
3463 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3464 if (cand_1)
3465 bitmap_set_bit (group->related_cands, cand_1->id);
3466 if (cand_2)
3467 bitmap_set_bit (group->related_cands, cand_2->id);
3471 /* Release data since it is useless from this point. */
3472 data->iv_common_cand_tab->empty ();
3473 data->iv_common_cands.truncate (0);
3476 /* Adds candidates based on the value of USE's iv. */
3478 static void
3479 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3481 poly_uint64 offset;
3482 tree base;
3483 tree basetype;
3484 struct iv *iv = use->iv;
3486 add_candidate (data, iv->base, iv->step, false, use);
3488 /* Record common candidate for use in case it can be shared by others. */
3489 record_common_cand (data, iv->base, iv->step, use);
3491 /* Record common candidate with initial value zero. */
3492 basetype = TREE_TYPE (iv->base);
3493 if (POINTER_TYPE_P (basetype))
3494 basetype = sizetype;
3495 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3497 /* Record common candidate with constant offset stripped in base.
3498 Like the use itself, we also add candidate directly for it. */
3499 base = strip_offset (iv->base, &offset);
3500 if (maybe_ne (offset, 0U) || base != iv->base)
3502 record_common_cand (data, base, iv->step, use);
3503 add_candidate (data, base, iv->step, false, use);
3506 /* Record common candidate with base_object removed in base. */
3507 base = iv->base;
3508 STRIP_NOPS (base);
3509 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3511 tree step = iv->step;
3513 STRIP_NOPS (step);
3514 base = TREE_OPERAND (base, 1);
3515 step = fold_convert (sizetype, step);
3516 record_common_cand (data, base, step, use);
3517 /* Also record common candidate with offset stripped. */
3518 base = strip_offset (base, &offset);
3519 if (maybe_ne (offset, 0U))
3520 record_common_cand (data, base, step, use);
3523 /* At last, add auto-incremental candidates. Make such variables
3524 important since other iv uses with same base object may be based
3525 on it. */
3526 if (use != NULL && address_p (use->type))
3527 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3530 /* Adds candidates based on the uses. */
3532 static void
3533 add_iv_candidate_for_groups (struct ivopts_data *data)
3535 unsigned i;
3537 /* Only add candidate for the first use in group. */
3538 for (i = 0; i < data->vgroups.length (); i++)
3540 struct iv_group *group = data->vgroups[i];
3542 gcc_assert (group->vuses[0] != NULL);
3543 add_iv_candidate_for_use (data, group->vuses[0]);
3545 add_iv_candidate_derived_from_uses (data);
3548 /* Record important candidates and add them to related_cands bitmaps. */
3550 static void
3551 record_important_candidates (struct ivopts_data *data)
3553 unsigned i;
3554 struct iv_group *group;
3556 for (i = 0; i < data->vcands.length (); i++)
3558 struct iv_cand *cand = data->vcands[i];
3560 if (cand->important)
3561 bitmap_set_bit (data->important_candidates, i);
3564 data->consider_all_candidates = (data->vcands.length ()
3565 <= CONSIDER_ALL_CANDIDATES_BOUND);
3567 /* Add important candidates to groups' related_cands bitmaps. */
3568 for (i = 0; i < data->vgroups.length (); i++)
3570 group = data->vgroups[i];
3571 bitmap_ior_into (group->related_cands, data->important_candidates);
3575 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3576 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3577 we allocate a simple list to every use. */
3579 static void
3580 alloc_use_cost_map (struct ivopts_data *data)
3582 unsigned i, size, s;
3584 for (i = 0; i < data->vgroups.length (); i++)
3586 struct iv_group *group = data->vgroups[i];
3588 if (data->consider_all_candidates)
3589 size = data->vcands.length ();
3590 else
3592 s = bitmap_count_bits (group->related_cands);
3594 /* Round up to the power of two, so that moduling by it is fast. */
3595 size = s ? (1 << ceil_log2 (s)) : 1;
3598 group->n_map_members = size;
3599 group->cost_map = XCNEWVEC (struct cost_pair, size);
3603 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3604 on invariants INV_VARS and that the value used in expressing it is
3605 VALUE, and in case of iv elimination the comparison operator is COMP. */
3607 static void
3608 set_group_iv_cost (struct ivopts_data *data,
3609 struct iv_group *group, struct iv_cand *cand,
3610 comp_cost cost, bitmap inv_vars, tree value,
3611 enum tree_code comp, bitmap inv_exprs)
3613 unsigned i, s;
3615 if (cost.infinite_cost_p ())
3617 BITMAP_FREE (inv_vars);
3618 BITMAP_FREE (inv_exprs);
3619 return;
3622 if (data->consider_all_candidates)
3624 group->cost_map[cand->id].cand = cand;
3625 group->cost_map[cand->id].cost = cost;
3626 group->cost_map[cand->id].inv_vars = inv_vars;
3627 group->cost_map[cand->id].inv_exprs = inv_exprs;
3628 group->cost_map[cand->id].value = value;
3629 group->cost_map[cand->id].comp = comp;
3630 return;
3633 /* n_map_members is a power of two, so this computes modulo. */
3634 s = cand->id & (group->n_map_members - 1);
3635 for (i = s; i < group->n_map_members; i++)
3636 if (!group->cost_map[i].cand)
3637 goto found;
3638 for (i = 0; i < s; i++)
3639 if (!group->cost_map[i].cand)
3640 goto found;
3642 gcc_unreachable ();
3644 found:
3645 group->cost_map[i].cand = cand;
3646 group->cost_map[i].cost = cost;
3647 group->cost_map[i].inv_vars = inv_vars;
3648 group->cost_map[i].inv_exprs = inv_exprs;
3649 group->cost_map[i].value = value;
3650 group->cost_map[i].comp = comp;
3653 /* Gets cost of (GROUP, CAND) pair. */
3655 static struct cost_pair *
3656 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3657 struct iv_cand *cand)
3659 unsigned i, s;
3660 struct cost_pair *ret;
3662 if (!cand)
3663 return NULL;
3665 if (data->consider_all_candidates)
3667 ret = group->cost_map + cand->id;
3668 if (!ret->cand)
3669 return NULL;
3671 return ret;
3674 /* n_map_members is a power of two, so this computes modulo. */
3675 s = cand->id & (group->n_map_members - 1);
3676 for (i = s; i < group->n_map_members; i++)
3677 if (group->cost_map[i].cand == cand)
3678 return group->cost_map + i;
3679 else if (group->cost_map[i].cand == NULL)
3680 return NULL;
3681 for (i = 0; i < s; i++)
3682 if (group->cost_map[i].cand == cand)
3683 return group->cost_map + i;
3684 else if (group->cost_map[i].cand == NULL)
3685 return NULL;
3687 return NULL;
3690 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3691 static rtx
3692 produce_memory_decl_rtl (tree obj, int *regno)
3694 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3695 machine_mode address_mode = targetm.addr_space.address_mode (as);
3696 rtx x;
3698 gcc_assert (obj);
3699 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3701 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3702 x = gen_rtx_SYMBOL_REF (address_mode, name);
3703 SET_SYMBOL_REF_DECL (x, obj);
3704 x = gen_rtx_MEM (DECL_MODE (obj), x);
3705 set_mem_addr_space (x, as);
3706 targetm.encode_section_info (obj, x, true);
3708 else
3710 x = gen_raw_REG (address_mode, (*regno)++);
3711 x = gen_rtx_MEM (DECL_MODE (obj), x);
3712 set_mem_addr_space (x, as);
3715 return x;
3718 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3719 walk_tree. DATA contains the actual fake register number. */
3721 static tree
3722 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3724 tree obj = NULL_TREE;
3725 rtx x = NULL_RTX;
3726 int *regno = (int *) data;
3728 switch (TREE_CODE (*expr_p))
3730 case ADDR_EXPR:
3731 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3732 handled_component_p (*expr_p);
3733 expr_p = &TREE_OPERAND (*expr_p, 0))
3734 continue;
3735 obj = *expr_p;
3736 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3737 x = produce_memory_decl_rtl (obj, regno);
3738 break;
3740 case SSA_NAME:
3741 *ws = 0;
3742 obj = SSA_NAME_VAR (*expr_p);
3743 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3744 if (!obj)
3745 return NULL_TREE;
3746 if (!DECL_RTL_SET_P (obj))
3747 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3748 break;
3750 case VAR_DECL:
3751 case PARM_DECL:
3752 case RESULT_DECL:
3753 *ws = 0;
3754 obj = *expr_p;
3756 if (DECL_RTL_SET_P (obj))
3757 break;
3759 if (DECL_MODE (obj) == BLKmode)
3760 x = produce_memory_decl_rtl (obj, regno);
3761 else
3762 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3764 break;
3766 default:
3767 break;
3770 if (x)
3772 decl_rtl_to_reset.safe_push (obj);
3773 SET_DECL_RTL (obj, x);
3776 return NULL_TREE;
3779 /* Determines cost of the computation of EXPR. */
3781 static unsigned
3782 computation_cost (tree expr, bool speed)
3784 rtx_insn *seq;
3785 rtx rslt;
3786 tree type = TREE_TYPE (expr);
3787 unsigned cost;
3788 /* Avoid using hard regs in ways which may be unsupported. */
3789 int regno = LAST_VIRTUAL_REGISTER + 1;
3790 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3791 enum node_frequency real_frequency = node->frequency;
3793 node->frequency = NODE_FREQUENCY_NORMAL;
3794 crtl->maybe_hot_insn_p = speed;
3795 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3796 start_sequence ();
3797 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3798 seq = get_insns ();
3799 end_sequence ();
3800 default_rtl_profile ();
3801 node->frequency = real_frequency;
3803 cost = seq_cost (seq, speed);
3804 if (MEM_P (rslt))
3805 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3806 TYPE_ADDR_SPACE (type), speed);
3807 else if (!REG_P (rslt))
3808 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3810 return cost;
3813 /* Returns variable containing the value of candidate CAND at statement AT. */
3815 static tree
3816 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3818 if (stmt_after_increment (loop, cand, stmt))
3819 return cand->var_after;
3820 else
3821 return cand->var_before;
3824 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3825 same precision that is at least as wide as the precision of TYPE, stores
3826 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3827 type of A and B. */
3829 static tree
3830 determine_common_wider_type (tree *a, tree *b)
3832 tree wider_type = NULL;
3833 tree suba, subb;
3834 tree atype = TREE_TYPE (*a);
3836 if (CONVERT_EXPR_P (*a))
3838 suba = TREE_OPERAND (*a, 0);
3839 wider_type = TREE_TYPE (suba);
3840 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3841 return atype;
3843 else
3844 return atype;
3846 if (CONVERT_EXPR_P (*b))
3848 subb = TREE_OPERAND (*b, 0);
3849 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3850 return atype;
3852 else
3853 return atype;
3855 *a = suba;
3856 *b = subb;
3857 return wider_type;
3860 /* Determines the expression by that USE is expressed from induction variable
3861 CAND at statement AT in LOOP. The expression is stored in two parts in a
3862 decomposed form. The invariant part is stored in AFF_INV; while variant
3863 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3864 non-null. Returns false if USE cannot be expressed using CAND. */
3866 static bool
3867 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3868 struct iv_cand *cand, struct aff_tree *aff_inv,
3869 struct aff_tree *aff_var, widest_int *prat = NULL)
3871 tree ubase = use->iv->base, ustep = use->iv->step;
3872 tree cbase = cand->iv->base, cstep = cand->iv->step;
3873 tree common_type, uutype, var, cstep_common;
3874 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3875 aff_tree aff_cbase;
3876 widest_int rat;
3878 /* We must have a precision to express the values of use. */
3879 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3880 return false;
3882 var = var_at_stmt (loop, cand, at);
3883 uutype = unsigned_type_for (utype);
3885 /* If the conversion is not noop, perform it. */
3886 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3888 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3889 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3891 tree inner_base, inner_step, inner_type;
3892 inner_base = TREE_OPERAND (cbase, 0);
3893 if (CONVERT_EXPR_P (cstep))
3894 inner_step = TREE_OPERAND (cstep, 0);
3895 else
3896 inner_step = cstep;
3898 inner_type = TREE_TYPE (inner_base);
3899 /* If candidate is added from a biv whose type is smaller than
3900 ctype, we know both candidate and the biv won't overflow.
3901 In this case, it's safe to skip the convertion in candidate.
3902 As an example, (unsigned short)((unsigned long)A) equals to
3903 (unsigned short)A, if A has a type no larger than short. */
3904 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3906 cbase = inner_base;
3907 cstep = inner_step;
3910 cbase = fold_convert (uutype, cbase);
3911 cstep = fold_convert (uutype, cstep);
3912 var = fold_convert (uutype, var);
3915 /* Ratio is 1 when computing the value of biv cand by itself.
3916 We can't rely on constant_multiple_of in this case because the
3917 use is created after the original biv is selected. The call
3918 could fail because of inconsistent fold behavior. See PR68021
3919 for more information. */
3920 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3922 gcc_assert (is_gimple_assign (use->stmt));
3923 gcc_assert (use->iv->ssa_name == cand->var_after);
3924 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3925 rat = 1;
3927 else if (!constant_multiple_of (ustep, cstep, &rat))
3928 return false;
3930 if (prat)
3931 *prat = rat;
3933 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3934 type, we achieve better folding by computing their difference in this
3935 wider type, and cast the result to UUTYPE. We do not need to worry about
3936 overflows, as all the arithmetics will in the end be performed in UUTYPE
3937 anyway. */
3938 common_type = determine_common_wider_type (&ubase, &cbase);
3940 /* use = ubase - ratio * cbase + ratio * var. */
3941 tree_to_aff_combination (ubase, common_type, aff_inv);
3942 tree_to_aff_combination (cbase, common_type, &aff_cbase);
3943 tree_to_aff_combination (var, uutype, aff_var);
3945 /* We need to shift the value if we are after the increment. */
3946 if (stmt_after_increment (loop, cand, at))
3948 aff_tree cstep_aff;
3950 if (common_type != uutype)
3951 cstep_common = fold_convert (common_type, cstep);
3952 else
3953 cstep_common = cstep;
3955 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3956 aff_combination_add (&aff_cbase, &cstep_aff);
3959 aff_combination_scale (&aff_cbase, -rat);
3960 aff_combination_add (aff_inv, &aff_cbase);
3961 if (common_type != uutype)
3962 aff_combination_convert (aff_inv, uutype);
3964 aff_combination_scale (aff_var, rat);
3965 return true;
3968 /* Determines the expression by that USE is expressed from induction variable
3969 CAND at statement AT in LOOP. The expression is stored in a decomposed
3970 form into AFF. Returns false if USE cannot be expressed using CAND. */
3972 static bool
3973 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3974 struct iv_cand *cand, struct aff_tree *aff)
3976 aff_tree aff_var;
3978 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3979 return false;
3981 aff_combination_add (aff, &aff_var);
3982 return true;
3985 /* Return the type of USE. */
3987 static tree
3988 get_use_type (struct iv_use *use)
3990 tree base_type = TREE_TYPE (use->iv->base);
3991 tree type;
3993 if (use->type == USE_REF_ADDRESS)
3995 /* The base_type may be a void pointer. Create a pointer type based on
3996 the mem_ref instead. */
3997 type = build_pointer_type (TREE_TYPE (*use->op_p));
3998 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3999 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4001 else
4002 type = base_type;
4004 return type;
4007 /* Determines the expression by that USE is expressed from induction variable
4008 CAND at statement AT in LOOP. The computation is unshared. */
4010 static tree
4011 get_computation_at (struct loop *loop, gimple *at,
4012 struct iv_use *use, struct iv_cand *cand)
4014 aff_tree aff;
4015 tree type = get_use_type (use);
4017 if (!get_computation_aff (loop, at, use, cand, &aff))
4018 return NULL_TREE;
4019 unshare_aff_combination (&aff);
4020 return fold_convert (type, aff_combination_to_tree (&aff));
4023 /* Adjust the cost COST for being in loop setup rather than loop body.
4024 If we're optimizing for space, the loop setup overhead is constant;
4025 if we're optimizing for speed, amortize it over the per-iteration cost.
4026 If ROUND_UP_P is true, the result is round up rather than to zero when
4027 optimizing for speed. */
4028 static unsigned
4029 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4030 bool round_up_p = false)
4032 if (cost == INFTY)
4033 return cost;
4034 else if (optimize_loop_for_speed_p (data->current_loop))
4036 HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4037 return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4039 else
4040 return cost;
4043 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4044 EXPR operand holding the shift. COST0 and COST1 are the costs for
4045 calculating the operands of EXPR. Returns true if successful, and returns
4046 the cost in COST. */
4048 static bool
4049 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4050 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4052 comp_cost res;
4053 tree op1 = TREE_OPERAND (expr, 1);
4054 tree cst = TREE_OPERAND (mult, 1);
4055 tree multop = TREE_OPERAND (mult, 0);
4056 int m = exact_log2 (int_cst_value (cst));
4057 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4058 int as_cost, sa_cost;
4059 bool mult_in_op1;
4061 if (!(m >= 0 && m < maxm))
4062 return false;
4064 STRIP_NOPS (op1);
4065 mult_in_op1 = operand_equal_p (op1, mult, 0);
4067 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4069 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4070 use that in preference to a shift insn followed by an add insn. */
4071 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4072 ? shiftadd_cost (speed, mode, m)
4073 : (mult_in_op1
4074 ? shiftsub1_cost (speed, mode, m)
4075 : shiftsub0_cost (speed, mode, m)));
4077 res = comp_cost (MIN (as_cost, sa_cost), 0);
4078 res += (mult_in_op1 ? cost0 : cost1);
4080 STRIP_NOPS (multop);
4081 if (!is_gimple_val (multop))
4082 res += force_expr_to_var_cost (multop, speed);
4084 *cost = res;
4085 return true;
4088 /* Estimates cost of forcing expression EXPR into a variable. */
4090 static comp_cost
4091 force_expr_to_var_cost (tree expr, bool speed)
4093 static bool costs_initialized = false;
4094 static unsigned integer_cost [2];
4095 static unsigned symbol_cost [2];
4096 static unsigned address_cost [2];
4097 tree op0, op1;
4098 comp_cost cost0, cost1, cost;
4099 machine_mode mode;
4100 scalar_int_mode int_mode;
4102 if (!costs_initialized)
4104 tree type = build_pointer_type (integer_type_node);
4105 tree var, addr;
4106 rtx x;
4107 int i;
4109 var = create_tmp_var_raw (integer_type_node, "test_var");
4110 TREE_STATIC (var) = 1;
4111 x = produce_memory_decl_rtl (var, NULL);
4112 SET_DECL_RTL (var, x);
4114 addr = build1 (ADDR_EXPR, type, var);
4117 for (i = 0; i < 2; i++)
4119 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4120 2000), i);
4122 symbol_cost[i] = computation_cost (addr, i) + 1;
4124 address_cost[i]
4125 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4126 if (dump_file && (dump_flags & TDF_DETAILS))
4128 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4129 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4130 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4131 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4132 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4133 fprintf (dump_file, "\n");
4137 costs_initialized = true;
4140 STRIP_NOPS (expr);
4142 if (SSA_VAR_P (expr))
4143 return no_cost;
4145 if (is_gimple_min_invariant (expr))
4147 if (poly_int_tree_p (expr))
4148 return comp_cost (integer_cost [speed], 0);
4150 if (TREE_CODE (expr) == ADDR_EXPR)
4152 tree obj = TREE_OPERAND (expr, 0);
4154 if (VAR_P (obj)
4155 || TREE_CODE (obj) == PARM_DECL
4156 || TREE_CODE (obj) == RESULT_DECL)
4157 return comp_cost (symbol_cost [speed], 0);
4160 return comp_cost (address_cost [speed], 0);
4163 switch (TREE_CODE (expr))
4165 case POINTER_PLUS_EXPR:
4166 case PLUS_EXPR:
4167 case MINUS_EXPR:
4168 case MULT_EXPR:
4169 case TRUNC_DIV_EXPR:
4170 case BIT_AND_EXPR:
4171 case BIT_IOR_EXPR:
4172 case LSHIFT_EXPR:
4173 case RSHIFT_EXPR:
4174 op0 = TREE_OPERAND (expr, 0);
4175 op1 = TREE_OPERAND (expr, 1);
4176 STRIP_NOPS (op0);
4177 STRIP_NOPS (op1);
4178 break;
4180 CASE_CONVERT:
4181 case NEGATE_EXPR:
4182 case BIT_NOT_EXPR:
4183 op0 = TREE_OPERAND (expr, 0);
4184 STRIP_NOPS (op0);
4185 op1 = NULL_TREE;
4186 break;
4188 default:
4189 /* Just an arbitrary value, FIXME. */
4190 return comp_cost (target_spill_cost[speed], 0);
4193 if (op0 == NULL_TREE
4194 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4195 cost0 = no_cost;
4196 else
4197 cost0 = force_expr_to_var_cost (op0, speed);
4199 if (op1 == NULL_TREE
4200 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4201 cost1 = no_cost;
4202 else
4203 cost1 = force_expr_to_var_cost (op1, speed);
4205 mode = TYPE_MODE (TREE_TYPE (expr));
4206 switch (TREE_CODE (expr))
4208 case POINTER_PLUS_EXPR:
4209 case PLUS_EXPR:
4210 case MINUS_EXPR:
4211 case NEGATE_EXPR:
4212 cost = comp_cost (add_cost (speed, mode), 0);
4213 if (TREE_CODE (expr) != NEGATE_EXPR)
4215 tree mult = NULL_TREE;
4216 comp_cost sa_cost;
4217 if (TREE_CODE (op1) == MULT_EXPR)
4218 mult = op1;
4219 else if (TREE_CODE (op0) == MULT_EXPR)
4220 mult = op0;
4222 if (mult != NULL_TREE
4223 && is_a <scalar_int_mode> (mode, &int_mode)
4224 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4225 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4226 speed, &sa_cost))
4227 return sa_cost;
4229 break;
4231 CASE_CONVERT:
4233 tree inner_mode, outer_mode;
4234 outer_mode = TREE_TYPE (expr);
4235 inner_mode = TREE_TYPE (op0);
4236 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4237 TYPE_MODE (inner_mode), speed), 0);
4239 break;
4241 case MULT_EXPR:
4242 if (cst_and_fits_in_hwi (op0))
4243 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4244 mode, speed), 0);
4245 else if (cst_and_fits_in_hwi (op1))
4246 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4247 mode, speed), 0);
4248 else
4249 return comp_cost (target_spill_cost [speed], 0);
4250 break;
4252 case TRUNC_DIV_EXPR:
4253 /* Division by power of two is usually cheap, so we allow it. Forbid
4254 anything else. */
4255 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4256 cost = comp_cost (add_cost (speed, mode), 0);
4257 else
4258 cost = comp_cost (target_spill_cost[speed], 0);
4259 break;
4261 case BIT_AND_EXPR:
4262 case BIT_IOR_EXPR:
4263 case BIT_NOT_EXPR:
4264 case LSHIFT_EXPR:
4265 case RSHIFT_EXPR:
4266 cost = comp_cost (add_cost (speed, mode), 0);
4267 break;
4269 default:
4270 gcc_unreachable ();
4273 cost += cost0;
4274 cost += cost1;
4275 return cost;
4278 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4279 invariants the computation depends on. */
4281 static comp_cost
4282 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4284 if (!expr)
4285 return no_cost;
4287 find_inv_vars (data, &expr, inv_vars);
4288 return force_expr_to_var_cost (expr, data->speed);
4291 /* Returns cost of auto-modifying address expression in shape base + offset.
4292 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4293 address expression. The address expression has ADDR_MODE in addr space
4294 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4295 speed or size. */
4297 enum ainc_type
4299 AINC_PRE_INC, /* Pre increment. */
4300 AINC_PRE_DEC, /* Pre decrement. */
4301 AINC_POST_INC, /* Post increment. */
4302 AINC_POST_DEC, /* Post decrement. */
4303 AINC_NONE /* Also the number of auto increment types. */
4306 struct ainc_cost_data
4308 unsigned costs[AINC_NONE];
4311 static comp_cost
4312 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4313 machine_mode addr_mode, machine_mode mem_mode,
4314 addr_space_t as, bool speed)
4316 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4317 && !USE_STORE_PRE_DECREMENT (mem_mode)
4318 && !USE_LOAD_POST_DECREMENT (mem_mode)
4319 && !USE_STORE_POST_DECREMENT (mem_mode)
4320 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4321 && !USE_STORE_PRE_INCREMENT (mem_mode)
4322 && !USE_LOAD_POST_INCREMENT (mem_mode)
4323 && !USE_STORE_POST_INCREMENT (mem_mode))
4324 return infinite_cost;
4326 static vec<ainc_cost_data *> ainc_cost_data_list;
4327 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4328 if (idx >= ainc_cost_data_list.length ())
4330 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4332 gcc_assert (nsize > idx);
4333 ainc_cost_data_list.safe_grow_cleared (nsize);
4336 ainc_cost_data *data = ainc_cost_data_list[idx];
4337 if (data == NULL)
4339 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4341 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4342 data->costs[AINC_PRE_DEC] = INFTY;
4343 data->costs[AINC_POST_DEC] = INFTY;
4344 data->costs[AINC_PRE_INC] = INFTY;
4345 data->costs[AINC_POST_INC] = INFTY;
4346 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4347 || USE_STORE_PRE_DECREMENT (mem_mode))
4349 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4351 if (memory_address_addr_space_p (mem_mode, addr, as))
4352 data->costs[AINC_PRE_DEC]
4353 = address_cost (addr, mem_mode, as, speed);
4355 if (USE_LOAD_POST_DECREMENT (mem_mode)
4356 || USE_STORE_POST_DECREMENT (mem_mode))
4358 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4360 if (memory_address_addr_space_p (mem_mode, addr, as))
4361 data->costs[AINC_POST_DEC]
4362 = address_cost (addr, mem_mode, as, speed);
4364 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4365 || USE_STORE_PRE_INCREMENT (mem_mode))
4367 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4369 if (memory_address_addr_space_p (mem_mode, addr, as))
4370 data->costs[AINC_PRE_INC]
4371 = address_cost (addr, mem_mode, as, speed);
4373 if (USE_LOAD_POST_INCREMENT (mem_mode)
4374 || USE_STORE_POST_INCREMENT (mem_mode))
4376 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4378 if (memory_address_addr_space_p (mem_mode, addr, as))
4379 data->costs[AINC_POST_INC]
4380 = address_cost (addr, mem_mode, as, speed);
4382 ainc_cost_data_list[idx] = data;
4385 poly_int64 msize = GET_MODE_SIZE (mem_mode);
4386 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4387 return comp_cost (data->costs[AINC_POST_INC], 0);
4388 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4389 return comp_cost (data->costs[AINC_POST_DEC], 0);
4390 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4391 return comp_cost (data->costs[AINC_PRE_INC], 0);
4392 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4393 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4395 return infinite_cost;
4398 /* Return cost of computing USE's address expression by using CAND.
4399 AFF_INV and AFF_VAR represent invariant and variant parts of the
4400 address expression, respectively. If AFF_INV is simple, store
4401 the loop invariant variables which are depended by it in INV_VARS;
4402 if AFF_INV is complicated, handle it as a new invariant expression
4403 and record it in INV_EXPR. RATIO indicates multiple times between
4404 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4405 value to it indicating if this is an auto-increment address. */
4407 static comp_cost
4408 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4409 struct iv_cand *cand, aff_tree *aff_inv,
4410 aff_tree *aff_var, HOST_WIDE_INT ratio,
4411 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4412 bool *can_autoinc, bool speed)
4414 rtx addr;
4415 bool simple_inv = true;
4416 tree comp_inv = NULL_TREE, type = aff_var->type;
4417 comp_cost var_cost = no_cost, cost = no_cost;
4418 struct mem_address parts = {NULL_TREE, integer_one_node,
4419 NULL_TREE, NULL_TREE, NULL_TREE};
4420 machine_mode addr_mode = TYPE_MODE (type);
4421 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4422 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4423 /* Only true if ratio != 1. */
4424 bool ok_with_ratio_p = false;
4425 bool ok_without_ratio_p = false;
4427 if (!aff_combination_const_p (aff_inv))
4429 parts.index = integer_one_node;
4430 /* Addressing mode "base + index". */
4431 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4432 if (ratio != 1)
4434 parts.step = wide_int_to_tree (type, ratio);
4435 /* Addressing mode "base + index << scale". */
4436 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4437 if (!ok_with_ratio_p)
4438 parts.step = NULL_TREE;
4440 if (ok_with_ratio_p || ok_without_ratio_p)
4442 if (maybe_ne (aff_inv->offset, 0))
4444 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4445 /* Addressing mode "base + index [<< scale] + offset". */
4446 if (!valid_mem_ref_p (mem_mode, as, &parts))
4447 parts.offset = NULL_TREE;
4448 else
4449 aff_inv->offset = 0;
4452 move_fixed_address_to_symbol (&parts, aff_inv);
4453 /* Base is fixed address and is moved to symbol part. */
4454 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4455 parts.base = NULL_TREE;
4457 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4458 if (parts.symbol != NULL_TREE
4459 && !valid_mem_ref_p (mem_mode, as, &parts))
4461 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4462 parts.symbol = NULL_TREE;
4463 /* Reset SIMPLE_INV since symbol address needs to be computed
4464 outside of address expression in this case. */
4465 simple_inv = false;
4466 /* Symbol part is moved back to base part, it can't be NULL. */
4467 parts.base = integer_one_node;
4470 else
4471 parts.index = NULL_TREE;
4473 else
4475 poly_int64 ainc_step;
4476 if (can_autoinc
4477 && ratio == 1
4478 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4480 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4482 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4483 ainc_offset += ainc_step;
4484 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4485 addr_mode, mem_mode, as, speed);
4486 if (!cost.infinite_cost_p ())
4488 *can_autoinc = true;
4489 return cost;
4491 cost = no_cost;
4493 if (!aff_combination_zero_p (aff_inv))
4495 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4496 /* Addressing mode "base + offset". */
4497 if (!valid_mem_ref_p (mem_mode, as, &parts))
4498 parts.offset = NULL_TREE;
4499 else
4500 aff_inv->offset = 0;
4504 if (simple_inv)
4505 simple_inv = (aff_inv == NULL
4506 || aff_combination_const_p (aff_inv)
4507 || aff_combination_singleton_var_p (aff_inv));
4508 if (!aff_combination_zero_p (aff_inv))
4509 comp_inv = aff_combination_to_tree (aff_inv);
4510 if (comp_inv != NULL_TREE)
4511 cost = force_var_cost (data, comp_inv, inv_vars);
4512 if (ratio != 1 && parts.step == NULL_TREE)
4513 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4514 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4515 var_cost += add_cost (speed, addr_mode);
4517 if (comp_inv && inv_expr && !simple_inv)
4519 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4520 /* Clear depends on. */
4521 if (*inv_expr != NULL && inv_vars && *inv_vars)
4522 bitmap_clear (*inv_vars);
4524 /* Cost of small invariant expression adjusted against loop niters
4525 is usually zero, which makes it difficult to be differentiated
4526 from candidate based on loop invariant variables. Secondly, the
4527 generated invariant expression may not be hoisted out of loop by
4528 following pass. We penalize the cost by rounding up in order to
4529 neutralize such effects. */
4530 cost.cost = adjust_setup_cost (data, cost.cost, true);
4531 cost.scratch = cost.cost;
4534 cost += var_cost;
4535 addr = addr_for_mem_ref (&parts, as, false);
4536 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4537 cost += address_cost (addr, mem_mode, as, speed);
4539 if (parts.symbol != NULL_TREE)
4540 cost.complexity += 1;
4541 /* Don't increase the complexity of adding a scaled index if it's
4542 the only kind of index that the target allows. */
4543 if (parts.step != NULL_TREE && ok_without_ratio_p)
4544 cost.complexity += 1;
4545 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4546 cost.complexity += 1;
4547 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4548 cost.complexity += 1;
4550 return cost;
4553 /* Scale (multiply) the computed COST (except scratch part that should be
4554 hoisted out a loop) by header->frequency / AT->frequency, which makes
4555 expected cost more accurate. */
4557 static comp_cost
4558 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4560 int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4561 int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4562 if (loop_freq != 0)
4564 gcc_assert (cost.scratch <= cost.cost);
4565 int scaled_cost
4566 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4568 if (dump_file && (dump_flags & TDF_DETAILS))
4569 fprintf (dump_file, "Scaling cost based on bb prob "
4570 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4571 1.0f * bb_freq / loop_freq, cost.cost,
4572 cost.scratch, scaled_cost, bb_freq, loop_freq);
4574 cost.cost = scaled_cost;
4577 return cost;
4580 /* Determines the cost of the computation by that USE is expressed
4581 from induction variable CAND. If ADDRESS_P is true, we just need
4582 to create an address from it, otherwise we want to get it into
4583 register. A set of invariants we depend on is stored in INV_VARS.
4584 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4585 addressing is likely. If INV_EXPR is nonnull, record invariant
4586 expr entry in it. */
4588 static comp_cost
4589 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4590 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4591 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4593 gimple *at = use->stmt;
4594 tree ubase = use->iv->base, cbase = cand->iv->base;
4595 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4596 tree comp_inv = NULL_TREE;
4597 HOST_WIDE_INT ratio, aratio;
4598 comp_cost cost;
4599 widest_int rat;
4600 aff_tree aff_inv, aff_var;
4601 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4603 if (inv_vars)
4604 *inv_vars = NULL;
4605 if (can_autoinc)
4606 *can_autoinc = false;
4607 if (inv_expr)
4608 *inv_expr = NULL;
4610 /* Check if we have enough precision to express the values of use. */
4611 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4612 return infinite_cost;
4614 if (address_p
4615 || (use->iv->base_object
4616 && cand->iv->base_object
4617 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4618 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4620 /* Do not try to express address of an object with computation based
4621 on address of a different object. This may cause problems in rtl
4622 level alias analysis (that does not expect this to be happening,
4623 as this is illegal in C), and would be unlikely to be useful
4624 anyway. */
4625 if (use->iv->base_object
4626 && cand->iv->base_object
4627 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4628 return infinite_cost;
4631 if (!get_computation_aff_1 (data->current_loop, at, use,
4632 cand, &aff_inv, &aff_var, &rat)
4633 || !wi::fits_shwi_p (rat))
4634 return infinite_cost;
4636 ratio = rat.to_shwi ();
4637 if (address_p)
4639 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4640 inv_vars, inv_expr, can_autoinc, speed);
4641 return get_scaled_computation_cost_at (data, at, cost);
4644 bool simple_inv = (aff_combination_const_p (&aff_inv)
4645 || aff_combination_singleton_var_p (&aff_inv));
4646 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4647 aff_combination_convert (&aff_inv, signed_type);
4648 if (!aff_combination_zero_p (&aff_inv))
4649 comp_inv = aff_combination_to_tree (&aff_inv);
4651 cost = force_var_cost (data, comp_inv, inv_vars);
4652 if (comp_inv && inv_expr && !simple_inv)
4654 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4655 /* Clear depends on. */
4656 if (*inv_expr != NULL && inv_vars && *inv_vars)
4657 bitmap_clear (*inv_vars);
4659 cost.cost = adjust_setup_cost (data, cost.cost);
4660 /* Record setup cost in scratch field. */
4661 cost.scratch = cost.cost;
4663 /* Cost of constant integer can be covered when adding invariant part to
4664 variant part. */
4665 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4666 cost = no_cost;
4668 /* Need type narrowing to represent use with cand. */
4669 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4671 machine_mode outer_mode = TYPE_MODE (utype);
4672 machine_mode inner_mode = TYPE_MODE (ctype);
4673 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4676 /* Turn a + i * (-c) into a - i * c. */
4677 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4678 aratio = -ratio;
4679 else
4680 aratio = ratio;
4682 if (ratio != 1)
4683 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4685 /* TODO: We may also need to check if we can compute a + i * 4 in one
4686 instruction. */
4687 /* Need to add up the invariant and variant parts. */
4688 if (comp_inv && !integer_zerop (comp_inv))
4689 cost += add_cost (speed, TYPE_MODE (utype));
4691 return get_scaled_computation_cost_at (data, at, cost);
4694 /* Determines cost of computing the use in GROUP with CAND in a generic
4695 expression. */
4697 static bool
4698 determine_group_iv_cost_generic (struct ivopts_data *data,
4699 struct iv_group *group, struct iv_cand *cand)
4701 comp_cost cost;
4702 iv_inv_expr_ent *inv_expr = NULL;
4703 bitmap inv_vars = NULL, inv_exprs = NULL;
4704 struct iv_use *use = group->vuses[0];
4706 /* The simple case first -- if we need to express value of the preserved
4707 original biv, the cost is 0. This also prevents us from counting the
4708 cost of increment twice -- once at this use and once in the cost of
4709 the candidate. */
4710 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4711 cost = no_cost;
4712 else
4713 cost = get_computation_cost (data, use, cand, false,
4714 &inv_vars, NULL, &inv_expr);
4716 if (inv_expr)
4718 inv_exprs = BITMAP_ALLOC (NULL);
4719 bitmap_set_bit (inv_exprs, inv_expr->id);
4721 set_group_iv_cost (data, group, cand, cost, inv_vars,
4722 NULL_TREE, ERROR_MARK, inv_exprs);
4723 return !cost.infinite_cost_p ();
4726 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4728 static bool
4729 determine_group_iv_cost_address (struct ivopts_data *data,
4730 struct iv_group *group, struct iv_cand *cand)
4732 unsigned i;
4733 bitmap inv_vars = NULL, inv_exprs = NULL;
4734 bool can_autoinc;
4735 iv_inv_expr_ent *inv_expr = NULL;
4736 struct iv_use *use = group->vuses[0];
4737 comp_cost sum_cost = no_cost, cost;
4739 cost = get_computation_cost (data, use, cand, true,
4740 &inv_vars, &can_autoinc, &inv_expr);
4742 if (inv_expr)
4744 inv_exprs = BITMAP_ALLOC (NULL);
4745 bitmap_set_bit (inv_exprs, inv_expr->id);
4747 sum_cost = cost;
4748 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4750 if (can_autoinc)
4751 sum_cost -= cand->cost_step;
4752 /* If we generated the candidate solely for exploiting autoincrement
4753 opportunities, and it turns out it can't be used, set the cost to
4754 infinity to make sure we ignore it. */
4755 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4756 sum_cost = infinite_cost;
4759 /* Uses in a group can share setup code, so only add setup cost once. */
4760 cost -= cost.scratch;
4761 /* Compute and add costs for rest uses of this group. */
4762 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4764 struct iv_use *next = group->vuses[i];
4766 /* TODO: We could skip computing cost for sub iv_use when it has the
4767 same cost as the first iv_use, but the cost really depends on the
4768 offset and where the iv_use is. */
4769 cost = get_computation_cost (data, next, cand, true,
4770 NULL, &can_autoinc, &inv_expr);
4771 if (inv_expr)
4773 if (!inv_exprs)
4774 inv_exprs = BITMAP_ALLOC (NULL);
4776 bitmap_set_bit (inv_exprs, inv_expr->id);
4778 sum_cost += cost;
4780 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4781 NULL_TREE, ERROR_MARK, inv_exprs);
4783 return !sum_cost.infinite_cost_p ();
4786 /* Computes value of candidate CAND at position AT in iteration NITER, and
4787 stores it to VAL. */
4789 static void
4790 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4791 aff_tree *val)
4793 aff_tree step, delta, nit;
4794 struct iv *iv = cand->iv;
4795 tree type = TREE_TYPE (iv->base);
4796 tree steptype;
4797 if (POINTER_TYPE_P (type))
4798 steptype = sizetype;
4799 else
4800 steptype = unsigned_type_for (type);
4802 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4803 aff_combination_convert (&step, steptype);
4804 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4805 aff_combination_convert (&nit, steptype);
4806 aff_combination_mult (&nit, &step, &delta);
4807 if (stmt_after_increment (loop, cand, at))
4808 aff_combination_add (&delta, &step);
4810 tree_to_aff_combination (iv->base, type, val);
4811 if (!POINTER_TYPE_P (type))
4812 aff_combination_convert (val, steptype);
4813 aff_combination_add (val, &delta);
4816 /* Returns period of induction variable iv. */
4818 static tree
4819 iv_period (struct iv *iv)
4821 tree step = iv->step, period, type;
4822 tree pow2div;
4824 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4826 type = unsigned_type_for (TREE_TYPE (step));
4827 /* Period of the iv is lcm (step, type_range)/step -1,
4828 i.e., N*type_range/step - 1. Since type range is power
4829 of two, N == (step >> num_of_ending_zeros_binary (step),
4830 so the final result is
4832 (type_range >> num_of_ending_zeros_binary (step)) - 1
4835 pow2div = num_ending_zeros (step);
4837 period = build_low_bits_mask (type,
4838 (TYPE_PRECISION (type)
4839 - tree_to_uhwi (pow2div)));
4841 return period;
4844 /* Returns the comparison operator used when eliminating the iv USE. */
4846 static enum tree_code
4847 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4849 struct loop *loop = data->current_loop;
4850 basic_block ex_bb;
4851 edge exit;
4853 ex_bb = gimple_bb (use->stmt);
4854 exit = EDGE_SUCC (ex_bb, 0);
4855 if (flow_bb_inside_loop_p (loop, exit->dest))
4856 exit = EDGE_SUCC (ex_bb, 1);
4858 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4861 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4862 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4863 calculation is performed in non-wrapping type.
4865 TODO: More generally, we could test for the situation that
4866 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4867 This would require knowing the sign of OFFSET. */
4869 static bool
4870 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4872 enum tree_code code;
4873 tree e1, e2;
4874 aff_tree aff_e1, aff_e2, aff_offset;
4876 if (!nowrap_type_p (TREE_TYPE (base)))
4877 return false;
4879 base = expand_simple_operations (base);
4881 if (TREE_CODE (base) == SSA_NAME)
4883 gimple *stmt = SSA_NAME_DEF_STMT (base);
4885 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4886 return false;
4888 code = gimple_assign_rhs_code (stmt);
4889 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4890 return false;
4892 e1 = gimple_assign_rhs1 (stmt);
4893 e2 = gimple_assign_rhs2 (stmt);
4895 else
4897 code = TREE_CODE (base);
4898 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4899 return false;
4900 e1 = TREE_OPERAND (base, 0);
4901 e2 = TREE_OPERAND (base, 1);
4904 /* Use affine expansion as deeper inspection to prove the equality. */
4905 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4906 &aff_e2, &data->name_expansion_cache);
4907 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4908 &aff_offset, &data->name_expansion_cache);
4909 aff_combination_scale (&aff_offset, -1);
4910 switch (code)
4912 case PLUS_EXPR:
4913 aff_combination_add (&aff_e2, &aff_offset);
4914 if (aff_combination_zero_p (&aff_e2))
4915 return true;
4917 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4918 &aff_e1, &data->name_expansion_cache);
4919 aff_combination_add (&aff_e1, &aff_offset);
4920 return aff_combination_zero_p (&aff_e1);
4922 case POINTER_PLUS_EXPR:
4923 aff_combination_add (&aff_e2, &aff_offset);
4924 return aff_combination_zero_p (&aff_e2);
4926 default:
4927 return false;
4931 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4932 comparison with CAND. NITER describes the number of iterations of
4933 the loops. If successful, the comparison in COMP_P is altered accordingly.
4935 We aim to handle the following situation:
4937 sometype *base, *p;
4938 int a, b, i;
4940 i = a;
4941 p = p_0 = base + a;
4945 bla (*p);
4946 p++;
4947 i++;
4949 while (i < b);
4951 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4952 We aim to optimize this to
4954 p = p_0 = base + a;
4957 bla (*p);
4958 p++;
4960 while (p < p_0 - a + b);
4962 This preserves the correctness, since the pointer arithmetics does not
4963 overflow. More precisely:
4965 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4966 overflow in computing it or the values of p.
4967 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4968 overflow. To prove this, we use the fact that p_0 = base + a. */
4970 static bool
4971 iv_elimination_compare_lt (struct ivopts_data *data,
4972 struct iv_cand *cand, enum tree_code *comp_p,
4973 struct tree_niter_desc *niter)
4975 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4976 struct aff_tree nit, tmpa, tmpb;
4977 enum tree_code comp;
4978 HOST_WIDE_INT step;
4980 /* We need to know that the candidate induction variable does not overflow.
4981 While more complex analysis may be used to prove this, for now just
4982 check that the variable appears in the original program and that it
4983 is computed in a type that guarantees no overflows. */
4984 cand_type = TREE_TYPE (cand->iv->base);
4985 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4986 return false;
4988 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4989 the calculation of the BOUND could overflow, making the comparison
4990 invalid. */
4991 if (!data->loop_single_exit_p)
4992 return false;
4994 /* We need to be able to decide whether candidate is increasing or decreasing
4995 in order to choose the right comparison operator. */
4996 if (!cst_and_fits_in_hwi (cand->iv->step))
4997 return false;
4998 step = int_cst_value (cand->iv->step);
5000 /* Check that the number of iterations matches the expected pattern:
5001 a + 1 > b ? 0 : b - a - 1. */
5002 mbz = niter->may_be_zero;
5003 if (TREE_CODE (mbz) == GT_EXPR)
5005 /* Handle a + 1 > b. */
5006 tree op0 = TREE_OPERAND (mbz, 0);
5007 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5009 a = TREE_OPERAND (op0, 0);
5010 b = TREE_OPERAND (mbz, 1);
5012 else
5013 return false;
5015 else if (TREE_CODE (mbz) == LT_EXPR)
5017 tree op1 = TREE_OPERAND (mbz, 1);
5019 /* Handle b < a + 1. */
5020 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5022 a = TREE_OPERAND (op1, 0);
5023 b = TREE_OPERAND (mbz, 0);
5025 else
5026 return false;
5028 else
5029 return false;
5031 /* Expected number of iterations is B - A - 1. Check that it matches
5032 the actual number, i.e., that B - A - NITER = 1. */
5033 tree_to_aff_combination (niter->niter, nit_type, &nit);
5034 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5035 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5036 aff_combination_scale (&nit, -1);
5037 aff_combination_scale (&tmpa, -1);
5038 aff_combination_add (&tmpb, &tmpa);
5039 aff_combination_add (&tmpb, &nit);
5040 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5041 return false;
5043 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5044 overflow. */
5045 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5046 cand->iv->step,
5047 fold_convert (TREE_TYPE (cand->iv->step), a));
5048 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5049 return false;
5051 /* Determine the new comparison operator. */
5052 comp = step < 0 ? GT_EXPR : LT_EXPR;
5053 if (*comp_p == NE_EXPR)
5054 *comp_p = comp;
5055 else if (*comp_p == EQ_EXPR)
5056 *comp_p = invert_tree_comparison (comp, false);
5057 else
5058 gcc_unreachable ();
5060 return true;
5063 /* Check whether it is possible to express the condition in USE by comparison
5064 of candidate CAND. If so, store the value compared with to BOUND, and the
5065 comparison operator to COMP. */
5067 static bool
5068 may_eliminate_iv (struct ivopts_data *data,
5069 struct iv_use *use, struct iv_cand *cand, tree *bound,
5070 enum tree_code *comp)
5072 basic_block ex_bb;
5073 edge exit;
5074 tree period;
5075 struct loop *loop = data->current_loop;
5076 aff_tree bnd;
5077 struct tree_niter_desc *desc = NULL;
5079 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5080 return false;
5082 /* For now works only for exits that dominate the loop latch.
5083 TODO: extend to other conditions inside loop body. */
5084 ex_bb = gimple_bb (use->stmt);
5085 if (use->stmt != last_stmt (ex_bb)
5086 || gimple_code (use->stmt) != GIMPLE_COND
5087 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5088 return false;
5090 exit = EDGE_SUCC (ex_bb, 0);
5091 if (flow_bb_inside_loop_p (loop, exit->dest))
5092 exit = EDGE_SUCC (ex_bb, 1);
5093 if (flow_bb_inside_loop_p (loop, exit->dest))
5094 return false;
5096 desc = niter_for_exit (data, exit);
5097 if (!desc)
5098 return false;
5100 /* Determine whether we can use the variable to test the exit condition.
5101 This is the case iff the period of the induction variable is greater
5102 than the number of iterations for which the exit condition is true. */
5103 period = iv_period (cand->iv);
5105 /* If the number of iterations is constant, compare against it directly. */
5106 if (TREE_CODE (desc->niter) == INTEGER_CST)
5108 /* See cand_value_at. */
5109 if (stmt_after_increment (loop, cand, use->stmt))
5111 if (!tree_int_cst_lt (desc->niter, period))
5112 return false;
5114 else
5116 if (tree_int_cst_lt (period, desc->niter))
5117 return false;
5121 /* If not, and if this is the only possible exit of the loop, see whether
5122 we can get a conservative estimate on the number of iterations of the
5123 entire loop and compare against that instead. */
5124 else
5126 widest_int period_value, max_niter;
5128 max_niter = desc->max;
5129 if (stmt_after_increment (loop, cand, use->stmt))
5130 max_niter += 1;
5131 period_value = wi::to_widest (period);
5132 if (wi::gtu_p (max_niter, period_value))
5134 /* See if we can take advantage of inferred loop bound
5135 information. */
5136 if (data->loop_single_exit_p)
5138 if (!max_loop_iterations (loop, &max_niter))
5139 return false;
5140 /* The loop bound is already adjusted by adding 1. */
5141 if (wi::gtu_p (max_niter, period_value))
5142 return false;
5144 else
5145 return false;
5149 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5151 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5152 aff_combination_to_tree (&bnd));
5153 *comp = iv_elimination_compare (data, use);
5155 /* It is unlikely that computing the number of iterations using division
5156 would be more profitable than keeping the original induction variable. */
5157 if (expression_expensive_p (*bound))
5158 return false;
5160 /* Sometimes, it is possible to handle the situation that the number of
5161 iterations may be zero unless additional assumptions by using <
5162 instead of != in the exit condition.
5164 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5165 base the exit condition on it. However, that is often too
5166 expensive. */
5167 if (!integer_zerop (desc->may_be_zero))
5168 return iv_elimination_compare_lt (data, cand, comp, desc);
5170 return true;
5173 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5174 be copied, if it is used in the loop body and DATA->body_includes_call. */
5176 static int
5177 parm_decl_cost (struct ivopts_data *data, tree bound)
5179 tree sbound = bound;
5180 STRIP_NOPS (sbound);
5182 if (TREE_CODE (sbound) == SSA_NAME
5183 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5184 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5185 && data->body_includes_call)
5186 return COSTS_N_INSNS (1);
5188 return 0;
5191 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5193 static bool
5194 determine_group_iv_cost_cond (struct ivopts_data *data,
5195 struct iv_group *group, struct iv_cand *cand)
5197 tree bound = NULL_TREE;
5198 struct iv *cmp_iv;
5199 bitmap inv_exprs = NULL;
5200 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5201 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5202 enum comp_iv_rewrite rewrite_type;
5203 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5204 tree *control_var, *bound_cst;
5205 enum tree_code comp = ERROR_MARK;
5206 struct iv_use *use = group->vuses[0];
5208 /* Extract condition operands. */
5209 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5210 &bound_cst, NULL, &cmp_iv);
5211 gcc_assert (rewrite_type != COMP_IV_NA);
5213 /* Try iv elimination. */
5214 if (rewrite_type == COMP_IV_ELIM
5215 && may_eliminate_iv (data, use, cand, &bound, &comp))
5217 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5218 if (elim_cost.cost == 0)
5219 elim_cost.cost = parm_decl_cost (data, bound);
5220 else if (TREE_CODE (bound) == INTEGER_CST)
5221 elim_cost.cost = 0;
5222 /* If we replace a loop condition 'i < n' with 'p < base + n',
5223 inv_vars_elim will have 'base' and 'n' set, which implies that both
5224 'base' and 'n' will be live during the loop. More likely,
5225 'base + n' will be loop invariant, resulting in only one live value
5226 during the loop. So in that case we clear inv_vars_elim and set
5227 inv_expr_elim instead. */
5228 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5230 inv_expr_elim = get_loop_invariant_expr (data, bound);
5231 bitmap_clear (inv_vars_elim);
5233 /* The bound is a loop invariant, so it will be only computed
5234 once. */
5235 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5238 /* When the condition is a comparison of the candidate IV against
5239 zero, prefer this IV.
5241 TODO: The constant that we're subtracting from the cost should
5242 be target-dependent. This information should be added to the
5243 target costs for each backend. */
5244 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5245 && integer_zerop (*bound_cst)
5246 && (operand_equal_p (*control_var, cand->var_after, 0)
5247 || operand_equal_p (*control_var, cand->var_before, 0)))
5248 elim_cost -= 1;
5250 express_cost = get_computation_cost (data, use, cand, false,
5251 &inv_vars_express, NULL,
5252 &inv_expr_express);
5253 if (cmp_iv != NULL)
5254 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5256 /* Count the cost of the original bound as well. */
5257 bound_cost = force_var_cost (data, *bound_cst, NULL);
5258 if (bound_cost.cost == 0)
5259 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5260 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5261 bound_cost.cost = 0;
5262 express_cost += bound_cost;
5264 /* Choose the better approach, preferring the eliminated IV. */
5265 if (elim_cost <= express_cost)
5267 cost = elim_cost;
5268 inv_vars = inv_vars_elim;
5269 inv_vars_elim = NULL;
5270 inv_expr = inv_expr_elim;
5272 else
5274 cost = express_cost;
5275 inv_vars = inv_vars_express;
5276 inv_vars_express = NULL;
5277 bound = NULL_TREE;
5278 comp = ERROR_MARK;
5279 inv_expr = inv_expr_express;
5282 if (inv_expr)
5284 inv_exprs = BITMAP_ALLOC (NULL);
5285 bitmap_set_bit (inv_exprs, inv_expr->id);
5287 set_group_iv_cost (data, group, cand, cost,
5288 inv_vars, bound, comp, inv_exprs);
5290 if (inv_vars_elim)
5291 BITMAP_FREE (inv_vars_elim);
5292 if (inv_vars_express)
5293 BITMAP_FREE (inv_vars_express);
5295 return !cost.infinite_cost_p ();
5298 /* Determines cost of computing uses in GROUP with CAND. Returns false
5299 if USE cannot be represented with CAND. */
5301 static bool
5302 determine_group_iv_cost (struct ivopts_data *data,
5303 struct iv_group *group, struct iv_cand *cand)
5305 switch (group->type)
5307 case USE_NONLINEAR_EXPR:
5308 return determine_group_iv_cost_generic (data, group, cand);
5310 case USE_REF_ADDRESS:
5311 case USE_PTR_ADDRESS:
5312 return determine_group_iv_cost_address (data, group, cand);
5314 case USE_COMPARE:
5315 return determine_group_iv_cost_cond (data, group, cand);
5317 default:
5318 gcc_unreachable ();
5322 /* Return true if get_computation_cost indicates that autoincrement is
5323 a possibility for the pair of USE and CAND, false otherwise. */
5325 static bool
5326 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5327 struct iv_cand *cand)
5329 if (!address_p (use->type))
5330 return false;
5332 bool can_autoinc = false;
5333 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5334 return can_autoinc;
5337 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5338 use that allows autoincrement, and set their AINC_USE if possible. */
5340 static void
5341 set_autoinc_for_original_candidates (struct ivopts_data *data)
5343 unsigned i, j;
5345 for (i = 0; i < data->vcands.length (); i++)
5347 struct iv_cand *cand = data->vcands[i];
5348 struct iv_use *closest_before = NULL;
5349 struct iv_use *closest_after = NULL;
5350 if (cand->pos != IP_ORIGINAL)
5351 continue;
5353 for (j = 0; j < data->vgroups.length (); j++)
5355 struct iv_group *group = data->vgroups[j];
5356 struct iv_use *use = group->vuses[0];
5357 unsigned uid = gimple_uid (use->stmt);
5359 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5360 continue;
5362 if (uid < gimple_uid (cand->incremented_at)
5363 && (closest_before == NULL
5364 || uid > gimple_uid (closest_before->stmt)))
5365 closest_before = use;
5367 if (uid > gimple_uid (cand->incremented_at)
5368 && (closest_after == NULL
5369 || uid < gimple_uid (closest_after->stmt)))
5370 closest_after = use;
5373 if (closest_before != NULL
5374 && autoinc_possible_for_pair (data, closest_before, cand))
5375 cand->ainc_use = closest_before;
5376 else if (closest_after != NULL
5377 && autoinc_possible_for_pair (data, closest_after, cand))
5378 cand->ainc_use = closest_after;
5382 /* Relate compare use with all candidates. */
5384 static void
5385 relate_compare_use_with_all_cands (struct ivopts_data *data)
5387 unsigned i, count = data->vcands.length ();
5388 for (i = 0; i < data->vgroups.length (); i++)
5390 struct iv_group *group = data->vgroups[i];
5392 if (group->type == USE_COMPARE)
5393 bitmap_set_range (group->related_cands, 0, count);
5397 /* Finds the candidates for the induction variables. */
5399 static void
5400 find_iv_candidates (struct ivopts_data *data)
5402 /* Add commonly used ivs. */
5403 add_standard_iv_candidates (data);
5405 /* Add old induction variables. */
5406 add_iv_candidate_for_bivs (data);
5408 /* Add induction variables derived from uses. */
5409 add_iv_candidate_for_groups (data);
5411 set_autoinc_for_original_candidates (data);
5413 /* Record the important candidates. */
5414 record_important_candidates (data);
5416 /* Relate compare iv_use with all candidates. */
5417 if (!data->consider_all_candidates)
5418 relate_compare_use_with_all_cands (data);
5420 if (dump_file && (dump_flags & TDF_DETAILS))
5422 unsigned i;
5424 fprintf (dump_file, "\n<Important Candidates>:\t");
5425 for (i = 0; i < data->vcands.length (); i++)
5426 if (data->vcands[i]->important)
5427 fprintf (dump_file, " %d,", data->vcands[i]->id);
5428 fprintf (dump_file, "\n");
5430 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5431 for (i = 0; i < data->vgroups.length (); i++)
5433 struct iv_group *group = data->vgroups[i];
5435 if (group->related_cands)
5437 fprintf (dump_file, " Group %d:\t", group->id);
5438 dump_bitmap (dump_file, group->related_cands);
5441 fprintf (dump_file, "\n");
5445 /* Determines costs of computing use of iv with an iv candidate. */
5447 static void
5448 determine_group_iv_costs (struct ivopts_data *data)
5450 unsigned i, j;
5451 struct iv_cand *cand;
5452 struct iv_group *group;
5453 bitmap to_clear = BITMAP_ALLOC (NULL);
5455 alloc_use_cost_map (data);
5457 for (i = 0; i < data->vgroups.length (); i++)
5459 group = data->vgroups[i];
5461 if (data->consider_all_candidates)
5463 for (j = 0; j < data->vcands.length (); j++)
5465 cand = data->vcands[j];
5466 determine_group_iv_cost (data, group, cand);
5469 else
5471 bitmap_iterator bi;
5473 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5475 cand = data->vcands[j];
5476 if (!determine_group_iv_cost (data, group, cand))
5477 bitmap_set_bit (to_clear, j);
5480 /* Remove the candidates for that the cost is infinite from
5481 the list of related candidates. */
5482 bitmap_and_compl_into (group->related_cands, to_clear);
5483 bitmap_clear (to_clear);
5487 BITMAP_FREE (to_clear);
5489 if (dump_file && (dump_flags & TDF_DETAILS))
5491 bitmap_iterator bi;
5493 /* Dump invariant variables. */
5494 fprintf (dump_file, "\n<Invariant Vars>:\n");
5495 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5497 struct version_info *info = ver_info (data, i);
5498 if (info->inv_id)
5500 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5501 print_generic_expr (dump_file, info->name, TDF_SLIM);
5502 fprintf (dump_file, "%s\n",
5503 info->has_nonlin_use ? "" : "\t(eliminable)");
5507 /* Dump invariant expressions. */
5508 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5509 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5511 for (hash_table<iv_inv_expr_hasher>::iterator it
5512 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5513 ++it)
5514 list.safe_push (*it);
5516 list.qsort (sort_iv_inv_expr_ent);
5518 for (i = 0; i < list.length (); ++i)
5520 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5521 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5522 fprintf (dump_file, "\n");
5525 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5527 for (i = 0; i < data->vgroups.length (); i++)
5529 group = data->vgroups[i];
5531 fprintf (dump_file, "Group %d:\n", i);
5532 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5533 for (j = 0; j < group->n_map_members; j++)
5535 if (!group->cost_map[j].cand
5536 || group->cost_map[j].cost.infinite_cost_p ())
5537 continue;
5539 fprintf (dump_file, " %d\t%d\t%d\t",
5540 group->cost_map[j].cand->id,
5541 group->cost_map[j].cost.cost,
5542 group->cost_map[j].cost.complexity);
5543 if (!group->cost_map[j].inv_exprs
5544 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5545 fprintf (dump_file, "NIL;\t");
5546 else
5547 bitmap_print (dump_file,
5548 group->cost_map[j].inv_exprs, "", ";\t");
5549 if (!group->cost_map[j].inv_vars
5550 || bitmap_empty_p (group->cost_map[j].inv_vars))
5551 fprintf (dump_file, "NIL;\n");
5552 else
5553 bitmap_print (dump_file,
5554 group->cost_map[j].inv_vars, "", "\n");
5557 fprintf (dump_file, "\n");
5559 fprintf (dump_file, "\n");
5563 /* Determines cost of the candidate CAND. */
5565 static void
5566 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5568 comp_cost cost_base;
5569 unsigned cost, cost_step;
5570 tree base;
5572 gcc_assert (cand->iv != NULL);
5574 /* There are two costs associated with the candidate -- its increment
5575 and its initialization. The second is almost negligible for any loop
5576 that rolls enough, so we take it just very little into account. */
5578 base = cand->iv->base;
5579 cost_base = force_var_cost (data, base, NULL);
5580 /* It will be exceptional that the iv register happens to be initialized with
5581 the proper value at no cost. In general, there will at least be a regcopy
5582 or a const set. */
5583 if (cost_base.cost == 0)
5584 cost_base.cost = COSTS_N_INSNS (1);
5585 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5587 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5589 /* Prefer the original ivs unless we may gain something by replacing it.
5590 The reason is to make debugging simpler; so this is not relevant for
5591 artificial ivs created by other optimization passes. */
5592 if (cand->pos != IP_ORIGINAL
5593 || !SSA_NAME_VAR (cand->var_before)
5594 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5595 cost++;
5597 /* Prefer not to insert statements into latch unless there are some
5598 already (so that we do not create unnecessary jumps). */
5599 if (cand->pos == IP_END
5600 && empty_block_p (ip_end_pos (data->current_loop)))
5601 cost++;
5603 cand->cost = cost;
5604 cand->cost_step = cost_step;
5607 /* Determines costs of computation of the candidates. */
5609 static void
5610 determine_iv_costs (struct ivopts_data *data)
5612 unsigned i;
5614 if (dump_file && (dump_flags & TDF_DETAILS))
5616 fprintf (dump_file, "<Candidate Costs>:\n");
5617 fprintf (dump_file, " cand\tcost\n");
5620 for (i = 0; i < data->vcands.length (); i++)
5622 struct iv_cand *cand = data->vcands[i];
5624 determine_iv_cost (data, cand);
5626 if (dump_file && (dump_flags & TDF_DETAILS))
5627 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5630 if (dump_file && (dump_flags & TDF_DETAILS))
5631 fprintf (dump_file, "\n");
5634 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5635 induction variables. Note N_INVS includes both invariant variables and
5636 invariant expressions. */
5638 static unsigned
5639 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5640 unsigned n_cands)
5642 unsigned cost;
5643 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5644 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5645 bool speed = data->speed;
5647 /* If there is a call in the loop body, the call-clobbered registers
5648 are not available for loop invariants. */
5649 if (data->body_includes_call)
5650 available_regs = available_regs - target_clobbered_regs;
5652 /* If we have enough registers. */
5653 if (regs_needed + target_res_regs < available_regs)
5654 cost = n_new;
5655 /* If close to running out of registers, try to preserve them. */
5656 else if (regs_needed <= available_regs)
5657 cost = target_reg_cost [speed] * regs_needed;
5658 /* If we run out of available registers but the number of candidates
5659 does not, we penalize extra registers using target_spill_cost. */
5660 else if (n_cands <= available_regs)
5661 cost = target_reg_cost [speed] * available_regs
5662 + target_spill_cost [speed] * (regs_needed - available_regs);
5663 /* If the number of candidates runs out available registers, we penalize
5664 extra candidate registers using target_spill_cost * 2. Because it is
5665 more expensive to spill induction variable than invariant. */
5666 else
5667 cost = target_reg_cost [speed] * available_regs
5668 + target_spill_cost [speed] * (n_cands - available_regs) * 2
5669 + target_spill_cost [speed] * (regs_needed - n_cands);
5671 /* Finally, add the number of candidates, so that we prefer eliminating
5672 induction variables if possible. */
5673 return cost + n_cands;
5676 /* For each size of the induction variable set determine the penalty. */
5678 static void
5679 determine_set_costs (struct ivopts_data *data)
5681 unsigned j, n;
5682 gphi *phi;
5683 gphi_iterator psi;
5684 tree op;
5685 struct loop *loop = data->current_loop;
5686 bitmap_iterator bi;
5688 if (dump_file && (dump_flags & TDF_DETAILS))
5690 fprintf (dump_file, "<Global Costs>:\n");
5691 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5692 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5693 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5694 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5697 n = 0;
5698 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5700 phi = psi.phi ();
5701 op = PHI_RESULT (phi);
5703 if (virtual_operand_p (op))
5704 continue;
5706 if (get_iv (data, op))
5707 continue;
5709 if (!POINTER_TYPE_P (TREE_TYPE (op))
5710 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5711 continue;
5713 n++;
5716 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5718 struct version_info *info = ver_info (data, j);
5720 if (info->inv_id && info->has_nonlin_use)
5721 n++;
5724 data->regs_used = n;
5725 if (dump_file && (dump_flags & TDF_DETAILS))
5726 fprintf (dump_file, " regs_used %d\n", n);
5728 if (dump_file && (dump_flags & TDF_DETAILS))
5730 fprintf (dump_file, " cost for size:\n");
5731 fprintf (dump_file, " ivs\tcost\n");
5732 for (j = 0; j <= 2 * target_avail_regs; j++)
5733 fprintf (dump_file, " %d\t%d\n", j,
5734 ivopts_estimate_reg_pressure (data, 0, j));
5735 fprintf (dump_file, "\n");
5739 /* Returns true if A is a cheaper cost pair than B. */
5741 static bool
5742 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5744 if (!a)
5745 return false;
5747 if (!b)
5748 return true;
5750 if (a->cost < b->cost)
5751 return true;
5753 if (b->cost < a->cost)
5754 return false;
5756 /* In case the costs are the same, prefer the cheaper candidate. */
5757 if (a->cand->cost < b->cand->cost)
5758 return true;
5760 return false;
5763 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5764 for more expensive, equal and cheaper respectively. */
5766 static int
5767 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5769 if (cheaper_cost_pair (a, b))
5770 return -1;
5771 if (cheaper_cost_pair (b, a))
5772 return 1;
5774 return 0;
5777 /* Returns candidate by that USE is expressed in IVS. */
5779 static struct cost_pair *
5780 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5782 return ivs->cand_for_group[group->id];
5785 /* Computes the cost field of IVS structure. */
5787 static void
5788 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5790 comp_cost cost = ivs->cand_use_cost;
5792 cost += ivs->cand_cost;
5793 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5794 ivs->cost = cost;
5797 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5798 and IVS. */
5800 static void
5801 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5803 bitmap_iterator bi;
5804 unsigned iid;
5806 if (!invs)
5807 return;
5809 gcc_assert (n_inv_uses != NULL);
5810 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5812 n_inv_uses[iid]--;
5813 if (n_inv_uses[iid] == 0)
5814 ivs->n_invs--;
5818 /* Set USE not to be expressed by any candidate in IVS. */
5820 static void
5821 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5822 struct iv_group *group)
5824 unsigned gid = group->id, cid;
5825 struct cost_pair *cp;
5827 cp = ivs->cand_for_group[gid];
5828 if (!cp)
5829 return;
5830 cid = cp->cand->id;
5832 ivs->bad_groups++;
5833 ivs->cand_for_group[gid] = NULL;
5834 ivs->n_cand_uses[cid]--;
5836 if (ivs->n_cand_uses[cid] == 0)
5838 bitmap_clear_bit (ivs->cands, cid);
5839 ivs->n_cands--;
5840 ivs->cand_cost -= cp->cand->cost;
5841 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5842 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5845 ivs->cand_use_cost -= cp->cost;
5846 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5847 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5848 iv_ca_recount_cost (data, ivs);
5851 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5852 IVS. */
5854 static void
5855 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5857 bitmap_iterator bi;
5858 unsigned iid;
5860 if (!invs)
5861 return;
5863 gcc_assert (n_inv_uses != NULL);
5864 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5866 n_inv_uses[iid]++;
5867 if (n_inv_uses[iid] == 1)
5868 ivs->n_invs++;
5872 /* Set cost pair for GROUP in set IVS to CP. */
5874 static void
5875 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5876 struct iv_group *group, struct cost_pair *cp)
5878 unsigned gid = group->id, cid;
5880 if (ivs->cand_for_group[gid] == cp)
5881 return;
5883 if (ivs->cand_for_group[gid])
5884 iv_ca_set_no_cp (data, ivs, group);
5886 if (cp)
5888 cid = cp->cand->id;
5890 ivs->bad_groups--;
5891 ivs->cand_for_group[gid] = cp;
5892 ivs->n_cand_uses[cid]++;
5893 if (ivs->n_cand_uses[cid] == 1)
5895 bitmap_set_bit (ivs->cands, cid);
5896 ivs->n_cands++;
5897 ivs->cand_cost += cp->cand->cost;
5898 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5899 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5902 ivs->cand_use_cost += cp->cost;
5903 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5904 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5905 iv_ca_recount_cost (data, ivs);
5909 /* Extend set IVS by expressing USE by some of the candidates in it
5910 if possible. Consider all important candidates if candidates in
5911 set IVS don't give any result. */
5913 static void
5914 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5915 struct iv_group *group)
5917 struct cost_pair *best_cp = NULL, *cp;
5918 bitmap_iterator bi;
5919 unsigned i;
5920 struct iv_cand *cand;
5922 gcc_assert (ivs->upto >= group->id);
5923 ivs->upto++;
5924 ivs->bad_groups++;
5926 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5928 cand = data->vcands[i];
5929 cp = get_group_iv_cost (data, group, cand);
5930 if (cheaper_cost_pair (cp, best_cp))
5931 best_cp = cp;
5934 if (best_cp == NULL)
5936 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5938 cand = data->vcands[i];
5939 cp = get_group_iv_cost (data, group, cand);
5940 if (cheaper_cost_pair (cp, best_cp))
5941 best_cp = cp;
5945 iv_ca_set_cp (data, ivs, group, best_cp);
5948 /* Get cost for assignment IVS. */
5950 static comp_cost
5951 iv_ca_cost (struct iv_ca *ivs)
5953 /* This was a conditional expression but it triggered a bug in
5954 Sun C 5.5. */
5955 if (ivs->bad_groups)
5956 return infinite_cost;
5957 else
5958 return ivs->cost;
5961 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5962 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5963 respectively. */
5965 static int
5966 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5967 struct iv_group *group, struct cost_pair *old_cp,
5968 struct cost_pair *new_cp)
5970 gcc_assert (old_cp && new_cp && old_cp != new_cp);
5971 unsigned old_n_invs = ivs->n_invs;
5972 iv_ca_set_cp (data, ivs, group, new_cp);
5973 unsigned new_n_invs = ivs->n_invs;
5974 iv_ca_set_cp (data, ivs, group, old_cp);
5976 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5979 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5980 it before NEXT. */
5982 static struct iv_ca_delta *
5983 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5984 struct cost_pair *new_cp, struct iv_ca_delta *next)
5986 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5988 change->group = group;
5989 change->old_cp = old_cp;
5990 change->new_cp = new_cp;
5991 change->next = next;
5993 return change;
5996 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5997 are rewritten. */
5999 static struct iv_ca_delta *
6000 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6002 struct iv_ca_delta *last;
6004 if (!l2)
6005 return l1;
6007 if (!l1)
6008 return l2;
6010 for (last = l1; last->next; last = last->next)
6011 continue;
6012 last->next = l2;
6014 return l1;
6017 /* Reverse the list of changes DELTA, forming the inverse to it. */
6019 static struct iv_ca_delta *
6020 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6022 struct iv_ca_delta *act, *next, *prev = NULL;
6024 for (act = delta; act; act = next)
6026 next = act->next;
6027 act->next = prev;
6028 prev = act;
6030 std::swap (act->old_cp, act->new_cp);
6033 return prev;
6036 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6037 reverted instead. */
6039 static void
6040 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6041 struct iv_ca_delta *delta, bool forward)
6043 struct cost_pair *from, *to;
6044 struct iv_ca_delta *act;
6046 if (!forward)
6047 delta = iv_ca_delta_reverse (delta);
6049 for (act = delta; act; act = act->next)
6051 from = act->old_cp;
6052 to = act->new_cp;
6053 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6054 iv_ca_set_cp (data, ivs, act->group, to);
6057 if (!forward)
6058 iv_ca_delta_reverse (delta);
6061 /* Returns true if CAND is used in IVS. */
6063 static bool
6064 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6066 return ivs->n_cand_uses[cand->id] > 0;
6069 /* Returns number of induction variable candidates in the set IVS. */
6071 static unsigned
6072 iv_ca_n_cands (struct iv_ca *ivs)
6074 return ivs->n_cands;
6077 /* Free the list of changes DELTA. */
6079 static void
6080 iv_ca_delta_free (struct iv_ca_delta **delta)
6082 struct iv_ca_delta *act, *next;
6084 for (act = *delta; act; act = next)
6086 next = act->next;
6087 free (act);
6090 *delta = NULL;
6093 /* Allocates new iv candidates assignment. */
6095 static struct iv_ca *
6096 iv_ca_new (struct ivopts_data *data)
6098 struct iv_ca *nw = XNEW (struct iv_ca);
6100 nw->upto = 0;
6101 nw->bad_groups = 0;
6102 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6103 data->vgroups.length ());
6104 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6105 nw->cands = BITMAP_ALLOC (NULL);
6106 nw->n_cands = 0;
6107 nw->n_invs = 0;
6108 nw->cand_use_cost = no_cost;
6109 nw->cand_cost = 0;
6110 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6111 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6112 nw->cost = no_cost;
6114 return nw;
6117 /* Free memory occupied by the set IVS. */
6119 static void
6120 iv_ca_free (struct iv_ca **ivs)
6122 free ((*ivs)->cand_for_group);
6123 free ((*ivs)->n_cand_uses);
6124 BITMAP_FREE ((*ivs)->cands);
6125 free ((*ivs)->n_inv_var_uses);
6126 free ((*ivs)->n_inv_expr_uses);
6127 free (*ivs);
6128 *ivs = NULL;
6131 /* Dumps IVS to FILE. */
6133 static void
6134 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6136 unsigned i;
6137 comp_cost cost = iv_ca_cost (ivs);
6139 fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
6140 cost.complexity);
6141 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6142 ivs->cand_cost, ivs->cand_use_cost.cost,
6143 ivs->cand_use_cost.complexity);
6144 bitmap_print (file, ivs->cands, " candidates: ","\n");
6146 for (i = 0; i < ivs->upto; i++)
6148 struct iv_group *group = data->vgroups[i];
6149 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6150 if (cp)
6151 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6152 group->id, cp->cand->id, cp->cost.cost,
6153 cp->cost.complexity);
6154 else
6155 fprintf (file, " group:%d --> ??\n", group->id);
6158 const char *pref = "";
6159 fprintf (file, " invariant variables: ");
6160 for (i = 1; i <= data->max_inv_var_id; i++)
6161 if (ivs->n_inv_var_uses[i])
6163 fprintf (file, "%s%d", pref, i);
6164 pref = ", ";
6167 pref = "";
6168 fprintf (file, "\n invariant expressions: ");
6169 for (i = 1; i <= data->max_inv_expr_id; i++)
6170 if (ivs->n_inv_expr_uses[i])
6172 fprintf (file, "%s%d", pref, i);
6173 pref = ", ";
6176 fprintf (file, "\n\n");
6179 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6180 new set, and store differences in DELTA. Number of induction variables
6181 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6182 the function will try to find a solution with mimimal iv candidates. */
6184 static comp_cost
6185 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6186 struct iv_cand *cand, struct iv_ca_delta **delta,
6187 unsigned *n_ivs, bool min_ncand)
6189 unsigned i;
6190 comp_cost cost;
6191 struct iv_group *group;
6192 struct cost_pair *old_cp, *new_cp;
6194 *delta = NULL;
6195 for (i = 0; i < ivs->upto; i++)
6197 group = data->vgroups[i];
6198 old_cp = iv_ca_cand_for_group (ivs, group);
6200 if (old_cp
6201 && old_cp->cand == cand)
6202 continue;
6204 new_cp = get_group_iv_cost (data, group, cand);
6205 if (!new_cp)
6206 continue;
6208 if (!min_ncand)
6210 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6211 /* Skip if new_cp depends on more invariants. */
6212 if (cmp_invs > 0)
6213 continue;
6215 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6216 /* Skip if new_cp is not cheaper. */
6217 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6218 continue;
6221 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6224 iv_ca_delta_commit (data, ivs, *delta, true);
6225 cost = iv_ca_cost (ivs);
6226 if (n_ivs)
6227 *n_ivs = iv_ca_n_cands (ivs);
6228 iv_ca_delta_commit (data, ivs, *delta, false);
6230 return cost;
6233 /* Try narrowing set IVS by removing CAND. Return the cost of
6234 the new set and store the differences in DELTA. START is
6235 the candidate with which we start narrowing. */
6237 static comp_cost
6238 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6239 struct iv_cand *cand, struct iv_cand *start,
6240 struct iv_ca_delta **delta)
6242 unsigned i, ci;
6243 struct iv_group *group;
6244 struct cost_pair *old_cp, *new_cp, *cp;
6245 bitmap_iterator bi;
6246 struct iv_cand *cnd;
6247 comp_cost cost, best_cost, acost;
6249 *delta = NULL;
6250 for (i = 0; i < data->vgroups.length (); i++)
6252 group = data->vgroups[i];
6254 old_cp = iv_ca_cand_for_group (ivs, group);
6255 if (old_cp->cand != cand)
6256 continue;
6258 best_cost = iv_ca_cost (ivs);
6259 /* Start narrowing with START. */
6260 new_cp = get_group_iv_cost (data, group, start);
6262 if (data->consider_all_candidates)
6264 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6266 if (ci == cand->id || (start && ci == start->id))
6267 continue;
6269 cnd = data->vcands[ci];
6271 cp = get_group_iv_cost (data, group, cnd);
6272 if (!cp)
6273 continue;
6275 iv_ca_set_cp (data, ivs, group, cp);
6276 acost = iv_ca_cost (ivs);
6278 if (acost < best_cost)
6280 best_cost = acost;
6281 new_cp = cp;
6285 else
6287 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6289 if (ci == cand->id || (start && ci == start->id))
6290 continue;
6292 cnd = data->vcands[ci];
6294 cp = get_group_iv_cost (data, group, cnd);
6295 if (!cp)
6296 continue;
6298 iv_ca_set_cp (data, ivs, group, cp);
6299 acost = iv_ca_cost (ivs);
6301 if (acost < best_cost)
6303 best_cost = acost;
6304 new_cp = cp;
6308 /* Restore to old cp for use. */
6309 iv_ca_set_cp (data, ivs, group, old_cp);
6311 if (!new_cp)
6313 iv_ca_delta_free (delta);
6314 return infinite_cost;
6317 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6320 iv_ca_delta_commit (data, ivs, *delta, true);
6321 cost = iv_ca_cost (ivs);
6322 iv_ca_delta_commit (data, ivs, *delta, false);
6324 return cost;
6327 /* Try optimizing the set of candidates IVS by removing candidates different
6328 from to EXCEPT_CAND from it. Return cost of the new set, and store
6329 differences in DELTA. */
6331 static comp_cost
6332 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6333 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6335 bitmap_iterator bi;
6336 struct iv_ca_delta *act_delta, *best_delta;
6337 unsigned i;
6338 comp_cost best_cost, acost;
6339 struct iv_cand *cand;
6341 best_delta = NULL;
6342 best_cost = iv_ca_cost (ivs);
6344 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6346 cand = data->vcands[i];
6348 if (cand == except_cand)
6349 continue;
6351 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6353 if (acost < best_cost)
6355 best_cost = acost;
6356 iv_ca_delta_free (&best_delta);
6357 best_delta = act_delta;
6359 else
6360 iv_ca_delta_free (&act_delta);
6363 if (!best_delta)
6365 *delta = NULL;
6366 return best_cost;
6369 /* Recurse to possibly remove other unnecessary ivs. */
6370 iv_ca_delta_commit (data, ivs, best_delta, true);
6371 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6372 iv_ca_delta_commit (data, ivs, best_delta, false);
6373 *delta = iv_ca_delta_join (best_delta, *delta);
6374 return best_cost;
6377 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6378 cheaper local cost for GROUP than BEST_CP. Return pointer to
6379 the corresponding cost_pair, otherwise just return BEST_CP. */
6381 static struct cost_pair*
6382 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6383 unsigned int cand_idx, struct iv_cand *old_cand,
6384 struct cost_pair *best_cp)
6386 struct iv_cand *cand;
6387 struct cost_pair *cp;
6389 gcc_assert (old_cand != NULL && best_cp != NULL);
6390 if (cand_idx == old_cand->id)
6391 return best_cp;
6393 cand = data->vcands[cand_idx];
6394 cp = get_group_iv_cost (data, group, cand);
6395 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6396 return cp;
6398 return best_cp;
6401 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6402 which are used by more than one iv uses. For each of those candidates,
6403 this function tries to represent iv uses under that candidate using
6404 other ones with lower local cost, then tries to prune the new set.
6405 If the new set has lower cost, It returns the new cost after recording
6406 candidate replacement in list DELTA. */
6408 static comp_cost
6409 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6410 struct iv_ca_delta **delta)
6412 bitmap_iterator bi, bj;
6413 unsigned int i, j, k;
6414 struct iv_cand *cand;
6415 comp_cost orig_cost, acost;
6416 struct iv_ca_delta *act_delta, *tmp_delta;
6417 struct cost_pair *old_cp, *best_cp = NULL;
6419 *delta = NULL;
6420 orig_cost = iv_ca_cost (ivs);
6422 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6424 if (ivs->n_cand_uses[i] == 1
6425 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6426 continue;
6428 cand = data->vcands[i];
6430 act_delta = NULL;
6431 /* Represent uses under current candidate using other ones with
6432 lower local cost. */
6433 for (j = 0; j < ivs->upto; j++)
6435 struct iv_group *group = data->vgroups[j];
6436 old_cp = iv_ca_cand_for_group (ivs, group);
6438 if (old_cp->cand != cand)
6439 continue;
6441 best_cp = old_cp;
6442 if (data->consider_all_candidates)
6443 for (k = 0; k < data->vcands.length (); k++)
6444 best_cp = cheaper_cost_with_cand (data, group, k,
6445 old_cp->cand, best_cp);
6446 else
6447 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6448 best_cp = cheaper_cost_with_cand (data, group, k,
6449 old_cp->cand, best_cp);
6451 if (best_cp == old_cp)
6452 continue;
6454 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6456 /* No need for further prune. */
6457 if (!act_delta)
6458 continue;
6460 /* Prune the new candidate set. */
6461 iv_ca_delta_commit (data, ivs, act_delta, true);
6462 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6463 iv_ca_delta_commit (data, ivs, act_delta, false);
6464 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6466 if (acost < orig_cost)
6468 *delta = act_delta;
6469 return acost;
6471 else
6472 iv_ca_delta_free (&act_delta);
6475 return orig_cost;
6478 /* Tries to extend the sets IVS in the best possible way in order to
6479 express the GROUP. If ORIGINALP is true, prefer candidates from
6480 the original set of IVs, otherwise favor important candidates not
6481 based on any memory object. */
6483 static bool
6484 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6485 struct iv_group *group, bool originalp)
6487 comp_cost best_cost, act_cost;
6488 unsigned i;
6489 bitmap_iterator bi;
6490 struct iv_cand *cand;
6491 struct iv_ca_delta *best_delta = NULL, *act_delta;
6492 struct cost_pair *cp;
6494 iv_ca_add_group (data, ivs, group);
6495 best_cost = iv_ca_cost (ivs);
6496 cp = iv_ca_cand_for_group (ivs, group);
6497 if (cp)
6499 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6500 iv_ca_set_no_cp (data, ivs, group);
6503 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6504 first try important candidates not based on any memory object. Only if
6505 this fails, try the specific ones. Rationale -- in loops with many
6506 variables the best choice often is to use just one generic biv. If we
6507 added here many ivs specific to the uses, the optimization algorithm later
6508 would be likely to get stuck in a local minimum, thus causing us to create
6509 too many ivs. The approach from few ivs to more seems more likely to be
6510 successful -- starting from few ivs, replacing an expensive use by a
6511 specific iv should always be a win. */
6512 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6514 cand = data->vcands[i];
6516 if (originalp && cand->pos !=IP_ORIGINAL)
6517 continue;
6519 if (!originalp && cand->iv->base_object != NULL_TREE)
6520 continue;
6522 if (iv_ca_cand_used_p (ivs, cand))
6523 continue;
6525 cp = get_group_iv_cost (data, group, cand);
6526 if (!cp)
6527 continue;
6529 iv_ca_set_cp (data, ivs, group, cp);
6530 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6531 true);
6532 iv_ca_set_no_cp (data, ivs, group);
6533 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6535 if (act_cost < best_cost)
6537 best_cost = act_cost;
6539 iv_ca_delta_free (&best_delta);
6540 best_delta = act_delta;
6542 else
6543 iv_ca_delta_free (&act_delta);
6546 if (best_cost.infinite_cost_p ())
6548 for (i = 0; i < group->n_map_members; i++)
6550 cp = group->cost_map + i;
6551 cand = cp->cand;
6552 if (!cand)
6553 continue;
6555 /* Already tried this. */
6556 if (cand->important)
6558 if (originalp && cand->pos == IP_ORIGINAL)
6559 continue;
6560 if (!originalp && cand->iv->base_object == NULL_TREE)
6561 continue;
6564 if (iv_ca_cand_used_p (ivs, cand))
6565 continue;
6567 act_delta = NULL;
6568 iv_ca_set_cp (data, ivs, group, cp);
6569 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6570 iv_ca_set_no_cp (data, ivs, group);
6571 act_delta = iv_ca_delta_add (group,
6572 iv_ca_cand_for_group (ivs, group),
6573 cp, act_delta);
6575 if (act_cost < best_cost)
6577 best_cost = act_cost;
6579 if (best_delta)
6580 iv_ca_delta_free (&best_delta);
6581 best_delta = act_delta;
6583 else
6584 iv_ca_delta_free (&act_delta);
6588 iv_ca_delta_commit (data, ivs, best_delta, true);
6589 iv_ca_delta_free (&best_delta);
6591 return !best_cost.infinite_cost_p ();
6594 /* Finds an initial assignment of candidates to uses. */
6596 static struct iv_ca *
6597 get_initial_solution (struct ivopts_data *data, bool originalp)
6599 unsigned i;
6600 struct iv_ca *ivs = iv_ca_new (data);
6602 for (i = 0; i < data->vgroups.length (); i++)
6603 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6605 iv_ca_free (&ivs);
6606 return NULL;
6609 return ivs;
6612 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6613 points to a bool variable, this function tries to break local
6614 optimal fixed-point by replacing candidates in IVS if it's true. */
6616 static bool
6617 try_improve_iv_set (struct ivopts_data *data,
6618 struct iv_ca *ivs, bool *try_replace_p)
6620 unsigned i, n_ivs;
6621 comp_cost acost, best_cost = iv_ca_cost (ivs);
6622 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6623 struct iv_cand *cand;
6625 /* Try extending the set of induction variables by one. */
6626 for (i = 0; i < data->vcands.length (); i++)
6628 cand = data->vcands[i];
6630 if (iv_ca_cand_used_p (ivs, cand))
6631 continue;
6633 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6634 if (!act_delta)
6635 continue;
6637 /* If we successfully added the candidate and the set is small enough,
6638 try optimizing it by removing other candidates. */
6639 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6641 iv_ca_delta_commit (data, ivs, act_delta, true);
6642 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6643 iv_ca_delta_commit (data, ivs, act_delta, false);
6644 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6647 if (acost < best_cost)
6649 best_cost = acost;
6650 iv_ca_delta_free (&best_delta);
6651 best_delta = act_delta;
6653 else
6654 iv_ca_delta_free (&act_delta);
6657 if (!best_delta)
6659 /* Try removing the candidates from the set instead. */
6660 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6662 if (!best_delta && *try_replace_p)
6664 *try_replace_p = false;
6665 /* So far candidate selecting algorithm tends to choose fewer IVs
6666 so that it can handle cases in which loops have many variables
6667 but the best choice is often to use only one general biv. One
6668 weakness is it can't handle opposite cases, in which different
6669 candidates should be chosen with respect to each use. To solve
6670 the problem, we replace candidates in a manner described by the
6671 comments of iv_ca_replace, thus give general algorithm a chance
6672 to break local optimal fixed-point in these cases. */
6673 best_cost = iv_ca_replace (data, ivs, &best_delta);
6676 if (!best_delta)
6677 return false;
6680 iv_ca_delta_commit (data, ivs, best_delta, true);
6681 gcc_assert (best_cost == iv_ca_cost (ivs));
6682 iv_ca_delta_free (&best_delta);
6683 return true;
6686 /* Attempts to find the optimal set of induction variables. We do simple
6687 greedy heuristic -- we try to replace at most one candidate in the selected
6688 solution and remove the unused ivs while this improves the cost. */
6690 static struct iv_ca *
6691 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6693 struct iv_ca *set;
6694 bool try_replace_p = true;
6696 /* Get the initial solution. */
6697 set = get_initial_solution (data, originalp);
6698 if (!set)
6700 if (dump_file && (dump_flags & TDF_DETAILS))
6701 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6702 return NULL;
6705 if (dump_file && (dump_flags & TDF_DETAILS))
6707 fprintf (dump_file, "Initial set of candidates:\n");
6708 iv_ca_dump (data, dump_file, set);
6711 while (try_improve_iv_set (data, set, &try_replace_p))
6713 if (dump_file && (dump_flags & TDF_DETAILS))
6715 fprintf (dump_file, "Improved to:\n");
6716 iv_ca_dump (data, dump_file, set);
6720 return set;
6723 static struct iv_ca *
6724 find_optimal_iv_set (struct ivopts_data *data)
6726 unsigned i;
6727 comp_cost cost, origcost;
6728 struct iv_ca *set, *origset;
6730 /* Determine the cost based on a strategy that starts with original IVs,
6731 and try again using a strategy that prefers candidates not based
6732 on any IVs. */
6733 origset = find_optimal_iv_set_1 (data, true);
6734 set = find_optimal_iv_set_1 (data, false);
6736 if (!origset && !set)
6737 return NULL;
6739 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6740 cost = set ? iv_ca_cost (set) : infinite_cost;
6742 if (dump_file && (dump_flags & TDF_DETAILS))
6744 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6745 origcost.cost, origcost.complexity);
6746 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6747 cost.cost, cost.complexity);
6750 /* Choose the one with the best cost. */
6751 if (origcost <= cost)
6753 if (set)
6754 iv_ca_free (&set);
6755 set = origset;
6757 else if (origset)
6758 iv_ca_free (&origset);
6760 for (i = 0; i < data->vgroups.length (); i++)
6762 struct iv_group *group = data->vgroups[i];
6763 group->selected = iv_ca_cand_for_group (set, group)->cand;
6766 return set;
6769 /* Creates a new induction variable corresponding to CAND. */
6771 static void
6772 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6774 gimple_stmt_iterator incr_pos;
6775 tree base;
6776 struct iv_use *use;
6777 struct iv_group *group;
6778 bool after = false;
6780 gcc_assert (cand->iv != NULL);
6782 switch (cand->pos)
6784 case IP_NORMAL:
6785 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6786 break;
6788 case IP_END:
6789 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6790 after = true;
6791 break;
6793 case IP_AFTER_USE:
6794 after = true;
6795 /* fall through */
6796 case IP_BEFORE_USE:
6797 incr_pos = gsi_for_stmt (cand->incremented_at);
6798 break;
6800 case IP_ORIGINAL:
6801 /* Mark that the iv is preserved. */
6802 name_info (data, cand->var_before)->preserve_biv = true;
6803 name_info (data, cand->var_after)->preserve_biv = true;
6805 /* Rewrite the increment so that it uses var_before directly. */
6806 use = find_interesting_uses_op (data, cand->var_after);
6807 group = data->vgroups[use->group_id];
6808 group->selected = cand;
6809 return;
6812 gimple_add_tmp_var (cand->var_before);
6814 base = unshare_expr (cand->iv->base);
6816 create_iv (base, unshare_expr (cand->iv->step),
6817 cand->var_before, data->current_loop,
6818 &incr_pos, after, &cand->var_before, &cand->var_after);
6821 /* Creates new induction variables described in SET. */
6823 static void
6824 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6826 unsigned i;
6827 struct iv_cand *cand;
6828 bitmap_iterator bi;
6830 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6832 cand = data->vcands[i];
6833 create_new_iv (data, cand);
6836 if (dump_file && (dump_flags & TDF_DETAILS))
6838 fprintf (dump_file, "Selected IV set for loop %d",
6839 data->current_loop->num);
6840 if (data->loop_loc != UNKNOWN_LOCATION)
6841 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6842 LOCATION_LINE (data->loop_loc));
6843 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6844 avg_loop_niter (data->current_loop));
6845 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6846 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6848 cand = data->vcands[i];
6849 dump_cand (dump_file, cand);
6851 fprintf (dump_file, "\n");
6855 /* Rewrites USE (definition of iv used in a nonlinear expression)
6856 using candidate CAND. */
6858 static void
6859 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6860 struct iv_use *use, struct iv_cand *cand)
6862 gassign *ass;
6863 gimple_stmt_iterator bsi;
6864 tree comp, type = get_use_type (use), tgt;
6866 /* An important special case -- if we are asked to express value of
6867 the original iv by itself, just exit; there is no need to
6868 introduce a new computation (that might also need casting the
6869 variable to unsigned and back). */
6870 if (cand->pos == IP_ORIGINAL
6871 && cand->incremented_at == use->stmt)
6873 tree op = NULL_TREE;
6874 enum tree_code stmt_code;
6876 gcc_assert (is_gimple_assign (use->stmt));
6877 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6879 /* Check whether we may leave the computation unchanged.
6880 This is the case only if it does not rely on other
6881 computations in the loop -- otherwise, the computation
6882 we rely upon may be removed in remove_unused_ivs,
6883 thus leading to ICE. */
6884 stmt_code = gimple_assign_rhs_code (use->stmt);
6885 if (stmt_code == PLUS_EXPR
6886 || stmt_code == MINUS_EXPR
6887 || stmt_code == POINTER_PLUS_EXPR)
6889 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6890 op = gimple_assign_rhs2 (use->stmt);
6891 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6892 op = gimple_assign_rhs1 (use->stmt);
6895 if (op != NULL_TREE)
6897 if (expr_invariant_in_loop_p (data->current_loop, op))
6898 return;
6899 if (TREE_CODE (op) == SSA_NAME)
6901 struct iv *iv = get_iv (data, op);
6902 if (iv != NULL && integer_zerop (iv->step))
6903 return;
6908 switch (gimple_code (use->stmt))
6910 case GIMPLE_PHI:
6911 tgt = PHI_RESULT (use->stmt);
6913 /* If we should keep the biv, do not replace it. */
6914 if (name_info (data, tgt)->preserve_biv)
6915 return;
6917 bsi = gsi_after_labels (gimple_bb (use->stmt));
6918 break;
6920 case GIMPLE_ASSIGN:
6921 tgt = gimple_assign_lhs (use->stmt);
6922 bsi = gsi_for_stmt (use->stmt);
6923 break;
6925 default:
6926 gcc_unreachable ();
6929 aff_tree aff_inv, aff_var;
6930 if (!get_computation_aff_1 (data->current_loop, use->stmt,
6931 use, cand, &aff_inv, &aff_var))
6932 gcc_unreachable ();
6934 unshare_aff_combination (&aff_inv);
6935 unshare_aff_combination (&aff_var);
6936 /* Prefer CSE opportunity than loop invariant by adding offset at last
6937 so that iv_uses have different offsets can be CSEed. */
6938 poly_widest_int offset = aff_inv.offset;
6939 aff_inv.offset = 0;
6941 gimple_seq stmt_list = NULL, seq = NULL;
6942 tree comp_op1 = aff_combination_to_tree (&aff_inv);
6943 tree comp_op2 = aff_combination_to_tree (&aff_var);
6944 gcc_assert (comp_op1 && comp_op2);
6946 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6947 gimple_seq_add_seq (&stmt_list, seq);
6948 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6949 gimple_seq_add_seq (&stmt_list, seq);
6951 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6952 std::swap (comp_op1, comp_op2);
6954 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6956 comp = fold_build_pointer_plus (comp_op1,
6957 fold_convert (sizetype, comp_op2));
6958 comp = fold_build_pointer_plus (comp,
6959 wide_int_to_tree (sizetype, offset));
6961 else
6963 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6964 fold_convert (TREE_TYPE (comp_op1), comp_op2));
6965 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6966 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6969 comp = fold_convert (type, comp);
6970 if (!valid_gimple_rhs_p (comp)
6971 || (gimple_code (use->stmt) != GIMPLE_PHI
6972 /* We can't allow re-allocating the stmt as it might be pointed
6973 to still. */
6974 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6975 >= gimple_num_ops (gsi_stmt (bsi)))))
6977 comp = force_gimple_operand (comp, &seq, true, NULL);
6978 gimple_seq_add_seq (&stmt_list, seq);
6979 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6981 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6982 /* As this isn't a plain copy we have to reset alignment
6983 information. */
6984 if (SSA_NAME_PTR_INFO (comp))
6985 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6989 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6990 if (gimple_code (use->stmt) == GIMPLE_PHI)
6992 ass = gimple_build_assign (tgt, comp);
6993 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6995 bsi = gsi_for_stmt (use->stmt);
6996 remove_phi_node (&bsi, false);
6998 else
7000 gimple_assign_set_rhs_from_tree (&bsi, comp);
7001 use->stmt = gsi_stmt (bsi);
7005 /* Performs a peephole optimization to reorder the iv update statement with
7006 a mem ref to enable instruction combining in later phases. The mem ref uses
7007 the iv value before the update, so the reordering transformation requires
7008 adjustment of the offset. CAND is the selected IV_CAND.
7010 Example:
7012 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7013 iv2 = iv1 + 1;
7015 if (t < val) (1)
7016 goto L;
7017 goto Head;
7020 directly propagating t over to (1) will introduce overlapping live range
7021 thus increase register pressure. This peephole transform it into:
7024 iv2 = iv1 + 1;
7025 t = MEM_REF (base, iv2, 8, 8);
7026 if (t < val)
7027 goto L;
7028 goto Head;
7031 static void
7032 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7034 tree var_after;
7035 gimple *iv_update, *stmt;
7036 basic_block bb;
7037 gimple_stmt_iterator gsi, gsi_iv;
7039 if (cand->pos != IP_NORMAL)
7040 return;
7042 var_after = cand->var_after;
7043 iv_update = SSA_NAME_DEF_STMT (var_after);
7045 bb = gimple_bb (iv_update);
7046 gsi = gsi_last_nondebug_bb (bb);
7047 stmt = gsi_stmt (gsi);
7049 /* Only handle conditional statement for now. */
7050 if (gimple_code (stmt) != GIMPLE_COND)
7051 return;
7053 gsi_prev_nondebug (&gsi);
7054 stmt = gsi_stmt (gsi);
7055 if (stmt != iv_update)
7056 return;
7058 gsi_prev_nondebug (&gsi);
7059 if (gsi_end_p (gsi))
7060 return;
7062 stmt = gsi_stmt (gsi);
7063 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7064 return;
7066 if (stmt != use->stmt)
7067 return;
7069 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7070 return;
7072 if (dump_file && (dump_flags & TDF_DETAILS))
7074 fprintf (dump_file, "Reordering \n");
7075 print_gimple_stmt (dump_file, iv_update, 0);
7076 print_gimple_stmt (dump_file, use->stmt, 0);
7077 fprintf (dump_file, "\n");
7080 gsi = gsi_for_stmt (use->stmt);
7081 gsi_iv = gsi_for_stmt (iv_update);
7082 gsi_move_before (&gsi_iv, &gsi);
7084 cand->pos = IP_BEFORE_USE;
7085 cand->incremented_at = use->stmt;
7088 /* Return the alias pointer type that should be used for a MEM_REF
7089 associated with USE, which has type USE_PTR_ADDRESS. */
7091 static tree
7092 get_alias_ptr_type_for_ptr_address (iv_use *use)
7094 gcall *call = as_a <gcall *> (use->stmt);
7095 switch (gimple_call_internal_fn (call))
7097 case IFN_MASK_LOAD:
7098 case IFN_MASK_STORE:
7099 /* The second argument contains the correct alias type. */
7100 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7101 return TREE_TYPE (gimple_call_arg (call, 1));
7103 default:
7104 gcc_unreachable ();
7109 /* Rewrites USE (address that is an iv) using candidate CAND. */
7111 static void
7112 rewrite_use_address (struct ivopts_data *data,
7113 struct iv_use *use, struct iv_cand *cand)
7115 aff_tree aff;
7116 bool ok;
7118 adjust_iv_update_pos (cand, use);
7119 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7120 gcc_assert (ok);
7121 unshare_aff_combination (&aff);
7123 /* To avoid undefined overflow problems, all IV candidates use unsigned
7124 integer types. The drawback is that this makes it impossible for
7125 create_mem_ref to distinguish an IV that is based on a memory object
7126 from one that represents simply an offset.
7128 To work around this problem, we pass a hint to create_mem_ref that
7129 indicates which variable (if any) in aff is an IV based on a memory
7130 object. Note that we only consider the candidate. If this is not
7131 based on an object, the base of the reference is in some subexpression
7132 of the use -- but these will use pointer types, so they are recognized
7133 by the create_mem_ref heuristics anyway. */
7134 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7135 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7136 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7137 tree type = use->mem_type;
7138 tree alias_ptr_type;
7139 if (use->type == USE_PTR_ADDRESS)
7140 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7141 else
7143 gcc_assert (type == TREE_TYPE (*use->op_p));
7144 unsigned int align = get_object_alignment (*use->op_p);
7145 if (align != TYPE_ALIGN (type))
7146 type = build_aligned_type (type, align);
7147 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7149 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7150 iv, base_hint, data->speed);
7152 if (use->type == USE_PTR_ADDRESS)
7154 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7155 ref = fold_convert (get_use_type (use), ref);
7156 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7157 true, GSI_SAME_STMT);
7159 else
7160 copy_ref_info (ref, *use->op_p);
7162 *use->op_p = ref;
7165 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7166 candidate CAND. */
7168 static void
7169 rewrite_use_compare (struct ivopts_data *data,
7170 struct iv_use *use, struct iv_cand *cand)
7172 tree comp, op, bound;
7173 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7174 enum tree_code compare;
7175 struct iv_group *group = data->vgroups[use->group_id];
7176 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7178 bound = cp->value;
7179 if (bound)
7181 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7182 tree var_type = TREE_TYPE (var);
7183 gimple_seq stmts;
7185 if (dump_file && (dump_flags & TDF_DETAILS))
7187 fprintf (dump_file, "Replacing exit test: ");
7188 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7190 compare = cp->comp;
7191 bound = unshare_expr (fold_convert (var_type, bound));
7192 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7193 if (stmts)
7194 gsi_insert_seq_on_edge_immediate (
7195 loop_preheader_edge (data->current_loop),
7196 stmts);
7198 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7199 gimple_cond_set_lhs (cond_stmt, var);
7200 gimple_cond_set_code (cond_stmt, compare);
7201 gimple_cond_set_rhs (cond_stmt, op);
7202 return;
7205 /* The induction variable elimination failed; just express the original
7206 giv. */
7207 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7208 gcc_assert (comp != NULL_TREE);
7209 gcc_assert (use->op_p != NULL);
7210 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7211 SSA_NAME_VAR (*use->op_p),
7212 true, GSI_SAME_STMT);
7215 /* Rewrite the groups using the selected induction variables. */
7217 static void
7218 rewrite_groups (struct ivopts_data *data)
7220 unsigned i, j;
7222 for (i = 0; i < data->vgroups.length (); i++)
7224 struct iv_group *group = data->vgroups[i];
7225 struct iv_cand *cand = group->selected;
7227 gcc_assert (cand);
7229 if (group->type == USE_NONLINEAR_EXPR)
7231 for (j = 0; j < group->vuses.length (); j++)
7233 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7234 update_stmt (group->vuses[j]->stmt);
7237 else if (address_p (group->type))
7239 for (j = 0; j < group->vuses.length (); j++)
7241 rewrite_use_address (data, group->vuses[j], cand);
7242 update_stmt (group->vuses[j]->stmt);
7245 else
7247 gcc_assert (group->type == USE_COMPARE);
7249 for (j = 0; j < group->vuses.length (); j++)
7251 rewrite_use_compare (data, group->vuses[j], cand);
7252 update_stmt (group->vuses[j]->stmt);
7258 /* Removes the ivs that are not used after rewriting. */
7260 static void
7261 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7263 unsigned j;
7264 bitmap_iterator bi;
7266 /* Figure out an order in which to release SSA DEFs so that we don't
7267 release something that we'd have to propagate into a debug stmt
7268 afterwards. */
7269 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7271 struct version_info *info;
7273 info = ver_info (data, j);
7274 if (info->iv
7275 && !integer_zerop (info->iv->step)
7276 && !info->inv_id
7277 && !info->iv->nonlin_use
7278 && !info->preserve_biv)
7280 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7282 tree def = info->iv->ssa_name;
7284 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7286 imm_use_iterator imm_iter;
7287 use_operand_p use_p;
7288 gimple *stmt;
7289 int count = 0;
7291 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7293 if (!gimple_debug_bind_p (stmt))
7294 continue;
7296 /* We just want to determine whether to do nothing
7297 (count == 0), to substitute the computed
7298 expression into a single use of the SSA DEF by
7299 itself (count == 1), or to use a debug temp
7300 because the SSA DEF is used multiple times or as
7301 part of a larger expression (count > 1). */
7302 count++;
7303 if (gimple_debug_bind_get_value (stmt) != def)
7304 count++;
7306 if (count > 1)
7307 BREAK_FROM_IMM_USE_STMT (imm_iter);
7310 if (!count)
7311 continue;
7313 struct iv_use dummy_use;
7314 struct iv_cand *best_cand = NULL, *cand;
7315 unsigned i, best_pref = 0, cand_pref;
7317 memset (&dummy_use, 0, sizeof (dummy_use));
7318 dummy_use.iv = info->iv;
7319 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7321 cand = data->vgroups[i]->selected;
7322 if (cand == best_cand)
7323 continue;
7324 cand_pref = operand_equal_p (cand->iv->step,
7325 info->iv->step, 0)
7326 ? 4 : 0;
7327 cand_pref
7328 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7329 == TYPE_MODE (TREE_TYPE (info->iv->base))
7330 ? 2 : 0;
7331 cand_pref
7332 += TREE_CODE (cand->iv->base) == INTEGER_CST
7333 ? 1 : 0;
7334 if (best_cand == NULL || best_pref < cand_pref)
7336 best_cand = cand;
7337 best_pref = cand_pref;
7341 if (!best_cand)
7342 continue;
7344 tree comp = get_computation_at (data->current_loop,
7345 SSA_NAME_DEF_STMT (def),
7346 &dummy_use, best_cand);
7347 if (!comp)
7348 continue;
7350 if (count > 1)
7352 tree vexpr = make_node (DEBUG_EXPR_DECL);
7353 DECL_ARTIFICIAL (vexpr) = 1;
7354 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7355 if (SSA_NAME_VAR (def))
7356 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7357 else
7358 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7359 gdebug *def_temp
7360 = gimple_build_debug_bind (vexpr, comp, NULL);
7361 gimple_stmt_iterator gsi;
7363 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7364 gsi = gsi_after_labels (gimple_bb
7365 (SSA_NAME_DEF_STMT (def)));
7366 else
7367 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7369 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7370 comp = vexpr;
7373 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7375 if (!gimple_debug_bind_p (stmt))
7376 continue;
7378 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7379 SET_USE (use_p, comp);
7381 update_stmt (stmt);
7388 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7389 for hash_map::traverse. */
7391 bool
7392 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7394 free (value);
7395 return true;
7398 /* Frees data allocated by the optimization of a single loop. */
7400 static void
7401 free_loop_data (struct ivopts_data *data)
7403 unsigned i, j;
7404 bitmap_iterator bi;
7405 tree obj;
7407 if (data->niters)
7409 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7410 delete data->niters;
7411 data->niters = NULL;
7414 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7416 struct version_info *info;
7418 info = ver_info (data, i);
7419 info->iv = NULL;
7420 info->has_nonlin_use = false;
7421 info->preserve_biv = false;
7422 info->inv_id = 0;
7424 bitmap_clear (data->relevant);
7425 bitmap_clear (data->important_candidates);
7427 for (i = 0; i < data->vgroups.length (); i++)
7429 struct iv_group *group = data->vgroups[i];
7431 for (j = 0; j < group->vuses.length (); j++)
7432 free (group->vuses[j]);
7433 group->vuses.release ();
7435 BITMAP_FREE (group->related_cands);
7436 for (j = 0; j < group->n_map_members; j++)
7438 if (group->cost_map[j].inv_vars)
7439 BITMAP_FREE (group->cost_map[j].inv_vars);
7440 if (group->cost_map[j].inv_exprs)
7441 BITMAP_FREE (group->cost_map[j].inv_exprs);
7444 free (group->cost_map);
7445 free (group);
7447 data->vgroups.truncate (0);
7449 for (i = 0; i < data->vcands.length (); i++)
7451 struct iv_cand *cand = data->vcands[i];
7453 if (cand->inv_vars)
7454 BITMAP_FREE (cand->inv_vars);
7455 if (cand->inv_exprs)
7456 BITMAP_FREE (cand->inv_exprs);
7457 free (cand);
7459 data->vcands.truncate (0);
7461 if (data->version_info_size < num_ssa_names)
7463 data->version_info_size = 2 * num_ssa_names;
7464 free (data->version_info);
7465 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7468 data->max_inv_var_id = 0;
7469 data->max_inv_expr_id = 0;
7471 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7472 SET_DECL_RTL (obj, NULL_RTX);
7474 decl_rtl_to_reset.truncate (0);
7476 data->inv_expr_tab->empty ();
7478 data->iv_common_cand_tab->empty ();
7479 data->iv_common_cands.truncate (0);
7482 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7483 loop tree. */
7485 static void
7486 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7488 free_loop_data (data);
7489 free (data->version_info);
7490 BITMAP_FREE (data->relevant);
7491 BITMAP_FREE (data->important_candidates);
7493 decl_rtl_to_reset.release ();
7494 data->vgroups.release ();
7495 data->vcands.release ();
7496 delete data->inv_expr_tab;
7497 data->inv_expr_tab = NULL;
7498 free_affine_expand_cache (&data->name_expansion_cache);
7499 delete data->iv_common_cand_tab;
7500 data->iv_common_cand_tab = NULL;
7501 data->iv_common_cands.release ();
7502 obstack_free (&data->iv_obstack, NULL);
7505 /* Returns true if the loop body BODY includes any function calls. */
7507 static bool
7508 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7510 gimple_stmt_iterator gsi;
7511 unsigned i;
7513 for (i = 0; i < num_nodes; i++)
7514 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7516 gimple *stmt = gsi_stmt (gsi);
7517 if (is_gimple_call (stmt)
7518 && !gimple_call_internal_p (stmt)
7519 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7520 return true;
7522 return false;
7525 /* Optimizes the LOOP. Returns true if anything changed. */
7527 static bool
7528 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
7529 bitmap toremove)
7531 bool changed = false;
7532 struct iv_ca *iv_ca;
7533 edge exit = single_dom_exit (loop);
7534 basic_block *body;
7536 gcc_assert (!data->niters);
7537 data->current_loop = loop;
7538 data->loop_loc = find_loop_location (loop).get_location_t ();
7539 data->speed = optimize_loop_for_speed_p (loop);
7541 if (dump_file && (dump_flags & TDF_DETAILS))
7543 fprintf (dump_file, "Processing loop %d", loop->num);
7544 if (data->loop_loc != UNKNOWN_LOCATION)
7545 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7546 LOCATION_LINE (data->loop_loc));
7547 fprintf (dump_file, "\n");
7549 if (exit)
7551 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7552 exit->src->index, exit->dest->index);
7553 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7554 fprintf (dump_file, "\n");
7557 fprintf (dump_file, "\n");
7560 body = get_loop_body (loop);
7561 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7562 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7563 free (body);
7565 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7567 /* For each ssa name determines whether it behaves as an induction variable
7568 in some loop. */
7569 if (!find_induction_variables (data))
7570 goto finish;
7572 /* Finds interesting uses (item 1). */
7573 find_interesting_uses (data);
7574 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7575 goto finish;
7577 /* Finds candidates for the induction variables (item 2). */
7578 find_iv_candidates (data);
7580 /* Calculates the costs (item 3, part 1). */
7581 determine_iv_costs (data);
7582 determine_group_iv_costs (data);
7583 determine_set_costs (data);
7585 /* Find the optimal set of induction variables (item 3, part 2). */
7586 iv_ca = find_optimal_iv_set (data);
7587 if (!iv_ca)
7588 goto finish;
7589 changed = true;
7591 /* Create the new induction variables (item 4, part 1). */
7592 create_new_ivs (data, iv_ca);
7593 iv_ca_free (&iv_ca);
7595 /* Rewrite the uses (item 4, part 2). */
7596 rewrite_groups (data);
7598 /* Remove the ivs that are unused after rewriting. */
7599 remove_unused_ivs (data, toremove);
7601 finish:
7602 free_loop_data (data);
7604 return changed;
7607 /* Main entry point. Optimizes induction variables in loops. */
7609 void
7610 tree_ssa_iv_optimize (void)
7612 struct loop *loop;
7613 struct ivopts_data data;
7614 auto_bitmap toremove;
7616 tree_ssa_iv_optimize_init (&data);
7618 /* Optimize the loops starting with the innermost ones. */
7619 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7621 if (dump_file && (dump_flags & TDF_DETAILS))
7622 flow_loop_dump (loop, dump_file, NULL, 1);
7624 tree_ssa_iv_optimize_loop (&data, loop, toremove);
7627 /* Remove eliminated IV defs. */
7628 release_defs_bitset (toremove);
7630 /* We have changed the structure of induction variables; it might happen
7631 that definitions in the scev database refer to some of them that were
7632 eliminated. */
7633 scev_reset_htab ();
7634 /* Likewise niter and control-IV information. */
7635 free_numbers_of_iterations_estimates (cfun);
7637 tree_ssa_iv_optimize_finalize (&data);
7640 #include "gt-tree-ssa-loop-ivopts.h"