[42/46] Add vec_info::replace_stmt
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob6b445bd2214ae424fadf424f3555786224d6c9fa
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113 cost of different addressing modes. This should be moved to a TBD
114 interface between the GIMPLE and RTL worlds. */
116 /* The infinite cost. */
117 #define INFTY 10000000
119 /* Returns the expected number of loop iterations for LOOP.
120 The average trip count is computed from profile data if it
121 exists. */
123 static inline HOST_WIDE_INT
124 avg_loop_niter (struct loop *loop)
126 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127 if (niter == -1)
129 niter = likely_max_stmt_executions_int (loop);
131 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
135 return niter;
138 struct iv_use;
140 /* Representation of the induction variable. */
141 struct iv
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
165 /* Types of uses. */
166 enum use_type
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_REF_ADDRESS, /* Use is an address for an explicit memory
170 reference. */
171 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
172 cases where the expansion of the function
173 will turn the argument into a normal address. */
174 USE_COMPARE /* Use is a compare. */
177 /* Cost of a computation. */
178 struct comp_cost
180 comp_cost (): cost (0), complexity (0), scratch (0)
183 comp_cost (int cost, unsigned complexity, int scratch = 0)
184 : cost (cost), complexity (complexity), scratch (scratch)
187 /* Returns true if COST is infinite. */
188 bool infinite_cost_p ();
190 /* Adds costs COST1 and COST2. */
191 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
193 /* Adds COST to the comp_cost. */
194 comp_cost operator+= (comp_cost cost);
196 /* Adds constant C to this comp_cost. */
197 comp_cost operator+= (HOST_WIDE_INT c);
199 /* Subtracts constant C to this comp_cost. */
200 comp_cost operator-= (HOST_WIDE_INT c);
202 /* Divide the comp_cost by constant C. */
203 comp_cost operator/= (HOST_WIDE_INT c);
205 /* Multiply the comp_cost by constant C. */
206 comp_cost operator*= (HOST_WIDE_INT c);
208 /* Subtracts costs COST1 and COST2. */
209 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
211 /* Subtracts COST from this comp_cost. */
212 comp_cost operator-= (comp_cost cost);
214 /* Returns true if COST1 is smaller than COST2. */
215 friend bool operator< (comp_cost cost1, comp_cost cost2);
217 /* Returns true if COST1 and COST2 are equal. */
218 friend bool operator== (comp_cost cost1, comp_cost cost2);
220 /* Returns true if COST1 is smaller or equal than COST2. */
221 friend bool operator<= (comp_cost cost1, comp_cost cost2);
223 int cost; /* The runtime cost. */
224 unsigned complexity; /* The estimate of the complexity of the code for
225 the computation (in no concrete units --
226 complexity field should be larger for more
227 complex expressions and addressing modes). */
228 int scratch; /* Scratch used during cost computation. */
231 static const comp_cost no_cost;
232 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
234 bool
235 comp_cost::infinite_cost_p ()
237 return cost == INFTY;
240 comp_cost
241 operator+ (comp_cost cost1, comp_cost cost2)
243 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
244 return infinite_cost;
246 cost1.cost += cost2.cost;
247 cost1.complexity += cost2.complexity;
249 return cost1;
252 comp_cost
253 operator- (comp_cost cost1, comp_cost cost2)
255 if (cost1.infinite_cost_p ())
256 return infinite_cost;
258 gcc_assert (!cost2.infinite_cost_p ());
260 cost1.cost -= cost2.cost;
261 cost1.complexity -= cost2.complexity;
263 return cost1;
266 comp_cost
267 comp_cost::operator+= (comp_cost cost)
269 *this = *this + cost;
270 return *this;
273 comp_cost
274 comp_cost::operator+= (HOST_WIDE_INT c)
276 if (infinite_cost_p ())
277 return *this;
279 this->cost += c;
281 return *this;
284 comp_cost
285 comp_cost::operator-= (HOST_WIDE_INT c)
287 if (infinite_cost_p ())
288 return *this;
290 this->cost -= c;
292 return *this;
295 comp_cost
296 comp_cost::operator/= (HOST_WIDE_INT c)
298 if (infinite_cost_p ())
299 return *this;
301 this->cost /= c;
303 return *this;
306 comp_cost
307 comp_cost::operator*= (HOST_WIDE_INT c)
309 if (infinite_cost_p ())
310 return *this;
312 this->cost *= c;
314 return *this;
317 comp_cost
318 comp_cost::operator-= (comp_cost cost)
320 *this = *this - cost;
321 return *this;
324 bool
325 operator< (comp_cost cost1, comp_cost cost2)
327 if (cost1.cost == cost2.cost)
328 return cost1.complexity < cost2.complexity;
330 return cost1.cost < cost2.cost;
333 bool
334 operator== (comp_cost cost1, comp_cost cost2)
336 return cost1.cost == cost2.cost
337 && cost1.complexity == cost2.complexity;
340 bool
341 operator<= (comp_cost cost1, comp_cost cost2)
343 return cost1 < cost2 || cost1 == cost2;
346 struct iv_inv_expr_ent;
348 /* The candidate - cost pair. */
349 struct cost_pair
351 struct iv_cand *cand; /* The candidate. */
352 comp_cost cost; /* The cost. */
353 enum tree_code comp; /* For iv elimination, the comparison. */
354 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
355 preserved when representing iv_use with iv_cand. */
356 bitmap inv_exprs; /* The list of newly created invariant expressions
357 when representing iv_use with iv_cand. */
358 tree value; /* For final value elimination, the expression for
359 the final value of the iv. For iv elimination,
360 the new bound to compare with. */
363 /* Use. */
364 struct iv_use
366 unsigned id; /* The id of the use. */
367 unsigned group_id; /* The group id the use belongs to. */
368 enum use_type type; /* Type of the use. */
369 tree mem_type; /* The memory type to use when testing whether an
370 address is legitimate, and what the address's
371 cost is. */
372 struct iv *iv; /* The induction variable it is based on. */
373 gimple *stmt; /* Statement in that it occurs. */
374 tree *op_p; /* The place where it occurs. */
376 tree addr_base; /* Base address with const offset stripped. */
377 poly_uint64_pod addr_offset;
378 /* Const offset stripped from base address. */
381 /* Group of uses. */
382 struct iv_group
384 /* The id of the group. */
385 unsigned id;
386 /* Uses of the group are of the same type. */
387 enum use_type type;
388 /* The set of "related" IV candidates, plus the important ones. */
389 bitmap related_cands;
390 /* Number of IV candidates in the cost_map. */
391 unsigned n_map_members;
392 /* The costs wrto the iv candidates. */
393 struct cost_pair *cost_map;
394 /* The selected candidate for the group. */
395 struct iv_cand *selected;
396 /* Uses in the group. */
397 vec<struct iv_use *> vuses;
400 /* The position where the iv is computed. */
401 enum iv_position
403 IP_NORMAL, /* At the end, just before the exit condition. */
404 IP_END, /* At the end of the latch block. */
405 IP_BEFORE_USE, /* Immediately before a specific use. */
406 IP_AFTER_USE, /* Immediately after a specific use. */
407 IP_ORIGINAL /* The original biv. */
410 /* The induction variable candidate. */
411 struct iv_cand
413 unsigned id; /* The number of the candidate. */
414 bool important; /* Whether this is an "important" candidate, i.e. such
415 that it should be considered by all uses. */
416 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
417 gimple *incremented_at;/* For original biv, the statement where it is
418 incremented. */
419 tree var_before; /* The variable used for it before increment. */
420 tree var_after; /* The variable used for it after increment. */
421 struct iv *iv; /* The value of the candidate. NULL for
422 "pseudocandidate" used to indicate the possibility
423 to replace the final value of an iv by direct
424 computation of the value. */
425 unsigned cost; /* Cost of the candidate. */
426 unsigned cost_step; /* Cost of the candidate's increment operation. */
427 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
428 where it is incremented. */
429 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
430 iv_cand. */
431 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
432 hanlde it as a new invariant expression which will
433 be hoisted out of loop. */
434 struct iv *orig_iv; /* The original iv if this cand is added from biv with
435 smaller type. */
438 /* Hashtable entry for common candidate derived from iv uses. */
439 struct iv_common_cand
441 tree base;
442 tree step;
443 /* IV uses from which this common candidate is derived. */
444 auto_vec<struct iv_use *> uses;
445 hashval_t hash;
448 /* Hashtable helpers. */
450 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
452 static inline hashval_t hash (const iv_common_cand *);
453 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
456 /* Hash function for possible common candidates. */
458 inline hashval_t
459 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
461 return ccand->hash;
464 /* Hash table equality function for common candidates. */
466 inline bool
467 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
468 const iv_common_cand *ccand2)
470 return (ccand1->hash == ccand2->hash
471 && operand_equal_p (ccand1->base, ccand2->base, 0)
472 && operand_equal_p (ccand1->step, ccand2->step, 0)
473 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
474 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
477 /* Loop invariant expression hashtable entry. */
479 struct iv_inv_expr_ent
481 /* Tree expression of the entry. */
482 tree expr;
483 /* Unique indentifier. */
484 int id;
485 /* Hash value. */
486 hashval_t hash;
489 /* Sort iv_inv_expr_ent pair A and B by id field. */
491 static int
492 sort_iv_inv_expr_ent (const void *a, const void *b)
494 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
495 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
497 unsigned id1 = (*e1)->id;
498 unsigned id2 = (*e2)->id;
500 if (id1 < id2)
501 return -1;
502 else if (id1 > id2)
503 return 1;
504 else
505 return 0;
508 /* Hashtable helpers. */
510 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
512 static inline hashval_t hash (const iv_inv_expr_ent *);
513 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
516 /* Return true if uses of type TYPE represent some form of address. */
518 inline bool
519 address_p (use_type type)
521 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
524 /* Hash function for loop invariant expressions. */
526 inline hashval_t
527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
529 return expr->hash;
532 /* Hash table equality function for expressions. */
534 inline bool
535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
536 const iv_inv_expr_ent *expr2)
538 return expr1->hash == expr2->hash
539 && operand_equal_p (expr1->expr, expr2->expr, 0);
542 struct ivopts_data
544 /* The currently optimized loop. */
545 struct loop *current_loop;
546 source_location loop_loc;
548 /* Numbers of iterations for all exits of the current loop. */
549 hash_map<edge, tree_niter_desc *> *niters;
551 /* Number of registers used in it. */
552 unsigned regs_used;
554 /* The size of version_info array allocated. */
555 unsigned version_info_size;
557 /* The array of information for the ssa names. */
558 struct version_info *version_info;
560 /* The hashtable of loop invariant expressions created
561 by ivopt. */
562 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
564 /* The bitmap of indices in version_info whose value was changed. */
565 bitmap relevant;
567 /* The uses of induction variables. */
568 vec<iv_group *> vgroups;
570 /* The candidates. */
571 vec<iv_cand *> vcands;
573 /* A bitmap of important candidates. */
574 bitmap important_candidates;
576 /* Cache used by tree_to_aff_combination_expand. */
577 hash_map<tree, name_expansion *> *name_expansion_cache;
579 /* The hashtable of common candidates derived from iv uses. */
580 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
582 /* The common candidates. */
583 vec<iv_common_cand *> iv_common_cands;
585 /* The maximum invariant variable id. */
586 unsigned max_inv_var_id;
588 /* The maximum invariant expression id. */
589 unsigned max_inv_expr_id;
591 /* Number of no_overflow BIVs which are not used in memory address. */
592 unsigned bivs_not_used_in_addr;
594 /* Obstack for iv structure. */
595 struct obstack iv_obstack;
597 /* Whether to consider just related and important candidates when replacing a
598 use. */
599 bool consider_all_candidates;
601 /* Are we optimizing for speed? */
602 bool speed;
604 /* Whether the loop body includes any function calls. */
605 bool body_includes_call;
607 /* Whether the loop body can only be exited via single exit. */
608 bool loop_single_exit_p;
611 /* An assignment of iv candidates to uses. */
613 struct iv_ca
615 /* The number of uses covered by the assignment. */
616 unsigned upto;
618 /* Number of uses that cannot be expressed by the candidates in the set. */
619 unsigned bad_groups;
621 /* Candidate assigned to a use, together with the related costs. */
622 struct cost_pair **cand_for_group;
624 /* Number of times each candidate is used. */
625 unsigned *n_cand_uses;
627 /* The candidates used. */
628 bitmap cands;
630 /* The number of candidates in the set. */
631 unsigned n_cands;
633 /* The number of invariants needed, including both invariant variants and
634 invariant expressions. */
635 unsigned n_invs;
637 /* Total cost of expressing uses. */
638 comp_cost cand_use_cost;
640 /* Total cost of candidates. */
641 unsigned cand_cost;
643 /* Number of times each invariant variable is used. */
644 unsigned *n_inv_var_uses;
646 /* Number of times each invariant expression is used. */
647 unsigned *n_inv_expr_uses;
649 /* Total cost of the assignment. */
650 comp_cost cost;
653 /* Difference of two iv candidate assignments. */
655 struct iv_ca_delta
657 /* Changed group. */
658 struct iv_group *group;
660 /* An old assignment (for rollback purposes). */
661 struct cost_pair *old_cp;
663 /* A new assignment. */
664 struct cost_pair *new_cp;
666 /* Next change in the list. */
667 struct iv_ca_delta *next;
670 /* Bound on number of candidates below that all candidates are considered. */
672 #define CONSIDER_ALL_CANDIDATES_BOUND \
673 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
675 /* If there are more iv occurrences, we just give up (it is quite unlikely that
676 optimizing such a loop would help, and it would take ages). */
678 #define MAX_CONSIDERED_GROUPS \
679 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
681 /* If there are at most this number of ivs in the set, try removing unnecessary
682 ivs from the set always. */
684 #define ALWAYS_PRUNE_CAND_SET_BOUND \
685 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
687 /* The list of trees for that the decl_rtl field must be reset is stored
688 here. */
690 static vec<tree> decl_rtl_to_reset;
692 static comp_cost force_expr_to_var_cost (tree, bool);
694 /* The single loop exit if it dominates the latch, NULL otherwise. */
696 edge
697 single_dom_exit (struct loop *loop)
699 edge exit = single_exit (loop);
701 if (!exit)
702 return NULL;
704 if (!just_once_each_iteration_p (loop, exit->src))
705 return NULL;
707 return exit;
710 /* Dumps information about the induction variable IV to FILE. Don't dump
711 variable's name if DUMP_NAME is FALSE. The information is dumped with
712 preceding spaces indicated by INDENT_LEVEL. */
714 void
715 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
717 const char *p;
718 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
720 if (indent_level > 4)
721 indent_level = 4;
722 p = spaces + 8 - (indent_level << 1);
724 fprintf (file, "%sIV struct:\n", p);
725 if (iv->ssa_name && dump_name)
727 fprintf (file, "%s SSA_NAME:\t", p);
728 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
729 fprintf (file, "\n");
732 fprintf (file, "%s Type:\t", p);
733 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
734 fprintf (file, "\n");
736 fprintf (file, "%s Base:\t", p);
737 print_generic_expr (file, iv->base, TDF_SLIM);
738 fprintf (file, "\n");
740 fprintf (file, "%s Step:\t", p);
741 print_generic_expr (file, iv->step, TDF_SLIM);
742 fprintf (file, "\n");
744 if (iv->base_object)
746 fprintf (file, "%s Object:\t", p);
747 print_generic_expr (file, iv->base_object, TDF_SLIM);
748 fprintf (file, "\n");
751 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
753 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
754 p, iv->no_overflow ? "No-overflow" : "Overflow");
757 /* Dumps information about the USE to FILE. */
759 void
760 dump_use (FILE *file, struct iv_use *use)
762 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
763 fprintf (file, " At stmt:\t");
764 print_gimple_stmt (file, use->stmt, 0);
765 fprintf (file, " At pos:\t");
766 if (use->op_p)
767 print_generic_expr (file, *use->op_p, TDF_SLIM);
768 fprintf (file, "\n");
769 dump_iv (file, use->iv, false, 2);
772 /* Dumps information about the uses to FILE. */
774 void
775 dump_groups (FILE *file, struct ivopts_data *data)
777 unsigned i, j;
778 struct iv_group *group;
780 for (i = 0; i < data->vgroups.length (); i++)
782 group = data->vgroups[i];
783 fprintf (file, "Group %d:\n", group->id);
784 if (group->type == USE_NONLINEAR_EXPR)
785 fprintf (file, " Type:\tGENERIC\n");
786 else if (group->type == USE_REF_ADDRESS)
787 fprintf (file, " Type:\tREFERENCE ADDRESS\n");
788 else if (group->type == USE_PTR_ADDRESS)
789 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
790 else
792 gcc_assert (group->type == USE_COMPARE);
793 fprintf (file, " Type:\tCOMPARE\n");
795 for (j = 0; j < group->vuses.length (); j++)
796 dump_use (file, group->vuses[j]);
800 /* Dumps information about induction variable candidate CAND to FILE. */
802 void
803 dump_cand (FILE *file, struct iv_cand *cand)
805 struct iv *iv = cand->iv;
807 fprintf (file, "Candidate %d:\n", cand->id);
808 if (cand->inv_vars)
810 fprintf (file, " Depend on inv.vars: ");
811 dump_bitmap (file, cand->inv_vars);
813 if (cand->inv_exprs)
815 fprintf (file, " Depend on inv.exprs: ");
816 dump_bitmap (file, cand->inv_exprs);
819 if (cand->var_before)
821 fprintf (file, " Var befor: ");
822 print_generic_expr (file, cand->var_before, TDF_SLIM);
823 fprintf (file, "\n");
825 if (cand->var_after)
827 fprintf (file, " Var after: ");
828 print_generic_expr (file, cand->var_after, TDF_SLIM);
829 fprintf (file, "\n");
832 switch (cand->pos)
834 case IP_NORMAL:
835 fprintf (file, " Incr POS: before exit test\n");
836 break;
838 case IP_BEFORE_USE:
839 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
840 break;
842 case IP_AFTER_USE:
843 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
844 break;
846 case IP_END:
847 fprintf (file, " Incr POS: at end\n");
848 break;
850 case IP_ORIGINAL:
851 fprintf (file, " Incr POS: orig biv\n");
852 break;
855 dump_iv (file, iv, false, 1);
858 /* Returns the info for ssa version VER. */
860 static inline struct version_info *
861 ver_info (struct ivopts_data *data, unsigned ver)
863 return data->version_info + ver;
866 /* Returns the info for ssa name NAME. */
868 static inline struct version_info *
869 name_info (struct ivopts_data *data, tree name)
871 return ver_info (data, SSA_NAME_VERSION (name));
874 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
875 emitted in LOOP. */
877 static bool
878 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
880 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
882 gcc_assert (bb);
884 if (sbb == loop->latch)
885 return true;
887 if (sbb != bb)
888 return false;
890 return stmt == last_stmt (bb);
893 /* Returns true if STMT if after the place where the original induction
894 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
895 if the positions are identical. */
897 static bool
898 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
900 basic_block cand_bb = gimple_bb (cand->incremented_at);
901 basic_block stmt_bb = gimple_bb (stmt);
903 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
904 return false;
906 if (stmt_bb != cand_bb)
907 return true;
909 if (true_if_equal
910 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
911 return true;
912 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
915 /* Returns true if STMT if after the place where the induction variable
916 CAND is incremented in LOOP. */
918 static bool
919 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
921 switch (cand->pos)
923 case IP_END:
924 return false;
926 case IP_NORMAL:
927 return stmt_after_ip_normal_pos (loop, stmt);
929 case IP_ORIGINAL:
930 case IP_AFTER_USE:
931 return stmt_after_inc_pos (cand, stmt, false);
933 case IP_BEFORE_USE:
934 return stmt_after_inc_pos (cand, stmt, true);
936 default:
937 gcc_unreachable ();
941 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
943 static bool
944 abnormal_ssa_name_p (tree exp)
946 if (!exp)
947 return false;
949 if (TREE_CODE (exp) != SSA_NAME)
950 return false;
952 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
955 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
956 abnormal phi node. Callback for for_each_index. */
958 static bool
959 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
960 void *data ATTRIBUTE_UNUSED)
962 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
964 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
965 return false;
966 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
967 return false;
970 return !abnormal_ssa_name_p (*index);
973 /* Returns true if EXPR contains a ssa name that occurs in an
974 abnormal phi node. */
976 bool
977 contains_abnormal_ssa_name_p (tree expr)
979 enum tree_code code;
980 enum tree_code_class codeclass;
982 if (!expr)
983 return false;
985 code = TREE_CODE (expr);
986 codeclass = TREE_CODE_CLASS (code);
988 if (code == CALL_EXPR)
990 tree arg;
991 call_expr_arg_iterator iter;
992 FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
993 if (contains_abnormal_ssa_name_p (arg))
994 return true;
995 return false;
998 if (code == SSA_NAME)
999 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
1001 if (code == INTEGER_CST
1002 || is_gimple_min_invariant (expr))
1003 return false;
1005 if (code == ADDR_EXPR)
1006 return !for_each_index (&TREE_OPERAND (expr, 0),
1007 idx_contains_abnormal_ssa_name_p,
1008 NULL);
1010 if (code == COND_EXPR)
1011 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1012 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1013 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1015 switch (codeclass)
1017 case tcc_binary:
1018 case tcc_comparison:
1019 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1020 return true;
1022 /* Fallthru. */
1023 case tcc_unary:
1024 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1025 return true;
1027 break;
1029 default:
1030 gcc_unreachable ();
1033 return false;
1036 /* Returns the structure describing number of iterations determined from
1037 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1039 static struct tree_niter_desc *
1040 niter_for_exit (struct ivopts_data *data, edge exit)
1042 struct tree_niter_desc *desc;
1043 tree_niter_desc **slot;
1045 if (!data->niters)
1047 data->niters = new hash_map<edge, tree_niter_desc *>;
1048 slot = NULL;
1050 else
1051 slot = data->niters->get (exit);
1053 if (!slot)
1055 /* Try to determine number of iterations. We cannot safely work with ssa
1056 names that appear in phi nodes on abnormal edges, so that we do not
1057 create overlapping life ranges for them (PR 27283). */
1058 desc = XNEW (struct tree_niter_desc);
1059 if (!number_of_iterations_exit (data->current_loop,
1060 exit, desc, true)
1061 || contains_abnormal_ssa_name_p (desc->niter))
1063 XDELETE (desc);
1064 desc = NULL;
1066 data->niters->put (exit, desc);
1068 else
1069 desc = *slot;
1071 return desc;
1074 /* Returns the structure describing number of iterations determined from
1075 single dominating exit of DATA->current_loop, or NULL if something
1076 goes wrong. */
1078 static struct tree_niter_desc *
1079 niter_for_single_dom_exit (struct ivopts_data *data)
1081 edge exit = single_dom_exit (data->current_loop);
1083 if (!exit)
1084 return NULL;
1086 return niter_for_exit (data, exit);
1089 /* Initializes data structures used by the iv optimization pass, stored
1090 in DATA. */
1092 static void
1093 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1095 data->version_info_size = 2 * num_ssa_names;
1096 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1097 data->relevant = BITMAP_ALLOC (NULL);
1098 data->important_candidates = BITMAP_ALLOC (NULL);
1099 data->max_inv_var_id = 0;
1100 data->max_inv_expr_id = 0;
1101 data->niters = NULL;
1102 data->vgroups.create (20);
1103 data->vcands.create (20);
1104 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1105 data->name_expansion_cache = NULL;
1106 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1107 data->iv_common_cands.create (20);
1108 decl_rtl_to_reset.create (20);
1109 gcc_obstack_init (&data->iv_obstack);
1112 /* Returns a memory object to that EXPR points. In case we are able to
1113 determine that it does not point to any such object, NULL is returned. */
1115 static tree
1116 determine_base_object (tree expr)
1118 enum tree_code code = TREE_CODE (expr);
1119 tree base, obj;
1121 /* If this is a pointer casted to any type, we need to determine
1122 the base object for the pointer; so handle conversions before
1123 throwing away non-pointer expressions. */
1124 if (CONVERT_EXPR_P (expr))
1125 return determine_base_object (TREE_OPERAND (expr, 0));
1127 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1128 return NULL_TREE;
1130 switch (code)
1132 case INTEGER_CST:
1133 return NULL_TREE;
1135 case ADDR_EXPR:
1136 obj = TREE_OPERAND (expr, 0);
1137 base = get_base_address (obj);
1139 if (!base)
1140 return expr;
1142 if (TREE_CODE (base) == MEM_REF)
1143 return determine_base_object (TREE_OPERAND (base, 0));
1145 return fold_convert (ptr_type_node,
1146 build_fold_addr_expr (base));
1148 case POINTER_PLUS_EXPR:
1149 return determine_base_object (TREE_OPERAND (expr, 0));
1151 case PLUS_EXPR:
1152 case MINUS_EXPR:
1153 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1154 gcc_unreachable ();
1156 default:
1157 if (POLY_INT_CST_P (expr))
1158 return NULL_TREE;
1159 return fold_convert (ptr_type_node, expr);
1163 /* Return true if address expression with non-DECL_P operand appears
1164 in EXPR. */
1166 static bool
1167 contain_complex_addr_expr (tree expr)
1169 bool res = false;
1171 STRIP_NOPS (expr);
1172 switch (TREE_CODE (expr))
1174 case POINTER_PLUS_EXPR:
1175 case PLUS_EXPR:
1176 case MINUS_EXPR:
1177 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1178 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1179 break;
1181 case ADDR_EXPR:
1182 return (!DECL_P (TREE_OPERAND (expr, 0)));
1184 default:
1185 return false;
1188 return res;
1191 /* Allocates an induction variable with given initial value BASE and step STEP
1192 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1194 static struct iv *
1195 alloc_iv (struct ivopts_data *data, tree base, tree step,
1196 bool no_overflow = false)
1198 tree expr = base;
1199 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1200 sizeof (struct iv));
1201 gcc_assert (step != NULL_TREE);
1203 /* Lower address expression in base except ones with DECL_P as operand.
1204 By doing this:
1205 1) More accurate cost can be computed for address expressions;
1206 2) Duplicate candidates won't be created for bases in different
1207 forms, like &a[0] and &a. */
1208 STRIP_NOPS (expr);
1209 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1210 || contain_complex_addr_expr (expr))
1212 aff_tree comb;
1213 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1214 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1217 iv->base = base;
1218 iv->base_object = determine_base_object (base);
1219 iv->step = step;
1220 iv->biv_p = false;
1221 iv->nonlin_use = NULL;
1222 iv->ssa_name = NULL_TREE;
1223 if (!no_overflow
1224 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1225 base, step))
1226 no_overflow = true;
1227 iv->no_overflow = no_overflow;
1228 iv->have_address_use = false;
1230 return iv;
1233 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1234 doesn't overflow. */
1236 static void
1237 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1238 bool no_overflow)
1240 struct version_info *info = name_info (data, iv);
1242 gcc_assert (!info->iv);
1244 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1245 info->iv = alloc_iv (data, base, step, no_overflow);
1246 info->iv->ssa_name = iv;
1249 /* Finds induction variable declaration for VAR. */
1251 static struct iv *
1252 get_iv (struct ivopts_data *data, tree var)
1254 basic_block bb;
1255 tree type = TREE_TYPE (var);
1257 if (!POINTER_TYPE_P (type)
1258 && !INTEGRAL_TYPE_P (type))
1259 return NULL;
1261 if (!name_info (data, var)->iv)
1263 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1265 if (!bb
1266 || !flow_bb_inside_loop_p (data->current_loop, bb))
1267 set_iv (data, var, var, build_int_cst (type, 0), true);
1270 return name_info (data, var)->iv;
1273 /* Return the first non-invariant ssa var found in EXPR. */
1275 static tree
1276 extract_single_var_from_expr (tree expr)
1278 int i, n;
1279 tree tmp;
1280 enum tree_code code;
1282 if (!expr || is_gimple_min_invariant (expr))
1283 return NULL;
1285 code = TREE_CODE (expr);
1286 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1288 n = TREE_OPERAND_LENGTH (expr);
1289 for (i = 0; i < n; i++)
1291 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1293 if (tmp)
1294 return tmp;
1297 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1300 /* Finds basic ivs. */
1302 static bool
1303 find_bivs (struct ivopts_data *data)
1305 gphi *phi;
1306 affine_iv iv;
1307 tree step, type, base, stop;
1308 bool found = false;
1309 struct loop *loop = data->current_loop;
1310 gphi_iterator psi;
1312 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1314 phi = psi.phi ();
1316 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1317 continue;
1319 if (virtual_operand_p (PHI_RESULT (phi)))
1320 continue;
1322 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1323 continue;
1325 if (integer_zerop (iv.step))
1326 continue;
1328 step = iv.step;
1329 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1330 /* Stop expanding iv base at the first ssa var referred by iv step.
1331 Ideally we should stop at any ssa var, because that's expensive
1332 and unusual to happen, we just do it on the first one.
1334 See PR64705 for the rationale. */
1335 stop = extract_single_var_from_expr (step);
1336 base = expand_simple_operations (base, stop);
1337 if (contains_abnormal_ssa_name_p (base)
1338 || contains_abnormal_ssa_name_p (step))
1339 continue;
1341 type = TREE_TYPE (PHI_RESULT (phi));
1342 base = fold_convert (type, base);
1343 if (step)
1345 if (POINTER_TYPE_P (type))
1346 step = convert_to_ptrofftype (step);
1347 else
1348 step = fold_convert (type, step);
1351 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1352 found = true;
1355 return found;
1358 /* Marks basic ivs. */
1360 static void
1361 mark_bivs (struct ivopts_data *data)
1363 gphi *phi;
1364 gimple *def;
1365 tree var;
1366 struct iv *iv, *incr_iv;
1367 struct loop *loop = data->current_loop;
1368 basic_block incr_bb;
1369 gphi_iterator psi;
1371 data->bivs_not_used_in_addr = 0;
1372 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1374 phi = psi.phi ();
1376 iv = get_iv (data, PHI_RESULT (phi));
1377 if (!iv)
1378 continue;
1380 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1381 def = SSA_NAME_DEF_STMT (var);
1382 /* Don't mark iv peeled from other one as biv. */
1383 if (def
1384 && gimple_code (def) == GIMPLE_PHI
1385 && gimple_bb (def) == loop->header)
1386 continue;
1388 incr_iv = get_iv (data, var);
1389 if (!incr_iv)
1390 continue;
1392 /* If the increment is in the subloop, ignore it. */
1393 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1394 if (incr_bb->loop_father != data->current_loop
1395 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1396 continue;
1398 iv->biv_p = true;
1399 incr_iv->biv_p = true;
1400 if (iv->no_overflow)
1401 data->bivs_not_used_in_addr++;
1402 if (incr_iv->no_overflow)
1403 data->bivs_not_used_in_addr++;
1407 /* Checks whether STMT defines a linear induction variable and stores its
1408 parameters to IV. */
1410 static bool
1411 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1413 tree lhs, stop;
1414 struct loop *loop = data->current_loop;
1416 iv->base = NULL_TREE;
1417 iv->step = NULL_TREE;
1419 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1420 return false;
1422 lhs = gimple_assign_lhs (stmt);
1423 if (TREE_CODE (lhs) != SSA_NAME)
1424 return false;
1426 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1427 return false;
1429 /* Stop expanding iv base at the first ssa var referred by iv step.
1430 Ideally we should stop at any ssa var, because that's expensive
1431 and unusual to happen, we just do it on the first one.
1433 See PR64705 for the rationale. */
1434 stop = extract_single_var_from_expr (iv->step);
1435 iv->base = expand_simple_operations (iv->base, stop);
1436 if (contains_abnormal_ssa_name_p (iv->base)
1437 || contains_abnormal_ssa_name_p (iv->step))
1438 return false;
1440 /* If STMT could throw, then do not consider STMT as defining a GIV.
1441 While this will suppress optimizations, we can not safely delete this
1442 GIV and associated statements, even if it appears it is not used. */
1443 if (stmt_could_throw_p (stmt))
1444 return false;
1446 return true;
1449 /* Finds general ivs in statement STMT. */
1451 static void
1452 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1454 affine_iv iv;
1456 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1457 return;
1459 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1462 /* Finds general ivs in basic block BB. */
1464 static void
1465 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1467 gimple_stmt_iterator bsi;
1469 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1470 find_givs_in_stmt (data, gsi_stmt (bsi));
1473 /* Finds general ivs. */
1475 static void
1476 find_givs (struct ivopts_data *data)
1478 struct loop *loop = data->current_loop;
1479 basic_block *body = get_loop_body_in_dom_order (loop);
1480 unsigned i;
1482 for (i = 0; i < loop->num_nodes; i++)
1483 find_givs_in_bb (data, body[i]);
1484 free (body);
1487 /* For each ssa name defined in LOOP determines whether it is an induction
1488 variable and if so, its initial value and step. */
1490 static bool
1491 find_induction_variables (struct ivopts_data *data)
1493 unsigned i;
1494 bitmap_iterator bi;
1496 if (!find_bivs (data))
1497 return false;
1499 find_givs (data);
1500 mark_bivs (data);
1502 if (dump_file && (dump_flags & TDF_DETAILS))
1504 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1506 if (niter)
1508 fprintf (dump_file, " number of iterations ");
1509 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1510 if (!integer_zerop (niter->may_be_zero))
1512 fprintf (dump_file, "; zero if ");
1513 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1515 fprintf (dump_file, "\n");
1518 fprintf (dump_file, "\n<Induction Vars>:\n");
1519 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1521 struct version_info *info = ver_info (data, i);
1522 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1523 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1527 return true;
1530 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1531 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1532 is the const offset stripped from IV base and MEM_TYPE is the type
1533 of the memory being addressed. For uses of other types, ADDR_BASE
1534 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1536 static struct iv_use *
1537 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1538 gimple *stmt, enum use_type type, tree mem_type,
1539 tree addr_base, poly_uint64 addr_offset)
1541 struct iv_use *use = XCNEW (struct iv_use);
1543 use->id = group->vuses.length ();
1544 use->group_id = group->id;
1545 use->type = type;
1546 use->mem_type = mem_type;
1547 use->iv = iv;
1548 use->stmt = stmt;
1549 use->op_p = use_p;
1550 use->addr_base = addr_base;
1551 use->addr_offset = addr_offset;
1553 group->vuses.safe_push (use);
1554 return use;
1557 /* Checks whether OP is a loop-level invariant and if so, records it.
1558 NONLINEAR_USE is true if the invariant is used in a way we do not
1559 handle specially. */
1561 static void
1562 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1564 basic_block bb;
1565 struct version_info *info;
1567 if (TREE_CODE (op) != SSA_NAME
1568 || virtual_operand_p (op))
1569 return;
1571 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1572 if (bb
1573 && flow_bb_inside_loop_p (data->current_loop, bb))
1574 return;
1576 info = name_info (data, op);
1577 info->name = op;
1578 info->has_nonlin_use |= nonlinear_use;
1579 if (!info->inv_id)
1580 info->inv_id = ++data->max_inv_var_id;
1581 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1584 /* Record a group of TYPE. */
1586 static struct iv_group *
1587 record_group (struct ivopts_data *data, enum use_type type)
1589 struct iv_group *group = XCNEW (struct iv_group);
1591 group->id = data->vgroups.length ();
1592 group->type = type;
1593 group->related_cands = BITMAP_ALLOC (NULL);
1594 group->vuses.create (1);
1596 data->vgroups.safe_push (group);
1597 return group;
1600 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1601 New group will be created if there is no existing group for the use.
1602 MEM_TYPE is the type of memory being addressed, or NULL if this
1603 isn't an address reference. */
1605 static struct iv_use *
1606 record_group_use (struct ivopts_data *data, tree *use_p,
1607 struct iv *iv, gimple *stmt, enum use_type type,
1608 tree mem_type)
1610 tree addr_base = NULL;
1611 struct iv_group *group = NULL;
1612 poly_uint64 addr_offset = 0;
1614 /* Record non address type use in a new group. */
1615 if (address_p (type))
1617 unsigned int i;
1619 addr_base = strip_offset (iv->base, &addr_offset);
1620 for (i = 0; i < data->vgroups.length (); i++)
1622 struct iv_use *use;
1624 group = data->vgroups[i];
1625 use = group->vuses[0];
1626 if (!address_p (use->type))
1627 continue;
1629 /* Check if it has the same stripped base and step. */
1630 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1631 && operand_equal_p (iv->step, use->iv->step, 0)
1632 && operand_equal_p (addr_base, use->addr_base, 0))
1633 break;
1635 if (i == data->vgroups.length ())
1636 group = NULL;
1639 if (!group)
1640 group = record_group (data, type);
1642 return record_use (group, use_p, iv, stmt, type, mem_type,
1643 addr_base, addr_offset);
1646 /* Checks whether the use OP is interesting and if so, records it. */
1648 static struct iv_use *
1649 find_interesting_uses_op (struct ivopts_data *data, tree op)
1651 struct iv *iv;
1652 gimple *stmt;
1653 struct iv_use *use;
1655 if (TREE_CODE (op) != SSA_NAME)
1656 return NULL;
1658 iv = get_iv (data, op);
1659 if (!iv)
1660 return NULL;
1662 if (iv->nonlin_use)
1664 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1665 return iv->nonlin_use;
1668 if (integer_zerop (iv->step))
1670 record_invariant (data, op, true);
1671 return NULL;
1674 stmt = SSA_NAME_DEF_STMT (op);
1675 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1677 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1678 iv->nonlin_use = use;
1679 return use;
1682 /* Indicate how compare type iv_use can be handled. */
1683 enum comp_iv_rewrite
1685 COMP_IV_NA,
1686 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1687 COMP_IV_EXPR,
1688 /* We may rewrite compare type iv_uses on both sides of comparison by
1689 expressing value of each iv_use. */
1690 COMP_IV_EXPR_2,
1691 /* We may rewrite compare type iv_use by expressing value of the iv_use
1692 or by eliminating it with other iv_cand. */
1693 COMP_IV_ELIM
1696 /* Given a condition in statement STMT, checks whether it is a compare
1697 of an induction variable and an invariant. If this is the case,
1698 CONTROL_VAR is set to location of the iv, BOUND to the location of
1699 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1700 induction variable descriptions, and true is returned. If this is not
1701 the case, CONTROL_VAR and BOUND are set to the arguments of the
1702 condition and false is returned. */
1704 static enum comp_iv_rewrite
1705 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1706 tree **control_var, tree **bound,
1707 struct iv **iv_var, struct iv **iv_bound)
1709 /* The objects returned when COND has constant operands. */
1710 static struct iv const_iv;
1711 static tree zero;
1712 tree *op0 = &zero, *op1 = &zero;
1713 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1714 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1716 if (gimple_code (stmt) == GIMPLE_COND)
1718 gcond *cond_stmt = as_a <gcond *> (stmt);
1719 op0 = gimple_cond_lhs_ptr (cond_stmt);
1720 op1 = gimple_cond_rhs_ptr (cond_stmt);
1722 else
1724 op0 = gimple_assign_rhs1_ptr (stmt);
1725 op1 = gimple_assign_rhs2_ptr (stmt);
1728 zero = integer_zero_node;
1729 const_iv.step = integer_zero_node;
1731 if (TREE_CODE (*op0) == SSA_NAME)
1732 iv0 = get_iv (data, *op0);
1733 if (TREE_CODE (*op1) == SSA_NAME)
1734 iv1 = get_iv (data, *op1);
1736 /* If both sides of comparison are IVs. We can express ivs on both end. */
1737 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1739 rewrite_type = COMP_IV_EXPR_2;
1740 goto end;
1743 /* If none side of comparison is IV. */
1744 if ((!iv0 || integer_zerop (iv0->step))
1745 && (!iv1 || integer_zerop (iv1->step)))
1746 goto end;
1748 /* Control variable may be on the other side. */
1749 if (!iv0 || integer_zerop (iv0->step))
1751 std::swap (op0, op1);
1752 std::swap (iv0, iv1);
1754 /* If one side is IV and the other side isn't loop invariant. */
1755 if (!iv1)
1756 rewrite_type = COMP_IV_EXPR;
1757 /* If one side is IV and the other side is loop invariant. */
1758 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1759 rewrite_type = COMP_IV_ELIM;
1761 end:
1762 if (control_var)
1763 *control_var = op0;
1764 if (iv_var)
1765 *iv_var = iv0;
1766 if (bound)
1767 *bound = op1;
1768 if (iv_bound)
1769 *iv_bound = iv1;
1771 return rewrite_type;
1774 /* Checks whether the condition in STMT is interesting and if so,
1775 records it. */
1777 static void
1778 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1780 tree *var_p, *bound_p;
1781 struct iv *var_iv, *bound_iv;
1782 enum comp_iv_rewrite ret;
1784 ret = extract_cond_operands (data, stmt,
1785 &var_p, &bound_p, &var_iv, &bound_iv);
1786 if (ret == COMP_IV_NA)
1788 find_interesting_uses_op (data, *var_p);
1789 find_interesting_uses_op (data, *bound_p);
1790 return;
1793 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1794 /* Record compare type iv_use for iv on the other side of comparison. */
1795 if (ret == COMP_IV_EXPR_2)
1796 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1799 /* Returns the outermost loop EXPR is obviously invariant in
1800 relative to the loop LOOP, i.e. if all its operands are defined
1801 outside of the returned loop. Returns NULL if EXPR is not
1802 even obviously invariant in LOOP. */
1804 struct loop *
1805 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1807 basic_block def_bb;
1808 unsigned i, len;
1810 if (is_gimple_min_invariant (expr))
1811 return current_loops->tree_root;
1813 if (TREE_CODE (expr) == SSA_NAME)
1815 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1816 if (def_bb)
1818 if (flow_bb_inside_loop_p (loop, def_bb))
1819 return NULL;
1820 return superloop_at_depth (loop,
1821 loop_depth (def_bb->loop_father) + 1);
1824 return current_loops->tree_root;
1827 if (!EXPR_P (expr))
1828 return NULL;
1830 unsigned maxdepth = 0;
1831 len = TREE_OPERAND_LENGTH (expr);
1832 for (i = 0; i < len; i++)
1834 struct loop *ivloop;
1835 if (!TREE_OPERAND (expr, i))
1836 continue;
1838 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1839 if (!ivloop)
1840 return NULL;
1841 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1844 return superloop_at_depth (loop, maxdepth);
1847 /* Returns true if expression EXPR is obviously invariant in LOOP,
1848 i.e. if all its operands are defined outside of the LOOP. LOOP
1849 should not be the function body. */
1851 bool
1852 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1854 basic_block def_bb;
1855 unsigned i, len;
1857 gcc_assert (loop_depth (loop) > 0);
1859 if (is_gimple_min_invariant (expr))
1860 return true;
1862 if (TREE_CODE (expr) == SSA_NAME)
1864 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1865 if (def_bb
1866 && flow_bb_inside_loop_p (loop, def_bb))
1867 return false;
1869 return true;
1872 if (!EXPR_P (expr))
1873 return false;
1875 len = TREE_OPERAND_LENGTH (expr);
1876 for (i = 0; i < len; i++)
1877 if (TREE_OPERAND (expr, i)
1878 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1879 return false;
1881 return true;
1884 /* Given expression EXPR which computes inductive values with respect
1885 to loop recorded in DATA, this function returns biv from which EXPR
1886 is derived by tracing definition chains of ssa variables in EXPR. */
1888 static struct iv*
1889 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1891 struct iv *iv;
1892 unsigned i, n;
1893 tree e2, e1;
1894 enum tree_code code;
1895 gimple *stmt;
1897 if (expr == NULL_TREE)
1898 return NULL;
1900 if (is_gimple_min_invariant (expr))
1901 return NULL;
1903 code = TREE_CODE (expr);
1904 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1906 n = TREE_OPERAND_LENGTH (expr);
1907 for (i = 0; i < n; i++)
1909 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1910 if (iv)
1911 return iv;
1915 /* Stop if it's not ssa name. */
1916 if (code != SSA_NAME)
1917 return NULL;
1919 iv = get_iv (data, expr);
1920 if (!iv || integer_zerop (iv->step))
1921 return NULL;
1922 else if (iv->biv_p)
1923 return iv;
1925 stmt = SSA_NAME_DEF_STMT (expr);
1926 if (gphi *phi = dyn_cast <gphi *> (stmt))
1928 ssa_op_iter iter;
1929 use_operand_p use_p;
1930 basic_block phi_bb = gimple_bb (phi);
1932 /* Skip loop header PHI that doesn't define biv. */
1933 if (phi_bb->loop_father == data->current_loop)
1934 return NULL;
1936 if (virtual_operand_p (gimple_phi_result (phi)))
1937 return NULL;
1939 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1941 tree use = USE_FROM_PTR (use_p);
1942 iv = find_deriving_biv_for_expr (data, use);
1943 if (iv)
1944 return iv;
1946 return NULL;
1948 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1949 return NULL;
1951 e1 = gimple_assign_rhs1 (stmt);
1952 code = gimple_assign_rhs_code (stmt);
1953 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1954 return find_deriving_biv_for_expr (data, e1);
1956 switch (code)
1958 case MULT_EXPR:
1959 case PLUS_EXPR:
1960 case MINUS_EXPR:
1961 case POINTER_PLUS_EXPR:
1962 /* Increments, decrements and multiplications by a constant
1963 are simple. */
1964 e2 = gimple_assign_rhs2 (stmt);
1965 iv = find_deriving_biv_for_expr (data, e2);
1966 if (iv)
1967 return iv;
1968 gcc_fallthrough ();
1970 CASE_CONVERT:
1971 /* Casts are simple. */
1972 return find_deriving_biv_for_expr (data, e1);
1974 default:
1975 break;
1978 return NULL;
1981 /* Record BIV, its predecessor and successor that they are used in
1982 address type uses. */
1984 static void
1985 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1987 unsigned i;
1988 tree type, base_1, base_2;
1989 bitmap_iterator bi;
1991 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1992 || biv->have_address_use || !biv->no_overflow)
1993 return;
1995 type = TREE_TYPE (biv->base);
1996 if (!INTEGRAL_TYPE_P (type))
1997 return;
1999 biv->have_address_use = true;
2000 data->bivs_not_used_in_addr--;
2001 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
2002 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2004 struct iv *iv = ver_info (data, i)->iv;
2006 if (!iv || !iv->biv_p || integer_zerop (iv->step)
2007 || iv->have_address_use || !iv->no_overflow)
2008 continue;
2010 if (type != TREE_TYPE (iv->base)
2011 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2012 continue;
2014 if (!operand_equal_p (biv->step, iv->step, 0))
2015 continue;
2017 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2018 if (operand_equal_p (base_1, iv->base, 0)
2019 || operand_equal_p (base_2, biv->base, 0))
2021 iv->have_address_use = true;
2022 data->bivs_not_used_in_addr--;
2027 /* Cumulates the steps of indices into DATA and replaces their values with the
2028 initial ones. Returns false when the value of the index cannot be determined.
2029 Callback for for_each_index. */
2031 struct ifs_ivopts_data
2033 struct ivopts_data *ivopts_data;
2034 gimple *stmt;
2035 tree step;
2038 static bool
2039 idx_find_step (tree base, tree *idx, void *data)
2041 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2042 struct iv *iv;
2043 bool use_overflow_semantics = false;
2044 tree step, iv_base, iv_step, lbound, off;
2045 struct loop *loop = dta->ivopts_data->current_loop;
2047 /* If base is a component ref, require that the offset of the reference
2048 be invariant. */
2049 if (TREE_CODE (base) == COMPONENT_REF)
2051 off = component_ref_field_offset (base);
2052 return expr_invariant_in_loop_p (loop, off);
2055 /* If base is array, first check whether we will be able to move the
2056 reference out of the loop (in order to take its address in strength
2057 reduction). In order for this to work we need both lower bound
2058 and step to be loop invariants. */
2059 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2061 /* Moreover, for a range, the size needs to be invariant as well. */
2062 if (TREE_CODE (base) == ARRAY_RANGE_REF
2063 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2064 return false;
2066 step = array_ref_element_size (base);
2067 lbound = array_ref_low_bound (base);
2069 if (!expr_invariant_in_loop_p (loop, step)
2070 || !expr_invariant_in_loop_p (loop, lbound))
2071 return false;
2074 if (TREE_CODE (*idx) != SSA_NAME)
2075 return true;
2077 iv = get_iv (dta->ivopts_data, *idx);
2078 if (!iv)
2079 return false;
2081 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2082 *&x[0], which is not folded and does not trigger the
2083 ARRAY_REF path below. */
2084 *idx = iv->base;
2086 if (integer_zerop (iv->step))
2087 return true;
2089 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2091 step = array_ref_element_size (base);
2093 /* We only handle addresses whose step is an integer constant. */
2094 if (TREE_CODE (step) != INTEGER_CST)
2095 return false;
2097 else
2098 /* The step for pointer arithmetics already is 1 byte. */
2099 step = size_one_node;
2101 iv_base = iv->base;
2102 iv_step = iv->step;
2103 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2104 use_overflow_semantics = true;
2106 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2107 sizetype, &iv_base, &iv_step, dta->stmt,
2108 use_overflow_semantics))
2110 /* The index might wrap. */
2111 return false;
2114 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2115 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2117 if (dta->ivopts_data->bivs_not_used_in_addr)
2119 if (!iv->biv_p)
2120 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2122 record_biv_for_address_use (dta->ivopts_data, iv);
2124 return true;
2127 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2128 object is passed to it in DATA. */
2130 static bool
2131 idx_record_use (tree base, tree *idx,
2132 void *vdata)
2134 struct ivopts_data *data = (struct ivopts_data *) vdata;
2135 find_interesting_uses_op (data, *idx);
2136 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2138 find_interesting_uses_op (data, array_ref_element_size (base));
2139 find_interesting_uses_op (data, array_ref_low_bound (base));
2141 return true;
2144 /* If we can prove that TOP = cst * BOT for some constant cst,
2145 store cst to MUL and return true. Otherwise return false.
2146 The returned value is always sign-extended, regardless of the
2147 signedness of TOP and BOT. */
2149 static bool
2150 constant_multiple_of (tree top, tree bot, widest_int *mul)
2152 tree mby;
2153 enum tree_code code;
2154 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2155 widest_int res, p0, p1;
2157 STRIP_NOPS (top);
2158 STRIP_NOPS (bot);
2160 if (operand_equal_p (top, bot, 0))
2162 *mul = 1;
2163 return true;
2166 code = TREE_CODE (top);
2167 switch (code)
2169 case MULT_EXPR:
2170 mby = TREE_OPERAND (top, 1);
2171 if (TREE_CODE (mby) != INTEGER_CST)
2172 return false;
2174 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2175 return false;
2177 *mul = wi::sext (res * wi::to_widest (mby), precision);
2178 return true;
2180 case PLUS_EXPR:
2181 case MINUS_EXPR:
2182 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2183 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2184 return false;
2186 if (code == MINUS_EXPR)
2187 p1 = -p1;
2188 *mul = wi::sext (p0 + p1, precision);
2189 return true;
2191 case INTEGER_CST:
2192 if (TREE_CODE (bot) != INTEGER_CST)
2193 return false;
2195 p0 = widest_int::from (wi::to_wide (top), SIGNED);
2196 p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2197 if (p1 == 0)
2198 return false;
2199 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2200 return res == 0;
2202 default:
2203 if (POLY_INT_CST_P (top)
2204 && POLY_INT_CST_P (bot)
2205 && constant_multiple_p (wi::to_poly_widest (top),
2206 wi::to_poly_widest (bot), mul))
2207 return true;
2209 return false;
2213 /* Return true if memory reference REF with step STEP may be unaligned. */
2215 static bool
2216 may_be_unaligned_p (tree ref, tree step)
2218 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2219 thus they are not misaligned. */
2220 if (TREE_CODE (ref) == TARGET_MEM_REF)
2221 return false;
2223 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2224 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2225 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2227 unsigned HOST_WIDE_INT bitpos;
2228 unsigned int ref_align;
2229 get_object_alignment_1 (ref, &ref_align, &bitpos);
2230 if (ref_align < align
2231 || (bitpos % align) != 0
2232 || (bitpos % BITS_PER_UNIT) != 0)
2233 return true;
2235 unsigned int trailing_zeros = tree_ctz (step);
2236 if (trailing_zeros < HOST_BITS_PER_INT
2237 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2238 return true;
2240 return false;
2243 /* Return true if EXPR may be non-addressable. */
2245 bool
2246 may_be_nonaddressable_p (tree expr)
2248 switch (TREE_CODE (expr))
2250 case TARGET_MEM_REF:
2251 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2252 target, thus they are always addressable. */
2253 return false;
2255 case MEM_REF:
2256 /* Likewise for MEM_REFs, modulo the storage order. */
2257 return REF_REVERSE_STORAGE_ORDER (expr);
2259 case BIT_FIELD_REF:
2260 if (REF_REVERSE_STORAGE_ORDER (expr))
2261 return true;
2262 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2264 case COMPONENT_REF:
2265 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2266 return true;
2267 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2268 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2270 case ARRAY_REF:
2271 case ARRAY_RANGE_REF:
2272 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2273 return true;
2274 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2276 case VIEW_CONVERT_EXPR:
2277 /* This kind of view-conversions may wrap non-addressable objects
2278 and make them look addressable. After some processing the
2279 non-addressability may be uncovered again, causing ADDR_EXPRs
2280 of inappropriate objects to be built. */
2281 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2282 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2283 return true;
2284 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2286 CASE_CONVERT:
2287 return true;
2289 default:
2290 break;
2293 return false;
2296 /* Finds addresses in *OP_P inside STMT. */
2298 static void
2299 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2300 tree *op_p)
2302 tree base = *op_p, step = size_zero_node;
2303 struct iv *civ;
2304 struct ifs_ivopts_data ifs_ivopts_data;
2306 /* Do not play with volatile memory references. A bit too conservative,
2307 perhaps, but safe. */
2308 if (gimple_has_volatile_ops (stmt))
2309 goto fail;
2311 /* Ignore bitfields for now. Not really something terribly complicated
2312 to handle. TODO. */
2313 if (TREE_CODE (base) == BIT_FIELD_REF)
2314 goto fail;
2316 base = unshare_expr (base);
2318 if (TREE_CODE (base) == TARGET_MEM_REF)
2320 tree type = build_pointer_type (TREE_TYPE (base));
2321 tree astep;
2323 if (TMR_BASE (base)
2324 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2326 civ = get_iv (data, TMR_BASE (base));
2327 if (!civ)
2328 goto fail;
2330 TMR_BASE (base) = civ->base;
2331 step = civ->step;
2333 if (TMR_INDEX2 (base)
2334 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2336 civ = get_iv (data, TMR_INDEX2 (base));
2337 if (!civ)
2338 goto fail;
2340 TMR_INDEX2 (base) = civ->base;
2341 step = civ->step;
2343 if (TMR_INDEX (base)
2344 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2346 civ = get_iv (data, TMR_INDEX (base));
2347 if (!civ)
2348 goto fail;
2350 TMR_INDEX (base) = civ->base;
2351 astep = civ->step;
2353 if (astep)
2355 if (TMR_STEP (base))
2356 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2358 step = fold_build2 (PLUS_EXPR, type, step, astep);
2362 if (integer_zerop (step))
2363 goto fail;
2364 base = tree_mem_ref_addr (type, base);
2366 else
2368 ifs_ivopts_data.ivopts_data = data;
2369 ifs_ivopts_data.stmt = stmt;
2370 ifs_ivopts_data.step = size_zero_node;
2371 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2372 || integer_zerop (ifs_ivopts_data.step))
2373 goto fail;
2374 step = ifs_ivopts_data.step;
2376 /* Check that the base expression is addressable. This needs
2377 to be done after substituting bases of IVs into it. */
2378 if (may_be_nonaddressable_p (base))
2379 goto fail;
2381 /* Moreover, on strict alignment platforms, check that it is
2382 sufficiently aligned. */
2383 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2384 goto fail;
2386 base = build_fold_addr_expr (base);
2388 /* Substituting bases of IVs into the base expression might
2389 have caused folding opportunities. */
2390 if (TREE_CODE (base) == ADDR_EXPR)
2392 tree *ref = &TREE_OPERAND (base, 0);
2393 while (handled_component_p (*ref))
2394 ref = &TREE_OPERAND (*ref, 0);
2395 if (TREE_CODE (*ref) == MEM_REF)
2397 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2398 TREE_OPERAND (*ref, 0),
2399 TREE_OPERAND (*ref, 1));
2400 if (tem)
2401 *ref = tem;
2406 civ = alloc_iv (data, base, step);
2407 /* Fail if base object of this memory reference is unknown. */
2408 if (civ->base_object == NULL_TREE)
2409 goto fail;
2411 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2412 return;
2414 fail:
2415 for_each_index (op_p, idx_record_use, data);
2418 /* Finds and records invariants used in STMT. */
2420 static void
2421 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2423 ssa_op_iter iter;
2424 use_operand_p use_p;
2425 tree op;
2427 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2429 op = USE_FROM_PTR (use_p);
2430 record_invariant (data, op, false);
2434 /* CALL calls an internal function. If operand *OP_P will become an
2435 address when the call is expanded, return the type of the memory
2436 being addressed, otherwise return null. */
2438 static tree
2439 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2441 switch (gimple_call_internal_fn (call))
2443 case IFN_MASK_LOAD:
2444 if (op_p == gimple_call_arg_ptr (call, 0))
2445 return TREE_TYPE (gimple_call_lhs (call));
2446 return NULL_TREE;
2448 case IFN_MASK_STORE:
2449 if (op_p == gimple_call_arg_ptr (call, 0))
2450 return TREE_TYPE (gimple_call_arg (call, 3));
2451 return NULL_TREE;
2453 default:
2454 return NULL_TREE;
2458 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2459 Return true if the operand will become an address when STMT
2460 is expanded and record the associated address use if so. */
2462 static bool
2463 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2464 struct iv *iv)
2466 /* Fail if base object of this memory reference is unknown. */
2467 if (iv->base_object == NULL_TREE)
2468 return false;
2470 tree mem_type = NULL_TREE;
2471 if (gcall *call = dyn_cast <gcall *> (stmt))
2472 if (gimple_call_internal_p (call))
2473 mem_type = get_mem_type_for_internal_fn (call, op_p);
2474 if (mem_type)
2476 iv = alloc_iv (data, iv->base, iv->step);
2477 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2478 return true;
2480 return false;
2483 /* Finds interesting uses of induction variables in the statement STMT. */
2485 static void
2486 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2488 struct iv *iv;
2489 tree op, *lhs, *rhs;
2490 ssa_op_iter iter;
2491 use_operand_p use_p;
2492 enum tree_code code;
2494 find_invariants_stmt (data, stmt);
2496 if (gimple_code (stmt) == GIMPLE_COND)
2498 find_interesting_uses_cond (data, stmt);
2499 return;
2502 if (is_gimple_assign (stmt))
2504 lhs = gimple_assign_lhs_ptr (stmt);
2505 rhs = gimple_assign_rhs1_ptr (stmt);
2507 if (TREE_CODE (*lhs) == SSA_NAME)
2509 /* If the statement defines an induction variable, the uses are not
2510 interesting by themselves. */
2512 iv = get_iv (data, *lhs);
2514 if (iv && !integer_zerop (iv->step))
2515 return;
2518 code = gimple_assign_rhs_code (stmt);
2519 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2520 && (REFERENCE_CLASS_P (*rhs)
2521 || is_gimple_val (*rhs)))
2523 if (REFERENCE_CLASS_P (*rhs))
2524 find_interesting_uses_address (data, stmt, rhs);
2525 else
2526 find_interesting_uses_op (data, *rhs);
2528 if (REFERENCE_CLASS_P (*lhs))
2529 find_interesting_uses_address (data, stmt, lhs);
2530 return;
2532 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2534 find_interesting_uses_cond (data, stmt);
2535 return;
2538 /* TODO -- we should also handle address uses of type
2540 memory = call (whatever);
2544 call (memory). */
2547 if (gimple_code (stmt) == GIMPLE_PHI
2548 && gimple_bb (stmt) == data->current_loop->header)
2550 iv = get_iv (data, PHI_RESULT (stmt));
2552 if (iv && !integer_zerop (iv->step))
2553 return;
2556 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2558 op = USE_FROM_PTR (use_p);
2560 if (TREE_CODE (op) != SSA_NAME)
2561 continue;
2563 iv = get_iv (data, op);
2564 if (!iv)
2565 continue;
2567 if (!find_address_like_use (data, stmt, use_p->use, iv))
2568 find_interesting_uses_op (data, op);
2572 /* Finds interesting uses of induction variables outside of loops
2573 on loop exit edge EXIT. */
2575 static void
2576 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2578 gphi *phi;
2579 gphi_iterator psi;
2580 tree def;
2582 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2584 phi = psi.phi ();
2585 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2586 if (!virtual_operand_p (def))
2587 find_interesting_uses_op (data, def);
2591 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2592 mode for memory reference represented by USE. */
2594 static GTY (()) vec<rtx, va_gc> *addr_list;
2596 static bool
2597 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2599 rtx reg, addr;
2600 unsigned list_index;
2601 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2602 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2604 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2605 if (list_index >= vec_safe_length (addr_list))
2606 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2608 addr = (*addr_list)[list_index];
2609 if (!addr)
2611 addr_mode = targetm.addr_space.address_mode (as);
2612 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2613 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2614 (*addr_list)[list_index] = addr;
2616 else
2617 addr_mode = GET_MODE (addr);
2619 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2620 return (memory_address_addr_space_p (mem_mode, addr, as));
2623 /* Comparison function to sort group in ascending order of addr_offset. */
2625 static int
2626 group_compare_offset (const void *a, const void *b)
2628 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2629 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2631 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2634 /* Check if small groups should be split. Return true if no group
2635 contains more than two uses with distinct addr_offsets. Return
2636 false otherwise. We want to split such groups because:
2638 1) Small groups don't have much benefit and may interfer with
2639 general candidate selection.
2640 2) Size for problem with only small groups is usually small and
2641 general algorithm can handle it well.
2643 TODO -- Above claim may not hold when we want to merge memory
2644 accesses with conseuctive addresses. */
2646 static bool
2647 split_small_address_groups_p (struct ivopts_data *data)
2649 unsigned int i, j, distinct = 1;
2650 struct iv_use *pre;
2651 struct iv_group *group;
2653 for (i = 0; i < data->vgroups.length (); i++)
2655 group = data->vgroups[i];
2656 if (group->vuses.length () == 1)
2657 continue;
2659 gcc_assert (address_p (group->type));
2660 if (group->vuses.length () == 2)
2662 if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2663 group->vuses[1]->addr_offset) > 0)
2664 std::swap (group->vuses[0], group->vuses[1]);
2666 else
2667 group->vuses.qsort (group_compare_offset);
2669 if (distinct > 2)
2670 continue;
2672 distinct = 1;
2673 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2675 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2677 pre = group->vuses[j];
2678 distinct++;
2681 if (distinct > 2)
2682 break;
2686 return (distinct <= 2);
2689 /* For each group of address type uses, this function further groups
2690 these uses according to the maximum offset supported by target's
2691 [base + offset] addressing mode. */
2693 static void
2694 split_address_groups (struct ivopts_data *data)
2696 unsigned int i, j;
2697 /* Always split group. */
2698 bool split_p = split_small_address_groups_p (data);
2700 for (i = 0; i < data->vgroups.length (); i++)
2702 struct iv_group *new_group = NULL;
2703 struct iv_group *group = data->vgroups[i];
2704 struct iv_use *use = group->vuses[0];
2706 use->id = 0;
2707 use->group_id = group->id;
2708 if (group->vuses.length () == 1)
2709 continue;
2711 gcc_assert (address_p (use->type));
2713 for (j = 1; j < group->vuses.length ();)
2715 struct iv_use *next = group->vuses[j];
2716 poly_int64 offset = next->addr_offset - use->addr_offset;
2718 /* Split group if aksed to, or the offset against the first
2719 use can't fit in offset part of addressing mode. IV uses
2720 having the same offset are still kept in one group. */
2721 if (maybe_ne (offset, 0)
2722 && (split_p || !addr_offset_valid_p (use, offset)))
2724 if (!new_group)
2725 new_group = record_group (data, group->type);
2726 group->vuses.ordered_remove (j);
2727 new_group->vuses.safe_push (next);
2728 continue;
2731 next->id = j;
2732 next->group_id = group->id;
2733 j++;
2738 /* Finds uses of the induction variables that are interesting. */
2740 static void
2741 find_interesting_uses (struct ivopts_data *data)
2743 basic_block bb;
2744 gimple_stmt_iterator bsi;
2745 basic_block *body = get_loop_body (data->current_loop);
2746 unsigned i;
2747 edge e;
2749 for (i = 0; i < data->current_loop->num_nodes; i++)
2751 edge_iterator ei;
2752 bb = body[i];
2754 FOR_EACH_EDGE (e, ei, bb->succs)
2755 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2756 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2757 find_interesting_uses_outside (data, e);
2759 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2760 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2761 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2762 if (!is_gimple_debug (gsi_stmt (bsi)))
2763 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2765 free (body);
2767 split_address_groups (data);
2769 if (dump_file && (dump_flags & TDF_DETAILS))
2771 fprintf (dump_file, "\n<IV Groups>:\n");
2772 dump_groups (dump_file, data);
2773 fprintf (dump_file, "\n");
2777 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2778 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2779 we are at the top-level of the processed address. */
2781 static tree
2782 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2783 poly_int64 *offset)
2785 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2786 enum tree_code code;
2787 tree type, orig_type = TREE_TYPE (expr);
2788 poly_int64 off0, off1;
2789 HOST_WIDE_INT st;
2790 tree orig_expr = expr;
2792 STRIP_NOPS (expr);
2794 type = TREE_TYPE (expr);
2795 code = TREE_CODE (expr);
2796 *offset = 0;
2798 switch (code)
2800 case POINTER_PLUS_EXPR:
2801 case PLUS_EXPR:
2802 case MINUS_EXPR:
2803 op0 = TREE_OPERAND (expr, 0);
2804 op1 = TREE_OPERAND (expr, 1);
2806 op0 = strip_offset_1 (op0, false, false, &off0);
2807 op1 = strip_offset_1 (op1, false, false, &off1);
2809 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2810 if (op0 == TREE_OPERAND (expr, 0)
2811 && op1 == TREE_OPERAND (expr, 1))
2812 return orig_expr;
2814 if (integer_zerop (op1))
2815 expr = op0;
2816 else if (integer_zerop (op0))
2818 if (code == MINUS_EXPR)
2819 expr = fold_build1 (NEGATE_EXPR, type, op1);
2820 else
2821 expr = op1;
2823 else
2824 expr = fold_build2 (code, type, op0, op1);
2826 return fold_convert (orig_type, expr);
2828 case MULT_EXPR:
2829 op1 = TREE_OPERAND (expr, 1);
2830 if (!cst_and_fits_in_hwi (op1))
2831 return orig_expr;
2833 op0 = TREE_OPERAND (expr, 0);
2834 op0 = strip_offset_1 (op0, false, false, &off0);
2835 if (op0 == TREE_OPERAND (expr, 0))
2836 return orig_expr;
2838 *offset = off0 * int_cst_value (op1);
2839 if (integer_zerop (op0))
2840 expr = op0;
2841 else
2842 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2844 return fold_convert (orig_type, expr);
2846 case ARRAY_REF:
2847 case ARRAY_RANGE_REF:
2848 if (!inside_addr)
2849 return orig_expr;
2851 step = array_ref_element_size (expr);
2852 if (!cst_and_fits_in_hwi (step))
2853 break;
2855 st = int_cst_value (step);
2856 op1 = TREE_OPERAND (expr, 1);
2857 op1 = strip_offset_1 (op1, false, false, &off1);
2858 *offset = off1 * st;
2860 if (top_compref
2861 && integer_zerop (op1))
2863 /* Strip the component reference completely. */
2864 op0 = TREE_OPERAND (expr, 0);
2865 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2866 *offset += off0;
2867 return op0;
2869 break;
2871 case COMPONENT_REF:
2873 tree field;
2875 if (!inside_addr)
2876 return orig_expr;
2878 tmp = component_ref_field_offset (expr);
2879 field = TREE_OPERAND (expr, 1);
2880 if (top_compref
2881 && cst_and_fits_in_hwi (tmp)
2882 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2884 HOST_WIDE_INT boffset, abs_off;
2886 /* Strip the component reference completely. */
2887 op0 = TREE_OPERAND (expr, 0);
2888 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2889 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2890 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2891 if (boffset < 0)
2892 abs_off = -abs_off;
2894 *offset = off0 + int_cst_value (tmp) + abs_off;
2895 return op0;
2898 break;
2900 case ADDR_EXPR:
2901 op0 = TREE_OPERAND (expr, 0);
2902 op0 = strip_offset_1 (op0, true, true, &off0);
2903 *offset += off0;
2905 if (op0 == TREE_OPERAND (expr, 0))
2906 return orig_expr;
2908 expr = build_fold_addr_expr (op0);
2909 return fold_convert (orig_type, expr);
2911 case MEM_REF:
2912 /* ??? Offset operand? */
2913 inside_addr = false;
2914 break;
2916 default:
2917 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2918 return build_int_cst (orig_type, 0);
2919 return orig_expr;
2922 /* Default handling of expressions for that we want to recurse into
2923 the first operand. */
2924 op0 = TREE_OPERAND (expr, 0);
2925 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2926 *offset += off0;
2928 if (op0 == TREE_OPERAND (expr, 0)
2929 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2930 return orig_expr;
2932 expr = copy_node (expr);
2933 TREE_OPERAND (expr, 0) = op0;
2934 if (op1)
2935 TREE_OPERAND (expr, 1) = op1;
2937 /* Inside address, we might strip the top level component references,
2938 thus changing type of the expression. Handling of ADDR_EXPR
2939 will fix that. */
2940 expr = fold_convert (orig_type, expr);
2942 return expr;
2945 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2947 tree
2948 strip_offset (tree expr, poly_uint64_pod *offset)
2950 poly_int64 off;
2951 tree core = strip_offset_1 (expr, false, false, &off);
2952 *offset = off;
2953 return core;
2956 /* Returns variant of TYPE that can be used as base for different uses.
2957 We return unsigned type with the same precision, which avoids problems
2958 with overflows. */
2960 static tree
2961 generic_type_for (tree type)
2963 if (POINTER_TYPE_P (type))
2964 return unsigned_type_for (type);
2966 if (TYPE_UNSIGNED (type))
2967 return type;
2969 return unsigned_type_for (type);
2972 /* Private data for walk_tree. */
2974 struct walk_tree_data
2976 bitmap *inv_vars;
2977 struct ivopts_data *idata;
2980 /* Callback function for walk_tree, it records invariants and symbol
2981 reference in *EXPR_P. DATA is the structure storing result info. */
2983 static tree
2984 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2986 tree op = *expr_p;
2987 struct version_info *info;
2988 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2990 if (TREE_CODE (op) != SSA_NAME)
2991 return NULL_TREE;
2993 info = name_info (wdata->idata, op);
2994 /* Because we expand simple operations when finding IVs, loop invariant
2995 variable that isn't referred by the original loop could be used now.
2996 Record such invariant variables here. */
2997 if (!info->iv)
2999 struct ivopts_data *idata = wdata->idata;
3000 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3002 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3004 set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
3005 record_invariant (idata, op, false);
3008 if (!info->inv_id || info->has_nonlin_use)
3009 return NULL_TREE;
3011 if (!*wdata->inv_vars)
3012 *wdata->inv_vars = BITMAP_ALLOC (NULL);
3013 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3015 return NULL_TREE;
3018 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3019 store it. */
3021 static inline void
3022 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3024 struct walk_tree_data wdata;
3026 if (!inv_vars)
3027 return;
3029 wdata.idata = data;
3030 wdata.inv_vars = inv_vars;
3031 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3034 /* Get entry from invariant expr hash table for INV_EXPR. New entry
3035 will be recorded if it doesn't exist yet. Given below two exprs:
3036 inv_expr + cst1, inv_expr + cst2
3037 It's hard to make decision whether constant part should be stripped
3038 or not. We choose to not strip based on below facts:
3039 1) We need to count ADD cost for constant part if it's stripped,
3040 which is't always trivial where this functions is called.
3041 2) Stripping constant away may be conflict with following loop
3042 invariant hoisting pass.
3043 3) Not stripping constant away results in more invariant exprs,
3044 which usually leads to decision preferring lower reg pressure. */
3046 static iv_inv_expr_ent *
3047 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3049 STRIP_NOPS (inv_expr);
3051 if (poly_int_tree_p (inv_expr)
3052 || TREE_CODE (inv_expr) == SSA_NAME)
3053 return NULL;
3055 /* Don't strip constant part away as we used to. */
3057 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3058 struct iv_inv_expr_ent ent;
3059 ent.expr = inv_expr;
3060 ent.hash = iterative_hash_expr (inv_expr, 0);
3061 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3063 if (!*slot)
3065 *slot = XNEW (struct iv_inv_expr_ent);
3066 (*slot)->expr = inv_expr;
3067 (*slot)->hash = ent.hash;
3068 (*slot)->id = ++data->max_inv_expr_id;
3071 return *slot;
3074 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3075 position to POS. If USE is not NULL, the candidate is set as related to
3076 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3077 replacement of the final value of the iv by a direct computation. */
3079 static struct iv_cand *
3080 add_candidate_1 (struct ivopts_data *data,
3081 tree base, tree step, bool important, enum iv_position pos,
3082 struct iv_use *use, gimple *incremented_at,
3083 struct iv *orig_iv = NULL)
3085 unsigned i;
3086 struct iv_cand *cand = NULL;
3087 tree type, orig_type;
3089 gcc_assert (base && step);
3091 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3092 live, but the ivopts code may replace a real pointer with one
3093 pointing before or after the memory block that is then adjusted
3094 into the memory block during the loop. FIXME: It would likely be
3095 better to actually force the pointer live and still use ivopts;
3096 for example, it would be enough to write the pointer into memory
3097 and keep it there until after the loop. */
3098 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3099 return NULL;
3101 /* For non-original variables, make sure their values are computed in a type
3102 that does not invoke undefined behavior on overflows (since in general,
3103 we cannot prove that these induction variables are non-wrapping). */
3104 if (pos != IP_ORIGINAL)
3106 orig_type = TREE_TYPE (base);
3107 type = generic_type_for (orig_type);
3108 if (type != orig_type)
3110 base = fold_convert (type, base);
3111 step = fold_convert (type, step);
3115 for (i = 0; i < data->vcands.length (); i++)
3117 cand = data->vcands[i];
3119 if (cand->pos != pos)
3120 continue;
3122 if (cand->incremented_at != incremented_at
3123 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3124 && cand->ainc_use != use))
3125 continue;
3127 if (operand_equal_p (base, cand->iv->base, 0)
3128 && operand_equal_p (step, cand->iv->step, 0)
3129 && (TYPE_PRECISION (TREE_TYPE (base))
3130 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3131 break;
3134 if (i == data->vcands.length ())
3136 cand = XCNEW (struct iv_cand);
3137 cand->id = i;
3138 cand->iv = alloc_iv (data, base, step);
3139 cand->pos = pos;
3140 if (pos != IP_ORIGINAL)
3142 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3143 cand->var_after = cand->var_before;
3145 cand->important = important;
3146 cand->incremented_at = incremented_at;
3147 data->vcands.safe_push (cand);
3149 if (!poly_int_tree_p (step))
3151 find_inv_vars (data, &step, &cand->inv_vars);
3153 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3154 /* Share bitmap between inv_vars and inv_exprs for cand. */
3155 if (inv_expr != NULL)
3157 cand->inv_exprs = cand->inv_vars;
3158 cand->inv_vars = NULL;
3159 if (cand->inv_exprs)
3160 bitmap_clear (cand->inv_exprs);
3161 else
3162 cand->inv_exprs = BITMAP_ALLOC (NULL);
3164 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3168 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3169 cand->ainc_use = use;
3170 else
3171 cand->ainc_use = NULL;
3173 cand->orig_iv = orig_iv;
3174 if (dump_file && (dump_flags & TDF_DETAILS))
3175 dump_cand (dump_file, cand);
3178 cand->important |= important;
3180 /* Relate candidate to the group for which it is added. */
3181 if (use)
3182 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3184 return cand;
3187 /* Returns true if incrementing the induction variable at the end of the LOOP
3188 is allowed.
3190 The purpose is to avoid splitting latch edge with a biv increment, thus
3191 creating a jump, possibly confusing other optimization passes and leaving
3192 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3193 available (so we do not have a better alternative), or if the latch edge
3194 is already nonempty. */
3196 static bool
3197 allow_ip_end_pos_p (struct loop *loop)
3199 if (!ip_normal_pos (loop))
3200 return true;
3202 if (!empty_block_p (ip_end_pos (loop)))
3203 return true;
3205 return false;
3208 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3209 Important field is set to IMPORTANT. */
3211 static void
3212 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3213 bool important, struct iv_use *use)
3215 basic_block use_bb = gimple_bb (use->stmt);
3216 machine_mode mem_mode;
3217 unsigned HOST_WIDE_INT cstepi;
3219 /* If we insert the increment in any position other than the standard
3220 ones, we must ensure that it is incremented once per iteration.
3221 It must not be in an inner nested loop, or one side of an if
3222 statement. */
3223 if (use_bb->loop_father != data->current_loop
3224 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3225 || stmt_can_throw_internal (use->stmt)
3226 || !cst_and_fits_in_hwi (step))
3227 return;
3229 cstepi = int_cst_value (step);
3231 mem_mode = TYPE_MODE (use->mem_type);
3232 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3233 || USE_STORE_PRE_INCREMENT (mem_mode))
3234 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3235 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3236 || USE_STORE_PRE_DECREMENT (mem_mode))
3237 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3239 enum tree_code code = MINUS_EXPR;
3240 tree new_base;
3241 tree new_step = step;
3243 if (POINTER_TYPE_P (TREE_TYPE (base)))
3245 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3246 code = POINTER_PLUS_EXPR;
3248 else
3249 new_step = fold_convert (TREE_TYPE (base), new_step);
3250 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3251 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3252 use->stmt);
3254 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3255 || USE_STORE_POST_INCREMENT (mem_mode))
3256 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3257 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3258 || USE_STORE_POST_DECREMENT (mem_mode))
3259 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3261 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3262 use->stmt);
3266 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3267 position to POS. If USE is not NULL, the candidate is set as related to
3268 it. The candidate computation is scheduled before exit condition and at
3269 the end of loop. */
3271 static void
3272 add_candidate (struct ivopts_data *data,
3273 tree base, tree step, bool important, struct iv_use *use,
3274 struct iv *orig_iv = NULL)
3276 if (ip_normal_pos (data->current_loop))
3277 add_candidate_1 (data, base, step, important,
3278 IP_NORMAL, use, NULL, orig_iv);
3279 if (ip_end_pos (data->current_loop)
3280 && allow_ip_end_pos_p (data->current_loop))
3281 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3284 /* Adds standard iv candidates. */
3286 static void
3287 add_standard_iv_candidates (struct ivopts_data *data)
3289 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3291 /* The same for a double-integer type if it is still fast enough. */
3292 if (TYPE_PRECISION
3293 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3294 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3295 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3296 build_int_cst (long_integer_type_node, 1), true, NULL);
3298 /* The same for a double-integer type if it is still fast enough. */
3299 if (TYPE_PRECISION
3300 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3301 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3302 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3303 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3307 /* Adds candidates bases on the old induction variable IV. */
3309 static void
3310 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3312 gimple *phi;
3313 tree def;
3314 struct iv_cand *cand;
3316 /* Check if this biv is used in address type use. */
3317 if (iv->no_overflow && iv->have_address_use
3318 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3319 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3321 tree base = fold_convert (sizetype, iv->base);
3322 tree step = fold_convert (sizetype, iv->step);
3324 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3325 add_candidate (data, base, step, true, NULL, iv);
3326 /* Add iv cand of the original type only if it has nonlinear use. */
3327 if (iv->nonlin_use)
3328 add_candidate (data, iv->base, iv->step, true, NULL);
3330 else
3331 add_candidate (data, iv->base, iv->step, true, NULL);
3333 /* The same, but with initial value zero. */
3334 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3335 add_candidate (data, size_int (0), iv->step, true, NULL);
3336 else
3337 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3338 iv->step, true, NULL);
3340 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3341 if (gimple_code (phi) == GIMPLE_PHI)
3343 /* Additionally record the possibility of leaving the original iv
3344 untouched. */
3345 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3346 /* Don't add candidate if it's from another PHI node because
3347 it's an affine iv appearing in the form of PEELED_CHREC. */
3348 phi = SSA_NAME_DEF_STMT (def);
3349 if (gimple_code (phi) != GIMPLE_PHI)
3351 cand = add_candidate_1 (data,
3352 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3353 SSA_NAME_DEF_STMT (def));
3354 if (cand)
3356 cand->var_before = iv->ssa_name;
3357 cand->var_after = def;
3360 else
3361 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3365 /* Adds candidates based on the old induction variables. */
3367 static void
3368 add_iv_candidate_for_bivs (struct ivopts_data *data)
3370 unsigned i;
3371 struct iv *iv;
3372 bitmap_iterator bi;
3374 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3376 iv = ver_info (data, i)->iv;
3377 if (iv && iv->biv_p && !integer_zerop (iv->step))
3378 add_iv_candidate_for_biv (data, iv);
3382 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3384 static void
3385 record_common_cand (struct ivopts_data *data, tree base,
3386 tree step, struct iv_use *use)
3388 struct iv_common_cand ent;
3389 struct iv_common_cand **slot;
3391 ent.base = base;
3392 ent.step = step;
3393 ent.hash = iterative_hash_expr (base, 0);
3394 ent.hash = iterative_hash_expr (step, ent.hash);
3396 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3397 if (*slot == NULL)
3399 *slot = new iv_common_cand ();
3400 (*slot)->base = base;
3401 (*slot)->step = step;
3402 (*slot)->uses.create (8);
3403 (*slot)->hash = ent.hash;
3404 data->iv_common_cands.safe_push ((*slot));
3407 gcc_assert (use != NULL);
3408 (*slot)->uses.safe_push (use);
3409 return;
3412 /* Comparison function used to sort common candidates. */
3414 static int
3415 common_cand_cmp (const void *p1, const void *p2)
3417 unsigned n1, n2;
3418 const struct iv_common_cand *const *const ccand1
3419 = (const struct iv_common_cand *const *)p1;
3420 const struct iv_common_cand *const *const ccand2
3421 = (const struct iv_common_cand *const *)p2;
3423 n1 = (*ccand1)->uses.length ();
3424 n2 = (*ccand2)->uses.length ();
3425 return n2 - n1;
3428 /* Adds IV candidates based on common candidated recorded. */
3430 static void
3431 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3433 unsigned i, j;
3434 struct iv_cand *cand_1, *cand_2;
3436 data->iv_common_cands.qsort (common_cand_cmp);
3437 for (i = 0; i < data->iv_common_cands.length (); i++)
3439 struct iv_common_cand *ptr = data->iv_common_cands[i];
3441 /* Only add IV candidate if it's derived from multiple uses. */
3442 if (ptr->uses.length () <= 1)
3443 break;
3445 cand_1 = NULL;
3446 cand_2 = NULL;
3447 if (ip_normal_pos (data->current_loop))
3448 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3449 false, IP_NORMAL, NULL, NULL);
3451 if (ip_end_pos (data->current_loop)
3452 && allow_ip_end_pos_p (data->current_loop))
3453 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3454 false, IP_END, NULL, NULL);
3456 /* Bind deriving uses and the new candidates. */
3457 for (j = 0; j < ptr->uses.length (); j++)
3459 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3460 if (cand_1)
3461 bitmap_set_bit (group->related_cands, cand_1->id);
3462 if (cand_2)
3463 bitmap_set_bit (group->related_cands, cand_2->id);
3467 /* Release data since it is useless from this point. */
3468 data->iv_common_cand_tab->empty ();
3469 data->iv_common_cands.truncate (0);
3472 /* Adds candidates based on the value of USE's iv. */
3474 static void
3475 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3477 poly_uint64 offset;
3478 tree base;
3479 tree basetype;
3480 struct iv *iv = use->iv;
3482 add_candidate (data, iv->base, iv->step, false, use);
3484 /* Record common candidate for use in case it can be shared by others. */
3485 record_common_cand (data, iv->base, iv->step, use);
3487 /* Record common candidate with initial value zero. */
3488 basetype = TREE_TYPE (iv->base);
3489 if (POINTER_TYPE_P (basetype))
3490 basetype = sizetype;
3491 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3493 /* Record common candidate with constant offset stripped in base.
3494 Like the use itself, we also add candidate directly for it. */
3495 base = strip_offset (iv->base, &offset);
3496 if (maybe_ne (offset, 0U) || base != iv->base)
3498 record_common_cand (data, base, iv->step, use);
3499 add_candidate (data, base, iv->step, false, use);
3502 /* Record common candidate with base_object removed in base. */
3503 base = iv->base;
3504 STRIP_NOPS (base);
3505 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3507 tree step = iv->step;
3509 STRIP_NOPS (step);
3510 base = TREE_OPERAND (base, 1);
3511 step = fold_convert (sizetype, step);
3512 record_common_cand (data, base, step, use);
3513 /* Also record common candidate with offset stripped. */
3514 base = strip_offset (base, &offset);
3515 if (maybe_ne (offset, 0U))
3516 record_common_cand (data, base, step, use);
3519 /* At last, add auto-incremental candidates. Make such variables
3520 important since other iv uses with same base object may be based
3521 on it. */
3522 if (use != NULL && address_p (use->type))
3523 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3526 /* Adds candidates based on the uses. */
3528 static void
3529 add_iv_candidate_for_groups (struct ivopts_data *data)
3531 unsigned i;
3533 /* Only add candidate for the first use in group. */
3534 for (i = 0; i < data->vgroups.length (); i++)
3536 struct iv_group *group = data->vgroups[i];
3538 gcc_assert (group->vuses[0] != NULL);
3539 add_iv_candidate_for_use (data, group->vuses[0]);
3541 add_iv_candidate_derived_from_uses (data);
3544 /* Record important candidates and add them to related_cands bitmaps. */
3546 static void
3547 record_important_candidates (struct ivopts_data *data)
3549 unsigned i;
3550 struct iv_group *group;
3552 for (i = 0; i < data->vcands.length (); i++)
3554 struct iv_cand *cand = data->vcands[i];
3556 if (cand->important)
3557 bitmap_set_bit (data->important_candidates, i);
3560 data->consider_all_candidates = (data->vcands.length ()
3561 <= CONSIDER_ALL_CANDIDATES_BOUND);
3563 /* Add important candidates to groups' related_cands bitmaps. */
3564 for (i = 0; i < data->vgroups.length (); i++)
3566 group = data->vgroups[i];
3567 bitmap_ior_into (group->related_cands, data->important_candidates);
3571 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3572 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3573 we allocate a simple list to every use. */
3575 static void
3576 alloc_use_cost_map (struct ivopts_data *data)
3578 unsigned i, size, s;
3580 for (i = 0; i < data->vgroups.length (); i++)
3582 struct iv_group *group = data->vgroups[i];
3584 if (data->consider_all_candidates)
3585 size = data->vcands.length ();
3586 else
3588 s = bitmap_count_bits (group->related_cands);
3590 /* Round up to the power of two, so that moduling by it is fast. */
3591 size = s ? (1 << ceil_log2 (s)) : 1;
3594 group->n_map_members = size;
3595 group->cost_map = XCNEWVEC (struct cost_pair, size);
3599 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3600 on invariants INV_VARS and that the value used in expressing it is
3601 VALUE, and in case of iv elimination the comparison operator is COMP. */
3603 static void
3604 set_group_iv_cost (struct ivopts_data *data,
3605 struct iv_group *group, struct iv_cand *cand,
3606 comp_cost cost, bitmap inv_vars, tree value,
3607 enum tree_code comp, bitmap inv_exprs)
3609 unsigned i, s;
3611 if (cost.infinite_cost_p ())
3613 BITMAP_FREE (inv_vars);
3614 BITMAP_FREE (inv_exprs);
3615 return;
3618 if (data->consider_all_candidates)
3620 group->cost_map[cand->id].cand = cand;
3621 group->cost_map[cand->id].cost = cost;
3622 group->cost_map[cand->id].inv_vars = inv_vars;
3623 group->cost_map[cand->id].inv_exprs = inv_exprs;
3624 group->cost_map[cand->id].value = value;
3625 group->cost_map[cand->id].comp = comp;
3626 return;
3629 /* n_map_members is a power of two, so this computes modulo. */
3630 s = cand->id & (group->n_map_members - 1);
3631 for (i = s; i < group->n_map_members; i++)
3632 if (!group->cost_map[i].cand)
3633 goto found;
3634 for (i = 0; i < s; i++)
3635 if (!group->cost_map[i].cand)
3636 goto found;
3638 gcc_unreachable ();
3640 found:
3641 group->cost_map[i].cand = cand;
3642 group->cost_map[i].cost = cost;
3643 group->cost_map[i].inv_vars = inv_vars;
3644 group->cost_map[i].inv_exprs = inv_exprs;
3645 group->cost_map[i].value = value;
3646 group->cost_map[i].comp = comp;
3649 /* Gets cost of (GROUP, CAND) pair. */
3651 static struct cost_pair *
3652 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3653 struct iv_cand *cand)
3655 unsigned i, s;
3656 struct cost_pair *ret;
3658 if (!cand)
3659 return NULL;
3661 if (data->consider_all_candidates)
3663 ret = group->cost_map + cand->id;
3664 if (!ret->cand)
3665 return NULL;
3667 return ret;
3670 /* n_map_members is a power of two, so this computes modulo. */
3671 s = cand->id & (group->n_map_members - 1);
3672 for (i = s; i < group->n_map_members; i++)
3673 if (group->cost_map[i].cand == cand)
3674 return group->cost_map + i;
3675 else if (group->cost_map[i].cand == NULL)
3676 return NULL;
3677 for (i = 0; i < s; i++)
3678 if (group->cost_map[i].cand == cand)
3679 return group->cost_map + i;
3680 else if (group->cost_map[i].cand == NULL)
3681 return NULL;
3683 return NULL;
3686 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3687 static rtx
3688 produce_memory_decl_rtl (tree obj, int *regno)
3690 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3691 machine_mode address_mode = targetm.addr_space.address_mode (as);
3692 rtx x;
3694 gcc_assert (obj);
3695 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3697 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3698 x = gen_rtx_SYMBOL_REF (address_mode, name);
3699 SET_SYMBOL_REF_DECL (x, obj);
3700 x = gen_rtx_MEM (DECL_MODE (obj), x);
3701 set_mem_addr_space (x, as);
3702 targetm.encode_section_info (obj, x, true);
3704 else
3706 x = gen_raw_REG (address_mode, (*regno)++);
3707 x = gen_rtx_MEM (DECL_MODE (obj), x);
3708 set_mem_addr_space (x, as);
3711 return x;
3714 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3715 walk_tree. DATA contains the actual fake register number. */
3717 static tree
3718 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3720 tree obj = NULL_TREE;
3721 rtx x = NULL_RTX;
3722 int *regno = (int *) data;
3724 switch (TREE_CODE (*expr_p))
3726 case ADDR_EXPR:
3727 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3728 handled_component_p (*expr_p);
3729 expr_p = &TREE_OPERAND (*expr_p, 0))
3730 continue;
3731 obj = *expr_p;
3732 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3733 x = produce_memory_decl_rtl (obj, regno);
3734 break;
3736 case SSA_NAME:
3737 *ws = 0;
3738 obj = SSA_NAME_VAR (*expr_p);
3739 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3740 if (!obj)
3741 return NULL_TREE;
3742 if (!DECL_RTL_SET_P (obj))
3743 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3744 break;
3746 case VAR_DECL:
3747 case PARM_DECL:
3748 case RESULT_DECL:
3749 *ws = 0;
3750 obj = *expr_p;
3752 if (DECL_RTL_SET_P (obj))
3753 break;
3755 if (DECL_MODE (obj) == BLKmode)
3756 x = produce_memory_decl_rtl (obj, regno);
3757 else
3758 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3760 break;
3762 default:
3763 break;
3766 if (x)
3768 decl_rtl_to_reset.safe_push (obj);
3769 SET_DECL_RTL (obj, x);
3772 return NULL_TREE;
3775 /* Determines cost of the computation of EXPR. */
3777 static unsigned
3778 computation_cost (tree expr, bool speed)
3780 rtx_insn *seq;
3781 rtx rslt;
3782 tree type = TREE_TYPE (expr);
3783 unsigned cost;
3784 /* Avoid using hard regs in ways which may be unsupported. */
3785 int regno = LAST_VIRTUAL_REGISTER + 1;
3786 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3787 enum node_frequency real_frequency = node->frequency;
3789 node->frequency = NODE_FREQUENCY_NORMAL;
3790 crtl->maybe_hot_insn_p = speed;
3791 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3792 start_sequence ();
3793 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3794 seq = get_insns ();
3795 end_sequence ();
3796 default_rtl_profile ();
3797 node->frequency = real_frequency;
3799 cost = seq_cost (seq, speed);
3800 if (MEM_P (rslt))
3801 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3802 TYPE_ADDR_SPACE (type), speed);
3803 else if (!REG_P (rslt))
3804 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3806 return cost;
3809 /* Returns variable containing the value of candidate CAND at statement AT. */
3811 static tree
3812 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3814 if (stmt_after_increment (loop, cand, stmt))
3815 return cand->var_after;
3816 else
3817 return cand->var_before;
3820 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3821 same precision that is at least as wide as the precision of TYPE, stores
3822 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3823 type of A and B. */
3825 static tree
3826 determine_common_wider_type (tree *a, tree *b)
3828 tree wider_type = NULL;
3829 tree suba, subb;
3830 tree atype = TREE_TYPE (*a);
3832 if (CONVERT_EXPR_P (*a))
3834 suba = TREE_OPERAND (*a, 0);
3835 wider_type = TREE_TYPE (suba);
3836 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3837 return atype;
3839 else
3840 return atype;
3842 if (CONVERT_EXPR_P (*b))
3844 subb = TREE_OPERAND (*b, 0);
3845 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3846 return atype;
3848 else
3849 return atype;
3851 *a = suba;
3852 *b = subb;
3853 return wider_type;
3856 /* Determines the expression by that USE is expressed from induction variable
3857 CAND at statement AT in LOOP. The expression is stored in two parts in a
3858 decomposed form. The invariant part is stored in AFF_INV; while variant
3859 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3860 non-null. Returns false if USE cannot be expressed using CAND. */
3862 static bool
3863 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3864 struct iv_cand *cand, struct aff_tree *aff_inv,
3865 struct aff_tree *aff_var, widest_int *prat = NULL)
3867 tree ubase = use->iv->base, ustep = use->iv->step;
3868 tree cbase = cand->iv->base, cstep = cand->iv->step;
3869 tree common_type, uutype, var, cstep_common;
3870 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3871 aff_tree aff_cbase;
3872 widest_int rat;
3874 /* We must have a precision to express the values of use. */
3875 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3876 return false;
3878 var = var_at_stmt (loop, cand, at);
3879 uutype = unsigned_type_for (utype);
3881 /* If the conversion is not noop, perform it. */
3882 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3884 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3885 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3887 tree inner_base, inner_step, inner_type;
3888 inner_base = TREE_OPERAND (cbase, 0);
3889 if (CONVERT_EXPR_P (cstep))
3890 inner_step = TREE_OPERAND (cstep, 0);
3891 else
3892 inner_step = cstep;
3894 inner_type = TREE_TYPE (inner_base);
3895 /* If candidate is added from a biv whose type is smaller than
3896 ctype, we know both candidate and the biv won't overflow.
3897 In this case, it's safe to skip the convertion in candidate.
3898 As an example, (unsigned short)((unsigned long)A) equals to
3899 (unsigned short)A, if A has a type no larger than short. */
3900 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3902 cbase = inner_base;
3903 cstep = inner_step;
3906 cbase = fold_convert (uutype, cbase);
3907 cstep = fold_convert (uutype, cstep);
3908 var = fold_convert (uutype, var);
3911 /* Ratio is 1 when computing the value of biv cand by itself.
3912 We can't rely on constant_multiple_of in this case because the
3913 use is created after the original biv is selected. The call
3914 could fail because of inconsistent fold behavior. See PR68021
3915 for more information. */
3916 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3918 gcc_assert (is_gimple_assign (use->stmt));
3919 gcc_assert (use->iv->ssa_name == cand->var_after);
3920 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3921 rat = 1;
3923 else if (!constant_multiple_of (ustep, cstep, &rat))
3924 return false;
3926 if (prat)
3927 *prat = rat;
3929 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3930 type, we achieve better folding by computing their difference in this
3931 wider type, and cast the result to UUTYPE. We do not need to worry about
3932 overflows, as all the arithmetics will in the end be performed in UUTYPE
3933 anyway. */
3934 common_type = determine_common_wider_type (&ubase, &cbase);
3936 /* use = ubase - ratio * cbase + ratio * var. */
3937 tree_to_aff_combination (ubase, common_type, aff_inv);
3938 tree_to_aff_combination (cbase, common_type, &aff_cbase);
3939 tree_to_aff_combination (var, uutype, aff_var);
3941 /* We need to shift the value if we are after the increment. */
3942 if (stmt_after_increment (loop, cand, at))
3944 aff_tree cstep_aff;
3946 if (common_type != uutype)
3947 cstep_common = fold_convert (common_type, cstep);
3948 else
3949 cstep_common = cstep;
3951 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3952 aff_combination_add (&aff_cbase, &cstep_aff);
3955 aff_combination_scale (&aff_cbase, -rat);
3956 aff_combination_add (aff_inv, &aff_cbase);
3957 if (common_type != uutype)
3958 aff_combination_convert (aff_inv, uutype);
3960 aff_combination_scale (aff_var, rat);
3961 return true;
3964 /* Determines the expression by that USE is expressed from induction variable
3965 CAND at statement AT in LOOP. The expression is stored in a decomposed
3966 form into AFF. Returns false if USE cannot be expressed using CAND. */
3968 static bool
3969 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3970 struct iv_cand *cand, struct aff_tree *aff)
3972 aff_tree aff_var;
3974 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3975 return false;
3977 aff_combination_add (aff, &aff_var);
3978 return true;
3981 /* Return the type of USE. */
3983 static tree
3984 get_use_type (struct iv_use *use)
3986 tree base_type = TREE_TYPE (use->iv->base);
3987 tree type;
3989 if (use->type == USE_REF_ADDRESS)
3991 /* The base_type may be a void pointer. Create a pointer type based on
3992 the mem_ref instead. */
3993 type = build_pointer_type (TREE_TYPE (*use->op_p));
3994 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3995 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3997 else
3998 type = base_type;
4000 return type;
4003 /* Determines the expression by that USE is expressed from induction variable
4004 CAND at statement AT in LOOP. The computation is unshared. */
4006 static tree
4007 get_computation_at (struct loop *loop, gimple *at,
4008 struct iv_use *use, struct iv_cand *cand)
4010 aff_tree aff;
4011 tree type = get_use_type (use);
4013 if (!get_computation_aff (loop, at, use, cand, &aff))
4014 return NULL_TREE;
4015 unshare_aff_combination (&aff);
4016 return fold_convert (type, aff_combination_to_tree (&aff));
4019 /* Adjust the cost COST for being in loop setup rather than loop body.
4020 If we're optimizing for space, the loop setup overhead is constant;
4021 if we're optimizing for speed, amortize it over the per-iteration cost.
4022 If ROUND_UP_P is true, the result is round up rather than to zero when
4023 optimizing for speed. */
4024 static unsigned
4025 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4026 bool round_up_p = false)
4028 if (cost == INFTY)
4029 return cost;
4030 else if (optimize_loop_for_speed_p (data->current_loop))
4032 HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4033 return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4035 else
4036 return cost;
4039 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4040 EXPR operand holding the shift. COST0 and COST1 are the costs for
4041 calculating the operands of EXPR. Returns true if successful, and returns
4042 the cost in COST. */
4044 static bool
4045 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4046 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4048 comp_cost res;
4049 tree op1 = TREE_OPERAND (expr, 1);
4050 tree cst = TREE_OPERAND (mult, 1);
4051 tree multop = TREE_OPERAND (mult, 0);
4052 int m = exact_log2 (int_cst_value (cst));
4053 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4054 int as_cost, sa_cost;
4055 bool mult_in_op1;
4057 if (!(m >= 0 && m < maxm))
4058 return false;
4060 STRIP_NOPS (op1);
4061 mult_in_op1 = operand_equal_p (op1, mult, 0);
4063 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4065 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4066 use that in preference to a shift insn followed by an add insn. */
4067 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4068 ? shiftadd_cost (speed, mode, m)
4069 : (mult_in_op1
4070 ? shiftsub1_cost (speed, mode, m)
4071 : shiftsub0_cost (speed, mode, m)));
4073 res = comp_cost (MIN (as_cost, sa_cost), 0);
4074 res += (mult_in_op1 ? cost0 : cost1);
4076 STRIP_NOPS (multop);
4077 if (!is_gimple_val (multop))
4078 res += force_expr_to_var_cost (multop, speed);
4080 *cost = res;
4081 return true;
4084 /* Estimates cost of forcing expression EXPR into a variable. */
4086 static comp_cost
4087 force_expr_to_var_cost (tree expr, bool speed)
4089 static bool costs_initialized = false;
4090 static unsigned integer_cost [2];
4091 static unsigned symbol_cost [2];
4092 static unsigned address_cost [2];
4093 tree op0, op1;
4094 comp_cost cost0, cost1, cost;
4095 machine_mode mode;
4096 scalar_int_mode int_mode;
4098 if (!costs_initialized)
4100 tree type = build_pointer_type (integer_type_node);
4101 tree var, addr;
4102 rtx x;
4103 int i;
4105 var = create_tmp_var_raw (integer_type_node, "test_var");
4106 TREE_STATIC (var) = 1;
4107 x = produce_memory_decl_rtl (var, NULL);
4108 SET_DECL_RTL (var, x);
4110 addr = build1 (ADDR_EXPR, type, var);
4113 for (i = 0; i < 2; i++)
4115 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4116 2000), i);
4118 symbol_cost[i] = computation_cost (addr, i) + 1;
4120 address_cost[i]
4121 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4122 if (dump_file && (dump_flags & TDF_DETAILS))
4124 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4125 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4126 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4127 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4128 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4129 fprintf (dump_file, "\n");
4133 costs_initialized = true;
4136 STRIP_NOPS (expr);
4138 if (SSA_VAR_P (expr))
4139 return no_cost;
4141 if (is_gimple_min_invariant (expr))
4143 if (poly_int_tree_p (expr))
4144 return comp_cost (integer_cost [speed], 0);
4146 if (TREE_CODE (expr) == ADDR_EXPR)
4148 tree obj = TREE_OPERAND (expr, 0);
4150 if (VAR_P (obj)
4151 || TREE_CODE (obj) == PARM_DECL
4152 || TREE_CODE (obj) == RESULT_DECL)
4153 return comp_cost (symbol_cost [speed], 0);
4156 return comp_cost (address_cost [speed], 0);
4159 switch (TREE_CODE (expr))
4161 case POINTER_PLUS_EXPR:
4162 case PLUS_EXPR:
4163 case MINUS_EXPR:
4164 case MULT_EXPR:
4165 case TRUNC_DIV_EXPR:
4166 case BIT_AND_EXPR:
4167 case BIT_IOR_EXPR:
4168 case LSHIFT_EXPR:
4169 case RSHIFT_EXPR:
4170 op0 = TREE_OPERAND (expr, 0);
4171 op1 = TREE_OPERAND (expr, 1);
4172 STRIP_NOPS (op0);
4173 STRIP_NOPS (op1);
4174 break;
4176 CASE_CONVERT:
4177 case NEGATE_EXPR:
4178 case BIT_NOT_EXPR:
4179 op0 = TREE_OPERAND (expr, 0);
4180 STRIP_NOPS (op0);
4181 op1 = NULL_TREE;
4182 break;
4184 default:
4185 /* Just an arbitrary value, FIXME. */
4186 return comp_cost (target_spill_cost[speed], 0);
4189 if (op0 == NULL_TREE
4190 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4191 cost0 = no_cost;
4192 else
4193 cost0 = force_expr_to_var_cost (op0, speed);
4195 if (op1 == NULL_TREE
4196 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4197 cost1 = no_cost;
4198 else
4199 cost1 = force_expr_to_var_cost (op1, speed);
4201 mode = TYPE_MODE (TREE_TYPE (expr));
4202 switch (TREE_CODE (expr))
4204 case POINTER_PLUS_EXPR:
4205 case PLUS_EXPR:
4206 case MINUS_EXPR:
4207 case NEGATE_EXPR:
4208 cost = comp_cost (add_cost (speed, mode), 0);
4209 if (TREE_CODE (expr) != NEGATE_EXPR)
4211 tree mult = NULL_TREE;
4212 comp_cost sa_cost;
4213 if (TREE_CODE (op1) == MULT_EXPR)
4214 mult = op1;
4215 else if (TREE_CODE (op0) == MULT_EXPR)
4216 mult = op0;
4218 if (mult != NULL_TREE
4219 && is_a <scalar_int_mode> (mode, &int_mode)
4220 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4221 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4222 speed, &sa_cost))
4223 return sa_cost;
4225 break;
4227 CASE_CONVERT:
4229 tree inner_mode, outer_mode;
4230 outer_mode = TREE_TYPE (expr);
4231 inner_mode = TREE_TYPE (op0);
4232 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4233 TYPE_MODE (inner_mode), speed), 0);
4235 break;
4237 case MULT_EXPR:
4238 if (cst_and_fits_in_hwi (op0))
4239 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4240 mode, speed), 0);
4241 else if (cst_and_fits_in_hwi (op1))
4242 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4243 mode, speed), 0);
4244 else
4245 return comp_cost (target_spill_cost [speed], 0);
4246 break;
4248 case TRUNC_DIV_EXPR:
4249 /* Division by power of two is usually cheap, so we allow it. Forbid
4250 anything else. */
4251 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4252 cost = comp_cost (add_cost (speed, mode), 0);
4253 else
4254 cost = comp_cost (target_spill_cost[speed], 0);
4255 break;
4257 case BIT_AND_EXPR:
4258 case BIT_IOR_EXPR:
4259 case BIT_NOT_EXPR:
4260 case LSHIFT_EXPR:
4261 case RSHIFT_EXPR:
4262 cost = comp_cost (add_cost (speed, mode), 0);
4263 break;
4265 default:
4266 gcc_unreachable ();
4269 cost += cost0;
4270 cost += cost1;
4271 return cost;
4274 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4275 invariants the computation depends on. */
4277 static comp_cost
4278 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4280 if (!expr)
4281 return no_cost;
4283 find_inv_vars (data, &expr, inv_vars);
4284 return force_expr_to_var_cost (expr, data->speed);
4287 /* Returns cost of auto-modifying address expression in shape base + offset.
4288 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4289 address expression. The address expression has ADDR_MODE in addr space
4290 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4291 speed or size. */
4293 enum ainc_type
4295 AINC_PRE_INC, /* Pre increment. */
4296 AINC_PRE_DEC, /* Pre decrement. */
4297 AINC_POST_INC, /* Post increment. */
4298 AINC_POST_DEC, /* Post decrement. */
4299 AINC_NONE /* Also the number of auto increment types. */
4302 struct ainc_cost_data
4304 unsigned costs[AINC_NONE];
4307 static comp_cost
4308 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4309 machine_mode addr_mode, machine_mode mem_mode,
4310 addr_space_t as, bool speed)
4312 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4313 && !USE_STORE_PRE_DECREMENT (mem_mode)
4314 && !USE_LOAD_POST_DECREMENT (mem_mode)
4315 && !USE_STORE_POST_DECREMENT (mem_mode)
4316 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4317 && !USE_STORE_PRE_INCREMENT (mem_mode)
4318 && !USE_LOAD_POST_INCREMENT (mem_mode)
4319 && !USE_STORE_POST_INCREMENT (mem_mode))
4320 return infinite_cost;
4322 static vec<ainc_cost_data *> ainc_cost_data_list;
4323 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4324 if (idx >= ainc_cost_data_list.length ())
4326 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4328 gcc_assert (nsize > idx);
4329 ainc_cost_data_list.safe_grow_cleared (nsize);
4332 ainc_cost_data *data = ainc_cost_data_list[idx];
4333 if (data == NULL)
4335 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4337 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4338 data->costs[AINC_PRE_DEC] = INFTY;
4339 data->costs[AINC_POST_DEC] = INFTY;
4340 data->costs[AINC_PRE_INC] = INFTY;
4341 data->costs[AINC_POST_INC] = INFTY;
4342 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4343 || USE_STORE_PRE_DECREMENT (mem_mode))
4345 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4347 if (memory_address_addr_space_p (mem_mode, addr, as))
4348 data->costs[AINC_PRE_DEC]
4349 = address_cost (addr, mem_mode, as, speed);
4351 if (USE_LOAD_POST_DECREMENT (mem_mode)
4352 || USE_STORE_POST_DECREMENT (mem_mode))
4354 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4356 if (memory_address_addr_space_p (mem_mode, addr, as))
4357 data->costs[AINC_POST_DEC]
4358 = address_cost (addr, mem_mode, as, speed);
4360 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4361 || USE_STORE_PRE_INCREMENT (mem_mode))
4363 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4365 if (memory_address_addr_space_p (mem_mode, addr, as))
4366 data->costs[AINC_PRE_INC]
4367 = address_cost (addr, mem_mode, as, speed);
4369 if (USE_LOAD_POST_INCREMENT (mem_mode)
4370 || USE_STORE_POST_INCREMENT (mem_mode))
4372 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4374 if (memory_address_addr_space_p (mem_mode, addr, as))
4375 data->costs[AINC_POST_INC]
4376 = address_cost (addr, mem_mode, as, speed);
4378 ainc_cost_data_list[idx] = data;
4381 poly_int64 msize = GET_MODE_SIZE (mem_mode);
4382 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4383 return comp_cost (data->costs[AINC_POST_INC], 0);
4384 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4385 return comp_cost (data->costs[AINC_POST_DEC], 0);
4386 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4387 return comp_cost (data->costs[AINC_PRE_INC], 0);
4388 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4389 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4391 return infinite_cost;
4394 /* Return cost of computing USE's address expression by using CAND.
4395 AFF_INV and AFF_VAR represent invariant and variant parts of the
4396 address expression, respectively. If AFF_INV is simple, store
4397 the loop invariant variables which are depended by it in INV_VARS;
4398 if AFF_INV is complicated, handle it as a new invariant expression
4399 and record it in INV_EXPR. RATIO indicates multiple times between
4400 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4401 value to it indicating if this is an auto-increment address. */
4403 static comp_cost
4404 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4405 struct iv_cand *cand, aff_tree *aff_inv,
4406 aff_tree *aff_var, HOST_WIDE_INT ratio,
4407 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4408 bool *can_autoinc, bool speed)
4410 rtx addr;
4411 bool simple_inv = true;
4412 tree comp_inv = NULL_TREE, type = aff_var->type;
4413 comp_cost var_cost = no_cost, cost = no_cost;
4414 struct mem_address parts = {NULL_TREE, integer_one_node,
4415 NULL_TREE, NULL_TREE, NULL_TREE};
4416 machine_mode addr_mode = TYPE_MODE (type);
4417 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4418 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4419 /* Only true if ratio != 1. */
4420 bool ok_with_ratio_p = false;
4421 bool ok_without_ratio_p = false;
4423 if (!aff_combination_const_p (aff_inv))
4425 parts.index = integer_one_node;
4426 /* Addressing mode "base + index". */
4427 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4428 if (ratio != 1)
4430 parts.step = wide_int_to_tree (type, ratio);
4431 /* Addressing mode "base + index << scale". */
4432 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4433 if (!ok_with_ratio_p)
4434 parts.step = NULL_TREE;
4436 if (ok_with_ratio_p || ok_without_ratio_p)
4438 if (maybe_ne (aff_inv->offset, 0))
4440 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4441 /* Addressing mode "base + index [<< scale] + offset". */
4442 if (!valid_mem_ref_p (mem_mode, as, &parts))
4443 parts.offset = NULL_TREE;
4444 else
4445 aff_inv->offset = 0;
4448 move_fixed_address_to_symbol (&parts, aff_inv);
4449 /* Base is fixed address and is moved to symbol part. */
4450 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4451 parts.base = NULL_TREE;
4453 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4454 if (parts.symbol != NULL_TREE
4455 && !valid_mem_ref_p (mem_mode, as, &parts))
4457 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4458 parts.symbol = NULL_TREE;
4459 /* Reset SIMPLE_INV since symbol address needs to be computed
4460 outside of address expression in this case. */
4461 simple_inv = false;
4462 /* Symbol part is moved back to base part, it can't be NULL. */
4463 parts.base = integer_one_node;
4466 else
4467 parts.index = NULL_TREE;
4469 else
4471 poly_int64 ainc_step;
4472 if (can_autoinc
4473 && ratio == 1
4474 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4476 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4478 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4479 ainc_offset += ainc_step;
4480 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4481 addr_mode, mem_mode, as, speed);
4482 if (!cost.infinite_cost_p ())
4484 *can_autoinc = true;
4485 return cost;
4487 cost = no_cost;
4489 if (!aff_combination_zero_p (aff_inv))
4491 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4492 /* Addressing mode "base + offset". */
4493 if (!valid_mem_ref_p (mem_mode, as, &parts))
4494 parts.offset = NULL_TREE;
4495 else
4496 aff_inv->offset = 0;
4500 if (simple_inv)
4501 simple_inv = (aff_inv == NULL
4502 || aff_combination_const_p (aff_inv)
4503 || aff_combination_singleton_var_p (aff_inv));
4504 if (!aff_combination_zero_p (aff_inv))
4505 comp_inv = aff_combination_to_tree (aff_inv);
4506 if (comp_inv != NULL_TREE)
4507 cost = force_var_cost (data, comp_inv, inv_vars);
4508 if (ratio != 1 && parts.step == NULL_TREE)
4509 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4510 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4511 var_cost += add_cost (speed, addr_mode);
4513 if (comp_inv && inv_expr && !simple_inv)
4515 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4516 /* Clear depends on. */
4517 if (*inv_expr != NULL && inv_vars && *inv_vars)
4518 bitmap_clear (*inv_vars);
4520 /* Cost of small invariant expression adjusted against loop niters
4521 is usually zero, which makes it difficult to be differentiated
4522 from candidate based on loop invariant variables. Secondly, the
4523 generated invariant expression may not be hoisted out of loop by
4524 following pass. We penalize the cost by rounding up in order to
4525 neutralize such effects. */
4526 cost.cost = adjust_setup_cost (data, cost.cost, true);
4527 cost.scratch = cost.cost;
4530 cost += var_cost;
4531 addr = addr_for_mem_ref (&parts, as, false);
4532 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4533 cost += address_cost (addr, mem_mode, as, speed);
4535 if (parts.symbol != NULL_TREE)
4536 cost.complexity += 1;
4537 /* Don't increase the complexity of adding a scaled index if it's
4538 the only kind of index that the target allows. */
4539 if (parts.step != NULL_TREE && ok_without_ratio_p)
4540 cost.complexity += 1;
4541 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4542 cost.complexity += 1;
4543 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4544 cost.complexity += 1;
4546 return cost;
4549 /* Scale (multiply) the computed COST (except scratch part that should be
4550 hoisted out a loop) by header->frequency / AT->frequency, which makes
4551 expected cost more accurate. */
4553 static comp_cost
4554 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4556 int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4557 int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
4558 if (loop_freq != 0)
4560 gcc_assert (cost.scratch <= cost.cost);
4561 int scaled_cost
4562 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4564 if (dump_file && (dump_flags & TDF_DETAILS))
4565 fprintf (dump_file, "Scaling cost based on bb prob "
4566 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4567 1.0f * bb_freq / loop_freq, cost.cost,
4568 cost.scratch, scaled_cost, bb_freq, loop_freq);
4570 cost.cost = scaled_cost;
4573 return cost;
4576 /* Determines the cost of the computation by that USE is expressed
4577 from induction variable CAND. If ADDRESS_P is true, we just need
4578 to create an address from it, otherwise we want to get it into
4579 register. A set of invariants we depend on is stored in INV_VARS.
4580 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4581 addressing is likely. If INV_EXPR is nonnull, record invariant
4582 expr entry in it. */
4584 static comp_cost
4585 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4586 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4587 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4589 gimple *at = use->stmt;
4590 tree ubase = use->iv->base, cbase = cand->iv->base;
4591 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4592 tree comp_inv = NULL_TREE;
4593 HOST_WIDE_INT ratio, aratio;
4594 comp_cost cost;
4595 widest_int rat;
4596 aff_tree aff_inv, aff_var;
4597 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4599 if (inv_vars)
4600 *inv_vars = NULL;
4601 if (can_autoinc)
4602 *can_autoinc = false;
4603 if (inv_expr)
4604 *inv_expr = NULL;
4606 /* Check if we have enough precision to express the values of use. */
4607 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4608 return infinite_cost;
4610 if (address_p
4611 || (use->iv->base_object
4612 && cand->iv->base_object
4613 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4614 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4616 /* Do not try to express address of an object with computation based
4617 on address of a different object. This may cause problems in rtl
4618 level alias analysis (that does not expect this to be happening,
4619 as this is illegal in C), and would be unlikely to be useful
4620 anyway. */
4621 if (use->iv->base_object
4622 && cand->iv->base_object
4623 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4624 return infinite_cost;
4627 if (!get_computation_aff_1 (data->current_loop, at, use,
4628 cand, &aff_inv, &aff_var, &rat)
4629 || !wi::fits_shwi_p (rat))
4630 return infinite_cost;
4632 ratio = rat.to_shwi ();
4633 if (address_p)
4635 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4636 inv_vars, inv_expr, can_autoinc, speed);
4637 return get_scaled_computation_cost_at (data, at, cost);
4640 bool simple_inv = (aff_combination_const_p (&aff_inv)
4641 || aff_combination_singleton_var_p (&aff_inv));
4642 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4643 aff_combination_convert (&aff_inv, signed_type);
4644 if (!aff_combination_zero_p (&aff_inv))
4645 comp_inv = aff_combination_to_tree (&aff_inv);
4647 cost = force_var_cost (data, comp_inv, inv_vars);
4648 if (comp_inv && inv_expr && !simple_inv)
4650 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4651 /* Clear depends on. */
4652 if (*inv_expr != NULL && inv_vars && *inv_vars)
4653 bitmap_clear (*inv_vars);
4655 cost.cost = adjust_setup_cost (data, cost.cost);
4656 /* Record setup cost in scratch field. */
4657 cost.scratch = cost.cost;
4659 /* Cost of constant integer can be covered when adding invariant part to
4660 variant part. */
4661 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4662 cost = no_cost;
4664 /* Need type narrowing to represent use with cand. */
4665 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4667 machine_mode outer_mode = TYPE_MODE (utype);
4668 machine_mode inner_mode = TYPE_MODE (ctype);
4669 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4672 /* Turn a + i * (-c) into a - i * c. */
4673 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4674 aratio = -ratio;
4675 else
4676 aratio = ratio;
4678 if (ratio != 1)
4679 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4681 /* TODO: We may also need to check if we can compute a + i * 4 in one
4682 instruction. */
4683 /* Need to add up the invariant and variant parts. */
4684 if (comp_inv && !integer_zerop (comp_inv))
4685 cost += add_cost (speed, TYPE_MODE (utype));
4687 return get_scaled_computation_cost_at (data, at, cost);
4690 /* Determines cost of computing the use in GROUP with CAND in a generic
4691 expression. */
4693 static bool
4694 determine_group_iv_cost_generic (struct ivopts_data *data,
4695 struct iv_group *group, struct iv_cand *cand)
4697 comp_cost cost;
4698 iv_inv_expr_ent *inv_expr = NULL;
4699 bitmap inv_vars = NULL, inv_exprs = NULL;
4700 struct iv_use *use = group->vuses[0];
4702 /* The simple case first -- if we need to express value of the preserved
4703 original biv, the cost is 0. This also prevents us from counting the
4704 cost of increment twice -- once at this use and once in the cost of
4705 the candidate. */
4706 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4707 cost = no_cost;
4708 else
4709 cost = get_computation_cost (data, use, cand, false,
4710 &inv_vars, NULL, &inv_expr);
4712 if (inv_expr)
4714 inv_exprs = BITMAP_ALLOC (NULL);
4715 bitmap_set_bit (inv_exprs, inv_expr->id);
4717 set_group_iv_cost (data, group, cand, cost, inv_vars,
4718 NULL_TREE, ERROR_MARK, inv_exprs);
4719 return !cost.infinite_cost_p ();
4722 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4724 static bool
4725 determine_group_iv_cost_address (struct ivopts_data *data,
4726 struct iv_group *group, struct iv_cand *cand)
4728 unsigned i;
4729 bitmap inv_vars = NULL, inv_exprs = NULL;
4730 bool can_autoinc;
4731 iv_inv_expr_ent *inv_expr = NULL;
4732 struct iv_use *use = group->vuses[0];
4733 comp_cost sum_cost = no_cost, cost;
4735 cost = get_computation_cost (data, use, cand, true,
4736 &inv_vars, &can_autoinc, &inv_expr);
4738 if (inv_expr)
4740 inv_exprs = BITMAP_ALLOC (NULL);
4741 bitmap_set_bit (inv_exprs, inv_expr->id);
4743 sum_cost = cost;
4744 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4746 if (can_autoinc)
4747 sum_cost -= cand->cost_step;
4748 /* If we generated the candidate solely for exploiting autoincrement
4749 opportunities, and it turns out it can't be used, set the cost to
4750 infinity to make sure we ignore it. */
4751 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4752 sum_cost = infinite_cost;
4755 /* Uses in a group can share setup code, so only add setup cost once. */
4756 cost -= cost.scratch;
4757 /* Compute and add costs for rest uses of this group. */
4758 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4760 struct iv_use *next = group->vuses[i];
4762 /* TODO: We could skip computing cost for sub iv_use when it has the
4763 same cost as the first iv_use, but the cost really depends on the
4764 offset and where the iv_use is. */
4765 cost = get_computation_cost (data, next, cand, true,
4766 NULL, &can_autoinc, &inv_expr);
4767 if (inv_expr)
4769 if (!inv_exprs)
4770 inv_exprs = BITMAP_ALLOC (NULL);
4772 bitmap_set_bit (inv_exprs, inv_expr->id);
4774 sum_cost += cost;
4776 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4777 NULL_TREE, ERROR_MARK, inv_exprs);
4779 return !sum_cost.infinite_cost_p ();
4782 /* Computes value of candidate CAND at position AT in iteration NITER, and
4783 stores it to VAL. */
4785 static void
4786 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4787 aff_tree *val)
4789 aff_tree step, delta, nit;
4790 struct iv *iv = cand->iv;
4791 tree type = TREE_TYPE (iv->base);
4792 tree steptype;
4793 if (POINTER_TYPE_P (type))
4794 steptype = sizetype;
4795 else
4796 steptype = unsigned_type_for (type);
4798 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4799 aff_combination_convert (&step, steptype);
4800 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4801 aff_combination_convert (&nit, steptype);
4802 aff_combination_mult (&nit, &step, &delta);
4803 if (stmt_after_increment (loop, cand, at))
4804 aff_combination_add (&delta, &step);
4806 tree_to_aff_combination (iv->base, type, val);
4807 if (!POINTER_TYPE_P (type))
4808 aff_combination_convert (val, steptype);
4809 aff_combination_add (val, &delta);
4812 /* Returns period of induction variable iv. */
4814 static tree
4815 iv_period (struct iv *iv)
4817 tree step = iv->step, period, type;
4818 tree pow2div;
4820 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4822 type = unsigned_type_for (TREE_TYPE (step));
4823 /* Period of the iv is lcm (step, type_range)/step -1,
4824 i.e., N*type_range/step - 1. Since type range is power
4825 of two, N == (step >> num_of_ending_zeros_binary (step),
4826 so the final result is
4828 (type_range >> num_of_ending_zeros_binary (step)) - 1
4831 pow2div = num_ending_zeros (step);
4833 period = build_low_bits_mask (type,
4834 (TYPE_PRECISION (type)
4835 - tree_to_uhwi (pow2div)));
4837 return period;
4840 /* Returns the comparison operator used when eliminating the iv USE. */
4842 static enum tree_code
4843 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4845 struct loop *loop = data->current_loop;
4846 basic_block ex_bb;
4847 edge exit;
4849 ex_bb = gimple_bb (use->stmt);
4850 exit = EDGE_SUCC (ex_bb, 0);
4851 if (flow_bb_inside_loop_p (loop, exit->dest))
4852 exit = EDGE_SUCC (ex_bb, 1);
4854 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4857 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4858 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4859 calculation is performed in non-wrapping type.
4861 TODO: More generally, we could test for the situation that
4862 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4863 This would require knowing the sign of OFFSET. */
4865 static bool
4866 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4868 enum tree_code code;
4869 tree e1, e2;
4870 aff_tree aff_e1, aff_e2, aff_offset;
4872 if (!nowrap_type_p (TREE_TYPE (base)))
4873 return false;
4875 base = expand_simple_operations (base);
4877 if (TREE_CODE (base) == SSA_NAME)
4879 gimple *stmt = SSA_NAME_DEF_STMT (base);
4881 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4882 return false;
4884 code = gimple_assign_rhs_code (stmt);
4885 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4886 return false;
4888 e1 = gimple_assign_rhs1 (stmt);
4889 e2 = gimple_assign_rhs2 (stmt);
4891 else
4893 code = TREE_CODE (base);
4894 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4895 return false;
4896 e1 = TREE_OPERAND (base, 0);
4897 e2 = TREE_OPERAND (base, 1);
4900 /* Use affine expansion as deeper inspection to prove the equality. */
4901 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4902 &aff_e2, &data->name_expansion_cache);
4903 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4904 &aff_offset, &data->name_expansion_cache);
4905 aff_combination_scale (&aff_offset, -1);
4906 switch (code)
4908 case PLUS_EXPR:
4909 aff_combination_add (&aff_e2, &aff_offset);
4910 if (aff_combination_zero_p (&aff_e2))
4911 return true;
4913 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4914 &aff_e1, &data->name_expansion_cache);
4915 aff_combination_add (&aff_e1, &aff_offset);
4916 return aff_combination_zero_p (&aff_e1);
4918 case POINTER_PLUS_EXPR:
4919 aff_combination_add (&aff_e2, &aff_offset);
4920 return aff_combination_zero_p (&aff_e2);
4922 default:
4923 return false;
4927 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4928 comparison with CAND. NITER describes the number of iterations of
4929 the loops. If successful, the comparison in COMP_P is altered accordingly.
4931 We aim to handle the following situation:
4933 sometype *base, *p;
4934 int a, b, i;
4936 i = a;
4937 p = p_0 = base + a;
4941 bla (*p);
4942 p++;
4943 i++;
4945 while (i < b);
4947 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4948 We aim to optimize this to
4950 p = p_0 = base + a;
4953 bla (*p);
4954 p++;
4956 while (p < p_0 - a + b);
4958 This preserves the correctness, since the pointer arithmetics does not
4959 overflow. More precisely:
4961 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4962 overflow in computing it or the values of p.
4963 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4964 overflow. To prove this, we use the fact that p_0 = base + a. */
4966 static bool
4967 iv_elimination_compare_lt (struct ivopts_data *data,
4968 struct iv_cand *cand, enum tree_code *comp_p,
4969 struct tree_niter_desc *niter)
4971 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4972 struct aff_tree nit, tmpa, tmpb;
4973 enum tree_code comp;
4974 HOST_WIDE_INT step;
4976 /* We need to know that the candidate induction variable does not overflow.
4977 While more complex analysis may be used to prove this, for now just
4978 check that the variable appears in the original program and that it
4979 is computed in a type that guarantees no overflows. */
4980 cand_type = TREE_TYPE (cand->iv->base);
4981 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4982 return false;
4984 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4985 the calculation of the BOUND could overflow, making the comparison
4986 invalid. */
4987 if (!data->loop_single_exit_p)
4988 return false;
4990 /* We need to be able to decide whether candidate is increasing or decreasing
4991 in order to choose the right comparison operator. */
4992 if (!cst_and_fits_in_hwi (cand->iv->step))
4993 return false;
4994 step = int_cst_value (cand->iv->step);
4996 /* Check that the number of iterations matches the expected pattern:
4997 a + 1 > b ? 0 : b - a - 1. */
4998 mbz = niter->may_be_zero;
4999 if (TREE_CODE (mbz) == GT_EXPR)
5001 /* Handle a + 1 > b. */
5002 tree op0 = TREE_OPERAND (mbz, 0);
5003 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5005 a = TREE_OPERAND (op0, 0);
5006 b = TREE_OPERAND (mbz, 1);
5008 else
5009 return false;
5011 else if (TREE_CODE (mbz) == LT_EXPR)
5013 tree op1 = TREE_OPERAND (mbz, 1);
5015 /* Handle b < a + 1. */
5016 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5018 a = TREE_OPERAND (op1, 0);
5019 b = TREE_OPERAND (mbz, 0);
5021 else
5022 return false;
5024 else
5025 return false;
5027 /* Expected number of iterations is B - A - 1. Check that it matches
5028 the actual number, i.e., that B - A - NITER = 1. */
5029 tree_to_aff_combination (niter->niter, nit_type, &nit);
5030 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5031 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5032 aff_combination_scale (&nit, -1);
5033 aff_combination_scale (&tmpa, -1);
5034 aff_combination_add (&tmpb, &tmpa);
5035 aff_combination_add (&tmpb, &nit);
5036 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5037 return false;
5039 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5040 overflow. */
5041 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5042 cand->iv->step,
5043 fold_convert (TREE_TYPE (cand->iv->step), a));
5044 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5045 return false;
5047 /* Determine the new comparison operator. */
5048 comp = step < 0 ? GT_EXPR : LT_EXPR;
5049 if (*comp_p == NE_EXPR)
5050 *comp_p = comp;
5051 else if (*comp_p == EQ_EXPR)
5052 *comp_p = invert_tree_comparison (comp, false);
5053 else
5054 gcc_unreachable ();
5056 return true;
5059 /* Check whether it is possible to express the condition in USE by comparison
5060 of candidate CAND. If so, store the value compared with to BOUND, and the
5061 comparison operator to COMP. */
5063 static bool
5064 may_eliminate_iv (struct ivopts_data *data,
5065 struct iv_use *use, struct iv_cand *cand, tree *bound,
5066 enum tree_code *comp)
5068 basic_block ex_bb;
5069 edge exit;
5070 tree period;
5071 struct loop *loop = data->current_loop;
5072 aff_tree bnd;
5073 struct tree_niter_desc *desc = NULL;
5075 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5076 return false;
5078 /* For now works only for exits that dominate the loop latch.
5079 TODO: extend to other conditions inside loop body. */
5080 ex_bb = gimple_bb (use->stmt);
5081 if (use->stmt != last_stmt (ex_bb)
5082 || gimple_code (use->stmt) != GIMPLE_COND
5083 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5084 return false;
5086 exit = EDGE_SUCC (ex_bb, 0);
5087 if (flow_bb_inside_loop_p (loop, exit->dest))
5088 exit = EDGE_SUCC (ex_bb, 1);
5089 if (flow_bb_inside_loop_p (loop, exit->dest))
5090 return false;
5092 desc = niter_for_exit (data, exit);
5093 if (!desc)
5094 return false;
5096 /* Determine whether we can use the variable to test the exit condition.
5097 This is the case iff the period of the induction variable is greater
5098 than the number of iterations for which the exit condition is true. */
5099 period = iv_period (cand->iv);
5101 /* If the number of iterations is constant, compare against it directly. */
5102 if (TREE_CODE (desc->niter) == INTEGER_CST)
5104 /* See cand_value_at. */
5105 if (stmt_after_increment (loop, cand, use->stmt))
5107 if (!tree_int_cst_lt (desc->niter, period))
5108 return false;
5110 else
5112 if (tree_int_cst_lt (period, desc->niter))
5113 return false;
5117 /* If not, and if this is the only possible exit of the loop, see whether
5118 we can get a conservative estimate on the number of iterations of the
5119 entire loop and compare against that instead. */
5120 else
5122 widest_int period_value, max_niter;
5124 max_niter = desc->max;
5125 if (stmt_after_increment (loop, cand, use->stmt))
5126 max_niter += 1;
5127 period_value = wi::to_widest (period);
5128 if (wi::gtu_p (max_niter, period_value))
5130 /* See if we can take advantage of inferred loop bound
5131 information. */
5132 if (data->loop_single_exit_p)
5134 if (!max_loop_iterations (loop, &max_niter))
5135 return false;
5136 /* The loop bound is already adjusted by adding 1. */
5137 if (wi::gtu_p (max_niter, period_value))
5138 return false;
5140 else
5141 return false;
5145 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5147 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5148 aff_combination_to_tree (&bnd));
5149 *comp = iv_elimination_compare (data, use);
5151 /* It is unlikely that computing the number of iterations using division
5152 would be more profitable than keeping the original induction variable. */
5153 if (expression_expensive_p (*bound))
5154 return false;
5156 /* Sometimes, it is possible to handle the situation that the number of
5157 iterations may be zero unless additional assumptions by using <
5158 instead of != in the exit condition.
5160 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5161 base the exit condition on it. However, that is often too
5162 expensive. */
5163 if (!integer_zerop (desc->may_be_zero))
5164 return iv_elimination_compare_lt (data, cand, comp, desc);
5166 return true;
5169 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5170 be copied, if it is used in the loop body and DATA->body_includes_call. */
5172 static int
5173 parm_decl_cost (struct ivopts_data *data, tree bound)
5175 tree sbound = bound;
5176 STRIP_NOPS (sbound);
5178 if (TREE_CODE (sbound) == SSA_NAME
5179 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5180 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5181 && data->body_includes_call)
5182 return COSTS_N_INSNS (1);
5184 return 0;
5187 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5189 static bool
5190 determine_group_iv_cost_cond (struct ivopts_data *data,
5191 struct iv_group *group, struct iv_cand *cand)
5193 tree bound = NULL_TREE;
5194 struct iv *cmp_iv;
5195 bitmap inv_exprs = NULL;
5196 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5197 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5198 enum comp_iv_rewrite rewrite_type;
5199 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5200 tree *control_var, *bound_cst;
5201 enum tree_code comp = ERROR_MARK;
5202 struct iv_use *use = group->vuses[0];
5204 /* Extract condition operands. */
5205 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5206 &bound_cst, NULL, &cmp_iv);
5207 gcc_assert (rewrite_type != COMP_IV_NA);
5209 /* Try iv elimination. */
5210 if (rewrite_type == COMP_IV_ELIM
5211 && may_eliminate_iv (data, use, cand, &bound, &comp))
5213 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5214 if (elim_cost.cost == 0)
5215 elim_cost.cost = parm_decl_cost (data, bound);
5216 else if (TREE_CODE (bound) == INTEGER_CST)
5217 elim_cost.cost = 0;
5218 /* If we replace a loop condition 'i < n' with 'p < base + n',
5219 inv_vars_elim will have 'base' and 'n' set, which implies that both
5220 'base' and 'n' will be live during the loop. More likely,
5221 'base + n' will be loop invariant, resulting in only one live value
5222 during the loop. So in that case we clear inv_vars_elim and set
5223 inv_expr_elim instead. */
5224 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5226 inv_expr_elim = get_loop_invariant_expr (data, bound);
5227 bitmap_clear (inv_vars_elim);
5229 /* The bound is a loop invariant, so it will be only computed
5230 once. */
5231 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5234 /* When the condition is a comparison of the candidate IV against
5235 zero, prefer this IV.
5237 TODO: The constant that we're subtracting from the cost should
5238 be target-dependent. This information should be added to the
5239 target costs for each backend. */
5240 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5241 && integer_zerop (*bound_cst)
5242 && (operand_equal_p (*control_var, cand->var_after, 0)
5243 || operand_equal_p (*control_var, cand->var_before, 0)))
5244 elim_cost -= 1;
5246 express_cost = get_computation_cost (data, use, cand, false,
5247 &inv_vars_express, NULL,
5248 &inv_expr_express);
5249 if (cmp_iv != NULL)
5250 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5252 /* Count the cost of the original bound as well. */
5253 bound_cost = force_var_cost (data, *bound_cst, NULL);
5254 if (bound_cost.cost == 0)
5255 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5256 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5257 bound_cost.cost = 0;
5258 express_cost += bound_cost;
5260 /* Choose the better approach, preferring the eliminated IV. */
5261 if (elim_cost <= express_cost)
5263 cost = elim_cost;
5264 inv_vars = inv_vars_elim;
5265 inv_vars_elim = NULL;
5266 inv_expr = inv_expr_elim;
5268 else
5270 cost = express_cost;
5271 inv_vars = inv_vars_express;
5272 inv_vars_express = NULL;
5273 bound = NULL_TREE;
5274 comp = ERROR_MARK;
5275 inv_expr = inv_expr_express;
5278 if (inv_expr)
5280 inv_exprs = BITMAP_ALLOC (NULL);
5281 bitmap_set_bit (inv_exprs, inv_expr->id);
5283 set_group_iv_cost (data, group, cand, cost,
5284 inv_vars, bound, comp, inv_exprs);
5286 if (inv_vars_elim)
5287 BITMAP_FREE (inv_vars_elim);
5288 if (inv_vars_express)
5289 BITMAP_FREE (inv_vars_express);
5291 return !cost.infinite_cost_p ();
5294 /* Determines cost of computing uses in GROUP with CAND. Returns false
5295 if USE cannot be represented with CAND. */
5297 static bool
5298 determine_group_iv_cost (struct ivopts_data *data,
5299 struct iv_group *group, struct iv_cand *cand)
5301 switch (group->type)
5303 case USE_NONLINEAR_EXPR:
5304 return determine_group_iv_cost_generic (data, group, cand);
5306 case USE_REF_ADDRESS:
5307 case USE_PTR_ADDRESS:
5308 return determine_group_iv_cost_address (data, group, cand);
5310 case USE_COMPARE:
5311 return determine_group_iv_cost_cond (data, group, cand);
5313 default:
5314 gcc_unreachable ();
5318 /* Return true if get_computation_cost indicates that autoincrement is
5319 a possibility for the pair of USE and CAND, false otherwise. */
5321 static bool
5322 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5323 struct iv_cand *cand)
5325 if (!address_p (use->type))
5326 return false;
5328 bool can_autoinc = false;
5329 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5330 return can_autoinc;
5333 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5334 use that allows autoincrement, and set their AINC_USE if possible. */
5336 static void
5337 set_autoinc_for_original_candidates (struct ivopts_data *data)
5339 unsigned i, j;
5341 for (i = 0; i < data->vcands.length (); i++)
5343 struct iv_cand *cand = data->vcands[i];
5344 struct iv_use *closest_before = NULL;
5345 struct iv_use *closest_after = NULL;
5346 if (cand->pos != IP_ORIGINAL)
5347 continue;
5349 for (j = 0; j < data->vgroups.length (); j++)
5351 struct iv_group *group = data->vgroups[j];
5352 struct iv_use *use = group->vuses[0];
5353 unsigned uid = gimple_uid (use->stmt);
5355 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5356 continue;
5358 if (uid < gimple_uid (cand->incremented_at)
5359 && (closest_before == NULL
5360 || uid > gimple_uid (closest_before->stmt)))
5361 closest_before = use;
5363 if (uid > gimple_uid (cand->incremented_at)
5364 && (closest_after == NULL
5365 || uid < gimple_uid (closest_after->stmt)))
5366 closest_after = use;
5369 if (closest_before != NULL
5370 && autoinc_possible_for_pair (data, closest_before, cand))
5371 cand->ainc_use = closest_before;
5372 else if (closest_after != NULL
5373 && autoinc_possible_for_pair (data, closest_after, cand))
5374 cand->ainc_use = closest_after;
5378 /* Relate compare use with all candidates. */
5380 static void
5381 relate_compare_use_with_all_cands (struct ivopts_data *data)
5383 unsigned i, count = data->vcands.length ();
5384 for (i = 0; i < data->vgroups.length (); i++)
5386 struct iv_group *group = data->vgroups[i];
5388 if (group->type == USE_COMPARE)
5389 bitmap_set_range (group->related_cands, 0, count);
5393 /* Finds the candidates for the induction variables. */
5395 static void
5396 find_iv_candidates (struct ivopts_data *data)
5398 /* Add commonly used ivs. */
5399 add_standard_iv_candidates (data);
5401 /* Add old induction variables. */
5402 add_iv_candidate_for_bivs (data);
5404 /* Add induction variables derived from uses. */
5405 add_iv_candidate_for_groups (data);
5407 set_autoinc_for_original_candidates (data);
5409 /* Record the important candidates. */
5410 record_important_candidates (data);
5412 /* Relate compare iv_use with all candidates. */
5413 if (!data->consider_all_candidates)
5414 relate_compare_use_with_all_cands (data);
5416 if (dump_file && (dump_flags & TDF_DETAILS))
5418 unsigned i;
5420 fprintf (dump_file, "\n<Important Candidates>:\t");
5421 for (i = 0; i < data->vcands.length (); i++)
5422 if (data->vcands[i]->important)
5423 fprintf (dump_file, " %d,", data->vcands[i]->id);
5424 fprintf (dump_file, "\n");
5426 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5427 for (i = 0; i < data->vgroups.length (); i++)
5429 struct iv_group *group = data->vgroups[i];
5431 if (group->related_cands)
5433 fprintf (dump_file, " Group %d:\t", group->id);
5434 dump_bitmap (dump_file, group->related_cands);
5437 fprintf (dump_file, "\n");
5441 /* Determines costs of computing use of iv with an iv candidate. */
5443 static void
5444 determine_group_iv_costs (struct ivopts_data *data)
5446 unsigned i, j;
5447 struct iv_cand *cand;
5448 struct iv_group *group;
5449 bitmap to_clear = BITMAP_ALLOC (NULL);
5451 alloc_use_cost_map (data);
5453 for (i = 0; i < data->vgroups.length (); i++)
5455 group = data->vgroups[i];
5457 if (data->consider_all_candidates)
5459 for (j = 0; j < data->vcands.length (); j++)
5461 cand = data->vcands[j];
5462 determine_group_iv_cost (data, group, cand);
5465 else
5467 bitmap_iterator bi;
5469 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5471 cand = data->vcands[j];
5472 if (!determine_group_iv_cost (data, group, cand))
5473 bitmap_set_bit (to_clear, j);
5476 /* Remove the candidates for that the cost is infinite from
5477 the list of related candidates. */
5478 bitmap_and_compl_into (group->related_cands, to_clear);
5479 bitmap_clear (to_clear);
5483 BITMAP_FREE (to_clear);
5485 if (dump_file && (dump_flags & TDF_DETAILS))
5487 bitmap_iterator bi;
5489 /* Dump invariant variables. */
5490 fprintf (dump_file, "\n<Invariant Vars>:\n");
5491 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5493 struct version_info *info = ver_info (data, i);
5494 if (info->inv_id)
5496 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5497 print_generic_expr (dump_file, info->name, TDF_SLIM);
5498 fprintf (dump_file, "%s\n",
5499 info->has_nonlin_use ? "" : "\t(eliminable)");
5503 /* Dump invariant expressions. */
5504 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5505 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5507 for (hash_table<iv_inv_expr_hasher>::iterator it
5508 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5509 ++it)
5510 list.safe_push (*it);
5512 list.qsort (sort_iv_inv_expr_ent);
5514 for (i = 0; i < list.length (); ++i)
5516 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5517 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5518 fprintf (dump_file, "\n");
5521 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5523 for (i = 0; i < data->vgroups.length (); i++)
5525 group = data->vgroups[i];
5527 fprintf (dump_file, "Group %d:\n", i);
5528 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5529 for (j = 0; j < group->n_map_members; j++)
5531 if (!group->cost_map[j].cand
5532 || group->cost_map[j].cost.infinite_cost_p ())
5533 continue;
5535 fprintf (dump_file, " %d\t%d\t%d\t",
5536 group->cost_map[j].cand->id,
5537 group->cost_map[j].cost.cost,
5538 group->cost_map[j].cost.complexity);
5539 if (!group->cost_map[j].inv_exprs
5540 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5541 fprintf (dump_file, "NIL;\t");
5542 else
5543 bitmap_print (dump_file,
5544 group->cost_map[j].inv_exprs, "", ";\t");
5545 if (!group->cost_map[j].inv_vars
5546 || bitmap_empty_p (group->cost_map[j].inv_vars))
5547 fprintf (dump_file, "NIL;\n");
5548 else
5549 bitmap_print (dump_file,
5550 group->cost_map[j].inv_vars, "", "\n");
5553 fprintf (dump_file, "\n");
5555 fprintf (dump_file, "\n");
5559 /* Determines cost of the candidate CAND. */
5561 static void
5562 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5564 comp_cost cost_base;
5565 unsigned cost, cost_step;
5566 tree base;
5568 gcc_assert (cand->iv != NULL);
5570 /* There are two costs associated with the candidate -- its increment
5571 and its initialization. The second is almost negligible for any loop
5572 that rolls enough, so we take it just very little into account. */
5574 base = cand->iv->base;
5575 cost_base = force_var_cost (data, base, NULL);
5576 /* It will be exceptional that the iv register happens to be initialized with
5577 the proper value at no cost. In general, there will at least be a regcopy
5578 or a const set. */
5579 if (cost_base.cost == 0)
5580 cost_base.cost = COSTS_N_INSNS (1);
5581 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5583 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5585 /* Prefer the original ivs unless we may gain something by replacing it.
5586 The reason is to make debugging simpler; so this is not relevant for
5587 artificial ivs created by other optimization passes. */
5588 if (cand->pos != IP_ORIGINAL
5589 || !SSA_NAME_VAR (cand->var_before)
5590 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5591 cost++;
5593 /* Prefer not to insert statements into latch unless there are some
5594 already (so that we do not create unnecessary jumps). */
5595 if (cand->pos == IP_END
5596 && empty_block_p (ip_end_pos (data->current_loop)))
5597 cost++;
5599 cand->cost = cost;
5600 cand->cost_step = cost_step;
5603 /* Determines costs of computation of the candidates. */
5605 static void
5606 determine_iv_costs (struct ivopts_data *data)
5608 unsigned i;
5610 if (dump_file && (dump_flags & TDF_DETAILS))
5612 fprintf (dump_file, "<Candidate Costs>:\n");
5613 fprintf (dump_file, " cand\tcost\n");
5616 for (i = 0; i < data->vcands.length (); i++)
5618 struct iv_cand *cand = data->vcands[i];
5620 determine_iv_cost (data, cand);
5622 if (dump_file && (dump_flags & TDF_DETAILS))
5623 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5626 if (dump_file && (dump_flags & TDF_DETAILS))
5627 fprintf (dump_file, "\n");
5630 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5631 induction variables. Note N_INVS includes both invariant variables and
5632 invariant expressions. */
5634 static unsigned
5635 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5636 unsigned n_cands)
5638 unsigned cost;
5639 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5640 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5641 bool speed = data->speed;
5643 /* If there is a call in the loop body, the call-clobbered registers
5644 are not available for loop invariants. */
5645 if (data->body_includes_call)
5646 available_regs = available_regs - target_clobbered_regs;
5648 /* If we have enough registers. */
5649 if (regs_needed + target_res_regs < available_regs)
5650 cost = n_new;
5651 /* If close to running out of registers, try to preserve them. */
5652 else if (regs_needed <= available_regs)
5653 cost = target_reg_cost [speed] * regs_needed;
5654 /* If we run out of available registers but the number of candidates
5655 does not, we penalize extra registers using target_spill_cost. */
5656 else if (n_cands <= available_regs)
5657 cost = target_reg_cost [speed] * available_regs
5658 + target_spill_cost [speed] * (regs_needed - available_regs);
5659 /* If the number of candidates runs out available registers, we penalize
5660 extra candidate registers using target_spill_cost * 2. Because it is
5661 more expensive to spill induction variable than invariant. */
5662 else
5663 cost = target_reg_cost [speed] * available_regs
5664 + target_spill_cost [speed] * (n_cands - available_regs) * 2
5665 + target_spill_cost [speed] * (regs_needed - n_cands);
5667 /* Finally, add the number of candidates, so that we prefer eliminating
5668 induction variables if possible. */
5669 return cost + n_cands;
5672 /* For each size of the induction variable set determine the penalty. */
5674 static void
5675 determine_set_costs (struct ivopts_data *data)
5677 unsigned j, n;
5678 gphi *phi;
5679 gphi_iterator psi;
5680 tree op;
5681 struct loop *loop = data->current_loop;
5682 bitmap_iterator bi;
5684 if (dump_file && (dump_flags & TDF_DETAILS))
5686 fprintf (dump_file, "<Global Costs>:\n");
5687 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5688 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5689 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5690 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5693 n = 0;
5694 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5696 phi = psi.phi ();
5697 op = PHI_RESULT (phi);
5699 if (virtual_operand_p (op))
5700 continue;
5702 if (get_iv (data, op))
5703 continue;
5705 if (!POINTER_TYPE_P (TREE_TYPE (op))
5706 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5707 continue;
5709 n++;
5712 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5714 struct version_info *info = ver_info (data, j);
5716 if (info->inv_id && info->has_nonlin_use)
5717 n++;
5720 data->regs_used = n;
5721 if (dump_file && (dump_flags & TDF_DETAILS))
5722 fprintf (dump_file, " regs_used %d\n", n);
5724 if (dump_file && (dump_flags & TDF_DETAILS))
5726 fprintf (dump_file, " cost for size:\n");
5727 fprintf (dump_file, " ivs\tcost\n");
5728 for (j = 0; j <= 2 * target_avail_regs; j++)
5729 fprintf (dump_file, " %d\t%d\n", j,
5730 ivopts_estimate_reg_pressure (data, 0, j));
5731 fprintf (dump_file, "\n");
5735 /* Returns true if A is a cheaper cost pair than B. */
5737 static bool
5738 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5740 if (!a)
5741 return false;
5743 if (!b)
5744 return true;
5746 if (a->cost < b->cost)
5747 return true;
5749 if (b->cost < a->cost)
5750 return false;
5752 /* In case the costs are the same, prefer the cheaper candidate. */
5753 if (a->cand->cost < b->cand->cost)
5754 return true;
5756 return false;
5759 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5760 for more expensive, equal and cheaper respectively. */
5762 static int
5763 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5765 if (cheaper_cost_pair (a, b))
5766 return -1;
5767 if (cheaper_cost_pair (b, a))
5768 return 1;
5770 return 0;
5773 /* Returns candidate by that USE is expressed in IVS. */
5775 static struct cost_pair *
5776 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5778 return ivs->cand_for_group[group->id];
5781 /* Computes the cost field of IVS structure. */
5783 static void
5784 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5786 comp_cost cost = ivs->cand_use_cost;
5788 cost += ivs->cand_cost;
5789 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5790 ivs->cost = cost;
5793 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5794 and IVS. */
5796 static void
5797 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5799 bitmap_iterator bi;
5800 unsigned iid;
5802 if (!invs)
5803 return;
5805 gcc_assert (n_inv_uses != NULL);
5806 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5808 n_inv_uses[iid]--;
5809 if (n_inv_uses[iid] == 0)
5810 ivs->n_invs--;
5814 /* Set USE not to be expressed by any candidate in IVS. */
5816 static void
5817 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5818 struct iv_group *group)
5820 unsigned gid = group->id, cid;
5821 struct cost_pair *cp;
5823 cp = ivs->cand_for_group[gid];
5824 if (!cp)
5825 return;
5826 cid = cp->cand->id;
5828 ivs->bad_groups++;
5829 ivs->cand_for_group[gid] = NULL;
5830 ivs->n_cand_uses[cid]--;
5832 if (ivs->n_cand_uses[cid] == 0)
5834 bitmap_clear_bit (ivs->cands, cid);
5835 ivs->n_cands--;
5836 ivs->cand_cost -= cp->cand->cost;
5837 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5838 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5841 ivs->cand_use_cost -= cp->cost;
5842 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5843 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5844 iv_ca_recount_cost (data, ivs);
5847 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5848 IVS. */
5850 static void
5851 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5853 bitmap_iterator bi;
5854 unsigned iid;
5856 if (!invs)
5857 return;
5859 gcc_assert (n_inv_uses != NULL);
5860 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5862 n_inv_uses[iid]++;
5863 if (n_inv_uses[iid] == 1)
5864 ivs->n_invs++;
5868 /* Set cost pair for GROUP in set IVS to CP. */
5870 static void
5871 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5872 struct iv_group *group, struct cost_pair *cp)
5874 unsigned gid = group->id, cid;
5876 if (ivs->cand_for_group[gid] == cp)
5877 return;
5879 if (ivs->cand_for_group[gid])
5880 iv_ca_set_no_cp (data, ivs, group);
5882 if (cp)
5884 cid = cp->cand->id;
5886 ivs->bad_groups--;
5887 ivs->cand_for_group[gid] = cp;
5888 ivs->n_cand_uses[cid]++;
5889 if (ivs->n_cand_uses[cid] == 1)
5891 bitmap_set_bit (ivs->cands, cid);
5892 ivs->n_cands++;
5893 ivs->cand_cost += cp->cand->cost;
5894 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5895 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5898 ivs->cand_use_cost += cp->cost;
5899 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5900 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5901 iv_ca_recount_cost (data, ivs);
5905 /* Extend set IVS by expressing USE by some of the candidates in it
5906 if possible. Consider all important candidates if candidates in
5907 set IVS don't give any result. */
5909 static void
5910 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5911 struct iv_group *group)
5913 struct cost_pair *best_cp = NULL, *cp;
5914 bitmap_iterator bi;
5915 unsigned i;
5916 struct iv_cand *cand;
5918 gcc_assert (ivs->upto >= group->id);
5919 ivs->upto++;
5920 ivs->bad_groups++;
5922 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5924 cand = data->vcands[i];
5925 cp = get_group_iv_cost (data, group, cand);
5926 if (cheaper_cost_pair (cp, best_cp))
5927 best_cp = cp;
5930 if (best_cp == NULL)
5932 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5934 cand = data->vcands[i];
5935 cp = get_group_iv_cost (data, group, cand);
5936 if (cheaper_cost_pair (cp, best_cp))
5937 best_cp = cp;
5941 iv_ca_set_cp (data, ivs, group, best_cp);
5944 /* Get cost for assignment IVS. */
5946 static comp_cost
5947 iv_ca_cost (struct iv_ca *ivs)
5949 /* This was a conditional expression but it triggered a bug in
5950 Sun C 5.5. */
5951 if (ivs->bad_groups)
5952 return infinite_cost;
5953 else
5954 return ivs->cost;
5957 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5958 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5959 respectively. */
5961 static int
5962 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5963 struct iv_group *group, struct cost_pair *old_cp,
5964 struct cost_pair *new_cp)
5966 gcc_assert (old_cp && new_cp && old_cp != new_cp);
5967 unsigned old_n_invs = ivs->n_invs;
5968 iv_ca_set_cp (data, ivs, group, new_cp);
5969 unsigned new_n_invs = ivs->n_invs;
5970 iv_ca_set_cp (data, ivs, group, old_cp);
5972 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5975 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5976 it before NEXT. */
5978 static struct iv_ca_delta *
5979 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5980 struct cost_pair *new_cp, struct iv_ca_delta *next)
5982 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5984 change->group = group;
5985 change->old_cp = old_cp;
5986 change->new_cp = new_cp;
5987 change->next = next;
5989 return change;
5992 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5993 are rewritten. */
5995 static struct iv_ca_delta *
5996 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5998 struct iv_ca_delta *last;
6000 if (!l2)
6001 return l1;
6003 if (!l1)
6004 return l2;
6006 for (last = l1; last->next; last = last->next)
6007 continue;
6008 last->next = l2;
6010 return l1;
6013 /* Reverse the list of changes DELTA, forming the inverse to it. */
6015 static struct iv_ca_delta *
6016 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6018 struct iv_ca_delta *act, *next, *prev = NULL;
6020 for (act = delta; act; act = next)
6022 next = act->next;
6023 act->next = prev;
6024 prev = act;
6026 std::swap (act->old_cp, act->new_cp);
6029 return prev;
6032 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6033 reverted instead. */
6035 static void
6036 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6037 struct iv_ca_delta *delta, bool forward)
6039 struct cost_pair *from, *to;
6040 struct iv_ca_delta *act;
6042 if (!forward)
6043 delta = iv_ca_delta_reverse (delta);
6045 for (act = delta; act; act = act->next)
6047 from = act->old_cp;
6048 to = act->new_cp;
6049 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6050 iv_ca_set_cp (data, ivs, act->group, to);
6053 if (!forward)
6054 iv_ca_delta_reverse (delta);
6057 /* Returns true if CAND is used in IVS. */
6059 static bool
6060 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6062 return ivs->n_cand_uses[cand->id] > 0;
6065 /* Returns number of induction variable candidates in the set IVS. */
6067 static unsigned
6068 iv_ca_n_cands (struct iv_ca *ivs)
6070 return ivs->n_cands;
6073 /* Free the list of changes DELTA. */
6075 static void
6076 iv_ca_delta_free (struct iv_ca_delta **delta)
6078 struct iv_ca_delta *act, *next;
6080 for (act = *delta; act; act = next)
6082 next = act->next;
6083 free (act);
6086 *delta = NULL;
6089 /* Allocates new iv candidates assignment. */
6091 static struct iv_ca *
6092 iv_ca_new (struct ivopts_data *data)
6094 struct iv_ca *nw = XNEW (struct iv_ca);
6096 nw->upto = 0;
6097 nw->bad_groups = 0;
6098 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6099 data->vgroups.length ());
6100 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6101 nw->cands = BITMAP_ALLOC (NULL);
6102 nw->n_cands = 0;
6103 nw->n_invs = 0;
6104 nw->cand_use_cost = no_cost;
6105 nw->cand_cost = 0;
6106 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6107 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6108 nw->cost = no_cost;
6110 return nw;
6113 /* Free memory occupied by the set IVS. */
6115 static void
6116 iv_ca_free (struct iv_ca **ivs)
6118 free ((*ivs)->cand_for_group);
6119 free ((*ivs)->n_cand_uses);
6120 BITMAP_FREE ((*ivs)->cands);
6121 free ((*ivs)->n_inv_var_uses);
6122 free ((*ivs)->n_inv_expr_uses);
6123 free (*ivs);
6124 *ivs = NULL;
6127 /* Dumps IVS to FILE. */
6129 static void
6130 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6132 unsigned i;
6133 comp_cost cost = iv_ca_cost (ivs);
6135 fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
6136 cost.complexity);
6137 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6138 ivs->cand_cost, ivs->cand_use_cost.cost,
6139 ivs->cand_use_cost.complexity);
6140 bitmap_print (file, ivs->cands, " candidates: ","\n");
6142 for (i = 0; i < ivs->upto; i++)
6144 struct iv_group *group = data->vgroups[i];
6145 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6146 if (cp)
6147 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6148 group->id, cp->cand->id, cp->cost.cost,
6149 cp->cost.complexity);
6150 else
6151 fprintf (file, " group:%d --> ??\n", group->id);
6154 const char *pref = "";
6155 fprintf (file, " invariant variables: ");
6156 for (i = 1; i <= data->max_inv_var_id; i++)
6157 if (ivs->n_inv_var_uses[i])
6159 fprintf (file, "%s%d", pref, i);
6160 pref = ", ";
6163 pref = "";
6164 fprintf (file, "\n invariant expressions: ");
6165 for (i = 1; i <= data->max_inv_expr_id; i++)
6166 if (ivs->n_inv_expr_uses[i])
6168 fprintf (file, "%s%d", pref, i);
6169 pref = ", ";
6172 fprintf (file, "\n\n");
6175 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6176 new set, and store differences in DELTA. Number of induction variables
6177 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6178 the function will try to find a solution with mimimal iv candidates. */
6180 static comp_cost
6181 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6182 struct iv_cand *cand, struct iv_ca_delta **delta,
6183 unsigned *n_ivs, bool min_ncand)
6185 unsigned i;
6186 comp_cost cost;
6187 struct iv_group *group;
6188 struct cost_pair *old_cp, *new_cp;
6190 *delta = NULL;
6191 for (i = 0; i < ivs->upto; i++)
6193 group = data->vgroups[i];
6194 old_cp = iv_ca_cand_for_group (ivs, group);
6196 if (old_cp
6197 && old_cp->cand == cand)
6198 continue;
6200 new_cp = get_group_iv_cost (data, group, cand);
6201 if (!new_cp)
6202 continue;
6204 if (!min_ncand)
6206 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6207 /* Skip if new_cp depends on more invariants. */
6208 if (cmp_invs > 0)
6209 continue;
6211 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6212 /* Skip if new_cp is not cheaper. */
6213 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6214 continue;
6217 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6220 iv_ca_delta_commit (data, ivs, *delta, true);
6221 cost = iv_ca_cost (ivs);
6222 if (n_ivs)
6223 *n_ivs = iv_ca_n_cands (ivs);
6224 iv_ca_delta_commit (data, ivs, *delta, false);
6226 return cost;
6229 /* Try narrowing set IVS by removing CAND. Return the cost of
6230 the new set and store the differences in DELTA. START is
6231 the candidate with which we start narrowing. */
6233 static comp_cost
6234 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6235 struct iv_cand *cand, struct iv_cand *start,
6236 struct iv_ca_delta **delta)
6238 unsigned i, ci;
6239 struct iv_group *group;
6240 struct cost_pair *old_cp, *new_cp, *cp;
6241 bitmap_iterator bi;
6242 struct iv_cand *cnd;
6243 comp_cost cost, best_cost, acost;
6245 *delta = NULL;
6246 for (i = 0; i < data->vgroups.length (); i++)
6248 group = data->vgroups[i];
6250 old_cp = iv_ca_cand_for_group (ivs, group);
6251 if (old_cp->cand != cand)
6252 continue;
6254 best_cost = iv_ca_cost (ivs);
6255 /* Start narrowing with START. */
6256 new_cp = get_group_iv_cost (data, group, start);
6258 if (data->consider_all_candidates)
6260 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6262 if (ci == cand->id || (start && ci == start->id))
6263 continue;
6265 cnd = data->vcands[ci];
6267 cp = get_group_iv_cost (data, group, cnd);
6268 if (!cp)
6269 continue;
6271 iv_ca_set_cp (data, ivs, group, cp);
6272 acost = iv_ca_cost (ivs);
6274 if (acost < best_cost)
6276 best_cost = acost;
6277 new_cp = cp;
6281 else
6283 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6285 if (ci == cand->id || (start && ci == start->id))
6286 continue;
6288 cnd = data->vcands[ci];
6290 cp = get_group_iv_cost (data, group, cnd);
6291 if (!cp)
6292 continue;
6294 iv_ca_set_cp (data, ivs, group, cp);
6295 acost = iv_ca_cost (ivs);
6297 if (acost < best_cost)
6299 best_cost = acost;
6300 new_cp = cp;
6304 /* Restore to old cp for use. */
6305 iv_ca_set_cp (data, ivs, group, old_cp);
6307 if (!new_cp)
6309 iv_ca_delta_free (delta);
6310 return infinite_cost;
6313 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6316 iv_ca_delta_commit (data, ivs, *delta, true);
6317 cost = iv_ca_cost (ivs);
6318 iv_ca_delta_commit (data, ivs, *delta, false);
6320 return cost;
6323 /* Try optimizing the set of candidates IVS by removing candidates different
6324 from to EXCEPT_CAND from it. Return cost of the new set, and store
6325 differences in DELTA. */
6327 static comp_cost
6328 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6329 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6331 bitmap_iterator bi;
6332 struct iv_ca_delta *act_delta, *best_delta;
6333 unsigned i;
6334 comp_cost best_cost, acost;
6335 struct iv_cand *cand;
6337 best_delta = NULL;
6338 best_cost = iv_ca_cost (ivs);
6340 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6342 cand = data->vcands[i];
6344 if (cand == except_cand)
6345 continue;
6347 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6349 if (acost < best_cost)
6351 best_cost = acost;
6352 iv_ca_delta_free (&best_delta);
6353 best_delta = act_delta;
6355 else
6356 iv_ca_delta_free (&act_delta);
6359 if (!best_delta)
6361 *delta = NULL;
6362 return best_cost;
6365 /* Recurse to possibly remove other unnecessary ivs. */
6366 iv_ca_delta_commit (data, ivs, best_delta, true);
6367 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6368 iv_ca_delta_commit (data, ivs, best_delta, false);
6369 *delta = iv_ca_delta_join (best_delta, *delta);
6370 return best_cost;
6373 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6374 cheaper local cost for GROUP than BEST_CP. Return pointer to
6375 the corresponding cost_pair, otherwise just return BEST_CP. */
6377 static struct cost_pair*
6378 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6379 unsigned int cand_idx, struct iv_cand *old_cand,
6380 struct cost_pair *best_cp)
6382 struct iv_cand *cand;
6383 struct cost_pair *cp;
6385 gcc_assert (old_cand != NULL && best_cp != NULL);
6386 if (cand_idx == old_cand->id)
6387 return best_cp;
6389 cand = data->vcands[cand_idx];
6390 cp = get_group_iv_cost (data, group, cand);
6391 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6392 return cp;
6394 return best_cp;
6397 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6398 which are used by more than one iv uses. For each of those candidates,
6399 this function tries to represent iv uses under that candidate using
6400 other ones with lower local cost, then tries to prune the new set.
6401 If the new set has lower cost, It returns the new cost after recording
6402 candidate replacement in list DELTA. */
6404 static comp_cost
6405 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6406 struct iv_ca_delta **delta)
6408 bitmap_iterator bi, bj;
6409 unsigned int i, j, k;
6410 struct iv_cand *cand;
6411 comp_cost orig_cost, acost;
6412 struct iv_ca_delta *act_delta, *tmp_delta;
6413 struct cost_pair *old_cp, *best_cp = NULL;
6415 *delta = NULL;
6416 orig_cost = iv_ca_cost (ivs);
6418 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6420 if (ivs->n_cand_uses[i] == 1
6421 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6422 continue;
6424 cand = data->vcands[i];
6426 act_delta = NULL;
6427 /* Represent uses under current candidate using other ones with
6428 lower local cost. */
6429 for (j = 0; j < ivs->upto; j++)
6431 struct iv_group *group = data->vgroups[j];
6432 old_cp = iv_ca_cand_for_group (ivs, group);
6434 if (old_cp->cand != cand)
6435 continue;
6437 best_cp = old_cp;
6438 if (data->consider_all_candidates)
6439 for (k = 0; k < data->vcands.length (); k++)
6440 best_cp = cheaper_cost_with_cand (data, group, k,
6441 old_cp->cand, best_cp);
6442 else
6443 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6444 best_cp = cheaper_cost_with_cand (data, group, k,
6445 old_cp->cand, best_cp);
6447 if (best_cp == old_cp)
6448 continue;
6450 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6452 /* No need for further prune. */
6453 if (!act_delta)
6454 continue;
6456 /* Prune the new candidate set. */
6457 iv_ca_delta_commit (data, ivs, act_delta, true);
6458 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6459 iv_ca_delta_commit (data, ivs, act_delta, false);
6460 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6462 if (acost < orig_cost)
6464 *delta = act_delta;
6465 return acost;
6467 else
6468 iv_ca_delta_free (&act_delta);
6471 return orig_cost;
6474 /* Tries to extend the sets IVS in the best possible way in order to
6475 express the GROUP. If ORIGINALP is true, prefer candidates from
6476 the original set of IVs, otherwise favor important candidates not
6477 based on any memory object. */
6479 static bool
6480 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6481 struct iv_group *group, bool originalp)
6483 comp_cost best_cost, act_cost;
6484 unsigned i;
6485 bitmap_iterator bi;
6486 struct iv_cand *cand;
6487 struct iv_ca_delta *best_delta = NULL, *act_delta;
6488 struct cost_pair *cp;
6490 iv_ca_add_group (data, ivs, group);
6491 best_cost = iv_ca_cost (ivs);
6492 cp = iv_ca_cand_for_group (ivs, group);
6493 if (cp)
6495 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6496 iv_ca_set_no_cp (data, ivs, group);
6499 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6500 first try important candidates not based on any memory object. Only if
6501 this fails, try the specific ones. Rationale -- in loops with many
6502 variables the best choice often is to use just one generic biv. If we
6503 added here many ivs specific to the uses, the optimization algorithm later
6504 would be likely to get stuck in a local minimum, thus causing us to create
6505 too many ivs. The approach from few ivs to more seems more likely to be
6506 successful -- starting from few ivs, replacing an expensive use by a
6507 specific iv should always be a win. */
6508 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6510 cand = data->vcands[i];
6512 if (originalp && cand->pos !=IP_ORIGINAL)
6513 continue;
6515 if (!originalp && cand->iv->base_object != NULL_TREE)
6516 continue;
6518 if (iv_ca_cand_used_p (ivs, cand))
6519 continue;
6521 cp = get_group_iv_cost (data, group, cand);
6522 if (!cp)
6523 continue;
6525 iv_ca_set_cp (data, ivs, group, cp);
6526 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6527 true);
6528 iv_ca_set_no_cp (data, ivs, group);
6529 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6531 if (act_cost < best_cost)
6533 best_cost = act_cost;
6535 iv_ca_delta_free (&best_delta);
6536 best_delta = act_delta;
6538 else
6539 iv_ca_delta_free (&act_delta);
6542 if (best_cost.infinite_cost_p ())
6544 for (i = 0; i < group->n_map_members; i++)
6546 cp = group->cost_map + i;
6547 cand = cp->cand;
6548 if (!cand)
6549 continue;
6551 /* Already tried this. */
6552 if (cand->important)
6554 if (originalp && cand->pos == IP_ORIGINAL)
6555 continue;
6556 if (!originalp && cand->iv->base_object == NULL_TREE)
6557 continue;
6560 if (iv_ca_cand_used_p (ivs, cand))
6561 continue;
6563 act_delta = NULL;
6564 iv_ca_set_cp (data, ivs, group, cp);
6565 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6566 iv_ca_set_no_cp (data, ivs, group);
6567 act_delta = iv_ca_delta_add (group,
6568 iv_ca_cand_for_group (ivs, group),
6569 cp, act_delta);
6571 if (act_cost < best_cost)
6573 best_cost = act_cost;
6575 if (best_delta)
6576 iv_ca_delta_free (&best_delta);
6577 best_delta = act_delta;
6579 else
6580 iv_ca_delta_free (&act_delta);
6584 iv_ca_delta_commit (data, ivs, best_delta, true);
6585 iv_ca_delta_free (&best_delta);
6587 return !best_cost.infinite_cost_p ();
6590 /* Finds an initial assignment of candidates to uses. */
6592 static struct iv_ca *
6593 get_initial_solution (struct ivopts_data *data, bool originalp)
6595 unsigned i;
6596 struct iv_ca *ivs = iv_ca_new (data);
6598 for (i = 0; i < data->vgroups.length (); i++)
6599 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6601 iv_ca_free (&ivs);
6602 return NULL;
6605 return ivs;
6608 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6609 points to a bool variable, this function tries to break local
6610 optimal fixed-point by replacing candidates in IVS if it's true. */
6612 static bool
6613 try_improve_iv_set (struct ivopts_data *data,
6614 struct iv_ca *ivs, bool *try_replace_p)
6616 unsigned i, n_ivs;
6617 comp_cost acost, best_cost = iv_ca_cost (ivs);
6618 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6619 struct iv_cand *cand;
6621 /* Try extending the set of induction variables by one. */
6622 for (i = 0; i < data->vcands.length (); i++)
6624 cand = data->vcands[i];
6626 if (iv_ca_cand_used_p (ivs, cand))
6627 continue;
6629 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6630 if (!act_delta)
6631 continue;
6633 /* If we successfully added the candidate and the set is small enough,
6634 try optimizing it by removing other candidates. */
6635 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6637 iv_ca_delta_commit (data, ivs, act_delta, true);
6638 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6639 iv_ca_delta_commit (data, ivs, act_delta, false);
6640 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6643 if (acost < best_cost)
6645 best_cost = acost;
6646 iv_ca_delta_free (&best_delta);
6647 best_delta = act_delta;
6649 else
6650 iv_ca_delta_free (&act_delta);
6653 if (!best_delta)
6655 /* Try removing the candidates from the set instead. */
6656 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6658 if (!best_delta && *try_replace_p)
6660 *try_replace_p = false;
6661 /* So far candidate selecting algorithm tends to choose fewer IVs
6662 so that it can handle cases in which loops have many variables
6663 but the best choice is often to use only one general biv. One
6664 weakness is it can't handle opposite cases, in which different
6665 candidates should be chosen with respect to each use. To solve
6666 the problem, we replace candidates in a manner described by the
6667 comments of iv_ca_replace, thus give general algorithm a chance
6668 to break local optimal fixed-point in these cases. */
6669 best_cost = iv_ca_replace (data, ivs, &best_delta);
6672 if (!best_delta)
6673 return false;
6676 iv_ca_delta_commit (data, ivs, best_delta, true);
6677 gcc_assert (best_cost == iv_ca_cost (ivs));
6678 iv_ca_delta_free (&best_delta);
6679 return true;
6682 /* Attempts to find the optimal set of induction variables. We do simple
6683 greedy heuristic -- we try to replace at most one candidate in the selected
6684 solution and remove the unused ivs while this improves the cost. */
6686 static struct iv_ca *
6687 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6689 struct iv_ca *set;
6690 bool try_replace_p = true;
6692 /* Get the initial solution. */
6693 set = get_initial_solution (data, originalp);
6694 if (!set)
6696 if (dump_file && (dump_flags & TDF_DETAILS))
6697 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6698 return NULL;
6701 if (dump_file && (dump_flags & TDF_DETAILS))
6703 fprintf (dump_file, "Initial set of candidates:\n");
6704 iv_ca_dump (data, dump_file, set);
6707 while (try_improve_iv_set (data, set, &try_replace_p))
6709 if (dump_file && (dump_flags & TDF_DETAILS))
6711 fprintf (dump_file, "Improved to:\n");
6712 iv_ca_dump (data, dump_file, set);
6716 return set;
6719 static struct iv_ca *
6720 find_optimal_iv_set (struct ivopts_data *data)
6722 unsigned i;
6723 comp_cost cost, origcost;
6724 struct iv_ca *set, *origset;
6726 /* Determine the cost based on a strategy that starts with original IVs,
6727 and try again using a strategy that prefers candidates not based
6728 on any IVs. */
6729 origset = find_optimal_iv_set_1 (data, true);
6730 set = find_optimal_iv_set_1 (data, false);
6732 if (!origset && !set)
6733 return NULL;
6735 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6736 cost = set ? iv_ca_cost (set) : infinite_cost;
6738 if (dump_file && (dump_flags & TDF_DETAILS))
6740 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6741 origcost.cost, origcost.complexity);
6742 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6743 cost.cost, cost.complexity);
6746 /* Choose the one with the best cost. */
6747 if (origcost <= cost)
6749 if (set)
6750 iv_ca_free (&set);
6751 set = origset;
6753 else if (origset)
6754 iv_ca_free (&origset);
6756 for (i = 0; i < data->vgroups.length (); i++)
6758 struct iv_group *group = data->vgroups[i];
6759 group->selected = iv_ca_cand_for_group (set, group)->cand;
6762 return set;
6765 /* Creates a new induction variable corresponding to CAND. */
6767 static void
6768 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6770 gimple_stmt_iterator incr_pos;
6771 tree base;
6772 struct iv_use *use;
6773 struct iv_group *group;
6774 bool after = false;
6776 gcc_assert (cand->iv != NULL);
6778 switch (cand->pos)
6780 case IP_NORMAL:
6781 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6782 break;
6784 case IP_END:
6785 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6786 after = true;
6787 break;
6789 case IP_AFTER_USE:
6790 after = true;
6791 /* fall through */
6792 case IP_BEFORE_USE:
6793 incr_pos = gsi_for_stmt (cand->incremented_at);
6794 break;
6796 case IP_ORIGINAL:
6797 /* Mark that the iv is preserved. */
6798 name_info (data, cand->var_before)->preserve_biv = true;
6799 name_info (data, cand->var_after)->preserve_biv = true;
6801 /* Rewrite the increment so that it uses var_before directly. */
6802 use = find_interesting_uses_op (data, cand->var_after);
6803 group = data->vgroups[use->group_id];
6804 group->selected = cand;
6805 return;
6808 gimple_add_tmp_var (cand->var_before);
6810 base = unshare_expr (cand->iv->base);
6812 create_iv (base, unshare_expr (cand->iv->step),
6813 cand->var_before, data->current_loop,
6814 &incr_pos, after, &cand->var_before, &cand->var_after);
6817 /* Creates new induction variables described in SET. */
6819 static void
6820 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6822 unsigned i;
6823 struct iv_cand *cand;
6824 bitmap_iterator bi;
6826 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6828 cand = data->vcands[i];
6829 create_new_iv (data, cand);
6832 if (dump_file && (dump_flags & TDF_DETAILS))
6834 fprintf (dump_file, "Selected IV set for loop %d",
6835 data->current_loop->num);
6836 if (data->loop_loc != UNKNOWN_LOCATION)
6837 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6838 LOCATION_LINE (data->loop_loc));
6839 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6840 avg_loop_niter (data->current_loop));
6841 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6842 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6844 cand = data->vcands[i];
6845 dump_cand (dump_file, cand);
6847 fprintf (dump_file, "\n");
6851 /* Rewrites USE (definition of iv used in a nonlinear expression)
6852 using candidate CAND. */
6854 static void
6855 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6856 struct iv_use *use, struct iv_cand *cand)
6858 gassign *ass;
6859 gimple_stmt_iterator bsi;
6860 tree comp, type = get_use_type (use), tgt;
6862 /* An important special case -- if we are asked to express value of
6863 the original iv by itself, just exit; there is no need to
6864 introduce a new computation (that might also need casting the
6865 variable to unsigned and back). */
6866 if (cand->pos == IP_ORIGINAL
6867 && cand->incremented_at == use->stmt)
6869 tree op = NULL_TREE;
6870 enum tree_code stmt_code;
6872 gcc_assert (is_gimple_assign (use->stmt));
6873 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6875 /* Check whether we may leave the computation unchanged.
6876 This is the case only if it does not rely on other
6877 computations in the loop -- otherwise, the computation
6878 we rely upon may be removed in remove_unused_ivs,
6879 thus leading to ICE. */
6880 stmt_code = gimple_assign_rhs_code (use->stmt);
6881 if (stmt_code == PLUS_EXPR
6882 || stmt_code == MINUS_EXPR
6883 || stmt_code == POINTER_PLUS_EXPR)
6885 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6886 op = gimple_assign_rhs2 (use->stmt);
6887 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6888 op = gimple_assign_rhs1 (use->stmt);
6891 if (op != NULL_TREE)
6893 if (expr_invariant_in_loop_p (data->current_loop, op))
6894 return;
6895 if (TREE_CODE (op) == SSA_NAME)
6897 struct iv *iv = get_iv (data, op);
6898 if (iv != NULL && integer_zerop (iv->step))
6899 return;
6904 switch (gimple_code (use->stmt))
6906 case GIMPLE_PHI:
6907 tgt = PHI_RESULT (use->stmt);
6909 /* If we should keep the biv, do not replace it. */
6910 if (name_info (data, tgt)->preserve_biv)
6911 return;
6913 bsi = gsi_after_labels (gimple_bb (use->stmt));
6914 break;
6916 case GIMPLE_ASSIGN:
6917 tgt = gimple_assign_lhs (use->stmt);
6918 bsi = gsi_for_stmt (use->stmt);
6919 break;
6921 default:
6922 gcc_unreachable ();
6925 aff_tree aff_inv, aff_var;
6926 if (!get_computation_aff_1 (data->current_loop, use->stmt,
6927 use, cand, &aff_inv, &aff_var))
6928 gcc_unreachable ();
6930 unshare_aff_combination (&aff_inv);
6931 unshare_aff_combination (&aff_var);
6932 /* Prefer CSE opportunity than loop invariant by adding offset at last
6933 so that iv_uses have different offsets can be CSEed. */
6934 poly_widest_int offset = aff_inv.offset;
6935 aff_inv.offset = 0;
6937 gimple_seq stmt_list = NULL, seq = NULL;
6938 tree comp_op1 = aff_combination_to_tree (&aff_inv);
6939 tree comp_op2 = aff_combination_to_tree (&aff_var);
6940 gcc_assert (comp_op1 && comp_op2);
6942 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6943 gimple_seq_add_seq (&stmt_list, seq);
6944 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6945 gimple_seq_add_seq (&stmt_list, seq);
6947 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6948 std::swap (comp_op1, comp_op2);
6950 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6952 comp = fold_build_pointer_plus (comp_op1,
6953 fold_convert (sizetype, comp_op2));
6954 comp = fold_build_pointer_plus (comp,
6955 wide_int_to_tree (sizetype, offset));
6957 else
6959 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6960 fold_convert (TREE_TYPE (comp_op1), comp_op2));
6961 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6962 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6965 comp = fold_convert (type, comp);
6966 if (!valid_gimple_rhs_p (comp)
6967 || (gimple_code (use->stmt) != GIMPLE_PHI
6968 /* We can't allow re-allocating the stmt as it might be pointed
6969 to still. */
6970 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6971 >= gimple_num_ops (gsi_stmt (bsi)))))
6973 comp = force_gimple_operand (comp, &seq, true, NULL);
6974 gimple_seq_add_seq (&stmt_list, seq);
6975 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6977 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6978 /* As this isn't a plain copy we have to reset alignment
6979 information. */
6980 if (SSA_NAME_PTR_INFO (comp))
6981 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6985 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
6986 if (gimple_code (use->stmt) == GIMPLE_PHI)
6988 ass = gimple_build_assign (tgt, comp);
6989 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6991 bsi = gsi_for_stmt (use->stmt);
6992 remove_phi_node (&bsi, false);
6994 else
6996 gimple_assign_set_rhs_from_tree (&bsi, comp);
6997 use->stmt = gsi_stmt (bsi);
7001 /* Performs a peephole optimization to reorder the iv update statement with
7002 a mem ref to enable instruction combining in later phases. The mem ref uses
7003 the iv value before the update, so the reordering transformation requires
7004 adjustment of the offset. CAND is the selected IV_CAND.
7006 Example:
7008 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7009 iv2 = iv1 + 1;
7011 if (t < val) (1)
7012 goto L;
7013 goto Head;
7016 directly propagating t over to (1) will introduce overlapping live range
7017 thus increase register pressure. This peephole transform it into:
7020 iv2 = iv1 + 1;
7021 t = MEM_REF (base, iv2, 8, 8);
7022 if (t < val)
7023 goto L;
7024 goto Head;
7027 static void
7028 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7030 tree var_after;
7031 gimple *iv_update, *stmt;
7032 basic_block bb;
7033 gimple_stmt_iterator gsi, gsi_iv;
7035 if (cand->pos != IP_NORMAL)
7036 return;
7038 var_after = cand->var_after;
7039 iv_update = SSA_NAME_DEF_STMT (var_after);
7041 bb = gimple_bb (iv_update);
7042 gsi = gsi_last_nondebug_bb (bb);
7043 stmt = gsi_stmt (gsi);
7045 /* Only handle conditional statement for now. */
7046 if (gimple_code (stmt) != GIMPLE_COND)
7047 return;
7049 gsi_prev_nondebug (&gsi);
7050 stmt = gsi_stmt (gsi);
7051 if (stmt != iv_update)
7052 return;
7054 gsi_prev_nondebug (&gsi);
7055 if (gsi_end_p (gsi))
7056 return;
7058 stmt = gsi_stmt (gsi);
7059 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7060 return;
7062 if (stmt != use->stmt)
7063 return;
7065 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7066 return;
7068 if (dump_file && (dump_flags & TDF_DETAILS))
7070 fprintf (dump_file, "Reordering \n");
7071 print_gimple_stmt (dump_file, iv_update, 0);
7072 print_gimple_stmt (dump_file, use->stmt, 0);
7073 fprintf (dump_file, "\n");
7076 gsi = gsi_for_stmt (use->stmt);
7077 gsi_iv = gsi_for_stmt (iv_update);
7078 gsi_move_before (&gsi_iv, &gsi);
7080 cand->pos = IP_BEFORE_USE;
7081 cand->incremented_at = use->stmt;
7084 /* Return the alias pointer type that should be used for a MEM_REF
7085 associated with USE, which has type USE_PTR_ADDRESS. */
7087 static tree
7088 get_alias_ptr_type_for_ptr_address (iv_use *use)
7090 gcall *call = as_a <gcall *> (use->stmt);
7091 switch (gimple_call_internal_fn (call))
7093 case IFN_MASK_LOAD:
7094 case IFN_MASK_STORE:
7095 /* The second argument contains the correct alias type. */
7096 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7097 return TREE_TYPE (gimple_call_arg (call, 1));
7099 default:
7100 gcc_unreachable ();
7105 /* Rewrites USE (address that is an iv) using candidate CAND. */
7107 static void
7108 rewrite_use_address (struct ivopts_data *data,
7109 struct iv_use *use, struct iv_cand *cand)
7111 aff_tree aff;
7112 bool ok;
7114 adjust_iv_update_pos (cand, use);
7115 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7116 gcc_assert (ok);
7117 unshare_aff_combination (&aff);
7119 /* To avoid undefined overflow problems, all IV candidates use unsigned
7120 integer types. The drawback is that this makes it impossible for
7121 create_mem_ref to distinguish an IV that is based on a memory object
7122 from one that represents simply an offset.
7124 To work around this problem, we pass a hint to create_mem_ref that
7125 indicates which variable (if any) in aff is an IV based on a memory
7126 object. Note that we only consider the candidate. If this is not
7127 based on an object, the base of the reference is in some subexpression
7128 of the use -- but these will use pointer types, so they are recognized
7129 by the create_mem_ref heuristics anyway. */
7130 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7131 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7132 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7133 tree type = use->mem_type;
7134 tree alias_ptr_type;
7135 if (use->type == USE_PTR_ADDRESS)
7136 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7137 else
7139 gcc_assert (type == TREE_TYPE (*use->op_p));
7140 unsigned int align = get_object_alignment (*use->op_p);
7141 if (align != TYPE_ALIGN (type))
7142 type = build_aligned_type (type, align);
7143 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7145 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7146 iv, base_hint, data->speed);
7148 if (use->type == USE_PTR_ADDRESS)
7150 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7151 ref = fold_convert (get_use_type (use), ref);
7152 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7153 true, GSI_SAME_STMT);
7155 else
7156 copy_ref_info (ref, *use->op_p);
7158 *use->op_p = ref;
7161 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7162 candidate CAND. */
7164 static void
7165 rewrite_use_compare (struct ivopts_data *data,
7166 struct iv_use *use, struct iv_cand *cand)
7168 tree comp, op, bound;
7169 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7170 enum tree_code compare;
7171 struct iv_group *group = data->vgroups[use->group_id];
7172 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7174 bound = cp->value;
7175 if (bound)
7177 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7178 tree var_type = TREE_TYPE (var);
7179 gimple_seq stmts;
7181 if (dump_file && (dump_flags & TDF_DETAILS))
7183 fprintf (dump_file, "Replacing exit test: ");
7184 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7186 compare = cp->comp;
7187 bound = unshare_expr (fold_convert (var_type, bound));
7188 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7189 if (stmts)
7190 gsi_insert_seq_on_edge_immediate (
7191 loop_preheader_edge (data->current_loop),
7192 stmts);
7194 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7195 gimple_cond_set_lhs (cond_stmt, var);
7196 gimple_cond_set_code (cond_stmt, compare);
7197 gimple_cond_set_rhs (cond_stmt, op);
7198 return;
7201 /* The induction variable elimination failed; just express the original
7202 giv. */
7203 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7204 gcc_assert (comp != NULL_TREE);
7205 gcc_assert (use->op_p != NULL);
7206 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7207 SSA_NAME_VAR (*use->op_p),
7208 true, GSI_SAME_STMT);
7211 /* Rewrite the groups using the selected induction variables. */
7213 static void
7214 rewrite_groups (struct ivopts_data *data)
7216 unsigned i, j;
7218 for (i = 0; i < data->vgroups.length (); i++)
7220 struct iv_group *group = data->vgroups[i];
7221 struct iv_cand *cand = group->selected;
7223 gcc_assert (cand);
7225 if (group->type == USE_NONLINEAR_EXPR)
7227 for (j = 0; j < group->vuses.length (); j++)
7229 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7230 update_stmt (group->vuses[j]->stmt);
7233 else if (address_p (group->type))
7235 for (j = 0; j < group->vuses.length (); j++)
7237 rewrite_use_address (data, group->vuses[j], cand);
7238 update_stmt (group->vuses[j]->stmt);
7241 else
7243 gcc_assert (group->type == USE_COMPARE);
7245 for (j = 0; j < group->vuses.length (); j++)
7247 rewrite_use_compare (data, group->vuses[j], cand);
7248 update_stmt (group->vuses[j]->stmt);
7254 /* Removes the ivs that are not used after rewriting. */
7256 static void
7257 remove_unused_ivs (struct ivopts_data *data)
7259 unsigned j;
7260 bitmap_iterator bi;
7261 bitmap toremove = BITMAP_ALLOC (NULL);
7263 /* Figure out an order in which to release SSA DEFs so that we don't
7264 release something that we'd have to propagate into a debug stmt
7265 afterwards. */
7266 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7268 struct version_info *info;
7270 info = ver_info (data, j);
7271 if (info->iv
7272 && !integer_zerop (info->iv->step)
7273 && !info->inv_id
7274 && !info->iv->nonlin_use
7275 && !info->preserve_biv)
7277 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7279 tree def = info->iv->ssa_name;
7281 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7283 imm_use_iterator imm_iter;
7284 use_operand_p use_p;
7285 gimple *stmt;
7286 int count = 0;
7288 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7290 if (!gimple_debug_bind_p (stmt))
7291 continue;
7293 /* We just want to determine whether to do nothing
7294 (count == 0), to substitute the computed
7295 expression into a single use of the SSA DEF by
7296 itself (count == 1), or to use a debug temp
7297 because the SSA DEF is used multiple times or as
7298 part of a larger expression (count > 1). */
7299 count++;
7300 if (gimple_debug_bind_get_value (stmt) != def)
7301 count++;
7303 if (count > 1)
7304 BREAK_FROM_IMM_USE_STMT (imm_iter);
7307 if (!count)
7308 continue;
7310 struct iv_use dummy_use;
7311 struct iv_cand *best_cand = NULL, *cand;
7312 unsigned i, best_pref = 0, cand_pref;
7314 memset (&dummy_use, 0, sizeof (dummy_use));
7315 dummy_use.iv = info->iv;
7316 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7318 cand = data->vgroups[i]->selected;
7319 if (cand == best_cand)
7320 continue;
7321 cand_pref = operand_equal_p (cand->iv->step,
7322 info->iv->step, 0)
7323 ? 4 : 0;
7324 cand_pref
7325 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7326 == TYPE_MODE (TREE_TYPE (info->iv->base))
7327 ? 2 : 0;
7328 cand_pref
7329 += TREE_CODE (cand->iv->base) == INTEGER_CST
7330 ? 1 : 0;
7331 if (best_cand == NULL || best_pref < cand_pref)
7333 best_cand = cand;
7334 best_pref = cand_pref;
7338 if (!best_cand)
7339 continue;
7341 tree comp = get_computation_at (data->current_loop,
7342 SSA_NAME_DEF_STMT (def),
7343 &dummy_use, best_cand);
7344 if (!comp)
7345 continue;
7347 if (count > 1)
7349 tree vexpr = make_node (DEBUG_EXPR_DECL);
7350 DECL_ARTIFICIAL (vexpr) = 1;
7351 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7352 if (SSA_NAME_VAR (def))
7353 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7354 else
7355 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7356 gdebug *def_temp
7357 = gimple_build_debug_bind (vexpr, comp, NULL);
7358 gimple_stmt_iterator gsi;
7360 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7361 gsi = gsi_after_labels (gimple_bb
7362 (SSA_NAME_DEF_STMT (def)));
7363 else
7364 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7366 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7367 comp = vexpr;
7370 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7372 if (!gimple_debug_bind_p (stmt))
7373 continue;
7375 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7376 SET_USE (use_p, comp);
7378 update_stmt (stmt);
7384 release_defs_bitset (toremove);
7386 BITMAP_FREE (toremove);
7389 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7390 for hash_map::traverse. */
7392 bool
7393 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7395 free (value);
7396 return true;
7399 /* Frees data allocated by the optimization of a single loop. */
7401 static void
7402 free_loop_data (struct ivopts_data *data)
7404 unsigned i, j;
7405 bitmap_iterator bi;
7406 tree obj;
7408 if (data->niters)
7410 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7411 delete data->niters;
7412 data->niters = NULL;
7415 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7417 struct version_info *info;
7419 info = ver_info (data, i);
7420 info->iv = NULL;
7421 info->has_nonlin_use = false;
7422 info->preserve_biv = false;
7423 info->inv_id = 0;
7425 bitmap_clear (data->relevant);
7426 bitmap_clear (data->important_candidates);
7428 for (i = 0; i < data->vgroups.length (); i++)
7430 struct iv_group *group = data->vgroups[i];
7432 for (j = 0; j < group->vuses.length (); j++)
7433 free (group->vuses[j]);
7434 group->vuses.release ();
7436 BITMAP_FREE (group->related_cands);
7437 for (j = 0; j < group->n_map_members; j++)
7439 if (group->cost_map[j].inv_vars)
7440 BITMAP_FREE (group->cost_map[j].inv_vars);
7441 if (group->cost_map[j].inv_exprs)
7442 BITMAP_FREE (group->cost_map[j].inv_exprs);
7445 free (group->cost_map);
7446 free (group);
7448 data->vgroups.truncate (0);
7450 for (i = 0; i < data->vcands.length (); i++)
7452 struct iv_cand *cand = data->vcands[i];
7454 if (cand->inv_vars)
7455 BITMAP_FREE (cand->inv_vars);
7456 if (cand->inv_exprs)
7457 BITMAP_FREE (cand->inv_exprs);
7458 free (cand);
7460 data->vcands.truncate (0);
7462 if (data->version_info_size < num_ssa_names)
7464 data->version_info_size = 2 * num_ssa_names;
7465 free (data->version_info);
7466 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7469 data->max_inv_var_id = 0;
7470 data->max_inv_expr_id = 0;
7472 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7473 SET_DECL_RTL (obj, NULL_RTX);
7475 decl_rtl_to_reset.truncate (0);
7477 data->inv_expr_tab->empty ();
7479 data->iv_common_cand_tab->empty ();
7480 data->iv_common_cands.truncate (0);
7483 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7484 loop tree. */
7486 static void
7487 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7489 free_loop_data (data);
7490 free (data->version_info);
7491 BITMAP_FREE (data->relevant);
7492 BITMAP_FREE (data->important_candidates);
7494 decl_rtl_to_reset.release ();
7495 data->vgroups.release ();
7496 data->vcands.release ();
7497 delete data->inv_expr_tab;
7498 data->inv_expr_tab = NULL;
7499 free_affine_expand_cache (&data->name_expansion_cache);
7500 delete data->iv_common_cand_tab;
7501 data->iv_common_cand_tab = NULL;
7502 data->iv_common_cands.release ();
7503 obstack_free (&data->iv_obstack, NULL);
7506 /* Returns true if the loop body BODY includes any function calls. */
7508 static bool
7509 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7511 gimple_stmt_iterator gsi;
7512 unsigned i;
7514 for (i = 0; i < num_nodes; i++)
7515 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7517 gimple *stmt = gsi_stmt (gsi);
7518 if (is_gimple_call (stmt)
7519 && !gimple_call_internal_p (stmt)
7520 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7521 return true;
7523 return false;
7526 /* Optimizes the LOOP. Returns true if anything changed. */
7528 static bool
7529 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7531 bool changed = false;
7532 struct iv_ca *iv_ca;
7533 edge exit = single_dom_exit (loop);
7534 basic_block *body;
7536 gcc_assert (!data->niters);
7537 data->current_loop = loop;
7538 data->loop_loc = find_loop_location (loop).get_location_t ();
7539 data->speed = optimize_loop_for_speed_p (loop);
7541 if (dump_file && (dump_flags & TDF_DETAILS))
7543 fprintf (dump_file, "Processing loop %d", loop->num);
7544 if (data->loop_loc != UNKNOWN_LOCATION)
7545 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7546 LOCATION_LINE (data->loop_loc));
7547 fprintf (dump_file, "\n");
7549 if (exit)
7551 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7552 exit->src->index, exit->dest->index);
7553 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7554 fprintf (dump_file, "\n");
7557 fprintf (dump_file, "\n");
7560 body = get_loop_body (loop);
7561 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7562 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7563 free (body);
7565 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7567 /* For each ssa name determines whether it behaves as an induction variable
7568 in some loop. */
7569 if (!find_induction_variables (data))
7570 goto finish;
7572 /* Finds interesting uses (item 1). */
7573 find_interesting_uses (data);
7574 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7575 goto finish;
7577 /* Finds candidates for the induction variables (item 2). */
7578 find_iv_candidates (data);
7580 /* Calculates the costs (item 3, part 1). */
7581 determine_iv_costs (data);
7582 determine_group_iv_costs (data);
7583 determine_set_costs (data);
7585 /* Find the optimal set of induction variables (item 3, part 2). */
7586 iv_ca = find_optimal_iv_set (data);
7587 if (!iv_ca)
7588 goto finish;
7589 changed = true;
7591 /* Create the new induction variables (item 4, part 1). */
7592 create_new_ivs (data, iv_ca);
7593 iv_ca_free (&iv_ca);
7595 /* Rewrite the uses (item 4, part 2). */
7596 rewrite_groups (data);
7598 /* Remove the ivs that are unused after rewriting. */
7599 remove_unused_ivs (data);
7601 /* We have changed the structure of induction variables; it might happen
7602 that definitions in the scev database refer to some of them that were
7603 eliminated. */
7604 scev_reset ();
7606 finish:
7607 free_loop_data (data);
7609 return changed;
7612 /* Main entry point. Optimizes induction variables in loops. */
7614 void
7615 tree_ssa_iv_optimize (void)
7617 struct loop *loop;
7618 struct ivopts_data data;
7620 tree_ssa_iv_optimize_init (&data);
7622 /* Optimize the loops starting with the innermost ones. */
7623 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7625 if (dump_file && (dump_flags & TDF_DETAILS))
7626 flow_loop_dump (loop, dump_file, NULL, 1);
7628 tree_ssa_iv_optimize_loop (&data, loop);
7631 tree_ssa_iv_optimize_finalize (&data);
7634 #include "gt-tree-ssa-loop-ivopts.h"