Add option for whether ceil etc. can raise "inexact", adjust x86 conditions.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob1e8d6377360c07ecb2fec78bf232d7ddeb54e454
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "tm_p.h"
79 #include "ssa.h"
80 #include "expmed.h"
81 #include "insn-config.h"
82 #include "emit-rtl.h"
83 #include "recog.h"
84 #include "cgraph.h"
85 #include "gimple-pretty-print.h"
86 #include "alias.h"
87 #include "fold-const.h"
88 #include "stor-layout.h"
89 #include "tree-eh.h"
90 #include "gimplify.h"
91 #include "gimple-iterator.h"
92 #include "gimplify-me.h"
93 #include "tree-cfg.h"
94 #include "tree-ssa-loop-ivopts.h"
95 #include "tree-ssa-loop-manip.h"
96 #include "tree-ssa-loop-niter.h"
97 #include "tree-ssa-loop.h"
98 #include "explow.h"
99 #include "expr.h"
100 #include "tree-dfa.h"
101 #include "tree-ssa.h"
102 #include "cfgloop.h"
103 #include "tree-scalar-evolution.h"
104 #include "params.h"
105 #include "tree-affine.h"
106 #include "tree-ssa-propagate.h"
107 #include "tree-ssa-address.h"
108 #include "builtins.h"
109 #include "tree-vectorizer.h"
111 /* FIXME: Expressions are expanded to RTL in this pass to determine the
112 cost of different addressing modes. This should be moved to a TBD
113 interface between the GIMPLE and RTL worlds. */
115 /* The infinite cost. */
116 #define INFTY 10000000
118 #define AVG_LOOP_NITER(LOOP) 5
120 /* Returns the expected number of loop iterations for LOOP.
121 The average trip count is computed from profile data if it
122 exists. */
124 static inline HOST_WIDE_INT
125 avg_loop_niter (struct loop *loop)
127 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
128 if (niter == -1)
130 niter = likely_max_stmt_executions_int (loop);
131 if (niter == -1 || niter > AVG_LOOP_NITER (loop))
132 return AVG_LOOP_NITER (loop);
135 return niter;
138 struct iv_use;
140 /* Representation of the induction variable. */
141 struct iv
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
165 /* Types of uses. */
166 enum use_type
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_ADDRESS, /* Use in an address. */
170 USE_COMPARE /* Use is a compare. */
173 /* Cost of a computation. */
174 struct comp_cost
176 comp_cost (): cost (0), complexity (0), scratch (0)
179 comp_cost (int cost, unsigned complexity, int scratch = 0)
180 : cost (cost), complexity (complexity), scratch (scratch)
183 /* Returns true if COST is infinite. */
184 bool infinite_cost_p ();
186 /* Adds costs COST1 and COST2. */
187 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
189 /* Adds COST to the comp_cost. */
190 comp_cost operator+= (comp_cost cost);
192 /* Adds constant C to this comp_cost. */
193 comp_cost operator+= (HOST_WIDE_INT c);
195 /* Subtracts constant C to this comp_cost. */
196 comp_cost operator-= (HOST_WIDE_INT c);
198 /* Divide the comp_cost by constant C. */
199 comp_cost operator/= (HOST_WIDE_INT c);
201 /* Multiply the comp_cost by constant C. */
202 comp_cost operator*= (HOST_WIDE_INT c);
204 /* Subtracts costs COST1 and COST2. */
205 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
207 /* Subtracts COST from this comp_cost. */
208 comp_cost operator-= (comp_cost cost);
210 /* Returns true if COST1 is smaller than COST2. */
211 friend bool operator< (comp_cost cost1, comp_cost cost2);
213 /* Returns true if COST1 and COST2 are equal. */
214 friend bool operator== (comp_cost cost1, comp_cost cost2);
216 /* Returns true if COST1 is smaller or equal than COST2. */
217 friend bool operator<= (comp_cost cost1, comp_cost cost2);
219 int cost; /* The runtime cost. */
220 unsigned complexity; /* The estimate of the complexity of the code for
221 the computation (in no concrete units --
222 complexity field should be larger for more
223 complex expressions and addressing modes). */
224 int scratch; /* Scratch used during cost computation. */
227 static const comp_cost no_cost;
228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
230 bool
231 comp_cost::infinite_cost_p ()
233 return cost == INFTY;
236 comp_cost
237 operator+ (comp_cost cost1, comp_cost cost2)
239 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
240 return infinite_cost;
242 cost1.cost += cost2.cost;
243 cost1.complexity += cost2.complexity;
245 return cost1;
248 comp_cost
249 operator- (comp_cost cost1, comp_cost cost2)
251 if (cost1.infinite_cost_p ())
252 return infinite_cost;
254 gcc_assert (!cost2.infinite_cost_p ());
256 cost1.cost -= cost2.cost;
257 cost1.complexity -= cost2.complexity;
259 return cost1;
262 comp_cost
263 comp_cost::operator+= (comp_cost cost)
265 *this = *this + cost;
266 return *this;
269 comp_cost
270 comp_cost::operator+= (HOST_WIDE_INT c)
272 if (infinite_cost_p ())
273 return *this;
275 this->cost += c;
277 return *this;
280 comp_cost
281 comp_cost::operator-= (HOST_WIDE_INT c)
283 if (infinite_cost_p ())
284 return *this;
286 this->cost -= c;
288 return *this;
291 comp_cost
292 comp_cost::operator/= (HOST_WIDE_INT c)
294 if (infinite_cost_p ())
295 return *this;
297 this->cost /= c;
299 return *this;
302 comp_cost
303 comp_cost::operator*= (HOST_WIDE_INT c)
305 if (infinite_cost_p ())
306 return *this;
308 this->cost *= c;
310 return *this;
313 comp_cost
314 comp_cost::operator-= (comp_cost cost)
316 *this = *this - cost;
317 return *this;
320 bool
321 operator< (comp_cost cost1, comp_cost cost2)
323 if (cost1.cost == cost2.cost)
324 return cost1.complexity < cost2.complexity;
326 return cost1.cost < cost2.cost;
329 bool
330 operator== (comp_cost cost1, comp_cost cost2)
332 return cost1.cost == cost2.cost
333 && cost1.complexity == cost2.complexity;
336 bool
337 operator<= (comp_cost cost1, comp_cost cost2)
339 return cost1 < cost2 || cost1 == cost2;
342 struct iv_inv_expr_ent;
344 /* The candidate - cost pair. */
345 struct cost_pair
347 struct iv_cand *cand; /* The candidate. */
348 comp_cost cost; /* The cost. */
349 bitmap depends_on; /* The list of invariants that have to be
350 preserved. */
351 tree value; /* For final value elimination, the expression for
352 the final value of the iv. For iv elimination,
353 the new bound to compare with. */
354 enum tree_code comp; /* For iv elimination, the comparison. */
355 iv_inv_expr_ent *inv_expr; /* Loop invariant expression. */
358 /* Use. */
359 struct iv_use
361 unsigned id; /* The id of the use. */
362 unsigned group_id; /* The group id the use belongs to. */
363 enum use_type type; /* Type of the use. */
364 struct iv *iv; /* The induction variable it is based on. */
365 gimple *stmt; /* Statement in that it occurs. */
366 tree *op_p; /* The place where it occurs. */
368 tree addr_base; /* Base address with const offset stripped. */
369 unsigned HOST_WIDE_INT addr_offset;
370 /* Const offset stripped from base address. */
373 /* Group of uses. */
374 struct iv_group
376 /* The id of the group. */
377 unsigned id;
378 /* Uses of the group are of the same type. */
379 enum use_type type;
380 /* The set of "related" IV candidates, plus the important ones. */
381 bitmap related_cands;
382 /* Number of IV candidates in the cost_map. */
383 unsigned n_map_members;
384 /* The costs wrto the iv candidates. */
385 struct cost_pair *cost_map;
386 /* The selected candidate for the group. */
387 struct iv_cand *selected;
388 /* Uses in the group. */
389 vec<struct iv_use *> vuses;
392 /* The position where the iv is computed. */
393 enum iv_position
395 IP_NORMAL, /* At the end, just before the exit condition. */
396 IP_END, /* At the end of the latch block. */
397 IP_BEFORE_USE, /* Immediately before a specific use. */
398 IP_AFTER_USE, /* Immediately after a specific use. */
399 IP_ORIGINAL /* The original biv. */
402 /* The induction variable candidate. */
403 struct iv_cand
405 unsigned id; /* The number of the candidate. */
406 bool important; /* Whether this is an "important" candidate, i.e. such
407 that it should be considered by all uses. */
408 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
409 gimple *incremented_at;/* For original biv, the statement where it is
410 incremented. */
411 tree var_before; /* The variable used for it before increment. */
412 tree var_after; /* The variable used for it after increment. */
413 struct iv *iv; /* The value of the candidate. NULL for
414 "pseudocandidate" used to indicate the possibility
415 to replace the final value of an iv by direct
416 computation of the value. */
417 unsigned cost; /* Cost of the candidate. */
418 unsigned cost_step; /* Cost of the candidate's increment operation. */
419 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
420 where it is incremented. */
421 bitmap depends_on; /* The list of invariants that are used in step of the
422 biv. */
423 struct iv *orig_iv; /* The original iv if this cand is added from biv with
424 smaller type. */
427 /* Hashtable entry for common candidate derived from iv uses. */
428 struct iv_common_cand
430 tree base;
431 tree step;
432 /* IV uses from which this common candidate is derived. */
433 auto_vec<struct iv_use *> uses;
434 hashval_t hash;
437 /* Hashtable helpers. */
439 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
441 static inline hashval_t hash (const iv_common_cand *);
442 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
445 /* Hash function for possible common candidates. */
447 inline hashval_t
448 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
450 return ccand->hash;
453 /* Hash table equality function for common candidates. */
455 inline bool
456 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
457 const iv_common_cand *ccand2)
459 return (ccand1->hash == ccand2->hash
460 && operand_equal_p (ccand1->base, ccand2->base, 0)
461 && operand_equal_p (ccand1->step, ccand2->step, 0)
462 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
463 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
466 /* Loop invariant expression hashtable entry. */
468 struct iv_inv_expr_ent
470 /* Tree expression of the entry. */
471 tree expr;
472 /* Unique indentifier. */
473 int id;
474 /* Hash value. */
475 hashval_t hash;
478 /* Sort iv_inv_expr_ent pair A and B by id field. */
480 static int
481 sort_iv_inv_expr_ent (const void *a, const void *b)
483 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
484 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
486 unsigned id1 = (*e1)->id;
487 unsigned id2 = (*e2)->id;
489 if (id1 < id2)
490 return -1;
491 else if (id1 > id2)
492 return 1;
493 else
494 return 0;
497 /* Hashtable helpers. */
499 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
501 static inline hashval_t hash (const iv_inv_expr_ent *);
502 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
505 /* Hash function for loop invariant expressions. */
507 inline hashval_t
508 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
510 return expr->hash;
513 /* Hash table equality function for expressions. */
515 inline bool
516 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
517 const iv_inv_expr_ent *expr2)
519 return expr1->hash == expr2->hash
520 && operand_equal_p (expr1->expr, expr2->expr, 0);
523 struct ivopts_data
525 /* The currently optimized loop. */
526 struct loop *current_loop;
527 source_location loop_loc;
529 /* Numbers of iterations for all exits of the current loop. */
530 hash_map<edge, tree_niter_desc *> *niters;
532 /* Number of registers used in it. */
533 unsigned regs_used;
535 /* The size of version_info array allocated. */
536 unsigned version_info_size;
538 /* The array of information for the ssa names. */
539 struct version_info *version_info;
541 /* The hashtable of loop invariant expressions created
542 by ivopt. */
543 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
545 /* Loop invariant expression id. */
546 int max_inv_expr_id;
548 /* The bitmap of indices in version_info whose value was changed. */
549 bitmap relevant;
551 /* The uses of induction variables. */
552 vec<iv_group *> vgroups;
554 /* The candidates. */
555 vec<iv_cand *> vcands;
557 /* A bitmap of important candidates. */
558 bitmap important_candidates;
560 /* Cache used by tree_to_aff_combination_expand. */
561 hash_map<tree, name_expansion *> *name_expansion_cache;
563 /* The hashtable of common candidates derived from iv uses. */
564 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
566 /* The common candidates. */
567 vec<iv_common_cand *> iv_common_cands;
569 /* The maximum invariant id. */
570 unsigned max_inv_id;
572 /* Number of no_overflow BIVs which are not used in memory address. */
573 unsigned bivs_not_used_in_addr;
575 /* Obstack for iv structure. */
576 struct obstack iv_obstack;
578 /* Whether to consider just related and important candidates when replacing a
579 use. */
580 bool consider_all_candidates;
582 /* Are we optimizing for speed? */
583 bool speed;
585 /* Whether the loop body includes any function calls. */
586 bool body_includes_call;
588 /* Whether the loop body can only be exited via single exit. */
589 bool loop_single_exit_p;
592 /* An assignment of iv candidates to uses. */
594 struct iv_ca
596 /* The number of uses covered by the assignment. */
597 unsigned upto;
599 /* Number of uses that cannot be expressed by the candidates in the set. */
600 unsigned bad_groups;
602 /* Candidate assigned to a use, together with the related costs. */
603 struct cost_pair **cand_for_group;
605 /* Number of times each candidate is used. */
606 unsigned *n_cand_uses;
608 /* The candidates used. */
609 bitmap cands;
611 /* The number of candidates in the set. */
612 unsigned n_cands;
614 /* Total number of registers needed. */
615 unsigned n_regs;
617 /* Total cost of expressing uses. */
618 comp_cost cand_use_cost;
620 /* Total cost of candidates. */
621 unsigned cand_cost;
623 /* Number of times each invariant is used. */
624 unsigned *n_invariant_uses;
626 /* Hash set with used invariant expression. */
627 hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs;
629 /* Total cost of the assignment. */
630 comp_cost cost;
633 /* Difference of two iv candidate assignments. */
635 struct iv_ca_delta
637 /* Changed group. */
638 struct iv_group *group;
640 /* An old assignment (for rollback purposes). */
641 struct cost_pair *old_cp;
643 /* A new assignment. */
644 struct cost_pair *new_cp;
646 /* Next change in the list. */
647 struct iv_ca_delta *next;
650 /* Bound on number of candidates below that all candidates are considered. */
652 #define CONSIDER_ALL_CANDIDATES_BOUND \
653 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
655 /* If there are more iv occurrences, we just give up (it is quite unlikely that
656 optimizing such a loop would help, and it would take ages). */
658 #define MAX_CONSIDERED_GROUPS \
659 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
661 /* If there are at most this number of ivs in the set, try removing unnecessary
662 ivs from the set always. */
664 #define ALWAYS_PRUNE_CAND_SET_BOUND \
665 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
667 /* The list of trees for that the decl_rtl field must be reset is stored
668 here. */
670 static vec<tree> decl_rtl_to_reset;
672 static comp_cost force_expr_to_var_cost (tree, bool);
674 /* The single loop exit if it dominates the latch, NULL otherwise. */
676 edge
677 single_dom_exit (struct loop *loop)
679 edge exit = single_exit (loop);
681 if (!exit)
682 return NULL;
684 if (!just_once_each_iteration_p (loop, exit->src))
685 return NULL;
687 return exit;
690 /* Dumps information about the induction variable IV to FILE. Don't dump
691 variable's name if DUMP_NAME is FALSE. The information is dumped with
692 preceding spaces indicated by INDENT_LEVEL. */
694 void
695 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
697 const char *p;
698 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
700 if (indent_level > 4)
701 indent_level = 4;
702 p = spaces + 8 - (indent_level << 1);
704 fprintf (file, "%sIV struct:\n", p);
705 if (iv->ssa_name && dump_name)
707 fprintf (file, "%s SSA_NAME:\t", p);
708 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
709 fprintf (file, "\n");
712 fprintf (file, "%s Type:\t", p);
713 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
714 fprintf (file, "\n");
716 fprintf (file, "%s Base:\t", p);
717 print_generic_expr (file, iv->base, TDF_SLIM);
718 fprintf (file, "\n");
720 fprintf (file, "%s Step:\t", p);
721 print_generic_expr (file, iv->step, TDF_SLIM);
722 fprintf (file, "\n");
724 if (iv->base_object)
726 fprintf (file, "%s Object:\t", p);
727 print_generic_expr (file, iv->base_object, TDF_SLIM);
728 fprintf (file, "\n");
731 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
733 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
734 p, iv->no_overflow ? "No-overflow" : "Overflow");
737 /* Dumps information about the USE to FILE. */
739 void
740 dump_use (FILE *file, struct iv_use *use)
742 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
743 fprintf (file, " At stmt:\t");
744 print_gimple_stmt (file, use->stmt, 0, 0);
745 fprintf (file, " At pos:\t");
746 if (use->op_p)
747 print_generic_expr (file, *use->op_p, TDF_SLIM);
748 fprintf (file, "\n");
749 dump_iv (file, use->iv, false, 2);
752 /* Dumps information about the uses to FILE. */
754 void
755 dump_groups (FILE *file, struct ivopts_data *data)
757 unsigned i, j;
758 struct iv_group *group;
760 for (i = 0; i < data->vgroups.length (); i++)
762 group = data->vgroups[i];
763 fprintf (file, "Group %d:\n", group->id);
764 if (group->type == USE_NONLINEAR_EXPR)
765 fprintf (file, " Type:\tGENERIC\n");
766 else if (group->type == USE_ADDRESS)
767 fprintf (file, " Type:\tADDRESS\n");
768 else
770 gcc_assert (group->type == USE_COMPARE);
771 fprintf (file, " Type:\tCOMPARE\n");
773 for (j = 0; j < group->vuses.length (); j++)
774 dump_use (file, group->vuses[j]);
778 /* Dumps information about induction variable candidate CAND to FILE. */
780 void
781 dump_cand (FILE *file, struct iv_cand *cand)
783 struct iv *iv = cand->iv;
785 fprintf (file, "Candidate %d:\n", cand->id);
786 if (cand->depends_on)
788 fprintf (file, " Depend on: ");
789 dump_bitmap (file, cand->depends_on);
792 if (cand->var_before)
794 fprintf (file, " Var befor: ");
795 print_generic_expr (file, cand->var_before, TDF_SLIM);
796 fprintf (file, "\n");
798 if (cand->var_after)
800 fprintf (file, " Var after: ");
801 print_generic_expr (file, cand->var_after, TDF_SLIM);
802 fprintf (file, "\n");
805 switch (cand->pos)
807 case IP_NORMAL:
808 fprintf (file, " Incr POS: before exit test\n");
809 break;
811 case IP_BEFORE_USE:
812 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
813 break;
815 case IP_AFTER_USE:
816 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
817 break;
819 case IP_END:
820 fprintf (file, " Incr POS: at end\n");
821 break;
823 case IP_ORIGINAL:
824 fprintf (file, " Incr POS: orig biv\n");
825 break;
828 dump_iv (file, iv, false, 1);
831 /* Returns the info for ssa version VER. */
833 static inline struct version_info *
834 ver_info (struct ivopts_data *data, unsigned ver)
836 return data->version_info + ver;
839 /* Returns the info for ssa name NAME. */
841 static inline struct version_info *
842 name_info (struct ivopts_data *data, tree name)
844 return ver_info (data, SSA_NAME_VERSION (name));
847 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
848 emitted in LOOP. */
850 static bool
851 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
853 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
855 gcc_assert (bb);
857 if (sbb == loop->latch)
858 return true;
860 if (sbb != bb)
861 return false;
863 return stmt == last_stmt (bb);
866 /* Returns true if STMT if after the place where the original induction
867 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
868 if the positions are identical. */
870 static bool
871 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
873 basic_block cand_bb = gimple_bb (cand->incremented_at);
874 basic_block stmt_bb = gimple_bb (stmt);
876 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
877 return false;
879 if (stmt_bb != cand_bb)
880 return true;
882 if (true_if_equal
883 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
884 return true;
885 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
888 /* Returns true if STMT if after the place where the induction variable
889 CAND is incremented in LOOP. */
891 static bool
892 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
894 switch (cand->pos)
896 case IP_END:
897 return false;
899 case IP_NORMAL:
900 return stmt_after_ip_normal_pos (loop, stmt);
902 case IP_ORIGINAL:
903 case IP_AFTER_USE:
904 return stmt_after_inc_pos (cand, stmt, false);
906 case IP_BEFORE_USE:
907 return stmt_after_inc_pos (cand, stmt, true);
909 default:
910 gcc_unreachable ();
914 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
916 static bool
917 abnormal_ssa_name_p (tree exp)
919 if (!exp)
920 return false;
922 if (TREE_CODE (exp) != SSA_NAME)
923 return false;
925 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
928 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
929 abnormal phi node. Callback for for_each_index. */
931 static bool
932 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
933 void *data ATTRIBUTE_UNUSED)
935 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
937 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
938 return false;
939 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
940 return false;
943 return !abnormal_ssa_name_p (*index);
946 /* Returns true if EXPR contains a ssa name that occurs in an
947 abnormal phi node. */
949 bool
950 contains_abnormal_ssa_name_p (tree expr)
952 enum tree_code code;
953 enum tree_code_class codeclass;
955 if (!expr)
956 return false;
958 code = TREE_CODE (expr);
959 codeclass = TREE_CODE_CLASS (code);
961 if (code == SSA_NAME)
962 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
964 if (code == INTEGER_CST
965 || is_gimple_min_invariant (expr))
966 return false;
968 if (code == ADDR_EXPR)
969 return !for_each_index (&TREE_OPERAND (expr, 0),
970 idx_contains_abnormal_ssa_name_p,
971 NULL);
973 if (code == COND_EXPR)
974 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
975 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
976 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
978 switch (codeclass)
980 case tcc_binary:
981 case tcc_comparison:
982 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
983 return true;
985 /* Fallthru. */
986 case tcc_unary:
987 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
988 return true;
990 break;
992 default:
993 gcc_unreachable ();
996 return false;
999 /* Returns the structure describing number of iterations determined from
1000 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1002 static struct tree_niter_desc *
1003 niter_for_exit (struct ivopts_data *data, edge exit)
1005 struct tree_niter_desc *desc;
1006 tree_niter_desc **slot;
1008 if (!data->niters)
1010 data->niters = new hash_map<edge, tree_niter_desc *>;
1011 slot = NULL;
1013 else
1014 slot = data->niters->get (exit);
1016 if (!slot)
1018 /* Try to determine number of iterations. We cannot safely work with ssa
1019 names that appear in phi nodes on abnormal edges, so that we do not
1020 create overlapping life ranges for them (PR 27283). */
1021 desc = XNEW (struct tree_niter_desc);
1022 if (!number_of_iterations_exit (data->current_loop,
1023 exit, desc, true)
1024 || contains_abnormal_ssa_name_p (desc->niter))
1026 XDELETE (desc);
1027 desc = NULL;
1029 data->niters->put (exit, desc);
1031 else
1032 desc = *slot;
1034 return desc;
1037 /* Returns the structure describing number of iterations determined from
1038 single dominating exit of DATA->current_loop, or NULL if something
1039 goes wrong. */
1041 static struct tree_niter_desc *
1042 niter_for_single_dom_exit (struct ivopts_data *data)
1044 edge exit = single_dom_exit (data->current_loop);
1046 if (!exit)
1047 return NULL;
1049 return niter_for_exit (data, exit);
1052 /* Initializes data structures used by the iv optimization pass, stored
1053 in DATA. */
1055 static void
1056 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1058 data->version_info_size = 2 * num_ssa_names;
1059 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1060 data->relevant = BITMAP_ALLOC (NULL);
1061 data->important_candidates = BITMAP_ALLOC (NULL);
1062 data->max_inv_id = 0;
1063 data->niters = NULL;
1064 data->vgroups.create (20);
1065 data->vcands.create (20);
1066 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1067 data->max_inv_expr_id = 0;
1068 data->name_expansion_cache = NULL;
1069 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1070 data->iv_common_cands.create (20);
1071 decl_rtl_to_reset.create (20);
1072 gcc_obstack_init (&data->iv_obstack);
1075 /* Returns a memory object to that EXPR points. In case we are able to
1076 determine that it does not point to any such object, NULL is returned. */
1078 static tree
1079 determine_base_object (tree expr)
1081 enum tree_code code = TREE_CODE (expr);
1082 tree base, obj;
1084 /* If this is a pointer casted to any type, we need to determine
1085 the base object for the pointer; so handle conversions before
1086 throwing away non-pointer expressions. */
1087 if (CONVERT_EXPR_P (expr))
1088 return determine_base_object (TREE_OPERAND (expr, 0));
1090 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1091 return NULL_TREE;
1093 switch (code)
1095 case INTEGER_CST:
1096 return NULL_TREE;
1098 case ADDR_EXPR:
1099 obj = TREE_OPERAND (expr, 0);
1100 base = get_base_address (obj);
1102 if (!base)
1103 return expr;
1105 if (TREE_CODE (base) == MEM_REF)
1106 return determine_base_object (TREE_OPERAND (base, 0));
1108 return fold_convert (ptr_type_node,
1109 build_fold_addr_expr (base));
1111 case POINTER_PLUS_EXPR:
1112 return determine_base_object (TREE_OPERAND (expr, 0));
1114 case PLUS_EXPR:
1115 case MINUS_EXPR:
1116 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1117 gcc_unreachable ();
1119 default:
1120 return fold_convert (ptr_type_node, expr);
1124 /* Return true if address expression with non-DECL_P operand appears
1125 in EXPR. */
1127 static bool
1128 contain_complex_addr_expr (tree expr)
1130 bool res = false;
1132 STRIP_NOPS (expr);
1133 switch (TREE_CODE (expr))
1135 case POINTER_PLUS_EXPR:
1136 case PLUS_EXPR:
1137 case MINUS_EXPR:
1138 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1139 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1140 break;
1142 case ADDR_EXPR:
1143 return (!DECL_P (TREE_OPERAND (expr, 0)));
1145 default:
1146 return false;
1149 return res;
1152 /* Allocates an induction variable with given initial value BASE and step STEP
1153 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1155 static struct iv *
1156 alloc_iv (struct ivopts_data *data, tree base, tree step,
1157 bool no_overflow = false)
1159 tree expr = base;
1160 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1161 sizeof (struct iv));
1162 gcc_assert (step != NULL_TREE);
1164 /* Lower address expression in base except ones with DECL_P as operand.
1165 By doing this:
1166 1) More accurate cost can be computed for address expressions;
1167 2) Duplicate candidates won't be created for bases in different
1168 forms, like &a[0] and &a. */
1169 STRIP_NOPS (expr);
1170 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1171 || contain_complex_addr_expr (expr))
1173 aff_tree comb;
1174 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1175 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1178 iv->base = base;
1179 iv->base_object = determine_base_object (base);
1180 iv->step = step;
1181 iv->biv_p = false;
1182 iv->nonlin_use = NULL;
1183 iv->ssa_name = NULL_TREE;
1184 iv->no_overflow = no_overflow;
1185 iv->have_address_use = false;
1187 return iv;
1190 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1191 doesn't overflow. */
1193 static void
1194 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1195 bool no_overflow)
1197 struct version_info *info = name_info (data, iv);
1199 gcc_assert (!info->iv);
1201 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1202 info->iv = alloc_iv (data, base, step, no_overflow);
1203 info->iv->ssa_name = iv;
1206 /* Finds induction variable declaration for VAR. */
1208 static struct iv *
1209 get_iv (struct ivopts_data *data, tree var)
1211 basic_block bb;
1212 tree type = TREE_TYPE (var);
1214 if (!POINTER_TYPE_P (type)
1215 && !INTEGRAL_TYPE_P (type))
1216 return NULL;
1218 if (!name_info (data, var)->iv)
1220 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1222 if (!bb
1223 || !flow_bb_inside_loop_p (data->current_loop, bb))
1224 set_iv (data, var, var, build_int_cst (type, 0), true);
1227 return name_info (data, var)->iv;
1230 /* Return the first non-invariant ssa var found in EXPR. */
1232 static tree
1233 extract_single_var_from_expr (tree expr)
1235 int i, n;
1236 tree tmp;
1237 enum tree_code code;
1239 if (!expr || is_gimple_min_invariant (expr))
1240 return NULL;
1242 code = TREE_CODE (expr);
1243 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1245 n = TREE_OPERAND_LENGTH (expr);
1246 for (i = 0; i < n; i++)
1248 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1250 if (tmp)
1251 return tmp;
1254 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1257 /* Finds basic ivs. */
1259 static bool
1260 find_bivs (struct ivopts_data *data)
1262 gphi *phi;
1263 affine_iv iv;
1264 tree step, type, base, stop;
1265 bool found = false;
1266 struct loop *loop = data->current_loop;
1267 gphi_iterator psi;
1269 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1271 phi = psi.phi ();
1273 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1274 continue;
1276 if (virtual_operand_p (PHI_RESULT (phi)))
1277 continue;
1279 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1280 continue;
1282 if (integer_zerop (iv.step))
1283 continue;
1285 step = iv.step;
1286 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1287 /* Stop expanding iv base at the first ssa var referred by iv step.
1288 Ideally we should stop at any ssa var, because that's expensive
1289 and unusual to happen, we just do it on the first one.
1291 See PR64705 for the rationale. */
1292 stop = extract_single_var_from_expr (step);
1293 base = expand_simple_operations (base, stop);
1294 if (contains_abnormal_ssa_name_p (base)
1295 || contains_abnormal_ssa_name_p (step))
1296 continue;
1298 type = TREE_TYPE (PHI_RESULT (phi));
1299 base = fold_convert (type, base);
1300 if (step)
1302 if (POINTER_TYPE_P (type))
1303 step = convert_to_ptrofftype (step);
1304 else
1305 step = fold_convert (type, step);
1308 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1309 found = true;
1312 return found;
1315 /* Marks basic ivs. */
1317 static void
1318 mark_bivs (struct ivopts_data *data)
1320 gphi *phi;
1321 gimple *def;
1322 tree var;
1323 struct iv *iv, *incr_iv;
1324 struct loop *loop = data->current_loop;
1325 basic_block incr_bb;
1326 gphi_iterator psi;
1328 data->bivs_not_used_in_addr = 0;
1329 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1331 phi = psi.phi ();
1333 iv = get_iv (data, PHI_RESULT (phi));
1334 if (!iv)
1335 continue;
1337 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1338 def = SSA_NAME_DEF_STMT (var);
1339 /* Don't mark iv peeled from other one as biv. */
1340 if (def
1341 && gimple_code (def) == GIMPLE_PHI
1342 && gimple_bb (def) == loop->header)
1343 continue;
1345 incr_iv = get_iv (data, var);
1346 if (!incr_iv)
1347 continue;
1349 /* If the increment is in the subloop, ignore it. */
1350 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1351 if (incr_bb->loop_father != data->current_loop
1352 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1353 continue;
1355 iv->biv_p = true;
1356 incr_iv->biv_p = true;
1357 if (iv->no_overflow)
1358 data->bivs_not_used_in_addr++;
1359 if (incr_iv->no_overflow)
1360 data->bivs_not_used_in_addr++;
1364 /* Checks whether STMT defines a linear induction variable and stores its
1365 parameters to IV. */
1367 static bool
1368 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1370 tree lhs, stop;
1371 struct loop *loop = data->current_loop;
1373 iv->base = NULL_TREE;
1374 iv->step = NULL_TREE;
1376 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1377 return false;
1379 lhs = gimple_assign_lhs (stmt);
1380 if (TREE_CODE (lhs) != SSA_NAME)
1381 return false;
1383 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1384 return false;
1386 /* Stop expanding iv base at the first ssa var referred by iv step.
1387 Ideally we should stop at any ssa var, because that's expensive
1388 and unusual to happen, we just do it on the first one.
1390 See PR64705 for the rationale. */
1391 stop = extract_single_var_from_expr (iv->step);
1392 iv->base = expand_simple_operations (iv->base, stop);
1393 if (contains_abnormal_ssa_name_p (iv->base)
1394 || contains_abnormal_ssa_name_p (iv->step))
1395 return false;
1397 /* If STMT could throw, then do not consider STMT as defining a GIV.
1398 While this will suppress optimizations, we can not safely delete this
1399 GIV and associated statements, even if it appears it is not used. */
1400 if (stmt_could_throw_p (stmt))
1401 return false;
1403 return true;
1406 /* Finds general ivs in statement STMT. */
1408 static void
1409 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1411 affine_iv iv;
1413 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1414 return;
1416 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1419 /* Finds general ivs in basic block BB. */
1421 static void
1422 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1424 gimple_stmt_iterator bsi;
1426 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1427 find_givs_in_stmt (data, gsi_stmt (bsi));
1430 /* Finds general ivs. */
1432 static void
1433 find_givs (struct ivopts_data *data)
1435 struct loop *loop = data->current_loop;
1436 basic_block *body = get_loop_body_in_dom_order (loop);
1437 unsigned i;
1439 for (i = 0; i < loop->num_nodes; i++)
1440 find_givs_in_bb (data, body[i]);
1441 free (body);
1444 /* For each ssa name defined in LOOP determines whether it is an induction
1445 variable and if so, its initial value and step. */
1447 static bool
1448 find_induction_variables (struct ivopts_data *data)
1450 unsigned i;
1451 bitmap_iterator bi;
1453 if (!find_bivs (data))
1454 return false;
1456 find_givs (data);
1457 mark_bivs (data);
1459 if (dump_file && (dump_flags & TDF_DETAILS))
1461 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1463 if (niter)
1465 fprintf (dump_file, " number of iterations ");
1466 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1467 if (!integer_zerop (niter->may_be_zero))
1469 fprintf (dump_file, "; zero if ");
1470 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1472 fprintf (dump_file, "\n");
1475 fprintf (dump_file, "\n<Induction Vars>:\n");
1476 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1478 struct version_info *info = ver_info (data, i);
1479 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1480 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1484 return true;
1487 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1488 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1489 is the const offset stripped from IV base; for other types use, both
1490 are zero by default. */
1492 static struct iv_use *
1493 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1494 gimple *stmt, enum use_type type, tree addr_base,
1495 unsigned HOST_WIDE_INT addr_offset)
1497 struct iv_use *use = XCNEW (struct iv_use);
1499 use->id = group->vuses.length ();
1500 use->group_id = group->id;
1501 use->type = type;
1502 use->iv = iv;
1503 use->stmt = stmt;
1504 use->op_p = use_p;
1505 use->addr_base = addr_base;
1506 use->addr_offset = addr_offset;
1508 group->vuses.safe_push (use);
1509 return use;
1512 /* Checks whether OP is a loop-level invariant and if so, records it.
1513 NONLINEAR_USE is true if the invariant is used in a way we do not
1514 handle specially. */
1516 static void
1517 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1519 basic_block bb;
1520 struct version_info *info;
1522 if (TREE_CODE (op) != SSA_NAME
1523 || virtual_operand_p (op))
1524 return;
1526 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1527 if (bb
1528 && flow_bb_inside_loop_p (data->current_loop, bb))
1529 return;
1531 info = name_info (data, op);
1532 info->name = op;
1533 info->has_nonlin_use |= nonlinear_use;
1534 if (!info->inv_id)
1535 info->inv_id = ++data->max_inv_id;
1536 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1539 static tree
1540 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1542 /* Record a group of TYPE. */
1544 static struct iv_group *
1545 record_group (struct ivopts_data *data, enum use_type type)
1547 struct iv_group *group = XCNEW (struct iv_group);
1549 group->id = data->vgroups.length ();
1550 group->type = type;
1551 group->related_cands = BITMAP_ALLOC (NULL);
1552 group->vuses.create (1);
1554 data->vgroups.safe_push (group);
1555 return group;
1558 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1559 New group will be created if there is no existing group for the use. */
1561 static struct iv_use *
1562 record_group_use (struct ivopts_data *data, tree *use_p,
1563 struct iv *iv, gimple *stmt, enum use_type type)
1565 tree addr_base = NULL;
1566 struct iv_group *group = NULL;
1567 unsigned HOST_WIDE_INT addr_offset = 0;
1569 /* Record non address type use in a new group. */
1570 if (type == USE_ADDRESS && iv->base_object)
1572 unsigned int i;
1574 addr_base = strip_offset (iv->base, &addr_offset);
1575 for (i = 0; i < data->vgroups.length (); i++)
1577 struct iv_use *use;
1579 group = data->vgroups[i];
1580 use = group->vuses[0];
1581 if (use->type != USE_ADDRESS || !use->iv->base_object)
1582 continue;
1584 /* Check if it has the same stripped base and step. */
1585 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1586 && operand_equal_p (iv->step, use->iv->step, 0)
1587 && operand_equal_p (addr_base, use->addr_base, 0))
1588 break;
1590 if (i == data->vgroups.length ())
1591 group = NULL;
1594 if (!group)
1595 group = record_group (data, type);
1597 return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1600 /* Checks whether the use OP is interesting and if so, records it. */
1602 static struct iv_use *
1603 find_interesting_uses_op (struct ivopts_data *data, tree op)
1605 struct iv *iv;
1606 gimple *stmt;
1607 struct iv_use *use;
1609 if (TREE_CODE (op) != SSA_NAME)
1610 return NULL;
1612 iv = get_iv (data, op);
1613 if (!iv)
1614 return NULL;
1616 if (iv->nonlin_use)
1618 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1619 return iv->nonlin_use;
1622 if (integer_zerop (iv->step))
1624 record_invariant (data, op, true);
1625 return NULL;
1628 stmt = SSA_NAME_DEF_STMT (op);
1629 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1631 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1632 iv->nonlin_use = use;
1633 return use;
1636 /* Given a condition in statement STMT, checks whether it is a compare
1637 of an induction variable and an invariant. If this is the case,
1638 CONTROL_VAR is set to location of the iv, BOUND to the location of
1639 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1640 induction variable descriptions, and true is returned. If this is not
1641 the case, CONTROL_VAR and BOUND are set to the arguments of the
1642 condition and false is returned. */
1644 static bool
1645 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1646 tree **control_var, tree **bound,
1647 struct iv **iv_var, struct iv **iv_bound)
1649 /* The objects returned when COND has constant operands. */
1650 static struct iv const_iv;
1651 static tree zero;
1652 tree *op0 = &zero, *op1 = &zero;
1653 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1654 bool ret = false;
1656 if (gimple_code (stmt) == GIMPLE_COND)
1658 gcond *cond_stmt = as_a <gcond *> (stmt);
1659 op0 = gimple_cond_lhs_ptr (cond_stmt);
1660 op1 = gimple_cond_rhs_ptr (cond_stmt);
1662 else
1664 op0 = gimple_assign_rhs1_ptr (stmt);
1665 op1 = gimple_assign_rhs2_ptr (stmt);
1668 zero = integer_zero_node;
1669 const_iv.step = integer_zero_node;
1671 if (TREE_CODE (*op0) == SSA_NAME)
1672 iv0 = get_iv (data, *op0);
1673 if (TREE_CODE (*op1) == SSA_NAME)
1674 iv1 = get_iv (data, *op1);
1676 /* Exactly one of the compared values must be an iv, and the other one must
1677 be an invariant. */
1678 if (!iv0 || !iv1)
1679 goto end;
1681 if (integer_zerop (iv0->step))
1683 /* Control variable may be on the other side. */
1684 std::swap (op0, op1);
1685 std::swap (iv0, iv1);
1687 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1689 end:
1690 if (control_var)
1691 *control_var = op0;
1692 if (iv_var)
1693 *iv_var = iv0;
1694 if (bound)
1695 *bound = op1;
1696 if (iv_bound)
1697 *iv_bound = iv1;
1699 return ret;
1702 /* Checks whether the condition in STMT is interesting and if so,
1703 records it. */
1705 static void
1706 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1708 tree *var_p, *bound_p;
1709 struct iv *var_iv;
1711 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1713 find_interesting_uses_op (data, *var_p);
1714 find_interesting_uses_op (data, *bound_p);
1715 return;
1718 record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1721 /* Returns the outermost loop EXPR is obviously invariant in
1722 relative to the loop LOOP, i.e. if all its operands are defined
1723 outside of the returned loop. Returns NULL if EXPR is not
1724 even obviously invariant in LOOP. */
1726 struct loop *
1727 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1729 basic_block def_bb;
1730 unsigned i, len;
1732 if (is_gimple_min_invariant (expr))
1733 return current_loops->tree_root;
1735 if (TREE_CODE (expr) == SSA_NAME)
1737 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1738 if (def_bb)
1740 if (flow_bb_inside_loop_p (loop, def_bb))
1741 return NULL;
1742 return superloop_at_depth (loop,
1743 loop_depth (def_bb->loop_father) + 1);
1746 return current_loops->tree_root;
1749 if (!EXPR_P (expr))
1750 return NULL;
1752 unsigned maxdepth = 0;
1753 len = TREE_OPERAND_LENGTH (expr);
1754 for (i = 0; i < len; i++)
1756 struct loop *ivloop;
1757 if (!TREE_OPERAND (expr, i))
1758 continue;
1760 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1761 if (!ivloop)
1762 return NULL;
1763 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1766 return superloop_at_depth (loop, maxdepth);
1769 /* Returns true if expression EXPR is obviously invariant in LOOP,
1770 i.e. if all its operands are defined outside of the LOOP. LOOP
1771 should not be the function body. */
1773 bool
1774 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1776 basic_block def_bb;
1777 unsigned i, len;
1779 gcc_assert (loop_depth (loop) > 0);
1781 if (is_gimple_min_invariant (expr))
1782 return true;
1784 if (TREE_CODE (expr) == SSA_NAME)
1786 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1787 if (def_bb
1788 && flow_bb_inside_loop_p (loop, def_bb))
1789 return false;
1791 return true;
1794 if (!EXPR_P (expr))
1795 return false;
1797 len = TREE_OPERAND_LENGTH (expr);
1798 for (i = 0; i < len; i++)
1799 if (TREE_OPERAND (expr, i)
1800 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1801 return false;
1803 return true;
1806 /* Given expression EXPR which computes inductive values with respect
1807 to loop recorded in DATA, this function returns biv from which EXPR
1808 is derived by tracing definition chains of ssa variables in EXPR. */
1810 static struct iv*
1811 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1813 struct iv *iv;
1814 unsigned i, n;
1815 tree e2, e1;
1816 enum tree_code code;
1817 gimple *stmt;
1819 if (expr == NULL_TREE)
1820 return NULL;
1822 if (is_gimple_min_invariant (expr))
1823 return NULL;
1825 code = TREE_CODE (expr);
1826 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1828 n = TREE_OPERAND_LENGTH (expr);
1829 for (i = 0; i < n; i++)
1831 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1832 if (iv)
1833 return iv;
1837 /* Stop if it's not ssa name. */
1838 if (code != SSA_NAME)
1839 return NULL;
1841 iv = get_iv (data, expr);
1842 if (!iv || integer_zerop (iv->step))
1843 return NULL;
1844 else if (iv->biv_p)
1845 return iv;
1847 stmt = SSA_NAME_DEF_STMT (expr);
1848 if (gphi *phi = dyn_cast <gphi *> (stmt))
1850 ssa_op_iter iter;
1851 use_operand_p use_p;
1853 if (virtual_operand_p (gimple_phi_result (phi)))
1854 return NULL;
1856 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1858 tree use = USE_FROM_PTR (use_p);
1859 iv = find_deriving_biv_for_expr (data, use);
1860 if (iv)
1861 return iv;
1863 return NULL;
1865 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1866 return NULL;
1868 e1 = gimple_assign_rhs1 (stmt);
1869 code = gimple_assign_rhs_code (stmt);
1870 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1871 return find_deriving_biv_for_expr (data, e1);
1873 switch (code)
1875 case MULT_EXPR:
1876 case PLUS_EXPR:
1877 case MINUS_EXPR:
1878 case POINTER_PLUS_EXPR:
1879 /* Increments, decrements and multiplications by a constant
1880 are simple. */
1881 e2 = gimple_assign_rhs2 (stmt);
1882 iv = find_deriving_biv_for_expr (data, e2);
1883 if (iv)
1884 return iv;
1886 /* Fallthru. */
1887 CASE_CONVERT:
1888 /* Casts are simple. */
1889 return find_deriving_biv_for_expr (data, e1);
1891 default:
1892 break;
1895 return NULL;
1898 /* Record BIV, its predecessor and successor that they are used in
1899 address type uses. */
1901 static void
1902 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1904 unsigned i;
1905 tree type, base_1, base_2;
1906 bitmap_iterator bi;
1908 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1909 || biv->have_address_use || !biv->no_overflow)
1910 return;
1912 type = TREE_TYPE (biv->base);
1913 if (!INTEGRAL_TYPE_P (type))
1914 return;
1916 biv->have_address_use = true;
1917 data->bivs_not_used_in_addr--;
1918 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1919 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1921 struct iv *iv = ver_info (data, i)->iv;
1923 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1924 || iv->have_address_use || !iv->no_overflow)
1925 continue;
1927 if (type != TREE_TYPE (iv->base)
1928 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1929 continue;
1931 if (!operand_equal_p (biv->step, iv->step, 0))
1932 continue;
1934 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1935 if (operand_equal_p (base_1, iv->base, 0)
1936 || operand_equal_p (base_2, biv->base, 0))
1938 iv->have_address_use = true;
1939 data->bivs_not_used_in_addr--;
1944 /* Cumulates the steps of indices into DATA and replaces their values with the
1945 initial ones. Returns false when the value of the index cannot be determined.
1946 Callback for for_each_index. */
1948 struct ifs_ivopts_data
1950 struct ivopts_data *ivopts_data;
1951 gimple *stmt;
1952 tree step;
1955 static bool
1956 idx_find_step (tree base, tree *idx, void *data)
1958 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1959 struct iv *iv;
1960 bool use_overflow_semantics = false;
1961 tree step, iv_base, iv_step, lbound, off;
1962 struct loop *loop = dta->ivopts_data->current_loop;
1964 /* If base is a component ref, require that the offset of the reference
1965 be invariant. */
1966 if (TREE_CODE (base) == COMPONENT_REF)
1968 off = component_ref_field_offset (base);
1969 return expr_invariant_in_loop_p (loop, off);
1972 /* If base is array, first check whether we will be able to move the
1973 reference out of the loop (in order to take its address in strength
1974 reduction). In order for this to work we need both lower bound
1975 and step to be loop invariants. */
1976 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1978 /* Moreover, for a range, the size needs to be invariant as well. */
1979 if (TREE_CODE (base) == ARRAY_RANGE_REF
1980 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1981 return false;
1983 step = array_ref_element_size (base);
1984 lbound = array_ref_low_bound (base);
1986 if (!expr_invariant_in_loop_p (loop, step)
1987 || !expr_invariant_in_loop_p (loop, lbound))
1988 return false;
1991 if (TREE_CODE (*idx) != SSA_NAME)
1992 return true;
1994 iv = get_iv (dta->ivopts_data, *idx);
1995 if (!iv)
1996 return false;
1998 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1999 *&x[0], which is not folded and does not trigger the
2000 ARRAY_REF path below. */
2001 *idx = iv->base;
2003 if (integer_zerop (iv->step))
2004 return true;
2006 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2008 step = array_ref_element_size (base);
2010 /* We only handle addresses whose step is an integer constant. */
2011 if (TREE_CODE (step) != INTEGER_CST)
2012 return false;
2014 else
2015 /* The step for pointer arithmetics already is 1 byte. */
2016 step = size_one_node;
2018 iv_base = iv->base;
2019 iv_step = iv->step;
2020 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2021 use_overflow_semantics = true;
2023 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2024 sizetype, &iv_base, &iv_step, dta->stmt,
2025 use_overflow_semantics))
2027 /* The index might wrap. */
2028 return false;
2031 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2032 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2034 if (dta->ivopts_data->bivs_not_used_in_addr)
2036 if (!iv->biv_p)
2037 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2039 record_biv_for_address_use (dta->ivopts_data, iv);
2041 return true;
2044 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2045 object is passed to it in DATA. */
2047 static bool
2048 idx_record_use (tree base, tree *idx,
2049 void *vdata)
2051 struct ivopts_data *data = (struct ivopts_data *) vdata;
2052 find_interesting_uses_op (data, *idx);
2053 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2055 find_interesting_uses_op (data, array_ref_element_size (base));
2056 find_interesting_uses_op (data, array_ref_low_bound (base));
2058 return true;
2061 /* If we can prove that TOP = cst * BOT for some constant cst,
2062 store cst to MUL and return true. Otherwise return false.
2063 The returned value is always sign-extended, regardless of the
2064 signedness of TOP and BOT. */
2066 static bool
2067 constant_multiple_of (tree top, tree bot, widest_int *mul)
2069 tree mby;
2070 enum tree_code code;
2071 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2072 widest_int res, p0, p1;
2074 STRIP_NOPS (top);
2075 STRIP_NOPS (bot);
2077 if (operand_equal_p (top, bot, 0))
2079 *mul = 1;
2080 return true;
2083 code = TREE_CODE (top);
2084 switch (code)
2086 case MULT_EXPR:
2087 mby = TREE_OPERAND (top, 1);
2088 if (TREE_CODE (mby) != INTEGER_CST)
2089 return false;
2091 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2092 return false;
2094 *mul = wi::sext (res * wi::to_widest (mby), precision);
2095 return true;
2097 case PLUS_EXPR:
2098 case MINUS_EXPR:
2099 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2100 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2101 return false;
2103 if (code == MINUS_EXPR)
2104 p1 = -p1;
2105 *mul = wi::sext (p0 + p1, precision);
2106 return true;
2108 case INTEGER_CST:
2109 if (TREE_CODE (bot) != INTEGER_CST)
2110 return false;
2112 p0 = widest_int::from (top, SIGNED);
2113 p1 = widest_int::from (bot, SIGNED);
2114 if (p1 == 0)
2115 return false;
2116 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2117 return res == 0;
2119 default:
2120 return false;
2124 /* Return true if memory reference REF with step STEP may be unaligned. */
2126 static bool
2127 may_be_unaligned_p (tree ref, tree step)
2129 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2130 thus they are not misaligned. */
2131 if (TREE_CODE (ref) == TARGET_MEM_REF)
2132 return false;
2134 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2135 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2136 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2138 unsigned HOST_WIDE_INT bitpos;
2139 unsigned int ref_align;
2140 get_object_alignment_1 (ref, &ref_align, &bitpos);
2141 if (ref_align < align
2142 || (bitpos % align) != 0
2143 || (bitpos % BITS_PER_UNIT) != 0)
2144 return true;
2146 unsigned int trailing_zeros = tree_ctz (step);
2147 if (trailing_zeros < HOST_BITS_PER_INT
2148 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2149 return true;
2151 return false;
2154 /* Return true if EXPR may be non-addressable. */
2156 bool
2157 may_be_nonaddressable_p (tree expr)
2159 switch (TREE_CODE (expr))
2161 case TARGET_MEM_REF:
2162 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2163 target, thus they are always addressable. */
2164 return false;
2166 case MEM_REF:
2167 /* Likewise for MEM_REFs, modulo the storage order. */
2168 return REF_REVERSE_STORAGE_ORDER (expr);
2170 case BIT_FIELD_REF:
2171 if (REF_REVERSE_STORAGE_ORDER (expr))
2172 return true;
2173 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2175 case COMPONENT_REF:
2176 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2177 return true;
2178 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2179 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2181 case ARRAY_REF:
2182 case ARRAY_RANGE_REF:
2183 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2184 return true;
2185 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2187 case VIEW_CONVERT_EXPR:
2188 /* This kind of view-conversions may wrap non-addressable objects
2189 and make them look addressable. After some processing the
2190 non-addressability may be uncovered again, causing ADDR_EXPRs
2191 of inappropriate objects to be built. */
2192 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2193 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2194 return true;
2195 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2197 CASE_CONVERT:
2198 return true;
2200 default:
2201 break;
2204 return false;
2207 /* Finds addresses in *OP_P inside STMT. */
2209 static void
2210 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2211 tree *op_p)
2213 tree base = *op_p, step = size_zero_node;
2214 struct iv *civ;
2215 struct ifs_ivopts_data ifs_ivopts_data;
2217 /* Do not play with volatile memory references. A bit too conservative,
2218 perhaps, but safe. */
2219 if (gimple_has_volatile_ops (stmt))
2220 goto fail;
2222 /* Ignore bitfields for now. Not really something terribly complicated
2223 to handle. TODO. */
2224 if (TREE_CODE (base) == BIT_FIELD_REF)
2225 goto fail;
2227 base = unshare_expr (base);
2229 if (TREE_CODE (base) == TARGET_MEM_REF)
2231 tree type = build_pointer_type (TREE_TYPE (base));
2232 tree astep;
2234 if (TMR_BASE (base)
2235 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2237 civ = get_iv (data, TMR_BASE (base));
2238 if (!civ)
2239 goto fail;
2241 TMR_BASE (base) = civ->base;
2242 step = civ->step;
2244 if (TMR_INDEX2 (base)
2245 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2247 civ = get_iv (data, TMR_INDEX2 (base));
2248 if (!civ)
2249 goto fail;
2251 TMR_INDEX2 (base) = civ->base;
2252 step = civ->step;
2254 if (TMR_INDEX (base)
2255 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2257 civ = get_iv (data, TMR_INDEX (base));
2258 if (!civ)
2259 goto fail;
2261 TMR_INDEX (base) = civ->base;
2262 astep = civ->step;
2264 if (astep)
2266 if (TMR_STEP (base))
2267 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2269 step = fold_build2 (PLUS_EXPR, type, step, astep);
2273 if (integer_zerop (step))
2274 goto fail;
2275 base = tree_mem_ref_addr (type, base);
2277 else
2279 ifs_ivopts_data.ivopts_data = data;
2280 ifs_ivopts_data.stmt = stmt;
2281 ifs_ivopts_data.step = size_zero_node;
2282 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2283 || integer_zerop (ifs_ivopts_data.step))
2284 goto fail;
2285 step = ifs_ivopts_data.step;
2287 /* Check that the base expression is addressable. This needs
2288 to be done after substituting bases of IVs into it. */
2289 if (may_be_nonaddressable_p (base))
2290 goto fail;
2292 /* Moreover, on strict alignment platforms, check that it is
2293 sufficiently aligned. */
2294 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2295 goto fail;
2297 base = build_fold_addr_expr (base);
2299 /* Substituting bases of IVs into the base expression might
2300 have caused folding opportunities. */
2301 if (TREE_CODE (base) == ADDR_EXPR)
2303 tree *ref = &TREE_OPERAND (base, 0);
2304 while (handled_component_p (*ref))
2305 ref = &TREE_OPERAND (*ref, 0);
2306 if (TREE_CODE (*ref) == MEM_REF)
2308 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2309 TREE_OPERAND (*ref, 0),
2310 TREE_OPERAND (*ref, 1));
2311 if (tem)
2312 *ref = tem;
2317 civ = alloc_iv (data, base, step);
2318 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2319 return;
2321 fail:
2322 for_each_index (op_p, idx_record_use, data);
2325 /* Finds and records invariants used in STMT. */
2327 static void
2328 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2330 ssa_op_iter iter;
2331 use_operand_p use_p;
2332 tree op;
2334 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2336 op = USE_FROM_PTR (use_p);
2337 record_invariant (data, op, false);
2341 /* Finds interesting uses of induction variables in the statement STMT. */
2343 static void
2344 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2346 struct iv *iv;
2347 tree op, *lhs, *rhs;
2348 ssa_op_iter iter;
2349 use_operand_p use_p;
2350 enum tree_code code;
2352 find_invariants_stmt (data, stmt);
2354 if (gimple_code (stmt) == GIMPLE_COND)
2356 find_interesting_uses_cond (data, stmt);
2357 return;
2360 if (is_gimple_assign (stmt))
2362 lhs = gimple_assign_lhs_ptr (stmt);
2363 rhs = gimple_assign_rhs1_ptr (stmt);
2365 if (TREE_CODE (*lhs) == SSA_NAME)
2367 /* If the statement defines an induction variable, the uses are not
2368 interesting by themselves. */
2370 iv = get_iv (data, *lhs);
2372 if (iv && !integer_zerop (iv->step))
2373 return;
2376 code = gimple_assign_rhs_code (stmt);
2377 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2378 && (REFERENCE_CLASS_P (*rhs)
2379 || is_gimple_val (*rhs)))
2381 if (REFERENCE_CLASS_P (*rhs))
2382 find_interesting_uses_address (data, stmt, rhs);
2383 else
2384 find_interesting_uses_op (data, *rhs);
2386 if (REFERENCE_CLASS_P (*lhs))
2387 find_interesting_uses_address (data, stmt, lhs);
2388 return;
2390 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2392 find_interesting_uses_cond (data, stmt);
2393 return;
2396 /* TODO -- we should also handle address uses of type
2398 memory = call (whatever);
2402 call (memory). */
2405 if (gimple_code (stmt) == GIMPLE_PHI
2406 && gimple_bb (stmt) == data->current_loop->header)
2408 iv = get_iv (data, PHI_RESULT (stmt));
2410 if (iv && !integer_zerop (iv->step))
2411 return;
2414 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2416 op = USE_FROM_PTR (use_p);
2418 if (TREE_CODE (op) != SSA_NAME)
2419 continue;
2421 iv = get_iv (data, op);
2422 if (!iv)
2423 continue;
2425 find_interesting_uses_op (data, op);
2429 /* Finds interesting uses of induction variables outside of loops
2430 on loop exit edge EXIT. */
2432 static void
2433 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2435 gphi *phi;
2436 gphi_iterator psi;
2437 tree def;
2439 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2441 phi = psi.phi ();
2442 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2443 if (!virtual_operand_p (def))
2444 find_interesting_uses_op (data, def);
2448 /* Compute maximum offset of [base + offset] addressing mode
2449 for memory reference represented by USE. */
2451 static HOST_WIDE_INT
2452 compute_max_addr_offset (struct iv_use *use)
2454 int width;
2455 rtx reg, addr;
2456 HOST_WIDE_INT i, off;
2457 unsigned list_index, num;
2458 addr_space_t as;
2459 machine_mode mem_mode, addr_mode;
2460 static vec<HOST_WIDE_INT> max_offset_list;
2462 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2463 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2465 num = max_offset_list.length ();
2466 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2467 if (list_index >= num)
2469 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2470 for (; num < max_offset_list.length (); num++)
2471 max_offset_list[num] = -1;
2474 off = max_offset_list[list_index];
2475 if (off != -1)
2476 return off;
2478 addr_mode = targetm.addr_space.address_mode (as);
2479 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2480 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2482 width = GET_MODE_BITSIZE (addr_mode) - 1;
2483 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2484 width = HOST_BITS_PER_WIDE_INT - 1;
2486 for (i = width; i > 0; i--)
2488 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2489 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2490 if (memory_address_addr_space_p (mem_mode, addr, as))
2491 break;
2493 /* For some strict-alignment targets, the offset must be naturally
2494 aligned. Try an aligned offset if mem_mode is not QImode. */
2495 off = ((unsigned HOST_WIDE_INT) 1 << i);
2496 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2498 off -= GET_MODE_SIZE (mem_mode);
2499 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2500 if (memory_address_addr_space_p (mem_mode, addr, as))
2501 break;
2504 if (i == 0)
2505 off = 0;
2507 max_offset_list[list_index] = off;
2508 return off;
2511 /* Comparison function to sort group in ascending order of addr_offset. */
2513 static int
2514 group_compare_offset (const void *a, const void *b)
2516 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2517 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2519 if ((*u1)->addr_offset != (*u2)->addr_offset)
2520 return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2521 else
2522 return 0;
2525 /* Check if small groups should be split. Return true if no group
2526 contains more than two uses with distinct addr_offsets. Return
2527 false otherwise. We want to split such groups because:
2529 1) Small groups don't have much benefit and may interfer with
2530 general candidate selection.
2531 2) Size for problem with only small groups is usually small and
2532 general algorithm can handle it well.
2534 TODO -- Above claim may not hold when we want to merge memory
2535 accesses with conseuctive addresses. */
2537 static bool
2538 split_small_address_groups_p (struct ivopts_data *data)
2540 unsigned int i, j, distinct = 1;
2541 struct iv_use *pre;
2542 struct iv_group *group;
2544 for (i = 0; i < data->vgroups.length (); i++)
2546 group = data->vgroups[i];
2547 if (group->vuses.length () == 1)
2548 continue;
2550 gcc_assert (group->type == USE_ADDRESS);
2551 if (group->vuses.length () == 2)
2553 if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2554 std::swap (group->vuses[0], group->vuses[1]);
2556 else
2557 group->vuses.qsort (group_compare_offset);
2559 if (distinct > 2)
2560 continue;
2562 distinct = 1;
2563 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2565 if (group->vuses[j]->addr_offset != pre->addr_offset)
2567 pre = group->vuses[j];
2568 distinct++;
2571 if (distinct > 2)
2572 break;
2576 return (distinct <= 2);
2579 /* For each group of address type uses, this function further groups
2580 these uses according to the maximum offset supported by target's
2581 [base + offset] addressing mode. */
2583 static void
2584 split_address_groups (struct ivopts_data *data)
2586 unsigned int i, j;
2587 HOST_WIDE_INT max_offset = -1;
2589 /* Reset max offset to split all small groups. */
2590 if (split_small_address_groups_p (data))
2591 max_offset = 0;
2593 for (i = 0; i < data->vgroups.length (); i++)
2595 struct iv_group *group = data->vgroups[i];
2596 struct iv_use *use = group->vuses[0];
2598 use->id = 0;
2599 use->group_id = group->id;
2600 if (group->vuses.length () == 1)
2601 continue;
2603 if (max_offset != 0)
2604 max_offset = compute_max_addr_offset (use);
2606 for (j = 1; j < group->vuses.length (); j++)
2608 struct iv_use *next = group->vuses[j];
2610 /* Only uses with offset that can fit in offset part against
2611 the first use can be grouped together. */
2612 if (next->addr_offset - use->addr_offset
2613 > (unsigned HOST_WIDE_INT) max_offset)
2614 break;
2616 next->id = j;
2617 next->group_id = group->id;
2619 /* Split group. */
2620 if (j < group->vuses.length ())
2622 struct iv_group *new_group = record_group (data, group->type);
2623 new_group->vuses.safe_splice (group->vuses);
2624 new_group->vuses.block_remove (0, j);
2625 group->vuses.truncate (j);
2630 /* Finds uses of the induction variables that are interesting. */
2632 static void
2633 find_interesting_uses (struct ivopts_data *data)
2635 basic_block bb;
2636 gimple_stmt_iterator bsi;
2637 basic_block *body = get_loop_body (data->current_loop);
2638 unsigned i;
2639 edge e;
2641 for (i = 0; i < data->current_loop->num_nodes; i++)
2643 edge_iterator ei;
2644 bb = body[i];
2646 FOR_EACH_EDGE (e, ei, bb->succs)
2647 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2648 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2649 find_interesting_uses_outside (data, e);
2651 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2652 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2653 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2654 if (!is_gimple_debug (gsi_stmt (bsi)))
2655 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2658 split_address_groups (data);
2660 if (dump_file && (dump_flags & TDF_DETAILS))
2662 bitmap_iterator bi;
2664 fprintf (dump_file, "\n<Invariant Vars>:\n");
2665 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2667 struct version_info *info = ver_info (data, i);
2668 if (info->inv_id)
2670 fprintf (dump_file, "Inv %d:\t", info->inv_id);
2671 print_generic_expr (dump_file, info->name, TDF_SLIM);
2672 fprintf (dump_file, "%s\n",
2673 info->has_nonlin_use ? "" : "\t(eliminable)");
2677 fprintf (dump_file, "\n<IV Groups>:\n");
2678 dump_groups (dump_file, data);
2679 fprintf (dump_file, "\n");
2682 free (body);
2685 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2686 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2687 we are at the top-level of the processed address. */
2689 static tree
2690 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2691 HOST_WIDE_INT *offset)
2693 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2694 enum tree_code code;
2695 tree type, orig_type = TREE_TYPE (expr);
2696 HOST_WIDE_INT off0, off1, st;
2697 tree orig_expr = expr;
2699 STRIP_NOPS (expr);
2701 type = TREE_TYPE (expr);
2702 code = TREE_CODE (expr);
2703 *offset = 0;
2705 switch (code)
2707 case INTEGER_CST:
2708 if (!cst_and_fits_in_hwi (expr)
2709 || integer_zerop (expr))
2710 return orig_expr;
2712 *offset = int_cst_value (expr);
2713 return build_int_cst (orig_type, 0);
2715 case POINTER_PLUS_EXPR:
2716 case PLUS_EXPR:
2717 case MINUS_EXPR:
2718 op0 = TREE_OPERAND (expr, 0);
2719 op1 = TREE_OPERAND (expr, 1);
2721 op0 = strip_offset_1 (op0, false, false, &off0);
2722 op1 = strip_offset_1 (op1, false, false, &off1);
2724 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2725 if (op0 == TREE_OPERAND (expr, 0)
2726 && op1 == TREE_OPERAND (expr, 1))
2727 return orig_expr;
2729 if (integer_zerop (op1))
2730 expr = op0;
2731 else if (integer_zerop (op0))
2733 if (code == MINUS_EXPR)
2734 expr = fold_build1 (NEGATE_EXPR, type, op1);
2735 else
2736 expr = op1;
2738 else
2739 expr = fold_build2 (code, type, op0, op1);
2741 return fold_convert (orig_type, expr);
2743 case MULT_EXPR:
2744 op1 = TREE_OPERAND (expr, 1);
2745 if (!cst_and_fits_in_hwi (op1))
2746 return orig_expr;
2748 op0 = TREE_OPERAND (expr, 0);
2749 op0 = strip_offset_1 (op0, false, false, &off0);
2750 if (op0 == TREE_OPERAND (expr, 0))
2751 return orig_expr;
2753 *offset = off0 * int_cst_value (op1);
2754 if (integer_zerop (op0))
2755 expr = op0;
2756 else
2757 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2759 return fold_convert (orig_type, expr);
2761 case ARRAY_REF:
2762 case ARRAY_RANGE_REF:
2763 if (!inside_addr)
2764 return orig_expr;
2766 step = array_ref_element_size (expr);
2767 if (!cst_and_fits_in_hwi (step))
2768 break;
2770 st = int_cst_value (step);
2771 op1 = TREE_OPERAND (expr, 1);
2772 op1 = strip_offset_1 (op1, false, false, &off1);
2773 *offset = off1 * st;
2775 if (top_compref
2776 && integer_zerop (op1))
2778 /* Strip the component reference completely. */
2779 op0 = TREE_OPERAND (expr, 0);
2780 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2781 *offset += off0;
2782 return op0;
2784 break;
2786 case COMPONENT_REF:
2788 tree field;
2790 if (!inside_addr)
2791 return orig_expr;
2793 tmp = component_ref_field_offset (expr);
2794 field = TREE_OPERAND (expr, 1);
2795 if (top_compref
2796 && cst_and_fits_in_hwi (tmp)
2797 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2799 HOST_WIDE_INT boffset, abs_off;
2801 /* Strip the component reference completely. */
2802 op0 = TREE_OPERAND (expr, 0);
2803 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2804 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2805 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2806 if (boffset < 0)
2807 abs_off = -abs_off;
2809 *offset = off0 + int_cst_value (tmp) + abs_off;
2810 return op0;
2813 break;
2815 case ADDR_EXPR:
2816 op0 = TREE_OPERAND (expr, 0);
2817 op0 = strip_offset_1 (op0, true, true, &off0);
2818 *offset += off0;
2820 if (op0 == TREE_OPERAND (expr, 0))
2821 return orig_expr;
2823 expr = build_fold_addr_expr (op0);
2824 return fold_convert (orig_type, expr);
2826 case MEM_REF:
2827 /* ??? Offset operand? */
2828 inside_addr = false;
2829 break;
2831 default:
2832 return orig_expr;
2835 /* Default handling of expressions for that we want to recurse into
2836 the first operand. */
2837 op0 = TREE_OPERAND (expr, 0);
2838 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2839 *offset += off0;
2841 if (op0 == TREE_OPERAND (expr, 0)
2842 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2843 return orig_expr;
2845 expr = copy_node (expr);
2846 TREE_OPERAND (expr, 0) = op0;
2847 if (op1)
2848 TREE_OPERAND (expr, 1) = op1;
2850 /* Inside address, we might strip the top level component references,
2851 thus changing type of the expression. Handling of ADDR_EXPR
2852 will fix that. */
2853 expr = fold_convert (orig_type, expr);
2855 return expr;
2858 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2860 static tree
2861 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2863 HOST_WIDE_INT off;
2864 tree core = strip_offset_1 (expr, false, false, &off);
2865 *offset = off;
2866 return core;
2869 /* Returns variant of TYPE that can be used as base for different uses.
2870 We return unsigned type with the same precision, which avoids problems
2871 with overflows. */
2873 static tree
2874 generic_type_for (tree type)
2876 if (POINTER_TYPE_P (type))
2877 return unsigned_type_for (type);
2879 if (TYPE_UNSIGNED (type))
2880 return type;
2882 return unsigned_type_for (type);
2885 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2886 the bitmap to that we should store it. */
2888 static struct ivopts_data *fd_ivopts_data;
2889 static tree
2890 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2892 bitmap *depends_on = (bitmap *) data;
2893 struct version_info *info;
2895 if (TREE_CODE (*expr_p) != SSA_NAME)
2896 return NULL_TREE;
2897 info = name_info (fd_ivopts_data, *expr_p);
2899 if (!info->inv_id || info->has_nonlin_use)
2900 return NULL_TREE;
2902 if (!*depends_on)
2903 *depends_on = BITMAP_ALLOC (NULL);
2904 bitmap_set_bit (*depends_on, info->inv_id);
2906 return NULL_TREE;
2909 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2910 position to POS. If USE is not NULL, the candidate is set as related to
2911 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2912 replacement of the final value of the iv by a direct computation. */
2914 static struct iv_cand *
2915 add_candidate_1 (struct ivopts_data *data,
2916 tree base, tree step, bool important, enum iv_position pos,
2917 struct iv_use *use, gimple *incremented_at,
2918 struct iv *orig_iv = NULL)
2920 unsigned i;
2921 struct iv_cand *cand = NULL;
2922 tree type, orig_type;
2924 gcc_assert (base && step);
2926 /* -fkeep-gc-roots-live means that we have to keep a real pointer
2927 live, but the ivopts code may replace a real pointer with one
2928 pointing before or after the memory block that is then adjusted
2929 into the memory block during the loop. FIXME: It would likely be
2930 better to actually force the pointer live and still use ivopts;
2931 for example, it would be enough to write the pointer into memory
2932 and keep it there until after the loop. */
2933 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2934 return NULL;
2936 /* For non-original variables, make sure their values are computed in a type
2937 that does not invoke undefined behavior on overflows (since in general,
2938 we cannot prove that these induction variables are non-wrapping). */
2939 if (pos != IP_ORIGINAL)
2941 orig_type = TREE_TYPE (base);
2942 type = generic_type_for (orig_type);
2943 if (type != orig_type)
2945 base = fold_convert (type, base);
2946 step = fold_convert (type, step);
2950 for (i = 0; i < data->vcands.length (); i++)
2952 cand = data->vcands[i];
2954 if (cand->pos != pos)
2955 continue;
2957 if (cand->incremented_at != incremented_at
2958 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2959 && cand->ainc_use != use))
2960 continue;
2962 if (operand_equal_p (base, cand->iv->base, 0)
2963 && operand_equal_p (step, cand->iv->step, 0)
2964 && (TYPE_PRECISION (TREE_TYPE (base))
2965 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2966 break;
2969 if (i == data->vcands.length ())
2971 cand = XCNEW (struct iv_cand);
2972 cand->id = i;
2973 cand->iv = alloc_iv (data, base, step);
2974 cand->pos = pos;
2975 if (pos != IP_ORIGINAL)
2977 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2978 cand->var_after = cand->var_before;
2980 cand->important = important;
2981 cand->incremented_at = incremented_at;
2982 data->vcands.safe_push (cand);
2984 if (TREE_CODE (step) != INTEGER_CST)
2986 fd_ivopts_data = data;
2987 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2990 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2991 cand->ainc_use = use;
2992 else
2993 cand->ainc_use = NULL;
2995 cand->orig_iv = orig_iv;
2996 if (dump_file && (dump_flags & TDF_DETAILS))
2997 dump_cand (dump_file, cand);
3000 cand->important |= important;
3002 /* Relate candidate to the group for which it is added. */
3003 if (use)
3004 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3006 return cand;
3009 /* Returns true if incrementing the induction variable at the end of the LOOP
3010 is allowed.
3012 The purpose is to avoid splitting latch edge with a biv increment, thus
3013 creating a jump, possibly confusing other optimization passes and leaving
3014 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
3015 is not available (so we do not have a better alternative), or if the latch
3016 edge is already nonempty. */
3018 static bool
3019 allow_ip_end_pos_p (struct loop *loop)
3021 if (!ip_normal_pos (loop))
3022 return true;
3024 if (!empty_block_p (ip_end_pos (loop)))
3025 return true;
3027 return false;
3030 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3031 Important field is set to IMPORTANT. */
3033 static void
3034 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3035 bool important, struct iv_use *use)
3037 basic_block use_bb = gimple_bb (use->stmt);
3038 machine_mode mem_mode;
3039 unsigned HOST_WIDE_INT cstepi;
3041 /* If we insert the increment in any position other than the standard
3042 ones, we must ensure that it is incremented once per iteration.
3043 It must not be in an inner nested loop, or one side of an if
3044 statement. */
3045 if (use_bb->loop_father != data->current_loop
3046 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3047 || stmt_could_throw_p (use->stmt)
3048 || !cst_and_fits_in_hwi (step))
3049 return;
3051 cstepi = int_cst_value (step);
3053 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3054 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3055 || USE_STORE_PRE_INCREMENT (mem_mode))
3056 && GET_MODE_SIZE (mem_mode) == cstepi)
3057 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3058 || USE_STORE_PRE_DECREMENT (mem_mode))
3059 && GET_MODE_SIZE (mem_mode) == -cstepi))
3061 enum tree_code code = MINUS_EXPR;
3062 tree new_base;
3063 tree new_step = step;
3065 if (POINTER_TYPE_P (TREE_TYPE (base)))
3067 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3068 code = POINTER_PLUS_EXPR;
3070 else
3071 new_step = fold_convert (TREE_TYPE (base), new_step);
3072 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3073 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3074 use->stmt);
3076 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3077 || USE_STORE_POST_INCREMENT (mem_mode))
3078 && GET_MODE_SIZE (mem_mode) == cstepi)
3079 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3080 || USE_STORE_POST_DECREMENT (mem_mode))
3081 && GET_MODE_SIZE (mem_mode) == -cstepi))
3083 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3084 use->stmt);
3088 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3089 position to POS. If USE is not NULL, the candidate is set as related to
3090 it. The candidate computation is scheduled before exit condition and at
3091 the end of loop. */
3093 static void
3094 add_candidate (struct ivopts_data *data,
3095 tree base, tree step, bool important, struct iv_use *use,
3096 struct iv *orig_iv = NULL)
3098 if (ip_normal_pos (data->current_loop))
3099 add_candidate_1 (data, base, step, important,
3100 IP_NORMAL, use, NULL, orig_iv);
3101 if (ip_end_pos (data->current_loop)
3102 && allow_ip_end_pos_p (data->current_loop))
3103 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3106 /* Adds standard iv candidates. */
3108 static void
3109 add_standard_iv_candidates (struct ivopts_data *data)
3111 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3113 /* The same for a double-integer type if it is still fast enough. */
3114 if (TYPE_PRECISION
3115 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3116 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3117 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3118 build_int_cst (long_integer_type_node, 1), true, NULL);
3120 /* The same for a double-integer type if it is still fast enough. */
3121 if (TYPE_PRECISION
3122 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3123 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3124 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3125 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3129 /* Adds candidates bases on the old induction variable IV. */
3131 static void
3132 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3134 gimple *phi;
3135 tree def;
3136 struct iv_cand *cand;
3138 /* Check if this biv is used in address type use. */
3139 if (iv->no_overflow && iv->have_address_use
3140 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3141 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3143 tree base = fold_convert (sizetype, iv->base);
3144 tree step = fold_convert (sizetype, iv->step);
3146 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3147 add_candidate (data, base, step, true, NULL, iv);
3148 /* Add iv cand of the original type only if it has nonlinear use. */
3149 if (iv->nonlin_use)
3150 add_candidate (data, iv->base, iv->step, true, NULL);
3152 else
3153 add_candidate (data, iv->base, iv->step, true, NULL);
3155 /* The same, but with initial value zero. */
3156 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3157 add_candidate (data, size_int (0), iv->step, true, NULL);
3158 else
3159 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3160 iv->step, true, NULL);
3162 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3163 if (gimple_code (phi) == GIMPLE_PHI)
3165 /* Additionally record the possibility of leaving the original iv
3166 untouched. */
3167 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3168 /* Don't add candidate if it's from another PHI node because
3169 it's an affine iv appearing in the form of PEELED_CHREC. */
3170 phi = SSA_NAME_DEF_STMT (def);
3171 if (gimple_code (phi) != GIMPLE_PHI)
3173 cand = add_candidate_1 (data,
3174 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3175 SSA_NAME_DEF_STMT (def));
3176 if (cand)
3178 cand->var_before = iv->ssa_name;
3179 cand->var_after = def;
3182 else
3183 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3187 /* Adds candidates based on the old induction variables. */
3189 static void
3190 add_iv_candidate_for_bivs (struct ivopts_data *data)
3192 unsigned i;
3193 struct iv *iv;
3194 bitmap_iterator bi;
3196 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3198 iv = ver_info (data, i)->iv;
3199 if (iv && iv->biv_p && !integer_zerop (iv->step))
3200 add_iv_candidate_for_biv (data, iv);
3204 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3206 static void
3207 record_common_cand (struct ivopts_data *data, tree base,
3208 tree step, struct iv_use *use)
3210 struct iv_common_cand ent;
3211 struct iv_common_cand **slot;
3213 ent.base = base;
3214 ent.step = step;
3215 ent.hash = iterative_hash_expr (base, 0);
3216 ent.hash = iterative_hash_expr (step, ent.hash);
3218 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3219 if (*slot == NULL)
3221 *slot = new iv_common_cand ();
3222 (*slot)->base = base;
3223 (*slot)->step = step;
3224 (*slot)->uses.create (8);
3225 (*slot)->hash = ent.hash;
3226 data->iv_common_cands.safe_push ((*slot));
3229 gcc_assert (use != NULL);
3230 (*slot)->uses.safe_push (use);
3231 return;
3234 /* Comparison function used to sort common candidates. */
3236 static int
3237 common_cand_cmp (const void *p1, const void *p2)
3239 unsigned n1, n2;
3240 const struct iv_common_cand *const *const ccand1
3241 = (const struct iv_common_cand *const *)p1;
3242 const struct iv_common_cand *const *const ccand2
3243 = (const struct iv_common_cand *const *)p2;
3245 n1 = (*ccand1)->uses.length ();
3246 n2 = (*ccand2)->uses.length ();
3247 return n2 - n1;
3250 /* Adds IV candidates based on common candidated recorded. */
3252 static void
3253 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3255 unsigned i, j;
3256 struct iv_cand *cand_1, *cand_2;
3258 data->iv_common_cands.qsort (common_cand_cmp);
3259 for (i = 0; i < data->iv_common_cands.length (); i++)
3261 struct iv_common_cand *ptr = data->iv_common_cands[i];
3263 /* Only add IV candidate if it's derived from multiple uses. */
3264 if (ptr->uses.length () <= 1)
3265 break;
3267 cand_1 = NULL;
3268 cand_2 = NULL;
3269 if (ip_normal_pos (data->current_loop))
3270 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3271 false, IP_NORMAL, NULL, NULL);
3273 if (ip_end_pos (data->current_loop)
3274 && allow_ip_end_pos_p (data->current_loop))
3275 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3276 false, IP_END, NULL, NULL);
3278 /* Bind deriving uses and the new candidates. */
3279 for (j = 0; j < ptr->uses.length (); j++)
3281 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3282 if (cand_1)
3283 bitmap_set_bit (group->related_cands, cand_1->id);
3284 if (cand_2)
3285 bitmap_set_bit (group->related_cands, cand_2->id);
3289 /* Release data since it is useless from this point. */
3290 data->iv_common_cand_tab->empty ();
3291 data->iv_common_cands.truncate (0);
3294 /* Adds candidates based on the value of USE's iv. */
3296 static void
3297 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3299 unsigned HOST_WIDE_INT offset;
3300 tree base;
3301 tree basetype;
3302 struct iv *iv = use->iv;
3304 add_candidate (data, iv->base, iv->step, false, use);
3306 /* Record common candidate for use in case it can be shared by others. */
3307 record_common_cand (data, iv->base, iv->step, use);
3309 /* Record common candidate with initial value zero. */
3310 basetype = TREE_TYPE (iv->base);
3311 if (POINTER_TYPE_P (basetype))
3312 basetype = sizetype;
3313 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3315 /* Record common candidate with constant offset stripped in base.
3316 Like the use itself, we also add candidate directly for it. */
3317 base = strip_offset (iv->base, &offset);
3318 if (offset || base != iv->base)
3320 record_common_cand (data, base, iv->step, use);
3321 add_candidate (data, base, iv->step, false, use);
3324 /* Record common candidate with base_object removed in base. */
3325 if (iv->base_object != NULL)
3327 unsigned i;
3328 aff_tree aff_base;
3329 tree step, base_object = iv->base_object;
3331 base = iv->base;
3332 step = iv->step;
3333 STRIP_NOPS (base);
3334 STRIP_NOPS (step);
3335 STRIP_NOPS (base_object);
3336 tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3337 for (i = 0; i < aff_base.n; i++)
3339 if (aff_base.elts[i].coef != 1)
3340 continue;
3342 if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3343 break;
3345 if (i < aff_base.n)
3347 aff_combination_remove_elt (&aff_base, i);
3348 base = aff_combination_to_tree (&aff_base);
3349 basetype = TREE_TYPE (base);
3350 if (POINTER_TYPE_P (basetype))
3351 basetype = sizetype;
3353 step = fold_convert (basetype, step);
3354 record_common_cand (data, base, step, use);
3355 /* Also record common candidate with offset stripped. */
3356 base = strip_offset (base, &offset);
3357 if (offset)
3358 record_common_cand (data, base, step, use);
3362 /* At last, add auto-incremental candidates. Make such variables
3363 important since other iv uses with same base object may be based
3364 on it. */
3365 if (use != NULL && use->type == USE_ADDRESS)
3366 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3369 /* Adds candidates based on the uses. */
3371 static void
3372 add_iv_candidate_for_groups (struct ivopts_data *data)
3374 unsigned i;
3376 /* Only add candidate for the first use in group. */
3377 for (i = 0; i < data->vgroups.length (); i++)
3379 struct iv_group *group = data->vgroups[i];
3381 gcc_assert (group->vuses[0] != NULL);
3382 add_iv_candidate_for_use (data, group->vuses[0]);
3384 add_iv_candidate_derived_from_uses (data);
3387 /* Record important candidates and add them to related_cands bitmaps. */
3389 static void
3390 record_important_candidates (struct ivopts_data *data)
3392 unsigned i;
3393 struct iv_group *group;
3395 for (i = 0; i < data->vcands.length (); i++)
3397 struct iv_cand *cand = data->vcands[i];
3399 if (cand->important)
3400 bitmap_set_bit (data->important_candidates, i);
3403 data->consider_all_candidates = (data->vcands.length ()
3404 <= CONSIDER_ALL_CANDIDATES_BOUND);
3406 /* Add important candidates to groups' related_cands bitmaps. */
3407 for (i = 0; i < data->vgroups.length (); i++)
3409 group = data->vgroups[i];
3410 bitmap_ior_into (group->related_cands, data->important_candidates);
3414 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3415 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3416 we allocate a simple list to every use. */
3418 static void
3419 alloc_use_cost_map (struct ivopts_data *data)
3421 unsigned i, size, s;
3423 for (i = 0; i < data->vgroups.length (); i++)
3425 struct iv_group *group = data->vgroups[i];
3427 if (data->consider_all_candidates)
3428 size = data->vcands.length ();
3429 else
3431 s = bitmap_count_bits (group->related_cands);
3433 /* Round up to the power of two, so that moduling by it is fast. */
3434 size = s ? (1 << ceil_log2 (s)) : 1;
3437 group->n_map_members = size;
3438 group->cost_map = XCNEWVEC (struct cost_pair, size);
3442 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3443 on invariants DEPENDS_ON and that the value used in expressing it
3444 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3446 static void
3447 set_group_iv_cost (struct ivopts_data *data,
3448 struct iv_group *group, struct iv_cand *cand,
3449 comp_cost cost, bitmap depends_on, tree value,
3450 enum tree_code comp, iv_inv_expr_ent *inv_expr)
3452 unsigned i, s;
3454 if (cost.infinite_cost_p ())
3456 BITMAP_FREE (depends_on);
3457 return;
3460 if (data->consider_all_candidates)
3462 group->cost_map[cand->id].cand = cand;
3463 group->cost_map[cand->id].cost = cost;
3464 group->cost_map[cand->id].depends_on = depends_on;
3465 group->cost_map[cand->id].value = value;
3466 group->cost_map[cand->id].comp = comp;
3467 group->cost_map[cand->id].inv_expr = inv_expr;
3468 return;
3471 /* n_map_members is a power of two, so this computes modulo. */
3472 s = cand->id & (group->n_map_members - 1);
3473 for (i = s; i < group->n_map_members; i++)
3474 if (!group->cost_map[i].cand)
3475 goto found;
3476 for (i = 0; i < s; i++)
3477 if (!group->cost_map[i].cand)
3478 goto found;
3480 gcc_unreachable ();
3482 found:
3483 group->cost_map[i].cand = cand;
3484 group->cost_map[i].cost = cost;
3485 group->cost_map[i].depends_on = depends_on;
3486 group->cost_map[i].value = value;
3487 group->cost_map[i].comp = comp;
3488 group->cost_map[i].inv_expr = inv_expr;
3491 /* Gets cost of (GROUP, CAND) pair. */
3493 static struct cost_pair *
3494 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3495 struct iv_cand *cand)
3497 unsigned i, s;
3498 struct cost_pair *ret;
3500 if (!cand)
3501 return NULL;
3503 if (data->consider_all_candidates)
3505 ret = group->cost_map + cand->id;
3506 if (!ret->cand)
3507 return NULL;
3509 return ret;
3512 /* n_map_members is a power of two, so this computes modulo. */
3513 s = cand->id & (group->n_map_members - 1);
3514 for (i = s; i < group->n_map_members; i++)
3515 if (group->cost_map[i].cand == cand)
3516 return group->cost_map + i;
3517 else if (group->cost_map[i].cand == NULL)
3518 return NULL;
3519 for (i = 0; i < s; i++)
3520 if (group->cost_map[i].cand == cand)
3521 return group->cost_map + i;
3522 else if (group->cost_map[i].cand == NULL)
3523 return NULL;
3525 return NULL;
3528 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3529 static rtx
3530 produce_memory_decl_rtl (tree obj, int *regno)
3532 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3533 machine_mode address_mode = targetm.addr_space.address_mode (as);
3534 rtx x;
3536 gcc_assert (obj);
3537 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3539 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3540 x = gen_rtx_SYMBOL_REF (address_mode, name);
3541 SET_SYMBOL_REF_DECL (x, obj);
3542 x = gen_rtx_MEM (DECL_MODE (obj), x);
3543 set_mem_addr_space (x, as);
3544 targetm.encode_section_info (obj, x, true);
3546 else
3548 x = gen_raw_REG (address_mode, (*regno)++);
3549 x = gen_rtx_MEM (DECL_MODE (obj), x);
3550 set_mem_addr_space (x, as);
3553 return x;
3556 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3557 walk_tree. DATA contains the actual fake register number. */
3559 static tree
3560 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3562 tree obj = NULL_TREE;
3563 rtx x = NULL_RTX;
3564 int *regno = (int *) data;
3566 switch (TREE_CODE (*expr_p))
3568 case ADDR_EXPR:
3569 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3570 handled_component_p (*expr_p);
3571 expr_p = &TREE_OPERAND (*expr_p, 0))
3572 continue;
3573 obj = *expr_p;
3574 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3575 x = produce_memory_decl_rtl (obj, regno);
3576 break;
3578 case SSA_NAME:
3579 *ws = 0;
3580 obj = SSA_NAME_VAR (*expr_p);
3581 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3582 if (!obj)
3583 return NULL_TREE;
3584 if (!DECL_RTL_SET_P (obj))
3585 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3586 break;
3588 case VAR_DECL:
3589 case PARM_DECL:
3590 case RESULT_DECL:
3591 *ws = 0;
3592 obj = *expr_p;
3594 if (DECL_RTL_SET_P (obj))
3595 break;
3597 if (DECL_MODE (obj) == BLKmode)
3598 x = produce_memory_decl_rtl (obj, regno);
3599 else
3600 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3602 break;
3604 default:
3605 break;
3608 if (x)
3610 decl_rtl_to_reset.safe_push (obj);
3611 SET_DECL_RTL (obj, x);
3614 return NULL_TREE;
3617 /* Determines cost of the computation of EXPR. */
3619 static unsigned
3620 computation_cost (tree expr, bool speed)
3622 rtx_insn *seq;
3623 rtx rslt;
3624 tree type = TREE_TYPE (expr);
3625 unsigned cost;
3626 /* Avoid using hard regs in ways which may be unsupported. */
3627 int regno = LAST_VIRTUAL_REGISTER + 1;
3628 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3629 enum node_frequency real_frequency = node->frequency;
3631 node->frequency = NODE_FREQUENCY_NORMAL;
3632 crtl->maybe_hot_insn_p = speed;
3633 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3634 start_sequence ();
3635 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3636 seq = get_insns ();
3637 end_sequence ();
3638 default_rtl_profile ();
3639 node->frequency = real_frequency;
3641 cost = seq_cost (seq, speed);
3642 if (MEM_P (rslt))
3643 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3644 TYPE_ADDR_SPACE (type), speed);
3645 else if (!REG_P (rslt))
3646 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3648 return cost;
3651 /* Returns variable containing the value of candidate CAND at statement AT. */
3653 static tree
3654 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3656 if (stmt_after_increment (loop, cand, stmt))
3657 return cand->var_after;
3658 else
3659 return cand->var_before;
3662 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3663 same precision that is at least as wide as the precision of TYPE, stores
3664 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3665 type of A and B. */
3667 static tree
3668 determine_common_wider_type (tree *a, tree *b)
3670 tree wider_type = NULL;
3671 tree suba, subb;
3672 tree atype = TREE_TYPE (*a);
3674 if (CONVERT_EXPR_P (*a))
3676 suba = TREE_OPERAND (*a, 0);
3677 wider_type = TREE_TYPE (suba);
3678 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3679 return atype;
3681 else
3682 return atype;
3684 if (CONVERT_EXPR_P (*b))
3686 subb = TREE_OPERAND (*b, 0);
3687 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3688 return atype;
3690 else
3691 return atype;
3693 *a = suba;
3694 *b = subb;
3695 return wider_type;
3698 /* Determines the expression by that USE is expressed from induction variable
3699 CAND at statement AT in LOOP. The expression is stored in a decomposed
3700 form into AFF. Returns false if USE cannot be expressed using CAND. */
3702 static bool
3703 get_computation_aff (struct loop *loop,
3704 struct iv_use *use, struct iv_cand *cand, gimple *at,
3705 struct aff_tree *aff)
3707 tree ubase = use->iv->base;
3708 tree ustep = use->iv->step;
3709 tree cbase = cand->iv->base;
3710 tree cstep = cand->iv->step, cstep_common;
3711 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3712 tree common_type, var;
3713 tree uutype;
3714 aff_tree cbase_aff, var_aff;
3715 widest_int rat;
3717 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3719 /* We do not have a precision to express the values of use. */
3720 return false;
3723 var = var_at_stmt (loop, cand, at);
3724 uutype = unsigned_type_for (utype);
3726 /* If the conversion is not noop, perform it. */
3727 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3729 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3730 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3732 tree inner_base, inner_step, inner_type;
3733 inner_base = TREE_OPERAND (cbase, 0);
3734 if (CONVERT_EXPR_P (cstep))
3735 inner_step = TREE_OPERAND (cstep, 0);
3736 else
3737 inner_step = cstep;
3739 inner_type = TREE_TYPE (inner_base);
3740 /* If candidate is added from a biv whose type is smaller than
3741 ctype, we know both candidate and the biv won't overflow.
3742 In this case, it's safe to skip the convertion in candidate.
3743 As an example, (unsigned short)((unsigned long)A) equals to
3744 (unsigned short)A, if A has a type no larger than short. */
3745 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3747 cbase = inner_base;
3748 cstep = inner_step;
3751 cstep = fold_convert (uutype, cstep);
3752 cbase = fold_convert (uutype, cbase);
3753 var = fold_convert (uutype, var);
3756 /* Ratio is 1 when computing the value of biv cand by itself.
3757 We can't rely on constant_multiple_of in this case because the
3758 use is created after the original biv is selected. The call
3759 could fail because of inconsistent fold behavior. See PR68021
3760 for more information. */
3761 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3763 gcc_assert (is_gimple_assign (use->stmt));
3764 gcc_assert (use->iv->ssa_name == cand->var_after);
3765 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3766 rat = 1;
3768 else if (!constant_multiple_of (ustep, cstep, &rat))
3769 return false;
3771 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3772 type, we achieve better folding by computing their difference in this
3773 wider type, and cast the result to UUTYPE. We do not need to worry about
3774 overflows, as all the arithmetics will in the end be performed in UUTYPE
3775 anyway. */
3776 common_type = determine_common_wider_type (&ubase, &cbase);
3778 /* use = ubase - ratio * cbase + ratio * var. */
3779 tree_to_aff_combination (ubase, common_type, aff);
3780 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3781 tree_to_aff_combination (var, uutype, &var_aff);
3783 /* We need to shift the value if we are after the increment. */
3784 if (stmt_after_increment (loop, cand, at))
3786 aff_tree cstep_aff;
3788 if (common_type != uutype)
3789 cstep_common = fold_convert (common_type, cstep);
3790 else
3791 cstep_common = cstep;
3793 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3794 aff_combination_add (&cbase_aff, &cstep_aff);
3797 aff_combination_scale (&cbase_aff, -rat);
3798 aff_combination_add (aff, &cbase_aff);
3799 if (common_type != uutype)
3800 aff_combination_convert (aff, uutype);
3802 aff_combination_scale (&var_aff, rat);
3803 aff_combination_add (aff, &var_aff);
3805 return true;
3808 /* Return the type of USE. */
3810 static tree
3811 get_use_type (struct iv_use *use)
3813 tree base_type = TREE_TYPE (use->iv->base);
3814 tree type;
3816 if (use->type == USE_ADDRESS)
3818 /* The base_type may be a void pointer. Create a pointer type based on
3819 the mem_ref instead. */
3820 type = build_pointer_type (TREE_TYPE (*use->op_p));
3821 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3822 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3824 else
3825 type = base_type;
3827 return type;
3830 /* Determines the expression by that USE is expressed from induction variable
3831 CAND at statement AT in LOOP. The computation is unshared. */
3833 static tree
3834 get_computation_at (struct loop *loop,
3835 struct iv_use *use, struct iv_cand *cand, gimple *at)
3837 aff_tree aff;
3838 tree type = get_use_type (use);
3840 if (!get_computation_aff (loop, use, cand, at, &aff))
3841 return NULL_TREE;
3842 unshare_aff_combination (&aff);
3843 return fold_convert (type, aff_combination_to_tree (&aff));
3846 /* Determines the expression by that USE is expressed from induction variable
3847 CAND in LOOP. The computation is unshared. */
3849 static tree
3850 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3852 return get_computation_at (loop, use, cand, use->stmt);
3855 /* Adjust the cost COST for being in loop setup rather than loop body.
3856 If we're optimizing for space, the loop setup overhead is constant;
3857 if we're optimizing for speed, amortize it over the per-iteration cost. */
3858 static unsigned
3859 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3861 if (cost == INFTY)
3862 return cost;
3863 else if (optimize_loop_for_speed_p (data->current_loop))
3864 return cost / avg_loop_niter (data->current_loop);
3865 else
3866 return cost;
3869 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3870 validity for a memory reference accessing memory of mode MODE in
3871 address space AS. */
3874 bool
3875 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3876 addr_space_t as)
3878 #define MAX_RATIO 128
3879 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3880 static vec<sbitmap> valid_mult_list;
3881 sbitmap valid_mult;
3883 if (data_index >= valid_mult_list.length ())
3884 valid_mult_list.safe_grow_cleared (data_index + 1);
3886 valid_mult = valid_mult_list[data_index];
3887 if (!valid_mult)
3889 machine_mode address_mode = targetm.addr_space.address_mode (as);
3890 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3891 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3892 rtx addr, scaled;
3893 HOST_WIDE_INT i;
3895 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3896 bitmap_clear (valid_mult);
3897 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3898 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3899 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3901 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3902 if (memory_address_addr_space_p (mode, addr, as)
3903 || memory_address_addr_space_p (mode, scaled, as))
3904 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3907 if (dump_file && (dump_flags & TDF_DETAILS))
3909 fprintf (dump_file, " allowed multipliers:");
3910 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3911 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3912 fprintf (dump_file, " %d", (int) i);
3913 fprintf (dump_file, "\n");
3914 fprintf (dump_file, "\n");
3917 valid_mult_list[data_index] = valid_mult;
3920 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3921 return false;
3923 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3926 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3927 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3928 variable is omitted. Compute the cost for a memory reference that accesses
3929 a memory location of mode MEM_MODE in address space AS.
3931 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3932 size of MEM_MODE / RATIO) is available. To make this determination, we
3933 look at the size of the increment to be made, which is given in CSTEP.
3934 CSTEP may be zero if the step is unknown.
3935 STMT_AFTER_INC is true iff the statement we're looking at is after the
3936 increment of the original biv.
3938 TODO -- there must be some better way. This all is quite crude. */
3940 enum ainc_type
3942 AINC_PRE_INC, /* Pre increment. */
3943 AINC_PRE_DEC, /* Pre decrement. */
3944 AINC_POST_INC, /* Post increment. */
3945 AINC_POST_DEC, /* Post decrement. */
3946 AINC_NONE /* Also the number of auto increment types. */
3949 struct address_cost_data
3951 HOST_WIDE_INT min_offset, max_offset;
3952 unsigned costs[2][2][2][2];
3953 unsigned ainc_costs[AINC_NONE];
3957 static comp_cost
3958 get_address_cost (bool symbol_present, bool var_present,
3959 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3960 HOST_WIDE_INT cstep, machine_mode mem_mode,
3961 addr_space_t as, bool speed,
3962 bool stmt_after_inc, bool *may_autoinc)
3964 machine_mode address_mode = targetm.addr_space.address_mode (as);
3965 static vec<address_cost_data *> address_cost_data_list;
3966 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3967 address_cost_data *data;
3968 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3969 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3970 unsigned cost, acost, complexity;
3971 enum ainc_type autoinc_type;
3972 bool offset_p, ratio_p, autoinc;
3973 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3974 unsigned HOST_WIDE_INT mask;
3975 unsigned bits;
3977 if (data_index >= address_cost_data_list.length ())
3978 address_cost_data_list.safe_grow_cleared (data_index + 1);
3980 data = address_cost_data_list[data_index];
3981 if (!data)
3983 HOST_WIDE_INT i;
3984 HOST_WIDE_INT rat, off = 0;
3985 int old_cse_not_expected, width;
3986 unsigned sym_p, var_p, off_p, rat_p, add_c;
3987 rtx_insn *seq;
3988 rtx addr, base;
3989 rtx reg0, reg1;
3991 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3993 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3995 width = GET_MODE_BITSIZE (address_mode) - 1;
3996 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3997 width = HOST_BITS_PER_WIDE_INT - 1;
3998 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
4000 for (i = width; i >= 0; i--)
4002 off = -((unsigned HOST_WIDE_INT) 1 << i);
4003 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4004 if (memory_address_addr_space_p (mem_mode, addr, as))
4005 break;
4007 data->min_offset = (i == -1? 0 : off);
4009 for (i = width; i >= 0; i--)
4011 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4012 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4013 if (memory_address_addr_space_p (mem_mode, addr, as))
4014 break;
4015 /* For some strict-alignment targets, the offset must be naturally
4016 aligned. Try an aligned offset if mem_mode is not QImode. */
4017 off = mem_mode != QImode
4018 ? ((unsigned HOST_WIDE_INT) 1 << i)
4019 - GET_MODE_SIZE (mem_mode)
4020 : 0;
4021 if (off > 0)
4023 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4024 if (memory_address_addr_space_p (mem_mode, addr, as))
4025 break;
4028 if (i == -1)
4029 off = 0;
4030 data->max_offset = off;
4032 if (dump_file && (dump_flags & TDF_DETAILS))
4034 fprintf (dump_file, "get_address_cost:\n");
4035 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4036 GET_MODE_NAME (mem_mode),
4037 data->min_offset);
4038 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4039 GET_MODE_NAME (mem_mode),
4040 data->max_offset);
4043 rat = 1;
4044 for (i = 2; i <= MAX_RATIO; i++)
4045 if (multiplier_allowed_in_address_p (i, mem_mode, as))
4047 rat = i;
4048 break;
4051 /* Compute the cost of various addressing modes. */
4052 acost = 0;
4053 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4054 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4056 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4057 || USE_STORE_PRE_DECREMENT (mem_mode))
4059 addr = gen_rtx_PRE_DEC (address_mode, reg0);
4060 has_predec[mem_mode]
4061 = memory_address_addr_space_p (mem_mode, addr, as);
4063 if (has_predec[mem_mode])
4064 data->ainc_costs[AINC_PRE_DEC]
4065 = address_cost (addr, mem_mode, as, speed);
4067 if (USE_LOAD_POST_DECREMENT (mem_mode)
4068 || USE_STORE_POST_DECREMENT (mem_mode))
4070 addr = gen_rtx_POST_DEC (address_mode, reg0);
4071 has_postdec[mem_mode]
4072 = memory_address_addr_space_p (mem_mode, addr, as);
4074 if (has_postdec[mem_mode])
4075 data->ainc_costs[AINC_POST_DEC]
4076 = address_cost (addr, mem_mode, as, speed);
4078 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4079 || USE_STORE_PRE_DECREMENT (mem_mode))
4081 addr = gen_rtx_PRE_INC (address_mode, reg0);
4082 has_preinc[mem_mode]
4083 = memory_address_addr_space_p (mem_mode, addr, as);
4085 if (has_preinc[mem_mode])
4086 data->ainc_costs[AINC_PRE_INC]
4087 = address_cost (addr, mem_mode, as, speed);
4089 if (USE_LOAD_POST_INCREMENT (mem_mode)
4090 || USE_STORE_POST_INCREMENT (mem_mode))
4092 addr = gen_rtx_POST_INC (address_mode, reg0);
4093 has_postinc[mem_mode]
4094 = memory_address_addr_space_p (mem_mode, addr, as);
4096 if (has_postinc[mem_mode])
4097 data->ainc_costs[AINC_POST_INC]
4098 = address_cost (addr, mem_mode, as, speed);
4100 for (i = 0; i < 16; i++)
4102 sym_p = i & 1;
4103 var_p = (i >> 1) & 1;
4104 off_p = (i >> 2) & 1;
4105 rat_p = (i >> 3) & 1;
4107 addr = reg0;
4108 if (rat_p)
4109 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4110 gen_int_mode (rat, address_mode));
4112 if (var_p)
4113 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4115 if (sym_p)
4117 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4118 /* ??? We can run into trouble with some backends by presenting
4119 it with symbols which haven't been properly passed through
4120 targetm.encode_section_info. By setting the local bit, we
4121 enhance the probability of things working. */
4122 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4124 if (off_p)
4125 base = gen_rtx_fmt_e (CONST, address_mode,
4126 gen_rtx_fmt_ee
4127 (PLUS, address_mode, base,
4128 gen_int_mode (off, address_mode)));
4130 else if (off_p)
4131 base = gen_int_mode (off, address_mode);
4132 else
4133 base = NULL_RTX;
4135 if (base)
4136 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4138 start_sequence ();
4139 /* To avoid splitting addressing modes, pretend that no cse will
4140 follow. */
4141 old_cse_not_expected = cse_not_expected;
4142 cse_not_expected = true;
4143 addr = memory_address_addr_space (mem_mode, addr, as);
4144 cse_not_expected = old_cse_not_expected;
4145 seq = get_insns ();
4146 end_sequence ();
4148 acost = seq_cost (seq, speed);
4149 acost += address_cost (addr, mem_mode, as, speed);
4151 if (!acost)
4152 acost = 1;
4153 data->costs[sym_p][var_p][off_p][rat_p] = acost;
4156 /* On some targets, it is quite expensive to load symbol to a register,
4157 which makes addresses that contain symbols look much more expensive.
4158 However, the symbol will have to be loaded in any case before the
4159 loop (and quite likely we have it in register already), so it does not
4160 make much sense to penalize them too heavily. So make some final
4161 tweaks for the SYMBOL_PRESENT modes:
4163 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4164 var is cheaper, use this mode with small penalty.
4165 If VAR_PRESENT is true, try whether the mode with
4166 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4167 if this is the case, use it. */
4168 add_c = add_cost (speed, address_mode);
4169 for (i = 0; i < 8; i++)
4171 var_p = i & 1;
4172 off_p = (i >> 1) & 1;
4173 rat_p = (i >> 2) & 1;
4175 acost = data->costs[0][1][off_p][rat_p] + 1;
4176 if (var_p)
4177 acost += add_c;
4179 if (acost < data->costs[1][var_p][off_p][rat_p])
4180 data->costs[1][var_p][off_p][rat_p] = acost;
4183 if (dump_file && (dump_flags & TDF_DETAILS))
4185 fprintf (dump_file, "<Address Costs>:\n");
4187 for (i = 0; i < 16; i++)
4189 sym_p = i & 1;
4190 var_p = (i >> 1) & 1;
4191 off_p = (i >> 2) & 1;
4192 rat_p = (i >> 3) & 1;
4194 fprintf (dump_file, " ");
4195 if (sym_p)
4196 fprintf (dump_file, "sym + ");
4197 if (var_p)
4198 fprintf (dump_file, "var + ");
4199 if (off_p)
4200 fprintf (dump_file, "cst + ");
4201 if (rat_p)
4202 fprintf (dump_file, "rat * ");
4204 acost = data->costs[sym_p][var_p][off_p][rat_p];
4205 fprintf (dump_file, "index costs %d\n", acost);
4207 if (has_predec[mem_mode] || has_postdec[mem_mode]
4208 || has_preinc[mem_mode] || has_postinc[mem_mode])
4209 fprintf (dump_file, " May include autoinc/dec\n");
4210 fprintf (dump_file, "\n");
4213 address_cost_data_list[data_index] = data;
4216 bits = GET_MODE_BITSIZE (address_mode);
4217 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4218 offset &= mask;
4219 if ((offset >> (bits - 1) & 1))
4220 offset |= ~mask;
4221 s_offset = offset;
4223 autoinc = false;
4224 autoinc_type = AINC_NONE;
4225 msize = GET_MODE_SIZE (mem_mode);
4226 autoinc_offset = offset;
4227 if (stmt_after_inc)
4228 autoinc_offset += ratio * cstep;
4229 if (symbol_present || var_present || ratio != 1)
4230 autoinc = false;
4231 else
4233 if (has_postinc[mem_mode] && autoinc_offset == 0
4234 && msize == cstep)
4235 autoinc_type = AINC_POST_INC;
4236 else if (has_postdec[mem_mode] && autoinc_offset == 0
4237 && msize == -cstep)
4238 autoinc_type = AINC_POST_DEC;
4239 else if (has_preinc[mem_mode] && autoinc_offset == msize
4240 && msize == cstep)
4241 autoinc_type = AINC_PRE_INC;
4242 else if (has_predec[mem_mode] && autoinc_offset == -msize
4243 && msize == -cstep)
4244 autoinc_type = AINC_PRE_DEC;
4246 if (autoinc_type != AINC_NONE)
4247 autoinc = true;
4250 cost = 0;
4251 offset_p = (s_offset != 0
4252 && data->min_offset <= s_offset
4253 && s_offset <= data->max_offset);
4254 ratio_p = (ratio != 1
4255 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4257 if (ratio != 1 && !ratio_p)
4258 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4260 if (s_offset && !offset_p && !symbol_present)
4261 cost += add_cost (speed, address_mode);
4263 if (may_autoinc)
4264 *may_autoinc = autoinc;
4265 if (autoinc)
4266 acost = data->ainc_costs[autoinc_type];
4267 else
4268 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4269 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4270 return comp_cost (cost + acost, complexity);
4273 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4274 EXPR operand holding the shift. COST0 and COST1 are the costs for
4275 calculating the operands of EXPR. Returns true if successful, and returns
4276 the cost in COST. */
4278 static bool
4279 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4280 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4282 comp_cost res;
4283 tree op1 = TREE_OPERAND (expr, 1);
4284 tree cst = TREE_OPERAND (mult, 1);
4285 tree multop = TREE_OPERAND (mult, 0);
4286 int m = exact_log2 (int_cst_value (cst));
4287 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4288 int as_cost, sa_cost;
4289 bool mult_in_op1;
4291 if (!(m >= 0 && m < maxm))
4292 return false;
4294 STRIP_NOPS (op1);
4295 mult_in_op1 = operand_equal_p (op1, mult, 0);
4297 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4299 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4300 use that in preference to a shift insn followed by an add insn. */
4301 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4302 ? shiftadd_cost (speed, mode, m)
4303 : (mult_in_op1
4304 ? shiftsub1_cost (speed, mode, m)
4305 : shiftsub0_cost (speed, mode, m)));
4307 res = comp_cost (MIN (as_cost, sa_cost), 0);
4308 res += (mult_in_op1 ? cost0 : cost1);
4310 STRIP_NOPS (multop);
4311 if (!is_gimple_val (multop))
4312 res += force_expr_to_var_cost (multop, speed);
4314 *cost = res;
4315 return true;
4318 /* Estimates cost of forcing expression EXPR into a variable. */
4320 static comp_cost
4321 force_expr_to_var_cost (tree expr, bool speed)
4323 static bool costs_initialized = false;
4324 static unsigned integer_cost [2];
4325 static unsigned symbol_cost [2];
4326 static unsigned address_cost [2];
4327 tree op0, op1;
4328 comp_cost cost0, cost1, cost;
4329 machine_mode mode;
4331 if (!costs_initialized)
4333 tree type = build_pointer_type (integer_type_node);
4334 tree var, addr;
4335 rtx x;
4336 int i;
4338 var = create_tmp_var_raw (integer_type_node, "test_var");
4339 TREE_STATIC (var) = 1;
4340 x = produce_memory_decl_rtl (var, NULL);
4341 SET_DECL_RTL (var, x);
4343 addr = build1 (ADDR_EXPR, type, var);
4346 for (i = 0; i < 2; i++)
4348 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4349 2000), i);
4351 symbol_cost[i] = computation_cost (addr, i) + 1;
4353 address_cost[i]
4354 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4355 if (dump_file && (dump_flags & TDF_DETAILS))
4357 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4358 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4359 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4360 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4361 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4362 fprintf (dump_file, "\n");
4366 costs_initialized = true;
4369 STRIP_NOPS (expr);
4371 if (SSA_VAR_P (expr))
4372 return no_cost;
4374 if (is_gimple_min_invariant (expr))
4376 if (TREE_CODE (expr) == INTEGER_CST)
4377 return comp_cost (integer_cost [speed], 0);
4379 if (TREE_CODE (expr) == ADDR_EXPR)
4381 tree obj = TREE_OPERAND (expr, 0);
4383 if (TREE_CODE (obj) == VAR_DECL
4384 || TREE_CODE (obj) == PARM_DECL
4385 || TREE_CODE (obj) == RESULT_DECL)
4386 return comp_cost (symbol_cost [speed], 0);
4389 return comp_cost (address_cost [speed], 0);
4392 switch (TREE_CODE (expr))
4394 case POINTER_PLUS_EXPR:
4395 case PLUS_EXPR:
4396 case MINUS_EXPR:
4397 case MULT_EXPR:
4398 op0 = TREE_OPERAND (expr, 0);
4399 op1 = TREE_OPERAND (expr, 1);
4400 STRIP_NOPS (op0);
4401 STRIP_NOPS (op1);
4402 break;
4404 CASE_CONVERT:
4405 case NEGATE_EXPR:
4406 op0 = TREE_OPERAND (expr, 0);
4407 STRIP_NOPS (op0);
4408 op1 = NULL_TREE;
4409 break;
4411 default:
4412 /* Just an arbitrary value, FIXME. */
4413 return comp_cost (target_spill_cost[speed], 0);
4416 if (op0 == NULL_TREE
4417 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4418 cost0 = no_cost;
4419 else
4420 cost0 = force_expr_to_var_cost (op0, speed);
4422 if (op1 == NULL_TREE
4423 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4424 cost1 = no_cost;
4425 else
4426 cost1 = force_expr_to_var_cost (op1, speed);
4428 mode = TYPE_MODE (TREE_TYPE (expr));
4429 switch (TREE_CODE (expr))
4431 case POINTER_PLUS_EXPR:
4432 case PLUS_EXPR:
4433 case MINUS_EXPR:
4434 case NEGATE_EXPR:
4435 cost = comp_cost (add_cost (speed, mode), 0);
4436 if (TREE_CODE (expr) != NEGATE_EXPR)
4438 tree mult = NULL_TREE;
4439 comp_cost sa_cost;
4440 if (TREE_CODE (op1) == MULT_EXPR)
4441 mult = op1;
4442 else if (TREE_CODE (op0) == MULT_EXPR)
4443 mult = op0;
4445 if (mult != NULL_TREE
4446 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4447 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4448 speed, &sa_cost))
4449 return sa_cost;
4451 break;
4453 CASE_CONVERT:
4455 tree inner_mode, outer_mode;
4456 outer_mode = TREE_TYPE (expr);
4457 inner_mode = TREE_TYPE (op0);
4458 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4459 TYPE_MODE (inner_mode), speed), 0);
4461 break;
4463 case MULT_EXPR:
4464 if (cst_and_fits_in_hwi (op0))
4465 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4466 mode, speed), 0);
4467 else if (cst_and_fits_in_hwi (op1))
4468 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4469 mode, speed), 0);
4470 else
4471 return comp_cost (target_spill_cost [speed], 0);
4472 break;
4474 default:
4475 gcc_unreachable ();
4478 cost += cost0;
4479 cost += cost1;
4481 /* Bound the cost by target_spill_cost. The parts of complicated
4482 computations often are either loop invariant or at least can
4483 be shared between several iv uses, so letting this grow without
4484 limits would not give reasonable results. */
4485 if (cost.cost > (int) target_spill_cost [speed])
4486 cost.cost = target_spill_cost [speed];
4488 return cost;
4491 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4492 invariants the computation depends on. */
4494 static comp_cost
4495 force_var_cost (struct ivopts_data *data,
4496 tree expr, bitmap *depends_on)
4498 if (depends_on)
4500 fd_ivopts_data = data;
4501 walk_tree (&expr, find_depends, depends_on, NULL);
4504 return force_expr_to_var_cost (expr, data->speed);
4507 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4508 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4509 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4510 invariants the computation depends on. */
4512 static comp_cost
4513 split_address_cost (struct ivopts_data *data,
4514 tree addr, bool *symbol_present, bool *var_present,
4515 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4517 tree core;
4518 HOST_WIDE_INT bitsize;
4519 HOST_WIDE_INT bitpos;
4520 tree toffset;
4521 machine_mode mode;
4522 int unsignedp, reversep, volatilep;
4524 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4525 &unsignedp, &reversep, &volatilep, false);
4527 if (toffset != 0
4528 || bitpos % BITS_PER_UNIT != 0
4529 || reversep
4530 || TREE_CODE (core) != VAR_DECL)
4532 *symbol_present = false;
4533 *var_present = true;
4534 fd_ivopts_data = data;
4535 if (depends_on)
4536 walk_tree (&addr, find_depends, depends_on, NULL);
4538 return comp_cost (target_spill_cost[data->speed], 0);
4541 *offset += bitpos / BITS_PER_UNIT;
4542 if (TREE_STATIC (core)
4543 || DECL_EXTERNAL (core))
4545 *symbol_present = true;
4546 *var_present = false;
4547 return no_cost;
4550 *symbol_present = false;
4551 *var_present = true;
4552 return no_cost;
4555 /* Estimates cost of expressing difference of addresses E1 - E2 as
4556 var + symbol + offset. The value of offset is added to OFFSET,
4557 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4558 part is missing. DEPENDS_ON is a set of the invariants the computation
4559 depends on. */
4561 static comp_cost
4562 ptr_difference_cost (struct ivopts_data *data,
4563 tree e1, tree e2, bool *symbol_present, bool *var_present,
4564 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4566 HOST_WIDE_INT diff = 0;
4567 aff_tree aff_e1, aff_e2;
4568 tree type;
4570 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4572 if (ptr_difference_const (e1, e2, &diff))
4574 *offset += diff;
4575 *symbol_present = false;
4576 *var_present = false;
4577 return no_cost;
4580 if (integer_zerop (e2))
4581 return split_address_cost (data, TREE_OPERAND (e1, 0),
4582 symbol_present, var_present, offset, depends_on);
4584 *symbol_present = false;
4585 *var_present = true;
4587 type = signed_type_for (TREE_TYPE (e1));
4588 tree_to_aff_combination (e1, type, &aff_e1);
4589 tree_to_aff_combination (e2, type, &aff_e2);
4590 aff_combination_scale (&aff_e2, -1);
4591 aff_combination_add (&aff_e1, &aff_e2);
4593 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4596 /* Estimates cost of expressing difference E1 - E2 as
4597 var + symbol + offset. The value of offset is added to OFFSET,
4598 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4599 part is missing. DEPENDS_ON is a set of the invariants the computation
4600 depends on. */
4602 static comp_cost
4603 difference_cost (struct ivopts_data *data,
4604 tree e1, tree e2, bool *symbol_present, bool *var_present,
4605 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4607 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4608 unsigned HOST_WIDE_INT off1, off2;
4609 aff_tree aff_e1, aff_e2;
4610 tree type;
4612 e1 = strip_offset (e1, &off1);
4613 e2 = strip_offset (e2, &off2);
4614 *offset += off1 - off2;
4616 STRIP_NOPS (e1);
4617 STRIP_NOPS (e2);
4619 if (TREE_CODE (e1) == ADDR_EXPR)
4620 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4621 offset, depends_on);
4622 *symbol_present = false;
4624 if (operand_equal_p (e1, e2, 0))
4626 *var_present = false;
4627 return no_cost;
4630 *var_present = true;
4632 if (integer_zerop (e2))
4633 return force_var_cost (data, e1, depends_on);
4635 if (integer_zerop (e1))
4637 comp_cost cost = force_var_cost (data, e2, depends_on);
4638 cost += mult_by_coeff_cost (-1, mode, data->speed);
4639 return cost;
4642 type = signed_type_for (TREE_TYPE (e1));
4643 tree_to_aff_combination (e1, type, &aff_e1);
4644 tree_to_aff_combination (e2, type, &aff_e2);
4645 aff_combination_scale (&aff_e2, -1);
4646 aff_combination_add (&aff_e1, &aff_e2);
4648 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4651 /* Returns true if AFF1 and AFF2 are identical. */
4653 static bool
4654 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4656 unsigned i;
4658 if (aff1->n != aff2->n)
4659 return false;
4661 for (i = 0; i < aff1->n; i++)
4663 if (aff1->elts[i].coef != aff2->elts[i].coef)
4664 return false;
4666 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4667 return false;
4669 return true;
4672 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
4674 static iv_inv_expr_ent *
4675 record_inv_expr (struct ivopts_data *data, tree expr)
4677 struct iv_inv_expr_ent ent;
4678 struct iv_inv_expr_ent **slot;
4680 ent.expr = expr;
4681 ent.hash = iterative_hash_expr (expr, 0);
4682 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4684 if (!*slot)
4686 *slot = XNEW (struct iv_inv_expr_ent);
4687 (*slot)->expr = expr;
4688 (*slot)->hash = ent.hash;
4689 (*slot)->id = data->max_inv_expr_id++;
4692 return *slot;
4695 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4696 requires a new compiler generated temporary. Returns -1 otherwise.
4697 ADDRESS_P is a flag indicating if the expression is for address
4698 computation. */
4700 static iv_inv_expr_ent *
4701 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4702 tree cbase, HOST_WIDE_INT ratio,
4703 bool address_p)
4705 aff_tree ubase_aff, cbase_aff;
4706 tree expr, ub, cb;
4708 STRIP_NOPS (ubase);
4709 STRIP_NOPS (cbase);
4710 ub = ubase;
4711 cb = cbase;
4713 if ((TREE_CODE (ubase) == INTEGER_CST)
4714 && (TREE_CODE (cbase) == INTEGER_CST))
4715 return NULL;
4717 /* Strips the constant part. */
4718 if (TREE_CODE (ubase) == PLUS_EXPR
4719 || TREE_CODE (ubase) == MINUS_EXPR
4720 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4722 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4723 ubase = TREE_OPERAND (ubase, 0);
4726 /* Strips the constant part. */
4727 if (TREE_CODE (cbase) == PLUS_EXPR
4728 || TREE_CODE (cbase) == MINUS_EXPR
4729 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4731 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4732 cbase = TREE_OPERAND (cbase, 0);
4735 if (address_p)
4737 if (((TREE_CODE (ubase) == SSA_NAME)
4738 || (TREE_CODE (ubase) == ADDR_EXPR
4739 && is_gimple_min_invariant (ubase)))
4740 && (TREE_CODE (cbase) == INTEGER_CST))
4741 return NULL;
4743 if (((TREE_CODE (cbase) == SSA_NAME)
4744 || (TREE_CODE (cbase) == ADDR_EXPR
4745 && is_gimple_min_invariant (cbase)))
4746 && (TREE_CODE (ubase) == INTEGER_CST))
4747 return NULL;
4750 if (ratio == 1)
4752 if (operand_equal_p (ubase, cbase, 0))
4753 return NULL;
4755 if (TREE_CODE (ubase) == ADDR_EXPR
4756 && TREE_CODE (cbase) == ADDR_EXPR)
4758 tree usym, csym;
4760 usym = TREE_OPERAND (ubase, 0);
4761 csym = TREE_OPERAND (cbase, 0);
4762 if (TREE_CODE (usym) == ARRAY_REF)
4764 tree ind = TREE_OPERAND (usym, 1);
4765 if (TREE_CODE (ind) == INTEGER_CST
4766 && tree_fits_shwi_p (ind)
4767 && tree_to_shwi (ind) == 0)
4768 usym = TREE_OPERAND (usym, 0);
4770 if (TREE_CODE (csym) == ARRAY_REF)
4772 tree ind = TREE_OPERAND (csym, 1);
4773 if (TREE_CODE (ind) == INTEGER_CST
4774 && tree_fits_shwi_p (ind)
4775 && tree_to_shwi (ind) == 0)
4776 csym = TREE_OPERAND (csym, 0);
4778 if (operand_equal_p (usym, csym, 0))
4779 return NULL;
4781 /* Now do more complex comparison */
4782 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4783 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4784 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4785 return NULL;
4788 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4789 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4791 aff_combination_scale (&cbase_aff, -1 * ratio);
4792 aff_combination_add (&ubase_aff, &cbase_aff);
4793 expr = aff_combination_to_tree (&ubase_aff);
4794 return record_inv_expr (data, expr);
4797 /* Scale (multiply) the computed COST (except scratch part that should be
4798 hoisted out a loop) by header->frequency / AT->frequency,
4799 which makes expected cost more accurate. */
4801 static comp_cost
4802 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, iv_cand *cand,
4803 comp_cost cost)
4805 int loop_freq = data->current_loop->header->frequency;
4806 int bb_freq = at->bb->frequency;
4807 if (loop_freq != 0)
4809 gcc_assert (cost.scratch <= cost.cost);
4810 int scaled_cost
4811 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4813 if (dump_file && (dump_flags & TDF_DETAILS))
4814 fprintf (dump_file, "Scaling iv_use based on cand %d "
4815 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4816 cand->id, 1.0f * bb_freq / loop_freq, cost.cost,
4817 cost.scratch, scaled_cost, bb_freq, loop_freq);
4819 cost.cost = scaled_cost;
4822 return cost;
4825 /* Determines the cost of the computation by that USE is expressed
4826 from induction variable CAND. If ADDRESS_P is true, we just need
4827 to create an address from it, otherwise we want to get it into
4828 register. A set of invariants we depend on is stored in
4829 DEPENDS_ON. AT is the statement at that the value is computed.
4830 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4831 addressing is likely. */
4833 static comp_cost
4834 get_computation_cost_at (struct ivopts_data *data,
4835 struct iv_use *use, struct iv_cand *cand,
4836 bool address_p, bitmap *depends_on, gimple *at,
4837 bool *can_autoinc,
4838 iv_inv_expr_ent **inv_expr)
4840 tree ubase = use->iv->base, ustep = use->iv->step;
4841 tree cbase, cstep;
4842 tree utype = TREE_TYPE (ubase), ctype;
4843 unsigned HOST_WIDE_INT cstepi, offset = 0;
4844 HOST_WIDE_INT ratio, aratio;
4845 bool var_present, symbol_present, stmt_is_after_inc;
4846 comp_cost cost;
4847 widest_int rat;
4848 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4849 machine_mode mem_mode = (address_p
4850 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4851 : VOIDmode);
4853 if (depends_on)
4854 *depends_on = NULL;
4856 /* Only consider real candidates. */
4857 if (!cand->iv)
4858 return infinite_cost;
4860 cbase = cand->iv->base;
4861 cstep = cand->iv->step;
4862 ctype = TREE_TYPE (cbase);
4864 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4866 /* We do not have a precision to express the values of use. */
4867 return infinite_cost;
4870 if (address_p
4871 || (use->iv->base_object
4872 && cand->iv->base_object
4873 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4874 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4876 /* Do not try to express address of an object with computation based
4877 on address of a different object. This may cause problems in rtl
4878 level alias analysis (that does not expect this to be happening,
4879 as this is illegal in C), and would be unlikely to be useful
4880 anyway. */
4881 if (use->iv->base_object
4882 && cand->iv->base_object
4883 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4884 return infinite_cost;
4887 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4889 /* TODO -- add direct handling of this case. */
4890 goto fallback;
4893 /* CSTEPI is removed from the offset in case statement is after the
4894 increment. If the step is not constant, we use zero instead.
4895 This is a bit imprecise (there is the extra addition), but
4896 redundancy elimination is likely to transform the code so that
4897 it uses value of the variable before increment anyway,
4898 so it is not that much unrealistic. */
4899 if (cst_and_fits_in_hwi (cstep))
4900 cstepi = int_cst_value (cstep);
4901 else
4902 cstepi = 0;
4904 if (!constant_multiple_of (ustep, cstep, &rat))
4905 return infinite_cost;
4907 if (wi::fits_shwi_p (rat))
4908 ratio = rat.to_shwi ();
4909 else
4910 return infinite_cost;
4912 STRIP_NOPS (cbase);
4913 ctype = TREE_TYPE (cbase);
4915 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4917 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4918 or ratio == 1, it is better to handle this like
4920 ubase - ratio * cbase + ratio * var
4922 (also holds in the case ratio == -1, TODO. */
4924 if (cst_and_fits_in_hwi (cbase))
4926 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4927 cost = difference_cost (data,
4928 ubase, build_int_cst (utype, 0),
4929 &symbol_present, &var_present, &offset,
4930 depends_on);
4931 cost /= avg_loop_niter (data->current_loop);
4933 else if (ratio == 1)
4935 tree real_cbase = cbase;
4937 /* Check to see if any adjustment is needed. */
4938 if (cstepi == 0 && stmt_is_after_inc)
4940 aff_tree real_cbase_aff;
4941 aff_tree cstep_aff;
4943 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4944 &real_cbase_aff);
4945 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4947 aff_combination_add (&real_cbase_aff, &cstep_aff);
4948 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4951 cost = difference_cost (data,
4952 ubase, real_cbase,
4953 &symbol_present, &var_present, &offset,
4954 depends_on);
4955 cost /= avg_loop_niter (data->current_loop);
4957 else if (address_p
4958 && !POINTER_TYPE_P (ctype)
4959 && multiplier_allowed_in_address_p
4960 (ratio, mem_mode,
4961 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4963 tree real_cbase = cbase;
4965 if (cstepi == 0 && stmt_is_after_inc)
4967 if (POINTER_TYPE_P (ctype))
4968 real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4969 else
4970 real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4972 real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4973 build_int_cst (ctype, ratio));
4974 cost = difference_cost (data,
4975 ubase, real_cbase,
4976 &symbol_present, &var_present, &offset,
4977 depends_on);
4978 cost /= avg_loop_niter (data->current_loop);
4980 else
4982 cost = force_var_cost (data, cbase, depends_on);
4983 cost += difference_cost (data, ubase, build_int_cst (utype, 0),
4984 &symbol_present, &var_present, &offset,
4985 depends_on);
4986 cost /= avg_loop_niter (data->current_loop);
4987 cost += add_cost (data->speed, TYPE_MODE (ctype));
4990 /* Record setup cost in scratch field. */
4991 cost.scratch = cost.cost;
4993 if (inv_expr && depends_on && *depends_on)
4995 *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
4996 address_p);
4997 /* Clear depends on. */
4998 if (*inv_expr != NULL)
4999 bitmap_clear (*depends_on);
5002 /* If we are after the increment, the value of the candidate is higher by
5003 one iteration. */
5004 if (stmt_is_after_inc)
5005 offset -= ratio * cstepi;
5007 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
5008 (symbol/var1/const parts may be omitted). If we are looking for an
5009 address, find the cost of addressing this. */
5010 if (address_p)
5012 cost += get_address_cost (symbol_present, var_present,
5013 offset, ratio, cstepi,
5014 mem_mode,
5015 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
5016 speed, stmt_is_after_inc, can_autoinc);
5017 return get_scaled_computation_cost_at (data, at, cand, cost);
5020 /* Otherwise estimate the costs for computing the expression. */
5021 if (!symbol_present && !var_present && !offset)
5023 if (ratio != 1)
5024 cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
5025 return get_scaled_computation_cost_at (data, at, cand, cost);
5028 /* Symbol + offset should be compile-time computable so consider that they
5029 are added once to the variable, if present. */
5030 if (var_present && (symbol_present || offset))
5031 cost += adjust_setup_cost (data,
5032 add_cost (speed, TYPE_MODE (ctype)));
5034 /* Having offset does not affect runtime cost in case it is added to
5035 symbol, but it increases complexity. */
5036 if (offset)
5037 cost.complexity++;
5039 cost += add_cost (speed, TYPE_MODE (ctype));
5041 aratio = ratio > 0 ? ratio : -ratio;
5042 if (aratio != 1)
5043 cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5045 return get_scaled_computation_cost_at (data, at, cand, cost);
5047 fallback:
5048 if (can_autoinc)
5049 *can_autoinc = false;
5051 /* Just get the expression, expand it and measure the cost. */
5052 tree comp = get_computation_at (data->current_loop, use, cand, at);
5054 if (!comp)
5055 return infinite_cost;
5057 if (address_p)
5058 comp = build_simple_mem_ref (comp);
5060 cost = comp_cost (computation_cost (comp, speed), 0);
5062 return get_scaled_computation_cost_at (data, at, cand, cost);
5065 /* Determines the cost of the computation by that USE is expressed
5066 from induction variable CAND. If ADDRESS_P is true, we just need
5067 to create an address from it, otherwise we want to get it into
5068 register. A set of invariants we depend on is stored in
5069 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
5070 autoinc addressing is likely. */
5072 static comp_cost
5073 get_computation_cost (struct ivopts_data *data,
5074 struct iv_use *use, struct iv_cand *cand,
5075 bool address_p, bitmap *depends_on,
5076 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
5078 return get_computation_cost_at (data,
5079 use, cand, address_p, depends_on, use->stmt,
5080 can_autoinc, inv_expr);
5083 /* Determines cost of computing the use in GROUP with CAND in a generic
5084 expression. */
5086 static bool
5087 determine_group_iv_cost_generic (struct ivopts_data *data,
5088 struct iv_group *group, struct iv_cand *cand)
5090 comp_cost cost;
5091 iv_inv_expr_ent *inv_expr = NULL;
5092 bitmap depends_on = NULL;
5093 struct iv_use *use = group->vuses[0];
5095 /* The simple case first -- if we need to express value of the preserved
5096 original biv, the cost is 0. This also prevents us from counting the
5097 cost of increment twice -- once at this use and once in the cost of
5098 the candidate. */
5099 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5100 cost = no_cost;
5101 else
5102 cost = get_computation_cost (data, use, cand, false,
5103 &depends_on, NULL, &inv_expr);
5105 set_group_iv_cost (data, group, cand, cost, depends_on,
5106 NULL_TREE, ERROR_MARK, inv_expr);
5107 return !cost.infinite_cost_p ();
5110 /* Determines cost of computing uses in GROUP with CAND in addresses. */
5112 static bool
5113 determine_group_iv_cost_address (struct ivopts_data *data,
5114 struct iv_group *group, struct iv_cand *cand)
5116 unsigned i;
5117 bitmap depends_on;
5118 bool can_autoinc, first = true;
5119 iv_inv_expr_ent *inv_expr = NULL;
5120 struct iv_use *use = group->vuses[0];
5121 comp_cost sum_cost = no_cost, cost;
5123 cost = get_computation_cost (data, use, cand, true,
5124 &depends_on, &can_autoinc, &inv_expr);
5126 sum_cost = cost;
5127 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5129 if (can_autoinc)
5130 sum_cost -= cand->cost_step;
5131 /* If we generated the candidate solely for exploiting autoincrement
5132 opportunities, and it turns out it can't be used, set the cost to
5133 infinity to make sure we ignore it. */
5134 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5135 sum_cost = infinite_cost;
5138 /* Uses in a group can share setup code, so only add setup cost once. */
5139 cost -= cost.scratch;
5140 /* Compute and add costs for rest uses of this group. */
5141 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5143 struct iv_use *next = group->vuses[i];
5145 /* Compute cost for the first use with different offset to the main
5146 use and add it afterwards. Costs for these uses could be quite
5147 different. Given below uses in a group:
5148 use 0 : {base + A + offset_0, step}
5149 use 0.1: {base + A + offset_0, step}
5150 use 0.2: {base + A + offset_1, step}
5151 use 0.3: {base + A + offset_2, step}
5152 when we need to compute costs with candidate:
5153 cand 1 : {base + B + offset_0, step}
5155 The first use with different offset is use 0.2, its cost is larger
5156 than cost of use 0/0.1 because we need to compute:
5157 A - B + offset_1 - offset_0
5158 rather than:
5159 A - B. */
5160 if (first && next->addr_offset != use->addr_offset)
5162 first = false;
5163 cost = get_computation_cost (data, next, cand, true,
5164 NULL, &can_autoinc, NULL);
5165 /* Remove setup cost. */
5166 if (!cost.infinite_cost_p ())
5167 cost -= cost.scratch;
5169 sum_cost += cost;
5171 set_group_iv_cost (data, group, cand, sum_cost, depends_on,
5172 NULL_TREE, ERROR_MARK, inv_expr);
5174 return !sum_cost.infinite_cost_p ();
5177 /* Computes value of candidate CAND at position AT in iteration NITER, and
5178 stores it to VAL. */
5180 static void
5181 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5182 aff_tree *val)
5184 aff_tree step, delta, nit;
5185 struct iv *iv = cand->iv;
5186 tree type = TREE_TYPE (iv->base);
5187 tree steptype = type;
5188 if (POINTER_TYPE_P (type))
5189 steptype = sizetype;
5190 steptype = unsigned_type_for (type);
5192 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5193 aff_combination_convert (&step, steptype);
5194 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5195 aff_combination_convert (&nit, steptype);
5196 aff_combination_mult (&nit, &step, &delta);
5197 if (stmt_after_increment (loop, cand, at))
5198 aff_combination_add (&delta, &step);
5200 tree_to_aff_combination (iv->base, type, val);
5201 if (!POINTER_TYPE_P (type))
5202 aff_combination_convert (val, steptype);
5203 aff_combination_add (val, &delta);
5206 /* Returns period of induction variable iv. */
5208 static tree
5209 iv_period (struct iv *iv)
5211 tree step = iv->step, period, type;
5212 tree pow2div;
5214 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5216 type = unsigned_type_for (TREE_TYPE (step));
5217 /* Period of the iv is lcm (step, type_range)/step -1,
5218 i.e., N*type_range/step - 1. Since type range is power
5219 of two, N == (step >> num_of_ending_zeros_binary (step),
5220 so the final result is
5222 (type_range >> num_of_ending_zeros_binary (step)) - 1
5225 pow2div = num_ending_zeros (step);
5227 period = build_low_bits_mask (type,
5228 (TYPE_PRECISION (type)
5229 - tree_to_uhwi (pow2div)));
5231 return period;
5234 /* Returns the comparison operator used when eliminating the iv USE. */
5236 static enum tree_code
5237 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5239 struct loop *loop = data->current_loop;
5240 basic_block ex_bb;
5241 edge exit;
5243 ex_bb = gimple_bb (use->stmt);
5244 exit = EDGE_SUCC (ex_bb, 0);
5245 if (flow_bb_inside_loop_p (loop, exit->dest))
5246 exit = EDGE_SUCC (ex_bb, 1);
5248 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5251 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5252 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5253 calculation is performed in non-wrapping type.
5255 TODO: More generally, we could test for the situation that
5256 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5257 This would require knowing the sign of OFFSET. */
5259 static bool
5260 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5262 enum tree_code code;
5263 tree e1, e2;
5264 aff_tree aff_e1, aff_e2, aff_offset;
5266 if (!nowrap_type_p (TREE_TYPE (base)))
5267 return false;
5269 base = expand_simple_operations (base);
5271 if (TREE_CODE (base) == SSA_NAME)
5273 gimple *stmt = SSA_NAME_DEF_STMT (base);
5275 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5276 return false;
5278 code = gimple_assign_rhs_code (stmt);
5279 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5280 return false;
5282 e1 = gimple_assign_rhs1 (stmt);
5283 e2 = gimple_assign_rhs2 (stmt);
5285 else
5287 code = TREE_CODE (base);
5288 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5289 return false;
5290 e1 = TREE_OPERAND (base, 0);
5291 e2 = TREE_OPERAND (base, 1);
5294 /* Use affine expansion as deeper inspection to prove the equality. */
5295 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5296 &aff_e2, &data->name_expansion_cache);
5297 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5298 &aff_offset, &data->name_expansion_cache);
5299 aff_combination_scale (&aff_offset, -1);
5300 switch (code)
5302 case PLUS_EXPR:
5303 aff_combination_add (&aff_e2, &aff_offset);
5304 if (aff_combination_zero_p (&aff_e2))
5305 return true;
5307 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5308 &aff_e1, &data->name_expansion_cache);
5309 aff_combination_add (&aff_e1, &aff_offset);
5310 return aff_combination_zero_p (&aff_e1);
5312 case POINTER_PLUS_EXPR:
5313 aff_combination_add (&aff_e2, &aff_offset);
5314 return aff_combination_zero_p (&aff_e2);
5316 default:
5317 return false;
5321 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5322 comparison with CAND. NITER describes the number of iterations of
5323 the loops. If successful, the comparison in COMP_P is altered accordingly.
5325 We aim to handle the following situation:
5327 sometype *base, *p;
5328 int a, b, i;
5330 i = a;
5331 p = p_0 = base + a;
5335 bla (*p);
5336 p++;
5337 i++;
5339 while (i < b);
5341 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5342 We aim to optimize this to
5344 p = p_0 = base + a;
5347 bla (*p);
5348 p++;
5350 while (p < p_0 - a + b);
5352 This preserves the correctness, since the pointer arithmetics does not
5353 overflow. More precisely:
5355 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5356 overflow in computing it or the values of p.
5357 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5358 overflow. To prove this, we use the fact that p_0 = base + a. */
5360 static bool
5361 iv_elimination_compare_lt (struct ivopts_data *data,
5362 struct iv_cand *cand, enum tree_code *comp_p,
5363 struct tree_niter_desc *niter)
5365 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5366 struct aff_tree nit, tmpa, tmpb;
5367 enum tree_code comp;
5368 HOST_WIDE_INT step;
5370 /* We need to know that the candidate induction variable does not overflow.
5371 While more complex analysis may be used to prove this, for now just
5372 check that the variable appears in the original program and that it
5373 is computed in a type that guarantees no overflows. */
5374 cand_type = TREE_TYPE (cand->iv->base);
5375 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5376 return false;
5378 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5379 the calculation of the BOUND could overflow, making the comparison
5380 invalid. */
5381 if (!data->loop_single_exit_p)
5382 return false;
5384 /* We need to be able to decide whether candidate is increasing or decreasing
5385 in order to choose the right comparison operator. */
5386 if (!cst_and_fits_in_hwi (cand->iv->step))
5387 return false;
5388 step = int_cst_value (cand->iv->step);
5390 /* Check that the number of iterations matches the expected pattern:
5391 a + 1 > b ? 0 : b - a - 1. */
5392 mbz = niter->may_be_zero;
5393 if (TREE_CODE (mbz) == GT_EXPR)
5395 /* Handle a + 1 > b. */
5396 tree op0 = TREE_OPERAND (mbz, 0);
5397 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5399 a = TREE_OPERAND (op0, 0);
5400 b = TREE_OPERAND (mbz, 1);
5402 else
5403 return false;
5405 else if (TREE_CODE (mbz) == LT_EXPR)
5407 tree op1 = TREE_OPERAND (mbz, 1);
5409 /* Handle b < a + 1. */
5410 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5412 a = TREE_OPERAND (op1, 0);
5413 b = TREE_OPERAND (mbz, 0);
5415 else
5416 return false;
5418 else
5419 return false;
5421 /* Expected number of iterations is B - A - 1. Check that it matches
5422 the actual number, i.e., that B - A - NITER = 1. */
5423 tree_to_aff_combination (niter->niter, nit_type, &nit);
5424 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5425 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5426 aff_combination_scale (&nit, -1);
5427 aff_combination_scale (&tmpa, -1);
5428 aff_combination_add (&tmpb, &tmpa);
5429 aff_combination_add (&tmpb, &nit);
5430 if (tmpb.n != 0 || tmpb.offset != 1)
5431 return false;
5433 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5434 overflow. */
5435 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5436 cand->iv->step,
5437 fold_convert (TREE_TYPE (cand->iv->step), a));
5438 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5439 return false;
5441 /* Determine the new comparison operator. */
5442 comp = step < 0 ? GT_EXPR : LT_EXPR;
5443 if (*comp_p == NE_EXPR)
5444 *comp_p = comp;
5445 else if (*comp_p == EQ_EXPR)
5446 *comp_p = invert_tree_comparison (comp, false);
5447 else
5448 gcc_unreachable ();
5450 return true;
5453 /* Check whether it is possible to express the condition in USE by comparison
5454 of candidate CAND. If so, store the value compared with to BOUND, and the
5455 comparison operator to COMP. */
5457 static bool
5458 may_eliminate_iv (struct ivopts_data *data,
5459 struct iv_use *use, struct iv_cand *cand, tree *bound,
5460 enum tree_code *comp)
5462 basic_block ex_bb;
5463 edge exit;
5464 tree period;
5465 struct loop *loop = data->current_loop;
5466 aff_tree bnd;
5467 struct tree_niter_desc *desc = NULL;
5469 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5470 return false;
5472 /* For now works only for exits that dominate the loop latch.
5473 TODO: extend to other conditions inside loop body. */
5474 ex_bb = gimple_bb (use->stmt);
5475 if (use->stmt != last_stmt (ex_bb)
5476 || gimple_code (use->stmt) != GIMPLE_COND
5477 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5478 return false;
5480 exit = EDGE_SUCC (ex_bb, 0);
5481 if (flow_bb_inside_loop_p (loop, exit->dest))
5482 exit = EDGE_SUCC (ex_bb, 1);
5483 if (flow_bb_inside_loop_p (loop, exit->dest))
5484 return false;
5486 desc = niter_for_exit (data, exit);
5487 if (!desc)
5488 return false;
5490 /* Determine whether we can use the variable to test the exit condition.
5491 This is the case iff the period of the induction variable is greater
5492 than the number of iterations for which the exit condition is true. */
5493 period = iv_period (cand->iv);
5495 /* If the number of iterations is constant, compare against it directly. */
5496 if (TREE_CODE (desc->niter) == INTEGER_CST)
5498 /* See cand_value_at. */
5499 if (stmt_after_increment (loop, cand, use->stmt))
5501 if (!tree_int_cst_lt (desc->niter, period))
5502 return false;
5504 else
5506 if (tree_int_cst_lt (period, desc->niter))
5507 return false;
5511 /* If not, and if this is the only possible exit of the loop, see whether
5512 we can get a conservative estimate on the number of iterations of the
5513 entire loop and compare against that instead. */
5514 else
5516 widest_int period_value, max_niter;
5518 max_niter = desc->max;
5519 if (stmt_after_increment (loop, cand, use->stmt))
5520 max_niter += 1;
5521 period_value = wi::to_widest (period);
5522 if (wi::gtu_p (max_niter, period_value))
5524 /* See if we can take advantage of inferred loop bound
5525 information. */
5526 if (data->loop_single_exit_p)
5528 if (!max_loop_iterations (loop, &max_niter))
5529 return false;
5530 /* The loop bound is already adjusted by adding 1. */
5531 if (wi::gtu_p (max_niter, period_value))
5532 return false;
5534 else
5535 return false;
5539 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5541 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5542 aff_combination_to_tree (&bnd));
5543 *comp = iv_elimination_compare (data, use);
5545 /* It is unlikely that computing the number of iterations using division
5546 would be more profitable than keeping the original induction variable. */
5547 if (expression_expensive_p (*bound))
5548 return false;
5550 /* Sometimes, it is possible to handle the situation that the number of
5551 iterations may be zero unless additional assumtions by using <
5552 instead of != in the exit condition.
5554 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5555 base the exit condition on it. However, that is often too
5556 expensive. */
5557 if (!integer_zerop (desc->may_be_zero))
5558 return iv_elimination_compare_lt (data, cand, comp, desc);
5560 return true;
5563 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5564 be copied, if it is used in the loop body and DATA->body_includes_call. */
5566 static int
5567 parm_decl_cost (struct ivopts_data *data, tree bound)
5569 tree sbound = bound;
5570 STRIP_NOPS (sbound);
5572 if (TREE_CODE (sbound) == SSA_NAME
5573 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5574 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5575 && data->body_includes_call)
5576 return COSTS_N_INSNS (1);
5578 return 0;
5581 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5583 static bool
5584 determine_group_iv_cost_cond (struct ivopts_data *data,
5585 struct iv_group *group, struct iv_cand *cand)
5587 tree bound = NULL_TREE;
5588 struct iv *cmp_iv;
5589 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5590 comp_cost elim_cost, express_cost, cost, bound_cost;
5591 bool ok;
5592 iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr;
5593 tree *control_var, *bound_cst;
5594 enum tree_code comp = ERROR_MARK;
5595 struct iv_use *use = group->vuses[0];
5597 gcc_assert (cand->iv);
5599 /* Try iv elimination. */
5600 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5602 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5603 if (elim_cost.cost == 0)
5604 elim_cost.cost = parm_decl_cost (data, bound);
5605 else if (TREE_CODE (bound) == INTEGER_CST)
5606 elim_cost.cost = 0;
5607 /* If we replace a loop condition 'i < n' with 'p < base + n',
5608 depends_on_elim will have 'base' and 'n' set, which implies
5609 that both 'base' and 'n' will be live during the loop. More likely,
5610 'base + n' will be loop invariant, resulting in only one live value
5611 during the loop. So in that case we clear depends_on_elim and set
5612 elim_inv_expr_id instead. */
5613 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5615 elim_inv_expr = record_inv_expr (data, bound);
5616 bitmap_clear (depends_on_elim);
5618 /* The bound is a loop invariant, so it will be only computed
5619 once. */
5620 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5622 else
5623 elim_cost = infinite_cost;
5625 /* Try expressing the original giv. If it is compared with an invariant,
5626 note that we cannot get rid of it. */
5627 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5628 NULL, &cmp_iv);
5629 gcc_assert (ok);
5631 /* When the condition is a comparison of the candidate IV against
5632 zero, prefer this IV.
5634 TODO: The constant that we're subtracting from the cost should
5635 be target-dependent. This information should be added to the
5636 target costs for each backend. */
5637 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5638 && integer_zerop (*bound_cst)
5639 && (operand_equal_p (*control_var, cand->var_after, 0)
5640 || operand_equal_p (*control_var, cand->var_before, 0)))
5641 elim_cost -= 1;
5643 express_cost = get_computation_cost (data, use, cand, false,
5644 &depends_on_express, NULL,
5645 &express_inv_expr);
5646 fd_ivopts_data = data;
5647 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5649 /* Count the cost of the original bound as well. */
5650 bound_cost = force_var_cost (data, *bound_cst, NULL);
5651 if (bound_cost.cost == 0)
5652 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5653 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5654 bound_cost.cost = 0;
5655 express_cost += bound_cost;
5657 /* Choose the better approach, preferring the eliminated IV. */
5658 if (elim_cost <= express_cost)
5660 cost = elim_cost;
5661 depends_on = depends_on_elim;
5662 depends_on_elim = NULL;
5663 inv_expr = elim_inv_expr;
5665 else
5667 cost = express_cost;
5668 depends_on = depends_on_express;
5669 depends_on_express = NULL;
5670 bound = NULL_TREE;
5671 comp = ERROR_MARK;
5672 inv_expr = express_inv_expr;
5675 set_group_iv_cost (data, group, cand, cost,
5676 depends_on, bound, comp, inv_expr);
5678 if (depends_on_elim)
5679 BITMAP_FREE (depends_on_elim);
5680 if (depends_on_express)
5681 BITMAP_FREE (depends_on_express);
5683 return !cost.infinite_cost_p ();
5686 /* Determines cost of computing uses in GROUP with CAND. Returns false
5687 if USE cannot be represented with CAND. */
5689 static bool
5690 determine_group_iv_cost (struct ivopts_data *data,
5691 struct iv_group *group, struct iv_cand *cand)
5693 switch (group->type)
5695 case USE_NONLINEAR_EXPR:
5696 return determine_group_iv_cost_generic (data, group, cand);
5698 case USE_ADDRESS:
5699 return determine_group_iv_cost_address (data, group, cand);
5701 case USE_COMPARE:
5702 return determine_group_iv_cost_cond (data, group, cand);
5704 default:
5705 gcc_unreachable ();
5709 /* Return true if get_computation_cost indicates that autoincrement is
5710 a possibility for the pair of USE and CAND, false otherwise. */
5712 static bool
5713 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5714 struct iv_cand *cand)
5716 bitmap depends_on;
5717 bool can_autoinc;
5718 comp_cost cost;
5720 if (use->type != USE_ADDRESS)
5721 return false;
5723 cost = get_computation_cost (data, use, cand, true, &depends_on,
5724 &can_autoinc, NULL);
5726 BITMAP_FREE (depends_on);
5728 return !cost.infinite_cost_p () && can_autoinc;
5731 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5732 use that allows autoincrement, and set their AINC_USE if possible. */
5734 static void
5735 set_autoinc_for_original_candidates (struct ivopts_data *data)
5737 unsigned i, j;
5739 for (i = 0; i < data->vcands.length (); i++)
5741 struct iv_cand *cand = data->vcands[i];
5742 struct iv_use *closest_before = NULL;
5743 struct iv_use *closest_after = NULL;
5744 if (cand->pos != IP_ORIGINAL)
5745 continue;
5747 for (j = 0; j < data->vgroups.length (); j++)
5749 struct iv_group *group = data->vgroups[j];
5750 struct iv_use *use = group->vuses[0];
5751 unsigned uid = gimple_uid (use->stmt);
5753 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5754 continue;
5756 if (uid < gimple_uid (cand->incremented_at)
5757 && (closest_before == NULL
5758 || uid > gimple_uid (closest_before->stmt)))
5759 closest_before = use;
5761 if (uid > gimple_uid (cand->incremented_at)
5762 && (closest_after == NULL
5763 || uid < gimple_uid (closest_after->stmt)))
5764 closest_after = use;
5767 if (closest_before != NULL
5768 && autoinc_possible_for_pair (data, closest_before, cand))
5769 cand->ainc_use = closest_before;
5770 else if (closest_after != NULL
5771 && autoinc_possible_for_pair (data, closest_after, cand))
5772 cand->ainc_use = closest_after;
5776 /* Finds the candidates for the induction variables. */
5778 static void
5779 find_iv_candidates (struct ivopts_data *data)
5781 /* Add commonly used ivs. */
5782 add_standard_iv_candidates (data);
5784 /* Add old induction variables. */
5785 add_iv_candidate_for_bivs (data);
5787 /* Add induction variables derived from uses. */
5788 add_iv_candidate_for_groups (data);
5790 set_autoinc_for_original_candidates (data);
5792 /* Record the important candidates. */
5793 record_important_candidates (data);
5795 if (dump_file && (dump_flags & TDF_DETAILS))
5797 unsigned i;
5799 fprintf (dump_file, "\n<Important Candidates>:\t");
5800 for (i = 0; i < data->vcands.length (); i++)
5801 if (data->vcands[i]->important)
5802 fprintf (dump_file, " %d,", data->vcands[i]->id);
5803 fprintf (dump_file, "\n");
5805 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5806 for (i = 0; i < data->vgroups.length (); i++)
5808 struct iv_group *group = data->vgroups[i];
5810 if (group->related_cands)
5812 fprintf (dump_file, " Group %d:\t", group->id);
5813 dump_bitmap (dump_file, group->related_cands);
5816 fprintf (dump_file, "\n");
5820 /* Determines costs of computing use of iv with an iv candidate. */
5822 static void
5823 determine_group_iv_costs (struct ivopts_data *data)
5825 unsigned i, j;
5826 struct iv_cand *cand;
5827 struct iv_group *group;
5828 bitmap to_clear = BITMAP_ALLOC (NULL);
5830 alloc_use_cost_map (data);
5832 for (i = 0; i < data->vgroups.length (); i++)
5834 group = data->vgroups[i];
5836 if (data->consider_all_candidates)
5838 for (j = 0; j < data->vcands.length (); j++)
5840 cand = data->vcands[j];
5841 determine_group_iv_cost (data, group, cand);
5844 else
5846 bitmap_iterator bi;
5848 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5850 cand = data->vcands[j];
5851 if (!determine_group_iv_cost (data, group, cand))
5852 bitmap_set_bit (to_clear, j);
5855 /* Remove the candidates for that the cost is infinite from
5856 the list of related candidates. */
5857 bitmap_and_compl_into (group->related_cands, to_clear);
5858 bitmap_clear (to_clear);
5862 BITMAP_FREE (to_clear);
5864 if (dump_file && (dump_flags & TDF_DETAILS))
5866 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5867 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5869 for (hash_table<iv_inv_expr_hasher>::iterator it
5870 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5871 ++it)
5872 list.safe_push (*it);
5874 list.qsort (sort_iv_inv_expr_ent);
5876 for (i = 0; i < list.length (); ++i)
5878 fprintf (dump_file, "inv_expr %d: \t", i);
5879 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5880 fprintf (dump_file, "\n");
5883 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5885 for (i = 0; i < data->vgroups.length (); i++)
5887 group = data->vgroups[i];
5889 fprintf (dump_file, "Group %d:\n", i);
5890 fprintf (dump_file, " cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5891 for (j = 0; j < group->n_map_members; j++)
5893 if (!group->cost_map[j].cand
5894 || group->cost_map[j].cost.infinite_cost_p ())
5895 continue;
5897 fprintf (dump_file, " %d\t%d\t%d\t",
5898 group->cost_map[j].cand->id,
5899 group->cost_map[j].cost.cost,
5900 group->cost_map[j].cost.complexity);
5901 if (group->cost_map[j].inv_expr != NULL)
5902 fprintf (dump_file, "%d\t",
5903 group->cost_map[j].inv_expr->id);
5904 else
5905 fprintf (dump_file, "\t");
5906 if (group->cost_map[j].depends_on)
5907 bitmap_print (dump_file,
5908 group->cost_map[j].depends_on, "","");
5909 fprintf (dump_file, "\n");
5912 fprintf (dump_file, "\n");
5914 fprintf (dump_file, "\n");
5918 /* Determines cost of the candidate CAND. */
5920 static void
5921 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5923 comp_cost cost_base;
5924 unsigned cost, cost_step;
5925 tree base;
5927 if (!cand->iv)
5929 cand->cost = 0;
5930 return;
5933 /* There are two costs associated with the candidate -- its increment
5934 and its initialization. The second is almost negligible for any loop
5935 that rolls enough, so we take it just very little into account. */
5937 base = cand->iv->base;
5938 cost_base = force_var_cost (data, base, NULL);
5939 /* It will be exceptional that the iv register happens to be initialized with
5940 the proper value at no cost. In general, there will at least be a regcopy
5941 or a const set. */
5942 if (cost_base.cost == 0)
5943 cost_base.cost = COSTS_N_INSNS (1);
5944 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5946 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5948 /* Prefer the original ivs unless we may gain something by replacing it.
5949 The reason is to make debugging simpler; so this is not relevant for
5950 artificial ivs created by other optimization passes. */
5951 if (cand->pos != IP_ORIGINAL
5952 || !SSA_NAME_VAR (cand->var_before)
5953 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5954 cost++;
5956 /* Prefer not to insert statements into latch unless there are some
5957 already (so that we do not create unnecessary jumps). */
5958 if (cand->pos == IP_END
5959 && empty_block_p (ip_end_pos (data->current_loop)))
5960 cost++;
5962 cand->cost = cost;
5963 cand->cost_step = cost_step;
5966 /* Determines costs of computation of the candidates. */
5968 static void
5969 determine_iv_costs (struct ivopts_data *data)
5971 unsigned i;
5973 if (dump_file && (dump_flags & TDF_DETAILS))
5975 fprintf (dump_file, "<Candidate Costs>:\n");
5976 fprintf (dump_file, " cand\tcost\n");
5979 for (i = 0; i < data->vcands.length (); i++)
5981 struct iv_cand *cand = data->vcands[i];
5983 determine_iv_cost (data, cand);
5985 if (dump_file && (dump_flags & TDF_DETAILS))
5986 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5989 if (dump_file && (dump_flags & TDF_DETAILS))
5990 fprintf (dump_file, "\n");
5993 /* Calculates cost for having SIZE induction variables. */
5995 static unsigned
5996 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5998 /* We add size to the cost, so that we prefer eliminating ivs
5999 if possible. */
6000 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
6001 data->body_includes_call);
6004 /* For each size of the induction variable set determine the penalty. */
6006 static void
6007 determine_set_costs (struct ivopts_data *data)
6009 unsigned j, n;
6010 gphi *phi;
6011 gphi_iterator psi;
6012 tree op;
6013 struct loop *loop = data->current_loop;
6014 bitmap_iterator bi;
6016 if (dump_file && (dump_flags & TDF_DETAILS))
6018 fprintf (dump_file, "<Global Costs>:\n");
6019 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
6020 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
6021 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
6022 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
6025 n = 0;
6026 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6028 phi = psi.phi ();
6029 op = PHI_RESULT (phi);
6031 if (virtual_operand_p (op))
6032 continue;
6034 if (get_iv (data, op))
6035 continue;
6037 n++;
6040 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6042 struct version_info *info = ver_info (data, j);
6044 if (info->inv_id && info->has_nonlin_use)
6045 n++;
6048 data->regs_used = n;
6049 if (dump_file && (dump_flags & TDF_DETAILS))
6050 fprintf (dump_file, " regs_used %d\n", n);
6052 if (dump_file && (dump_flags & TDF_DETAILS))
6054 fprintf (dump_file, " cost for size:\n");
6055 fprintf (dump_file, " ivs\tcost\n");
6056 for (j = 0; j <= 2 * target_avail_regs; j++)
6057 fprintf (dump_file, " %d\t%d\n", j,
6058 ivopts_global_cost_for_size (data, j));
6059 fprintf (dump_file, "\n");
6063 /* Returns true if A is a cheaper cost pair than B. */
6065 static bool
6066 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6068 if (!a)
6069 return false;
6071 if (!b)
6072 return true;
6074 if (a->cost < b->cost)
6075 return true;
6077 if (b->cost < a->cost)
6078 return false;
6080 /* In case the costs are the same, prefer the cheaper candidate. */
6081 if (a->cand->cost < b->cand->cost)
6082 return true;
6084 return false;
6088 /* Returns candidate by that USE is expressed in IVS. */
6090 static struct cost_pair *
6091 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
6093 return ivs->cand_for_group[group->id];
6096 /* Computes the cost field of IVS structure. */
6098 static void
6099 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6101 comp_cost cost = ivs->cand_use_cost;
6103 cost += ivs->cand_cost;
6105 cost += ivopts_global_cost_for_size (data,
6106 ivs->n_regs
6107 + ivs->used_inv_exprs->elements ());
6109 ivs->cost = cost;
6112 /* Remove invariants in set INVS to set IVS. */
6114 static void
6115 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6117 bitmap_iterator bi;
6118 unsigned iid;
6120 if (!invs)
6121 return;
6123 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6125 ivs->n_invariant_uses[iid]--;
6126 if (ivs->n_invariant_uses[iid] == 0)
6127 ivs->n_regs--;
6131 /* Set USE not to be expressed by any candidate in IVS. */
6133 static void
6134 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6135 struct iv_group *group)
6137 unsigned gid = group->id, cid;
6138 struct cost_pair *cp;
6140 cp = ivs->cand_for_group[gid];
6141 if (!cp)
6142 return;
6143 cid = cp->cand->id;
6145 ivs->bad_groups++;
6146 ivs->cand_for_group[gid] = NULL;
6147 ivs->n_cand_uses[cid]--;
6149 if (ivs->n_cand_uses[cid] == 0)
6151 bitmap_clear_bit (ivs->cands, cid);
6152 /* Do not count the pseudocandidates. */
6153 if (cp->cand->iv)
6154 ivs->n_regs--;
6155 ivs->n_cands--;
6156 ivs->cand_cost -= cp->cand->cost;
6158 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6161 ivs->cand_use_cost -= cp->cost;
6163 iv_ca_set_remove_invariants (ivs, cp->depends_on);
6165 if (cp->inv_expr != NULL)
6167 unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr);
6168 --(*slot);
6169 if (*slot == 0)
6170 ivs->used_inv_exprs->remove (cp->inv_expr);
6172 iv_ca_recount_cost (data, ivs);
6175 /* Add invariants in set INVS to set IVS. */
6177 static void
6178 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6180 bitmap_iterator bi;
6181 unsigned iid;
6183 if (!invs)
6184 return;
6186 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6188 ivs->n_invariant_uses[iid]++;
6189 if (ivs->n_invariant_uses[iid] == 1)
6190 ivs->n_regs++;
6194 /* Set cost pair for GROUP in set IVS to CP. */
6196 static void
6197 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6198 struct iv_group *group, struct cost_pair *cp)
6200 unsigned gid = group->id, cid;
6202 if (ivs->cand_for_group[gid] == cp)
6203 return;
6205 if (ivs->cand_for_group[gid])
6206 iv_ca_set_no_cp (data, ivs, group);
6208 if (cp)
6210 cid = cp->cand->id;
6212 ivs->bad_groups--;
6213 ivs->cand_for_group[gid] = cp;
6214 ivs->n_cand_uses[cid]++;
6215 if (ivs->n_cand_uses[cid] == 1)
6217 bitmap_set_bit (ivs->cands, cid);
6218 /* Do not count the pseudocandidates. */
6219 if (cp->cand->iv)
6220 ivs->n_regs++;
6221 ivs->n_cands++;
6222 ivs->cand_cost += cp->cand->cost;
6224 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6227 ivs->cand_use_cost += cp->cost;
6228 iv_ca_set_add_invariants (ivs, cp->depends_on);
6230 if (cp->inv_expr != NULL)
6232 unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr);
6233 ++(*slot);
6235 iv_ca_recount_cost (data, ivs);
6239 /* Extend set IVS by expressing USE by some of the candidates in it
6240 if possible. Consider all important candidates if candidates in
6241 set IVS don't give any result. */
6243 static void
6244 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6245 struct iv_group *group)
6247 struct cost_pair *best_cp = NULL, *cp;
6248 bitmap_iterator bi;
6249 unsigned i;
6250 struct iv_cand *cand;
6252 gcc_assert (ivs->upto >= group->id);
6253 ivs->upto++;
6254 ivs->bad_groups++;
6256 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6258 cand = data->vcands[i];
6259 cp = get_group_iv_cost (data, group, cand);
6260 if (cheaper_cost_pair (cp, best_cp))
6261 best_cp = cp;
6264 if (best_cp == NULL)
6266 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6268 cand = data->vcands[i];
6269 cp = get_group_iv_cost (data, group, cand);
6270 if (cheaper_cost_pair (cp, best_cp))
6271 best_cp = cp;
6275 iv_ca_set_cp (data, ivs, group, best_cp);
6278 /* Get cost for assignment IVS. */
6280 static comp_cost
6281 iv_ca_cost (struct iv_ca *ivs)
6283 /* This was a conditional expression but it triggered a bug in
6284 Sun C 5.5. */
6285 if (ivs->bad_groups)
6286 return infinite_cost;
6287 else
6288 return ivs->cost;
6291 /* Returns true if all dependences of CP are among invariants in IVS. */
6293 static bool
6294 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6296 unsigned i;
6297 bitmap_iterator bi;
6299 if (!cp->depends_on)
6300 return true;
6302 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6304 if (ivs->n_invariant_uses[i] == 0)
6305 return false;
6308 return true;
6311 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6312 it before NEXT. */
6314 static struct iv_ca_delta *
6315 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6316 struct cost_pair *new_cp, struct iv_ca_delta *next)
6318 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6320 change->group = group;
6321 change->old_cp = old_cp;
6322 change->new_cp = new_cp;
6323 change->next = next;
6325 return change;
6328 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6329 are rewritten. */
6331 static struct iv_ca_delta *
6332 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6334 struct iv_ca_delta *last;
6336 if (!l2)
6337 return l1;
6339 if (!l1)
6340 return l2;
6342 for (last = l1; last->next; last = last->next)
6343 continue;
6344 last->next = l2;
6346 return l1;
6349 /* Reverse the list of changes DELTA, forming the inverse to it. */
6351 static struct iv_ca_delta *
6352 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6354 struct iv_ca_delta *act, *next, *prev = NULL;
6356 for (act = delta; act; act = next)
6358 next = act->next;
6359 act->next = prev;
6360 prev = act;
6362 std::swap (act->old_cp, act->new_cp);
6365 return prev;
6368 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6369 reverted instead. */
6371 static void
6372 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6373 struct iv_ca_delta *delta, bool forward)
6375 struct cost_pair *from, *to;
6376 struct iv_ca_delta *act;
6378 if (!forward)
6379 delta = iv_ca_delta_reverse (delta);
6381 for (act = delta; act; act = act->next)
6383 from = act->old_cp;
6384 to = act->new_cp;
6385 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6386 iv_ca_set_cp (data, ivs, act->group, to);
6389 if (!forward)
6390 iv_ca_delta_reverse (delta);
6393 /* Returns true if CAND is used in IVS. */
6395 static bool
6396 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6398 return ivs->n_cand_uses[cand->id] > 0;
6401 /* Returns number of induction variable candidates in the set IVS. */
6403 static unsigned
6404 iv_ca_n_cands (struct iv_ca *ivs)
6406 return ivs->n_cands;
6409 /* Free the list of changes DELTA. */
6411 static void
6412 iv_ca_delta_free (struct iv_ca_delta **delta)
6414 struct iv_ca_delta *act, *next;
6416 for (act = *delta; act; act = next)
6418 next = act->next;
6419 free (act);
6422 *delta = NULL;
6425 /* Allocates new iv candidates assignment. */
6427 static struct iv_ca *
6428 iv_ca_new (struct ivopts_data *data)
6430 struct iv_ca *nw = XNEW (struct iv_ca);
6432 nw->upto = 0;
6433 nw->bad_groups = 0;
6434 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6435 data->vgroups.length ());
6436 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6437 nw->cands = BITMAP_ALLOC (NULL);
6438 nw->n_cands = 0;
6439 nw->n_regs = 0;
6440 nw->cand_use_cost = no_cost;
6441 nw->cand_cost = 0;
6442 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6443 nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13);
6444 nw->cost = no_cost;
6446 return nw;
6449 /* Free memory occupied by the set IVS. */
6451 static void
6452 iv_ca_free (struct iv_ca **ivs)
6454 free ((*ivs)->cand_for_group);
6455 free ((*ivs)->n_cand_uses);
6456 BITMAP_FREE ((*ivs)->cands);
6457 free ((*ivs)->n_invariant_uses);
6458 delete ((*ivs)->used_inv_exprs);
6459 free (*ivs);
6460 *ivs = NULL;
6463 /* Dumps IVS to FILE. */
6465 static void
6466 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6468 unsigned i;
6469 comp_cost cost = iv_ca_cost (ivs);
6471 fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
6472 cost.complexity);
6473 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6474 ivs->cand_cost, ivs->cand_use_cost.cost,
6475 ivs->cand_use_cost.complexity);
6476 bitmap_print (file, ivs->cands, " candidates: ","\n");
6478 for (i = 0; i < ivs->upto; i++)
6480 struct iv_group *group = data->vgroups[i];
6481 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6482 if (cp)
6483 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6484 group->id, cp->cand->id, cp->cost.cost,
6485 cp->cost.complexity);
6486 else
6487 fprintf (file, " group:%d --> ??\n", group->id);
6490 const char *pref = "";
6491 fprintf (file, " invariant variables: ");
6492 for (i = 1; i <= data->max_inv_id; i++)
6493 if (ivs->n_invariant_uses[i])
6495 fprintf (file, "%s%d", pref, i);
6496 pref = ", ";
6499 pref = "";
6500 fprintf (file, "\n invariant expressions: ");
6501 for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it
6502 = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it)
6504 fprintf (file, "%s%d", pref, (*it).first->id);
6505 pref = ", ";
6508 fprintf (file, "\n\n");
6511 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6512 new set, and store differences in DELTA. Number of induction variables
6513 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6514 the function will try to find a solution with mimimal iv candidates. */
6516 static comp_cost
6517 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6518 struct iv_cand *cand, struct iv_ca_delta **delta,
6519 unsigned *n_ivs, bool min_ncand)
6521 unsigned i;
6522 comp_cost cost;
6523 struct iv_group *group;
6524 struct cost_pair *old_cp, *new_cp;
6526 *delta = NULL;
6527 for (i = 0; i < ivs->upto; i++)
6529 group = data->vgroups[i];
6530 old_cp = iv_ca_cand_for_group (ivs, group);
6532 if (old_cp
6533 && old_cp->cand == cand)
6534 continue;
6536 new_cp = get_group_iv_cost (data, group, cand);
6537 if (!new_cp)
6538 continue;
6540 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6541 continue;
6543 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6544 continue;
6546 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6549 iv_ca_delta_commit (data, ivs, *delta, true);
6550 cost = iv_ca_cost (ivs);
6551 if (n_ivs)
6552 *n_ivs = iv_ca_n_cands (ivs);
6553 iv_ca_delta_commit (data, ivs, *delta, false);
6555 return cost;
6558 /* Try narrowing set IVS by removing CAND. Return the cost of
6559 the new set and store the differences in DELTA. START is
6560 the candidate with which we start narrowing. */
6562 static comp_cost
6563 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6564 struct iv_cand *cand, struct iv_cand *start,
6565 struct iv_ca_delta **delta)
6567 unsigned i, ci;
6568 struct iv_group *group;
6569 struct cost_pair *old_cp, *new_cp, *cp;
6570 bitmap_iterator bi;
6571 struct iv_cand *cnd;
6572 comp_cost cost, best_cost, acost;
6574 *delta = NULL;
6575 for (i = 0; i < data->vgroups.length (); i++)
6577 group = data->vgroups[i];
6579 old_cp = iv_ca_cand_for_group (ivs, group);
6580 if (old_cp->cand != cand)
6581 continue;
6583 best_cost = iv_ca_cost (ivs);
6584 /* Start narrowing with START. */
6585 new_cp = get_group_iv_cost (data, group, start);
6587 if (data->consider_all_candidates)
6589 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6591 if (ci == cand->id || (start && ci == start->id))
6592 continue;
6594 cnd = data->vcands[ci];
6596 cp = get_group_iv_cost (data, group, cnd);
6597 if (!cp)
6598 continue;
6600 iv_ca_set_cp (data, ivs, group, cp);
6601 acost = iv_ca_cost (ivs);
6603 if (acost < best_cost)
6605 best_cost = acost;
6606 new_cp = cp;
6610 else
6612 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6614 if (ci == cand->id || (start && ci == start->id))
6615 continue;
6617 cnd = data->vcands[ci];
6619 cp = get_group_iv_cost (data, group, cnd);
6620 if (!cp)
6621 continue;
6623 iv_ca_set_cp (data, ivs, group, cp);
6624 acost = iv_ca_cost (ivs);
6626 if (acost < best_cost)
6628 best_cost = acost;
6629 new_cp = cp;
6633 /* Restore to old cp for use. */
6634 iv_ca_set_cp (data, ivs, group, old_cp);
6636 if (!new_cp)
6638 iv_ca_delta_free (delta);
6639 return infinite_cost;
6642 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6645 iv_ca_delta_commit (data, ivs, *delta, true);
6646 cost = iv_ca_cost (ivs);
6647 iv_ca_delta_commit (data, ivs, *delta, false);
6649 return cost;
6652 /* Try optimizing the set of candidates IVS by removing candidates different
6653 from to EXCEPT_CAND from it. Return cost of the new set, and store
6654 differences in DELTA. */
6656 static comp_cost
6657 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6658 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6660 bitmap_iterator bi;
6661 struct iv_ca_delta *act_delta, *best_delta;
6662 unsigned i;
6663 comp_cost best_cost, acost;
6664 struct iv_cand *cand;
6666 best_delta = NULL;
6667 best_cost = iv_ca_cost (ivs);
6669 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6671 cand = data->vcands[i];
6673 if (cand == except_cand)
6674 continue;
6676 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6678 if (acost < best_cost)
6680 best_cost = acost;
6681 iv_ca_delta_free (&best_delta);
6682 best_delta = act_delta;
6684 else
6685 iv_ca_delta_free (&act_delta);
6688 if (!best_delta)
6690 *delta = NULL;
6691 return best_cost;
6694 /* Recurse to possibly remove other unnecessary ivs. */
6695 iv_ca_delta_commit (data, ivs, best_delta, true);
6696 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6697 iv_ca_delta_commit (data, ivs, best_delta, false);
6698 *delta = iv_ca_delta_join (best_delta, *delta);
6699 return best_cost;
6702 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6703 cheaper local cost for GROUP than BEST_CP. Return pointer to
6704 the corresponding cost_pair, otherwise just return BEST_CP. */
6706 static struct cost_pair*
6707 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6708 unsigned int cand_idx, struct iv_cand *old_cand,
6709 struct cost_pair *best_cp)
6711 struct iv_cand *cand;
6712 struct cost_pair *cp;
6714 gcc_assert (old_cand != NULL && best_cp != NULL);
6715 if (cand_idx == old_cand->id)
6716 return best_cp;
6718 cand = data->vcands[cand_idx];
6719 cp = get_group_iv_cost (data, group, cand);
6720 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6721 return cp;
6723 return best_cp;
6726 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6727 which are used by more than one iv uses. For each of those candidates,
6728 this function tries to represent iv uses under that candidate using
6729 other ones with lower local cost, then tries to prune the new set.
6730 If the new set has lower cost, It returns the new cost after recording
6731 candidate replacement in list DELTA. */
6733 static comp_cost
6734 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6735 struct iv_ca_delta **delta)
6737 bitmap_iterator bi, bj;
6738 unsigned int i, j, k;
6739 struct iv_cand *cand;
6740 comp_cost orig_cost, acost;
6741 struct iv_ca_delta *act_delta, *tmp_delta;
6742 struct cost_pair *old_cp, *best_cp = NULL;
6744 *delta = NULL;
6745 orig_cost = iv_ca_cost (ivs);
6747 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6749 if (ivs->n_cand_uses[i] == 1
6750 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6751 continue;
6753 cand = data->vcands[i];
6755 act_delta = NULL;
6756 /* Represent uses under current candidate using other ones with
6757 lower local cost. */
6758 for (j = 0; j < ivs->upto; j++)
6760 struct iv_group *group = data->vgroups[j];
6761 old_cp = iv_ca_cand_for_group (ivs, group);
6763 if (old_cp->cand != cand)
6764 continue;
6766 best_cp = old_cp;
6767 if (data->consider_all_candidates)
6768 for (k = 0; k < data->vcands.length (); k++)
6769 best_cp = cheaper_cost_with_cand (data, group, k,
6770 old_cp->cand, best_cp);
6771 else
6772 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6773 best_cp = cheaper_cost_with_cand (data, group, k,
6774 old_cp->cand, best_cp);
6776 if (best_cp == old_cp)
6777 continue;
6779 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6781 /* No need for further prune. */
6782 if (!act_delta)
6783 continue;
6785 /* Prune the new candidate set. */
6786 iv_ca_delta_commit (data, ivs, act_delta, true);
6787 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6788 iv_ca_delta_commit (data, ivs, act_delta, false);
6789 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6791 if (acost < orig_cost)
6793 *delta = act_delta;
6794 return acost;
6796 else
6797 iv_ca_delta_free (&act_delta);
6800 return orig_cost;
6803 /* Tries to extend the sets IVS in the best possible way in order to
6804 express the GROUP. If ORIGINALP is true, prefer candidates from
6805 the original set of IVs, otherwise favor important candidates not
6806 based on any memory object. */
6808 static bool
6809 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6810 struct iv_group *group, bool originalp)
6812 comp_cost best_cost, act_cost;
6813 unsigned i;
6814 bitmap_iterator bi;
6815 struct iv_cand *cand;
6816 struct iv_ca_delta *best_delta = NULL, *act_delta;
6817 struct cost_pair *cp;
6819 iv_ca_add_group (data, ivs, group);
6820 best_cost = iv_ca_cost (ivs);
6821 cp = iv_ca_cand_for_group (ivs, group);
6822 if (cp)
6824 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6825 iv_ca_set_no_cp (data, ivs, group);
6828 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6829 first try important candidates not based on any memory object. Only if
6830 this fails, try the specific ones. Rationale -- in loops with many
6831 variables the best choice often is to use just one generic biv. If we
6832 added here many ivs specific to the uses, the optimization algorithm later
6833 would be likely to get stuck in a local minimum, thus causing us to create
6834 too many ivs. The approach from few ivs to more seems more likely to be
6835 successful -- starting from few ivs, replacing an expensive use by a
6836 specific iv should always be a win. */
6837 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6839 cand = data->vcands[i];
6841 if (originalp && cand->pos !=IP_ORIGINAL)
6842 continue;
6844 if (!originalp && cand->iv->base_object != NULL_TREE)
6845 continue;
6847 if (iv_ca_cand_used_p (ivs, cand))
6848 continue;
6850 cp = get_group_iv_cost (data, group, cand);
6851 if (!cp)
6852 continue;
6854 iv_ca_set_cp (data, ivs, group, cp);
6855 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6856 true);
6857 iv_ca_set_no_cp (data, ivs, group);
6858 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6860 if (act_cost < best_cost)
6862 best_cost = act_cost;
6864 iv_ca_delta_free (&best_delta);
6865 best_delta = act_delta;
6867 else
6868 iv_ca_delta_free (&act_delta);
6871 if (best_cost.infinite_cost_p ())
6873 for (i = 0; i < group->n_map_members; i++)
6875 cp = group->cost_map + i;
6876 cand = cp->cand;
6877 if (!cand)
6878 continue;
6880 /* Already tried this. */
6881 if (cand->important)
6883 if (originalp && cand->pos == IP_ORIGINAL)
6884 continue;
6885 if (!originalp && cand->iv->base_object == NULL_TREE)
6886 continue;
6889 if (iv_ca_cand_used_p (ivs, cand))
6890 continue;
6892 act_delta = NULL;
6893 iv_ca_set_cp (data, ivs, group, cp);
6894 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6895 iv_ca_set_no_cp (data, ivs, group);
6896 act_delta = iv_ca_delta_add (group,
6897 iv_ca_cand_for_group (ivs, group),
6898 cp, act_delta);
6900 if (act_cost < best_cost)
6902 best_cost = act_cost;
6904 if (best_delta)
6905 iv_ca_delta_free (&best_delta);
6906 best_delta = act_delta;
6908 else
6909 iv_ca_delta_free (&act_delta);
6913 iv_ca_delta_commit (data, ivs, best_delta, true);
6914 iv_ca_delta_free (&best_delta);
6916 return !best_cost.infinite_cost_p ();
6919 /* Finds an initial assignment of candidates to uses. */
6921 static struct iv_ca *
6922 get_initial_solution (struct ivopts_data *data, bool originalp)
6924 unsigned i;
6925 struct iv_ca *ivs = iv_ca_new (data);
6927 for (i = 0; i < data->vgroups.length (); i++)
6928 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6930 iv_ca_free (&ivs);
6931 return NULL;
6934 return ivs;
6937 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6938 points to a bool variable, this function tries to break local
6939 optimal fixed-point by replacing candidates in IVS if it's true. */
6941 static bool
6942 try_improve_iv_set (struct ivopts_data *data,
6943 struct iv_ca *ivs, bool *try_replace_p)
6945 unsigned i, n_ivs;
6946 comp_cost acost, best_cost = iv_ca_cost (ivs);
6947 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6948 struct iv_cand *cand;
6950 /* Try extending the set of induction variables by one. */
6951 for (i = 0; i < data->vcands.length (); i++)
6953 cand = data->vcands[i];
6955 if (iv_ca_cand_used_p (ivs, cand))
6956 continue;
6958 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6959 if (!act_delta)
6960 continue;
6962 /* If we successfully added the candidate and the set is small enough,
6963 try optimizing it by removing other candidates. */
6964 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6966 iv_ca_delta_commit (data, ivs, act_delta, true);
6967 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6968 iv_ca_delta_commit (data, ivs, act_delta, false);
6969 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6972 if (acost < best_cost)
6974 best_cost = acost;
6975 iv_ca_delta_free (&best_delta);
6976 best_delta = act_delta;
6978 else
6979 iv_ca_delta_free (&act_delta);
6982 if (!best_delta)
6984 /* Try removing the candidates from the set instead. */
6985 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6987 if (!best_delta && *try_replace_p)
6989 *try_replace_p = false;
6990 /* So far candidate selecting algorithm tends to choose fewer IVs
6991 so that it can handle cases in which loops have many variables
6992 but the best choice is often to use only one general biv. One
6993 weakness is it can't handle opposite cases, in which different
6994 candidates should be chosen with respect to each use. To solve
6995 the problem, we replace candidates in a manner described by the
6996 comments of iv_ca_replace, thus give general algorithm a chance
6997 to break local optimal fixed-point in these cases. */
6998 best_cost = iv_ca_replace (data, ivs, &best_delta);
7001 if (!best_delta)
7002 return false;
7005 iv_ca_delta_commit (data, ivs, best_delta, true);
7006 gcc_assert (best_cost == iv_ca_cost (ivs));
7007 iv_ca_delta_free (&best_delta);
7008 return true;
7011 /* Attempts to find the optimal set of induction variables. We do simple
7012 greedy heuristic -- we try to replace at most one candidate in the selected
7013 solution and remove the unused ivs while this improves the cost. */
7015 static struct iv_ca *
7016 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
7018 struct iv_ca *set;
7019 bool try_replace_p = true;
7021 /* Get the initial solution. */
7022 set = get_initial_solution (data, originalp);
7023 if (!set)
7025 if (dump_file && (dump_flags & TDF_DETAILS))
7026 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7027 return NULL;
7030 if (dump_file && (dump_flags & TDF_DETAILS))
7032 fprintf (dump_file, "Initial set of candidates:\n");
7033 iv_ca_dump (data, dump_file, set);
7036 while (try_improve_iv_set (data, set, &try_replace_p))
7038 if (dump_file && (dump_flags & TDF_DETAILS))
7040 fprintf (dump_file, "Improved to:\n");
7041 iv_ca_dump (data, dump_file, set);
7045 return set;
7048 static struct iv_ca *
7049 find_optimal_iv_set (struct ivopts_data *data)
7051 unsigned i;
7052 comp_cost cost, origcost;
7053 struct iv_ca *set, *origset;
7055 /* Determine the cost based on a strategy that starts with original IVs,
7056 and try again using a strategy that prefers candidates not based
7057 on any IVs. */
7058 origset = find_optimal_iv_set_1 (data, true);
7059 set = find_optimal_iv_set_1 (data, false);
7061 if (!origset && !set)
7062 return NULL;
7064 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7065 cost = set ? iv_ca_cost (set) : infinite_cost;
7067 if (dump_file && (dump_flags & TDF_DETAILS))
7069 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
7070 origcost.cost, origcost.complexity);
7071 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7072 cost.cost, cost.complexity);
7075 /* Choose the one with the best cost. */
7076 if (origcost <= cost)
7078 if (set)
7079 iv_ca_free (&set);
7080 set = origset;
7082 else if (origset)
7083 iv_ca_free (&origset);
7085 for (i = 0; i < data->vgroups.length (); i++)
7087 struct iv_group *group = data->vgroups[i];
7088 group->selected = iv_ca_cand_for_group (set, group)->cand;
7091 return set;
7094 /* Creates a new induction variable corresponding to CAND. */
7096 static void
7097 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7099 gimple_stmt_iterator incr_pos;
7100 tree base;
7101 struct iv_use *use;
7102 struct iv_group *group;
7103 bool after = false;
7105 if (!cand->iv)
7106 return;
7108 switch (cand->pos)
7110 case IP_NORMAL:
7111 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7112 break;
7114 case IP_END:
7115 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7116 after = true;
7117 break;
7119 case IP_AFTER_USE:
7120 after = true;
7121 /* fall through */
7122 case IP_BEFORE_USE:
7123 incr_pos = gsi_for_stmt (cand->incremented_at);
7124 break;
7126 case IP_ORIGINAL:
7127 /* Mark that the iv is preserved. */
7128 name_info (data, cand->var_before)->preserve_biv = true;
7129 name_info (data, cand->var_after)->preserve_biv = true;
7131 /* Rewrite the increment so that it uses var_before directly. */
7132 use = find_interesting_uses_op (data, cand->var_after);
7133 group = data->vgroups[use->group_id];
7134 group->selected = cand;
7135 return;
7138 gimple_add_tmp_var (cand->var_before);
7140 base = unshare_expr (cand->iv->base);
7142 create_iv (base, unshare_expr (cand->iv->step),
7143 cand->var_before, data->current_loop,
7144 &incr_pos, after, &cand->var_before, &cand->var_after);
7147 /* Creates new induction variables described in SET. */
7149 static void
7150 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7152 unsigned i;
7153 struct iv_cand *cand;
7154 bitmap_iterator bi;
7156 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7158 cand = data->vcands[i];
7159 create_new_iv (data, cand);
7162 if (dump_file && (dump_flags & TDF_DETAILS))
7164 fprintf (dump_file, "Selected IV set for loop %d",
7165 data->current_loop->num);
7166 if (data->loop_loc != UNKNOWN_LOCATION)
7167 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7168 LOCATION_LINE (data->loop_loc));
7169 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7170 avg_loop_niter (data->current_loop));
7171 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions",
7172 (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ());
7173 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7174 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7176 cand = data->vcands[i];
7177 dump_cand (dump_file, cand);
7179 fprintf (dump_file, "\n");
7183 /* Rewrites USE (definition of iv used in a nonlinear expression)
7184 using candidate CAND. */
7186 static void
7187 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7188 struct iv_use *use, struct iv_cand *cand)
7190 tree comp;
7191 tree op, tgt;
7192 gassign *ass;
7193 gimple_stmt_iterator bsi;
7195 /* An important special case -- if we are asked to express value of
7196 the original iv by itself, just exit; there is no need to
7197 introduce a new computation (that might also need casting the
7198 variable to unsigned and back). */
7199 if (cand->pos == IP_ORIGINAL
7200 && cand->incremented_at == use->stmt)
7202 enum tree_code stmt_code;
7204 gcc_assert (is_gimple_assign (use->stmt));
7205 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7207 /* Check whether we may leave the computation unchanged.
7208 This is the case only if it does not rely on other
7209 computations in the loop -- otherwise, the computation
7210 we rely upon may be removed in remove_unused_ivs,
7211 thus leading to ICE. */
7212 stmt_code = gimple_assign_rhs_code (use->stmt);
7213 if (stmt_code == PLUS_EXPR
7214 || stmt_code == MINUS_EXPR
7215 || stmt_code == POINTER_PLUS_EXPR)
7217 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7218 op = gimple_assign_rhs2 (use->stmt);
7219 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7220 op = gimple_assign_rhs1 (use->stmt);
7221 else
7222 op = NULL_TREE;
7224 else
7225 op = NULL_TREE;
7227 if (op && expr_invariant_in_loop_p (data->current_loop, op))
7228 return;
7231 comp = get_computation (data->current_loop, use, cand);
7232 gcc_assert (comp != NULL_TREE);
7234 switch (gimple_code (use->stmt))
7236 case GIMPLE_PHI:
7237 tgt = PHI_RESULT (use->stmt);
7239 /* If we should keep the biv, do not replace it. */
7240 if (name_info (data, tgt)->preserve_biv)
7241 return;
7243 bsi = gsi_after_labels (gimple_bb (use->stmt));
7244 break;
7246 case GIMPLE_ASSIGN:
7247 tgt = gimple_assign_lhs (use->stmt);
7248 bsi = gsi_for_stmt (use->stmt);
7249 break;
7251 default:
7252 gcc_unreachable ();
7255 if (!valid_gimple_rhs_p (comp)
7256 || (gimple_code (use->stmt) != GIMPLE_PHI
7257 /* We can't allow re-allocating the stmt as it might be pointed
7258 to still. */
7259 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7260 >= gimple_num_ops (gsi_stmt (bsi)))))
7262 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7263 true, GSI_SAME_STMT);
7264 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7266 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7267 /* As this isn't a plain copy we have to reset alignment
7268 information. */
7269 if (SSA_NAME_PTR_INFO (comp))
7270 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7274 if (gimple_code (use->stmt) == GIMPLE_PHI)
7276 ass = gimple_build_assign (tgt, comp);
7277 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7279 bsi = gsi_for_stmt (use->stmt);
7280 remove_phi_node (&bsi, false);
7282 else
7284 gimple_assign_set_rhs_from_tree (&bsi, comp);
7285 use->stmt = gsi_stmt (bsi);
7289 /* Performs a peephole optimization to reorder the iv update statement with
7290 a mem ref to enable instruction combining in later phases. The mem ref uses
7291 the iv value before the update, so the reordering transformation requires
7292 adjustment of the offset. CAND is the selected IV_CAND.
7294 Example:
7296 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7297 iv2 = iv1 + 1;
7299 if (t < val) (1)
7300 goto L;
7301 goto Head;
7304 directly propagating t over to (1) will introduce overlapping live range
7305 thus increase register pressure. This peephole transform it into:
7308 iv2 = iv1 + 1;
7309 t = MEM_REF (base, iv2, 8, 8);
7310 if (t < val)
7311 goto L;
7312 goto Head;
7315 static void
7316 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7318 tree var_after;
7319 gimple *iv_update, *stmt;
7320 basic_block bb;
7321 gimple_stmt_iterator gsi, gsi_iv;
7323 if (cand->pos != IP_NORMAL)
7324 return;
7326 var_after = cand->var_after;
7327 iv_update = SSA_NAME_DEF_STMT (var_after);
7329 bb = gimple_bb (iv_update);
7330 gsi = gsi_last_nondebug_bb (bb);
7331 stmt = gsi_stmt (gsi);
7333 /* Only handle conditional statement for now. */
7334 if (gimple_code (stmt) != GIMPLE_COND)
7335 return;
7337 gsi_prev_nondebug (&gsi);
7338 stmt = gsi_stmt (gsi);
7339 if (stmt != iv_update)
7340 return;
7342 gsi_prev_nondebug (&gsi);
7343 if (gsi_end_p (gsi))
7344 return;
7346 stmt = gsi_stmt (gsi);
7347 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7348 return;
7350 if (stmt != use->stmt)
7351 return;
7353 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7354 return;
7356 if (dump_file && (dump_flags & TDF_DETAILS))
7358 fprintf (dump_file, "Reordering \n");
7359 print_gimple_stmt (dump_file, iv_update, 0, 0);
7360 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7361 fprintf (dump_file, "\n");
7364 gsi = gsi_for_stmt (use->stmt);
7365 gsi_iv = gsi_for_stmt (iv_update);
7366 gsi_move_before (&gsi_iv, &gsi);
7368 cand->pos = IP_BEFORE_USE;
7369 cand->incremented_at = use->stmt;
7372 /* Rewrites USE (address that is an iv) using candidate CAND. */
7374 static void
7375 rewrite_use_address (struct ivopts_data *data,
7376 struct iv_use *use, struct iv_cand *cand)
7378 aff_tree aff;
7379 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7380 tree base_hint = NULL_TREE;
7381 tree ref, iv;
7382 bool ok;
7384 adjust_iv_update_pos (cand, use);
7385 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7386 gcc_assert (ok);
7387 unshare_aff_combination (&aff);
7389 /* To avoid undefined overflow problems, all IV candidates use unsigned
7390 integer types. The drawback is that this makes it impossible for
7391 create_mem_ref to distinguish an IV that is based on a memory object
7392 from one that represents simply an offset.
7394 To work around this problem, we pass a hint to create_mem_ref that
7395 indicates which variable (if any) in aff is an IV based on a memory
7396 object. Note that we only consider the candidate. If this is not
7397 based on an object, the base of the reference is in some subexpression
7398 of the use -- but these will use pointer types, so they are recognized
7399 by the create_mem_ref heuristics anyway. */
7400 if (cand->iv->base_object)
7401 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7403 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7404 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7405 reference_alias_ptr_type (*use->op_p),
7406 iv, base_hint, data->speed);
7407 copy_ref_info (ref, *use->op_p);
7408 *use->op_p = ref;
7411 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7412 candidate CAND. */
7414 static void
7415 rewrite_use_compare (struct ivopts_data *data,
7416 struct iv_use *use, struct iv_cand *cand)
7418 tree comp, *var_p, op, bound;
7419 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7420 enum tree_code compare;
7421 struct iv_group *group = data->vgroups[use->group_id];
7422 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7423 bool ok;
7425 bound = cp->value;
7426 if (bound)
7428 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7429 tree var_type = TREE_TYPE (var);
7430 gimple_seq stmts;
7432 if (dump_file && (dump_flags & TDF_DETAILS))
7434 fprintf (dump_file, "Replacing exit test: ");
7435 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7437 compare = cp->comp;
7438 bound = unshare_expr (fold_convert (var_type, bound));
7439 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7440 if (stmts)
7441 gsi_insert_seq_on_edge_immediate (
7442 loop_preheader_edge (data->current_loop),
7443 stmts);
7445 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7446 gimple_cond_set_lhs (cond_stmt, var);
7447 gimple_cond_set_code (cond_stmt, compare);
7448 gimple_cond_set_rhs (cond_stmt, op);
7449 return;
7452 /* The induction variable elimination failed; just express the original
7453 giv. */
7454 comp = get_computation (data->current_loop, use, cand);
7455 gcc_assert (comp != NULL_TREE);
7457 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7458 gcc_assert (ok);
7460 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7461 true, GSI_SAME_STMT);
7464 /* Rewrite the groups using the selected induction variables. */
7466 static void
7467 rewrite_groups (struct ivopts_data *data)
7469 unsigned i, j;
7471 for (i = 0; i < data->vgroups.length (); i++)
7473 struct iv_group *group = data->vgroups[i];
7474 struct iv_cand *cand = group->selected;
7476 gcc_assert (cand);
7478 if (group->type == USE_NONLINEAR_EXPR)
7480 for (j = 0; j < group->vuses.length (); j++)
7482 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7483 update_stmt (group->vuses[j]->stmt);
7486 else if (group->type == USE_ADDRESS)
7488 for (j = 0; j < group->vuses.length (); j++)
7490 rewrite_use_address (data, group->vuses[j], cand);
7491 update_stmt (group->vuses[j]->stmt);
7494 else
7496 gcc_assert (group->type == USE_COMPARE);
7498 for (j = 0; j < group->vuses.length (); j++)
7500 rewrite_use_compare (data, group->vuses[j], cand);
7501 update_stmt (group->vuses[j]->stmt);
7507 /* Removes the ivs that are not used after rewriting. */
7509 static void
7510 remove_unused_ivs (struct ivopts_data *data)
7512 unsigned j;
7513 bitmap_iterator bi;
7514 bitmap toremove = BITMAP_ALLOC (NULL);
7516 /* Figure out an order in which to release SSA DEFs so that we don't
7517 release something that we'd have to propagate into a debug stmt
7518 afterwards. */
7519 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7521 struct version_info *info;
7523 info = ver_info (data, j);
7524 if (info->iv
7525 && !integer_zerop (info->iv->step)
7526 && !info->inv_id
7527 && !info->iv->nonlin_use
7528 && !info->preserve_biv)
7530 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7532 tree def = info->iv->ssa_name;
7534 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7536 imm_use_iterator imm_iter;
7537 use_operand_p use_p;
7538 gimple *stmt;
7539 int count = 0;
7541 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7543 if (!gimple_debug_bind_p (stmt))
7544 continue;
7546 /* We just want to determine whether to do nothing
7547 (count == 0), to substitute the computed
7548 expression into a single use of the SSA DEF by
7549 itself (count == 1), or to use a debug temp
7550 because the SSA DEF is used multiple times or as
7551 part of a larger expression (count > 1). */
7552 count++;
7553 if (gimple_debug_bind_get_value (stmt) != def)
7554 count++;
7556 if (count > 1)
7557 BREAK_FROM_IMM_USE_STMT (imm_iter);
7560 if (!count)
7561 continue;
7563 struct iv_use dummy_use;
7564 struct iv_cand *best_cand = NULL, *cand;
7565 unsigned i, best_pref = 0, cand_pref;
7567 memset (&dummy_use, 0, sizeof (dummy_use));
7568 dummy_use.iv = info->iv;
7569 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7571 cand = data->vgroups[i]->selected;
7572 if (cand == best_cand)
7573 continue;
7574 cand_pref = operand_equal_p (cand->iv->step,
7575 info->iv->step, 0)
7576 ? 4 : 0;
7577 cand_pref
7578 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7579 == TYPE_MODE (TREE_TYPE (info->iv->base))
7580 ? 2 : 0;
7581 cand_pref
7582 += TREE_CODE (cand->iv->base) == INTEGER_CST
7583 ? 1 : 0;
7584 if (best_cand == NULL || best_pref < cand_pref)
7586 best_cand = cand;
7587 best_pref = cand_pref;
7591 if (!best_cand)
7592 continue;
7594 tree comp = get_computation_at (data->current_loop,
7595 &dummy_use, best_cand,
7596 SSA_NAME_DEF_STMT (def));
7597 if (!comp)
7598 continue;
7600 if (count > 1)
7602 tree vexpr = make_node (DEBUG_EXPR_DECL);
7603 DECL_ARTIFICIAL (vexpr) = 1;
7604 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7605 if (SSA_NAME_VAR (def))
7606 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7607 else
7608 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7609 gdebug *def_temp
7610 = gimple_build_debug_bind (vexpr, comp, NULL);
7611 gimple_stmt_iterator gsi;
7613 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7614 gsi = gsi_after_labels (gimple_bb
7615 (SSA_NAME_DEF_STMT (def)));
7616 else
7617 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7619 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7620 comp = vexpr;
7623 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7625 if (!gimple_debug_bind_p (stmt))
7626 continue;
7628 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7629 SET_USE (use_p, comp);
7631 update_stmt (stmt);
7637 release_defs_bitset (toremove);
7639 BITMAP_FREE (toremove);
7642 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7643 for hash_map::traverse. */
7645 bool
7646 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7648 free (value);
7649 return true;
7652 /* Frees data allocated by the optimization of a single loop. */
7654 static void
7655 free_loop_data (struct ivopts_data *data)
7657 unsigned i, j;
7658 bitmap_iterator bi;
7659 tree obj;
7661 if (data->niters)
7663 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7664 delete data->niters;
7665 data->niters = NULL;
7668 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7670 struct version_info *info;
7672 info = ver_info (data, i);
7673 info->iv = NULL;
7674 info->has_nonlin_use = false;
7675 info->preserve_biv = false;
7676 info->inv_id = 0;
7678 bitmap_clear (data->relevant);
7679 bitmap_clear (data->important_candidates);
7681 for (i = 0; i < data->vgroups.length (); i++)
7683 struct iv_group *group = data->vgroups[i];
7685 for (j = 0; j < group->vuses.length (); j++)
7686 free (group->vuses[j]);
7687 group->vuses.release ();
7689 BITMAP_FREE (group->related_cands);
7690 for (j = 0; j < group->n_map_members; j++)
7691 if (group->cost_map[j].depends_on)
7692 BITMAP_FREE (group->cost_map[j].depends_on);
7694 free (group->cost_map);
7695 free (group);
7697 data->vgroups.truncate (0);
7699 for (i = 0; i < data->vcands.length (); i++)
7701 struct iv_cand *cand = data->vcands[i];
7703 if (cand->depends_on)
7704 BITMAP_FREE (cand->depends_on);
7705 free (cand);
7707 data->vcands.truncate (0);
7709 if (data->version_info_size < num_ssa_names)
7711 data->version_info_size = 2 * num_ssa_names;
7712 free (data->version_info);
7713 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7716 data->max_inv_id = 0;
7718 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7719 SET_DECL_RTL (obj, NULL_RTX);
7721 decl_rtl_to_reset.truncate (0);
7723 data->inv_expr_tab->empty ();
7724 data->max_inv_expr_id = 0;
7726 data->iv_common_cand_tab->empty ();
7727 data->iv_common_cands.truncate (0);
7730 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7731 loop tree. */
7733 static void
7734 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7736 free_loop_data (data);
7737 free (data->version_info);
7738 BITMAP_FREE (data->relevant);
7739 BITMAP_FREE (data->important_candidates);
7741 decl_rtl_to_reset.release ();
7742 data->vgroups.release ();
7743 data->vcands.release ();
7744 delete data->inv_expr_tab;
7745 data->inv_expr_tab = NULL;
7746 free_affine_expand_cache (&data->name_expansion_cache);
7747 delete data->iv_common_cand_tab;
7748 data->iv_common_cand_tab = NULL;
7749 data->iv_common_cands.release ();
7750 obstack_free (&data->iv_obstack, NULL);
7753 /* Returns true if the loop body BODY includes any function calls. */
7755 static bool
7756 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7758 gimple_stmt_iterator gsi;
7759 unsigned i;
7761 for (i = 0; i < num_nodes; i++)
7762 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7764 gimple *stmt = gsi_stmt (gsi);
7765 if (is_gimple_call (stmt)
7766 && !gimple_call_internal_p (stmt)
7767 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7768 return true;
7770 return false;
7773 /* Optimizes the LOOP. Returns true if anything changed. */
7775 static bool
7776 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7778 bool changed = false;
7779 struct iv_ca *iv_ca;
7780 edge exit = single_dom_exit (loop);
7781 basic_block *body;
7783 gcc_assert (!data->niters);
7784 data->current_loop = loop;
7785 data->loop_loc = find_loop_location (loop);
7786 data->speed = optimize_loop_for_speed_p (loop);
7788 if (dump_file && (dump_flags & TDF_DETAILS))
7790 fprintf (dump_file, "Processing loop %d", loop->num);
7791 if (data->loop_loc != UNKNOWN_LOCATION)
7792 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7793 LOCATION_LINE (data->loop_loc));
7794 fprintf (dump_file, "\n");
7796 if (exit)
7798 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7799 exit->src->index, exit->dest->index);
7800 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7801 fprintf (dump_file, "\n");
7804 fprintf (dump_file, "\n");
7807 body = get_loop_body (loop);
7808 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7809 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7810 free (body);
7812 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7814 /* For each ssa name determines whether it behaves as an induction variable
7815 in some loop. */
7816 if (!find_induction_variables (data))
7817 goto finish;
7819 /* Finds interesting uses (item 1). */
7820 find_interesting_uses (data);
7821 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7822 goto finish;
7824 /* Finds candidates for the induction variables (item 2). */
7825 find_iv_candidates (data);
7827 /* Calculates the costs (item 3, part 1). */
7828 determine_iv_costs (data);
7829 determine_group_iv_costs (data);
7830 determine_set_costs (data);
7832 /* Find the optimal set of induction variables (item 3, part 2). */
7833 iv_ca = find_optimal_iv_set (data);
7834 if (!iv_ca)
7835 goto finish;
7836 changed = true;
7838 /* Create the new induction variables (item 4, part 1). */
7839 create_new_ivs (data, iv_ca);
7840 iv_ca_free (&iv_ca);
7842 /* Rewrite the uses (item 4, part 2). */
7843 rewrite_groups (data);
7845 /* Remove the ivs that are unused after rewriting. */
7846 remove_unused_ivs (data);
7848 /* We have changed the structure of induction variables; it might happen
7849 that definitions in the scev database refer to some of them that were
7850 eliminated. */
7851 scev_reset ();
7853 finish:
7854 free_loop_data (data);
7856 return changed;
7859 /* Main entry point. Optimizes induction variables in loops. */
7861 void
7862 tree_ssa_iv_optimize (void)
7864 struct loop *loop;
7865 struct ivopts_data data;
7867 tree_ssa_iv_optimize_init (&data);
7869 /* Optimize the loops starting with the innermost ones. */
7870 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7872 if (dump_file && (dump_flags & TDF_DETAILS))
7873 flow_loop_dump (loop, dump_file, NULL, 1);
7875 tree_ssa_iv_optimize_loop (&data, loop);
7878 tree_ssa_iv_optimize_finalize (&data);