Add qdf24xx base tuning support.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob25b978085124f1ccdd91f6a52112c9c69bd97d11
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "tm_p.h"
79 #include "ssa.h"
80 #include "expmed.h"
81 #include "insn-config.h"
82 #include "emit-rtl.h"
83 #include "recog.h"
84 #include "cgraph.h"
85 #include "gimple-pretty-print.h"
86 #include "alias.h"
87 #include "fold-const.h"
88 #include "stor-layout.h"
89 #include "tree-eh.h"
90 #include "gimplify.h"
91 #include "gimple-iterator.h"
92 #include "gimplify-me.h"
93 #include "tree-cfg.h"
94 #include "tree-ssa-loop-ivopts.h"
95 #include "tree-ssa-loop-manip.h"
96 #include "tree-ssa-loop-niter.h"
97 #include "tree-ssa-loop.h"
98 #include "explow.h"
99 #include "expr.h"
100 #include "tree-dfa.h"
101 #include "tree-ssa.h"
102 #include "cfgloop.h"
103 #include "tree-scalar-evolution.h"
104 #include "params.h"
105 #include "tree-affine.h"
106 #include "tree-ssa-propagate.h"
107 #include "tree-ssa-address.h"
108 #include "builtins.h"
109 #include "tree-vectorizer.h"
111 /* FIXME: Expressions are expanded to RTL in this pass to determine the
112 cost of different addressing modes. This should be moved to a TBD
113 interface between the GIMPLE and RTL worlds. */
115 /* The infinite cost. */
116 #define INFTY 10000000
118 #define AVG_LOOP_NITER(LOOP) 5
120 /* Returns the expected number of loop iterations for LOOP.
121 The average trip count is computed from profile data if it
122 exists. */
124 static inline HOST_WIDE_INT
125 avg_loop_niter (struct loop *loop)
127 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
128 if (niter == -1)
130 niter = likely_max_stmt_executions_int (loop);
131 if (niter == -1 || niter > AVG_LOOP_NITER (loop))
132 return AVG_LOOP_NITER (loop);
135 return niter;
138 struct iv_use;
140 /* Representation of the induction variable. */
141 struct iv
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
165 /* Types of uses. */
166 enum use_type
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_ADDRESS, /* Use in an address. */
170 USE_COMPARE /* Use is a compare. */
173 /* Cost of a computation. */
174 struct comp_cost
176 comp_cost (): cost (0), complexity (0), scratch (0)
179 comp_cost (int cost, unsigned complexity, int scratch = 0)
180 : cost (cost), complexity (complexity), scratch (scratch)
183 /* Returns true if COST is infinite. */
184 bool infinite_cost_p ();
186 /* Adds costs COST1 and COST2. */
187 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
189 /* Adds COST to the comp_cost. */
190 comp_cost operator+= (comp_cost cost);
192 /* Adds constant C to this comp_cost. */
193 comp_cost operator+= (HOST_WIDE_INT c);
195 /* Subtracts constant C to this comp_cost. */
196 comp_cost operator-= (HOST_WIDE_INT c);
198 /* Divide the comp_cost by constant C. */
199 comp_cost operator/= (HOST_WIDE_INT c);
201 /* Multiply the comp_cost by constant C. */
202 comp_cost operator*= (HOST_WIDE_INT c);
204 /* Subtracts costs COST1 and COST2. */
205 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
207 /* Subtracts COST from this comp_cost. */
208 comp_cost operator-= (comp_cost cost);
210 /* Returns true if COST1 is smaller than COST2. */
211 friend bool operator< (comp_cost cost1, comp_cost cost2);
213 /* Returns true if COST1 and COST2 are equal. */
214 friend bool operator== (comp_cost cost1, comp_cost cost2);
216 /* Returns true if COST1 is smaller or equal than COST2. */
217 friend bool operator<= (comp_cost cost1, comp_cost cost2);
219 int cost; /* The runtime cost. */
220 unsigned complexity; /* The estimate of the complexity of the code for
221 the computation (in no concrete units --
222 complexity field should be larger for more
223 complex expressions and addressing modes). */
224 int scratch; /* Scratch used during cost computation. */
227 static const comp_cost no_cost;
228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
230 bool
231 comp_cost::infinite_cost_p ()
233 return cost == INFTY;
236 comp_cost
237 operator+ (comp_cost cost1, comp_cost cost2)
239 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
240 return infinite_cost;
242 cost1.cost += cost2.cost;
243 cost1.complexity += cost2.complexity;
245 return cost1;
248 comp_cost
249 operator- (comp_cost cost1, comp_cost cost2)
251 if (cost1.infinite_cost_p ())
252 return infinite_cost;
254 gcc_assert (!cost2.infinite_cost_p ());
256 cost1.cost -= cost2.cost;
257 cost1.complexity -= cost2.complexity;
259 return cost1;
262 comp_cost
263 comp_cost::operator+= (comp_cost cost)
265 *this = *this + cost;
266 return *this;
269 comp_cost
270 comp_cost::operator+= (HOST_WIDE_INT c)
272 if (infinite_cost_p ())
273 return *this;
275 this->cost += c;
277 return *this;
280 comp_cost
281 comp_cost::operator-= (HOST_WIDE_INT c)
283 if (infinite_cost_p ())
284 return *this;
286 this->cost -= c;
288 return *this;
291 comp_cost
292 comp_cost::operator/= (HOST_WIDE_INT c)
294 if (infinite_cost_p ())
295 return *this;
297 this->cost /= c;
299 return *this;
302 comp_cost
303 comp_cost::operator*= (HOST_WIDE_INT c)
305 if (infinite_cost_p ())
306 return *this;
308 this->cost *= c;
310 return *this;
313 comp_cost
314 comp_cost::operator-= (comp_cost cost)
316 *this = *this - cost;
317 return *this;
320 bool
321 operator< (comp_cost cost1, comp_cost cost2)
323 if (cost1.cost == cost2.cost)
324 return cost1.complexity < cost2.complexity;
326 return cost1.cost < cost2.cost;
329 bool
330 operator== (comp_cost cost1, comp_cost cost2)
332 return cost1.cost == cost2.cost
333 && cost1.complexity == cost2.complexity;
336 bool
337 operator<= (comp_cost cost1, comp_cost cost2)
339 return cost1 < cost2 || cost1 == cost2;
342 struct iv_inv_expr_ent;
344 /* The candidate - cost pair. */
345 struct cost_pair
347 struct iv_cand *cand; /* The candidate. */
348 comp_cost cost; /* The cost. */
349 bitmap depends_on; /* The list of invariants that have to be
350 preserved. */
351 tree value; /* For final value elimination, the expression for
352 the final value of the iv. For iv elimination,
353 the new bound to compare with. */
354 enum tree_code comp; /* For iv elimination, the comparison. */
355 iv_inv_expr_ent *inv_expr; /* Loop invariant expression. */
358 /* Use. */
359 struct iv_use
361 unsigned id; /* The id of the use. */
362 unsigned group_id; /* The group id the use belongs to. */
363 enum use_type type; /* Type of the use. */
364 struct iv *iv; /* The induction variable it is based on. */
365 gimple *stmt; /* Statement in that it occurs. */
366 tree *op_p; /* The place where it occurs. */
368 tree addr_base; /* Base address with const offset stripped. */
369 unsigned HOST_WIDE_INT addr_offset;
370 /* Const offset stripped from base address. */
373 /* Group of uses. */
374 struct iv_group
376 /* The id of the group. */
377 unsigned id;
378 /* Uses of the group are of the same type. */
379 enum use_type type;
380 /* The set of "related" IV candidates, plus the important ones. */
381 bitmap related_cands;
382 /* Number of IV candidates in the cost_map. */
383 unsigned n_map_members;
384 /* The costs wrto the iv candidates. */
385 struct cost_pair *cost_map;
386 /* The selected candidate for the group. */
387 struct iv_cand *selected;
388 /* Uses in the group. */
389 vec<struct iv_use *> vuses;
392 /* The position where the iv is computed. */
393 enum iv_position
395 IP_NORMAL, /* At the end, just before the exit condition. */
396 IP_END, /* At the end of the latch block. */
397 IP_BEFORE_USE, /* Immediately before a specific use. */
398 IP_AFTER_USE, /* Immediately after a specific use. */
399 IP_ORIGINAL /* The original biv. */
402 /* The induction variable candidate. */
403 struct iv_cand
405 unsigned id; /* The number of the candidate. */
406 bool important; /* Whether this is an "important" candidate, i.e. such
407 that it should be considered by all uses. */
408 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
409 gimple *incremented_at;/* For original biv, the statement where it is
410 incremented. */
411 tree var_before; /* The variable used for it before increment. */
412 tree var_after; /* The variable used for it after increment. */
413 struct iv *iv; /* The value of the candidate. NULL for
414 "pseudocandidate" used to indicate the possibility
415 to replace the final value of an iv by direct
416 computation of the value. */
417 unsigned cost; /* Cost of the candidate. */
418 unsigned cost_step; /* Cost of the candidate's increment operation. */
419 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
420 where it is incremented. */
421 bitmap depends_on; /* The list of invariants that are used in step of the
422 biv. */
423 struct iv *orig_iv; /* The original iv if this cand is added from biv with
424 smaller type. */
427 /* Hashtable entry for common candidate derived from iv uses. */
428 struct iv_common_cand
430 tree base;
431 tree step;
432 /* IV uses from which this common candidate is derived. */
433 auto_vec<struct iv_use *> uses;
434 hashval_t hash;
437 /* Hashtable helpers. */
439 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
441 static inline hashval_t hash (const iv_common_cand *);
442 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
445 /* Hash function for possible common candidates. */
447 inline hashval_t
448 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
450 return ccand->hash;
453 /* Hash table equality function for common candidates. */
455 inline bool
456 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
457 const iv_common_cand *ccand2)
459 return (ccand1->hash == ccand2->hash
460 && operand_equal_p (ccand1->base, ccand2->base, 0)
461 && operand_equal_p (ccand1->step, ccand2->step, 0)
462 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
463 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
466 /* Loop invariant expression hashtable entry. */
468 struct iv_inv_expr_ent
470 /* Tree expression of the entry. */
471 tree expr;
472 /* Unique indentifier. */
473 int id;
474 /* Hash value. */
475 hashval_t hash;
478 /* Sort iv_inv_expr_ent pair A and B by id field. */
480 static int
481 sort_iv_inv_expr_ent (const void *a, const void *b)
483 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
484 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
486 unsigned id1 = (*e1)->id;
487 unsigned id2 = (*e2)->id;
489 if (id1 < id2)
490 return -1;
491 else if (id1 > id2)
492 return 1;
493 else
494 return 0;
497 /* Hashtable helpers. */
499 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
501 static inline hashval_t hash (const iv_inv_expr_ent *);
502 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
505 /* Hash function for loop invariant expressions. */
507 inline hashval_t
508 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
510 return expr->hash;
513 /* Hash table equality function for expressions. */
515 inline bool
516 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
517 const iv_inv_expr_ent *expr2)
519 return expr1->hash == expr2->hash
520 && operand_equal_p (expr1->expr, expr2->expr, 0);
523 struct ivopts_data
525 /* The currently optimized loop. */
526 struct loop *current_loop;
527 source_location loop_loc;
529 /* Numbers of iterations for all exits of the current loop. */
530 hash_map<edge, tree_niter_desc *> *niters;
532 /* Number of registers used in it. */
533 unsigned regs_used;
535 /* The size of version_info array allocated. */
536 unsigned version_info_size;
538 /* The array of information for the ssa names. */
539 struct version_info *version_info;
541 /* The hashtable of loop invariant expressions created
542 by ivopt. */
543 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
545 /* Loop invariant expression id. */
546 int max_inv_expr_id;
548 /* The bitmap of indices in version_info whose value was changed. */
549 bitmap relevant;
551 /* The uses of induction variables. */
552 vec<iv_group *> vgroups;
554 /* The candidates. */
555 vec<iv_cand *> vcands;
557 /* A bitmap of important candidates. */
558 bitmap important_candidates;
560 /* Cache used by tree_to_aff_combination_expand. */
561 hash_map<tree, name_expansion *> *name_expansion_cache;
563 /* The hashtable of common candidates derived from iv uses. */
564 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
566 /* The common candidates. */
567 vec<iv_common_cand *> iv_common_cands;
569 /* The maximum invariant id. */
570 unsigned max_inv_id;
572 /* Number of no_overflow BIVs which are not used in memory address. */
573 unsigned bivs_not_used_in_addr;
575 /* Obstack for iv structure. */
576 struct obstack iv_obstack;
578 /* Whether to consider just related and important candidates when replacing a
579 use. */
580 bool consider_all_candidates;
582 /* Are we optimizing for speed? */
583 bool speed;
585 /* Whether the loop body includes any function calls. */
586 bool body_includes_call;
588 /* Whether the loop body can only be exited via single exit. */
589 bool loop_single_exit_p;
592 /* An assignment of iv candidates to uses. */
594 struct iv_ca
596 /* The number of uses covered by the assignment. */
597 unsigned upto;
599 /* Number of uses that cannot be expressed by the candidates in the set. */
600 unsigned bad_groups;
602 /* Candidate assigned to a use, together with the related costs. */
603 struct cost_pair **cand_for_group;
605 /* Number of times each candidate is used. */
606 unsigned *n_cand_uses;
608 /* The candidates used. */
609 bitmap cands;
611 /* The number of candidates in the set. */
612 unsigned n_cands;
614 /* Total number of registers needed. */
615 unsigned n_regs;
617 /* Total cost of expressing uses. */
618 comp_cost cand_use_cost;
620 /* Total cost of candidates. */
621 unsigned cand_cost;
623 /* Number of times each invariant is used. */
624 unsigned *n_invariant_uses;
626 /* Hash set with used invariant expression. */
627 hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs;
629 /* Total cost of the assignment. */
630 comp_cost cost;
633 /* Difference of two iv candidate assignments. */
635 struct iv_ca_delta
637 /* Changed group. */
638 struct iv_group *group;
640 /* An old assignment (for rollback purposes). */
641 struct cost_pair *old_cp;
643 /* A new assignment. */
644 struct cost_pair *new_cp;
646 /* Next change in the list. */
647 struct iv_ca_delta *next;
650 /* Bound on number of candidates below that all candidates are considered. */
652 #define CONSIDER_ALL_CANDIDATES_BOUND \
653 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
655 /* If there are more iv occurrences, we just give up (it is quite unlikely that
656 optimizing such a loop would help, and it would take ages). */
658 #define MAX_CONSIDERED_GROUPS \
659 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
661 /* If there are at most this number of ivs in the set, try removing unnecessary
662 ivs from the set always. */
664 #define ALWAYS_PRUNE_CAND_SET_BOUND \
665 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
667 /* The list of trees for that the decl_rtl field must be reset is stored
668 here. */
670 static vec<tree> decl_rtl_to_reset;
672 static comp_cost force_expr_to_var_cost (tree, bool);
674 /* The single loop exit if it dominates the latch, NULL otherwise. */
676 edge
677 single_dom_exit (struct loop *loop)
679 edge exit = single_exit (loop);
681 if (!exit)
682 return NULL;
684 if (!just_once_each_iteration_p (loop, exit->src))
685 return NULL;
687 return exit;
690 /* Dumps information about the induction variable IV to FILE. Don't dump
691 variable's name if DUMP_NAME is FALSE. The information is dumped with
692 preceding spaces indicated by INDENT_LEVEL. */
694 void
695 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
697 const char *p;
698 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
700 if (indent_level > 4)
701 indent_level = 4;
702 p = spaces + 8 - (indent_level << 1);
704 fprintf (file, "%sIV struct:\n", p);
705 if (iv->ssa_name && dump_name)
707 fprintf (file, "%s SSA_NAME:\t", p);
708 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
709 fprintf (file, "\n");
712 fprintf (file, "%s Type:\t", p);
713 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
714 fprintf (file, "\n");
716 fprintf (file, "%s Base:\t", p);
717 print_generic_expr (file, iv->base, TDF_SLIM);
718 fprintf (file, "\n");
720 fprintf (file, "%s Step:\t", p);
721 print_generic_expr (file, iv->step, TDF_SLIM);
722 fprintf (file, "\n");
724 if (iv->base_object)
726 fprintf (file, "%s Object:\t", p);
727 print_generic_expr (file, iv->base_object, TDF_SLIM);
728 fprintf (file, "\n");
731 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
733 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
734 p, iv->no_overflow ? "No-overflow" : "Overflow");
737 /* Dumps information about the USE to FILE. */
739 void
740 dump_use (FILE *file, struct iv_use *use)
742 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
743 fprintf (file, " At stmt:\t");
744 print_gimple_stmt (file, use->stmt, 0, 0);
745 fprintf (file, " At pos:\t");
746 if (use->op_p)
747 print_generic_expr (file, *use->op_p, TDF_SLIM);
748 fprintf (file, "\n");
749 dump_iv (file, use->iv, false, 2);
752 /* Dumps information about the uses to FILE. */
754 void
755 dump_groups (FILE *file, struct ivopts_data *data)
757 unsigned i, j;
758 struct iv_group *group;
760 for (i = 0; i < data->vgroups.length (); i++)
762 group = data->vgroups[i];
763 fprintf (file, "Group %d:\n", group->id);
764 if (group->type == USE_NONLINEAR_EXPR)
765 fprintf (file, " Type:\tGENERIC\n");
766 else if (group->type == USE_ADDRESS)
767 fprintf (file, " Type:\tADDRESS\n");
768 else
770 gcc_assert (group->type == USE_COMPARE);
771 fprintf (file, " Type:\tCOMPARE\n");
773 for (j = 0; j < group->vuses.length (); j++)
774 dump_use (file, group->vuses[j]);
778 /* Dumps information about induction variable candidate CAND to FILE. */
780 void
781 dump_cand (FILE *file, struct iv_cand *cand)
783 struct iv *iv = cand->iv;
785 fprintf (file, "Candidate %d:\n", cand->id);
786 if (cand->depends_on)
788 fprintf (file, " Depend on: ");
789 dump_bitmap (file, cand->depends_on);
792 if (cand->var_before)
794 fprintf (file, " Var befor: ");
795 print_generic_expr (file, cand->var_before, TDF_SLIM);
796 fprintf (file, "\n");
798 if (cand->var_after)
800 fprintf (file, " Var after: ");
801 print_generic_expr (file, cand->var_after, TDF_SLIM);
802 fprintf (file, "\n");
805 switch (cand->pos)
807 case IP_NORMAL:
808 fprintf (file, " Incr POS: before exit test\n");
809 break;
811 case IP_BEFORE_USE:
812 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
813 break;
815 case IP_AFTER_USE:
816 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
817 break;
819 case IP_END:
820 fprintf (file, " Incr POS: at end\n");
821 break;
823 case IP_ORIGINAL:
824 fprintf (file, " Incr POS: orig biv\n");
825 break;
828 dump_iv (file, iv, false, 1);
831 /* Returns the info for ssa version VER. */
833 static inline struct version_info *
834 ver_info (struct ivopts_data *data, unsigned ver)
836 return data->version_info + ver;
839 /* Returns the info for ssa name NAME. */
841 static inline struct version_info *
842 name_info (struct ivopts_data *data, tree name)
844 return ver_info (data, SSA_NAME_VERSION (name));
847 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
848 emitted in LOOP. */
850 static bool
851 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
853 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
855 gcc_assert (bb);
857 if (sbb == loop->latch)
858 return true;
860 if (sbb != bb)
861 return false;
863 return stmt == last_stmt (bb);
866 /* Returns true if STMT if after the place where the original induction
867 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
868 if the positions are identical. */
870 static bool
871 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
873 basic_block cand_bb = gimple_bb (cand->incremented_at);
874 basic_block stmt_bb = gimple_bb (stmt);
876 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
877 return false;
879 if (stmt_bb != cand_bb)
880 return true;
882 if (true_if_equal
883 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
884 return true;
885 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
888 /* Returns true if STMT if after the place where the induction variable
889 CAND is incremented in LOOP. */
891 static bool
892 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
894 switch (cand->pos)
896 case IP_END:
897 return false;
899 case IP_NORMAL:
900 return stmt_after_ip_normal_pos (loop, stmt);
902 case IP_ORIGINAL:
903 case IP_AFTER_USE:
904 return stmt_after_inc_pos (cand, stmt, false);
906 case IP_BEFORE_USE:
907 return stmt_after_inc_pos (cand, stmt, true);
909 default:
910 gcc_unreachable ();
914 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
916 static bool
917 abnormal_ssa_name_p (tree exp)
919 if (!exp)
920 return false;
922 if (TREE_CODE (exp) != SSA_NAME)
923 return false;
925 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
928 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
929 abnormal phi node. Callback for for_each_index. */
931 static bool
932 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
933 void *data ATTRIBUTE_UNUSED)
935 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
937 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
938 return false;
939 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
940 return false;
943 return !abnormal_ssa_name_p (*index);
946 /* Returns true if EXPR contains a ssa name that occurs in an
947 abnormal phi node. */
949 bool
950 contains_abnormal_ssa_name_p (tree expr)
952 enum tree_code code;
953 enum tree_code_class codeclass;
955 if (!expr)
956 return false;
958 code = TREE_CODE (expr);
959 codeclass = TREE_CODE_CLASS (code);
961 if (code == SSA_NAME)
962 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
964 if (code == INTEGER_CST
965 || is_gimple_min_invariant (expr))
966 return false;
968 if (code == ADDR_EXPR)
969 return !for_each_index (&TREE_OPERAND (expr, 0),
970 idx_contains_abnormal_ssa_name_p,
971 NULL);
973 if (code == COND_EXPR)
974 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
975 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
976 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
978 switch (codeclass)
980 case tcc_binary:
981 case tcc_comparison:
982 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
983 return true;
985 /* Fallthru. */
986 case tcc_unary:
987 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
988 return true;
990 break;
992 default:
993 gcc_unreachable ();
996 return false;
999 /* Returns the structure describing number of iterations determined from
1000 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1002 static struct tree_niter_desc *
1003 niter_for_exit (struct ivopts_data *data, edge exit)
1005 struct tree_niter_desc *desc;
1006 tree_niter_desc **slot;
1008 if (!data->niters)
1010 data->niters = new hash_map<edge, tree_niter_desc *>;
1011 slot = NULL;
1013 else
1014 slot = data->niters->get (exit);
1016 if (!slot)
1018 /* Try to determine number of iterations. We cannot safely work with ssa
1019 names that appear in phi nodes on abnormal edges, so that we do not
1020 create overlapping life ranges for them (PR 27283). */
1021 desc = XNEW (struct tree_niter_desc);
1022 if (!number_of_iterations_exit (data->current_loop,
1023 exit, desc, true)
1024 || contains_abnormal_ssa_name_p (desc->niter))
1026 XDELETE (desc);
1027 desc = NULL;
1029 data->niters->put (exit, desc);
1031 else
1032 desc = *slot;
1034 return desc;
1037 /* Returns the structure describing number of iterations determined from
1038 single dominating exit of DATA->current_loop, or NULL if something
1039 goes wrong. */
1041 static struct tree_niter_desc *
1042 niter_for_single_dom_exit (struct ivopts_data *data)
1044 edge exit = single_dom_exit (data->current_loop);
1046 if (!exit)
1047 return NULL;
1049 return niter_for_exit (data, exit);
1052 /* Initializes data structures used by the iv optimization pass, stored
1053 in DATA. */
1055 static void
1056 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1058 data->version_info_size = 2 * num_ssa_names;
1059 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1060 data->relevant = BITMAP_ALLOC (NULL);
1061 data->important_candidates = BITMAP_ALLOC (NULL);
1062 data->max_inv_id = 0;
1063 data->niters = NULL;
1064 data->vgroups.create (20);
1065 data->vcands.create (20);
1066 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1067 data->max_inv_expr_id = 0;
1068 data->name_expansion_cache = NULL;
1069 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1070 data->iv_common_cands.create (20);
1071 decl_rtl_to_reset.create (20);
1072 gcc_obstack_init (&data->iv_obstack);
1075 /* Returns a memory object to that EXPR points. In case we are able to
1076 determine that it does not point to any such object, NULL is returned. */
1078 static tree
1079 determine_base_object (tree expr)
1081 enum tree_code code = TREE_CODE (expr);
1082 tree base, obj;
1084 /* If this is a pointer casted to any type, we need to determine
1085 the base object for the pointer; so handle conversions before
1086 throwing away non-pointer expressions. */
1087 if (CONVERT_EXPR_P (expr))
1088 return determine_base_object (TREE_OPERAND (expr, 0));
1090 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1091 return NULL_TREE;
1093 switch (code)
1095 case INTEGER_CST:
1096 return NULL_TREE;
1098 case ADDR_EXPR:
1099 obj = TREE_OPERAND (expr, 0);
1100 base = get_base_address (obj);
1102 if (!base)
1103 return expr;
1105 if (TREE_CODE (base) == MEM_REF)
1106 return determine_base_object (TREE_OPERAND (base, 0));
1108 return fold_convert (ptr_type_node,
1109 build_fold_addr_expr (base));
1111 case POINTER_PLUS_EXPR:
1112 return determine_base_object (TREE_OPERAND (expr, 0));
1114 case PLUS_EXPR:
1115 case MINUS_EXPR:
1116 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1117 gcc_unreachable ();
1119 default:
1120 return fold_convert (ptr_type_node, expr);
1124 /* Return true if address expression with non-DECL_P operand appears
1125 in EXPR. */
1127 static bool
1128 contain_complex_addr_expr (tree expr)
1130 bool res = false;
1132 STRIP_NOPS (expr);
1133 switch (TREE_CODE (expr))
1135 case POINTER_PLUS_EXPR:
1136 case PLUS_EXPR:
1137 case MINUS_EXPR:
1138 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1139 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1140 break;
1142 case ADDR_EXPR:
1143 return (!DECL_P (TREE_OPERAND (expr, 0)));
1145 default:
1146 return false;
1149 return res;
1152 /* Allocates an induction variable with given initial value BASE and step STEP
1153 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1155 static struct iv *
1156 alloc_iv (struct ivopts_data *data, tree base, tree step,
1157 bool no_overflow = false)
1159 tree expr = base;
1160 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1161 sizeof (struct iv));
1162 gcc_assert (step != NULL_TREE);
1164 /* Lower address expression in base except ones with DECL_P as operand.
1165 By doing this:
1166 1) More accurate cost can be computed for address expressions;
1167 2) Duplicate candidates won't be created for bases in different
1168 forms, like &a[0] and &a. */
1169 STRIP_NOPS (expr);
1170 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1171 || contain_complex_addr_expr (expr))
1173 aff_tree comb;
1174 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1175 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1178 iv->base = base;
1179 iv->base_object = determine_base_object (base);
1180 iv->step = step;
1181 iv->biv_p = false;
1182 iv->nonlin_use = NULL;
1183 iv->ssa_name = NULL_TREE;
1184 iv->no_overflow = no_overflow;
1185 iv->have_address_use = false;
1187 return iv;
1190 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1191 doesn't overflow. */
1193 static void
1194 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1195 bool no_overflow)
1197 struct version_info *info = name_info (data, iv);
1199 gcc_assert (!info->iv);
1201 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1202 info->iv = alloc_iv (data, base, step, no_overflow);
1203 info->iv->ssa_name = iv;
1206 /* Finds induction variable declaration for VAR. */
1208 static struct iv *
1209 get_iv (struct ivopts_data *data, tree var)
1211 basic_block bb;
1212 tree type = TREE_TYPE (var);
1214 if (!POINTER_TYPE_P (type)
1215 && !INTEGRAL_TYPE_P (type))
1216 return NULL;
1218 if (!name_info (data, var)->iv)
1220 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1222 if (!bb
1223 || !flow_bb_inside_loop_p (data->current_loop, bb))
1224 set_iv (data, var, var, build_int_cst (type, 0), true);
1227 return name_info (data, var)->iv;
1230 /* Return the first non-invariant ssa var found in EXPR. */
1232 static tree
1233 extract_single_var_from_expr (tree expr)
1235 int i, n;
1236 tree tmp;
1237 enum tree_code code;
1239 if (!expr || is_gimple_min_invariant (expr))
1240 return NULL;
1242 code = TREE_CODE (expr);
1243 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1245 n = TREE_OPERAND_LENGTH (expr);
1246 for (i = 0; i < n; i++)
1248 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1250 if (tmp)
1251 return tmp;
1254 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1257 /* Finds basic ivs. */
1259 static bool
1260 find_bivs (struct ivopts_data *data)
1262 gphi *phi;
1263 affine_iv iv;
1264 tree step, type, base, stop;
1265 bool found = false;
1266 struct loop *loop = data->current_loop;
1267 gphi_iterator psi;
1269 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1271 phi = psi.phi ();
1273 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1274 continue;
1276 if (virtual_operand_p (PHI_RESULT (phi)))
1277 continue;
1279 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1280 continue;
1282 if (integer_zerop (iv.step))
1283 continue;
1285 step = iv.step;
1286 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1287 /* Stop expanding iv base at the first ssa var referred by iv step.
1288 Ideally we should stop at any ssa var, because that's expensive
1289 and unusual to happen, we just do it on the first one.
1291 See PR64705 for the rationale. */
1292 stop = extract_single_var_from_expr (step);
1293 base = expand_simple_operations (base, stop);
1294 if (contains_abnormal_ssa_name_p (base)
1295 || contains_abnormal_ssa_name_p (step))
1296 continue;
1298 type = TREE_TYPE (PHI_RESULT (phi));
1299 base = fold_convert (type, base);
1300 if (step)
1302 if (POINTER_TYPE_P (type))
1303 step = convert_to_ptrofftype (step);
1304 else
1305 step = fold_convert (type, step);
1308 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1309 found = true;
1312 return found;
1315 /* Marks basic ivs. */
1317 static void
1318 mark_bivs (struct ivopts_data *data)
1320 gphi *phi;
1321 gimple *def;
1322 tree var;
1323 struct iv *iv, *incr_iv;
1324 struct loop *loop = data->current_loop;
1325 basic_block incr_bb;
1326 gphi_iterator psi;
1328 data->bivs_not_used_in_addr = 0;
1329 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1331 phi = psi.phi ();
1333 iv = get_iv (data, PHI_RESULT (phi));
1334 if (!iv)
1335 continue;
1337 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1338 def = SSA_NAME_DEF_STMT (var);
1339 /* Don't mark iv peeled from other one as biv. */
1340 if (def
1341 && gimple_code (def) == GIMPLE_PHI
1342 && gimple_bb (def) == loop->header)
1343 continue;
1345 incr_iv = get_iv (data, var);
1346 if (!incr_iv)
1347 continue;
1349 /* If the increment is in the subloop, ignore it. */
1350 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1351 if (incr_bb->loop_father != data->current_loop
1352 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1353 continue;
1355 iv->biv_p = true;
1356 incr_iv->biv_p = true;
1357 if (iv->no_overflow)
1358 data->bivs_not_used_in_addr++;
1359 if (incr_iv->no_overflow)
1360 data->bivs_not_used_in_addr++;
1364 /* Checks whether STMT defines a linear induction variable and stores its
1365 parameters to IV. */
1367 static bool
1368 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1370 tree lhs, stop;
1371 struct loop *loop = data->current_loop;
1373 iv->base = NULL_TREE;
1374 iv->step = NULL_TREE;
1376 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1377 return false;
1379 lhs = gimple_assign_lhs (stmt);
1380 if (TREE_CODE (lhs) != SSA_NAME)
1381 return false;
1383 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1384 return false;
1386 /* Stop expanding iv base at the first ssa var referred by iv step.
1387 Ideally we should stop at any ssa var, because that's expensive
1388 and unusual to happen, we just do it on the first one.
1390 See PR64705 for the rationale. */
1391 stop = extract_single_var_from_expr (iv->step);
1392 iv->base = expand_simple_operations (iv->base, stop);
1393 if (contains_abnormal_ssa_name_p (iv->base)
1394 || contains_abnormal_ssa_name_p (iv->step))
1395 return false;
1397 /* If STMT could throw, then do not consider STMT as defining a GIV.
1398 While this will suppress optimizations, we can not safely delete this
1399 GIV and associated statements, even if it appears it is not used. */
1400 if (stmt_could_throw_p (stmt))
1401 return false;
1403 return true;
1406 /* Finds general ivs in statement STMT. */
1408 static void
1409 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1411 affine_iv iv;
1413 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1414 return;
1416 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1419 /* Finds general ivs in basic block BB. */
1421 static void
1422 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1424 gimple_stmt_iterator bsi;
1426 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1427 find_givs_in_stmt (data, gsi_stmt (bsi));
1430 /* Finds general ivs. */
1432 static void
1433 find_givs (struct ivopts_data *data)
1435 struct loop *loop = data->current_loop;
1436 basic_block *body = get_loop_body_in_dom_order (loop);
1437 unsigned i;
1439 for (i = 0; i < loop->num_nodes; i++)
1440 find_givs_in_bb (data, body[i]);
1441 free (body);
1444 /* For each ssa name defined in LOOP determines whether it is an induction
1445 variable and if so, its initial value and step. */
1447 static bool
1448 find_induction_variables (struct ivopts_data *data)
1450 unsigned i;
1451 bitmap_iterator bi;
1453 if (!find_bivs (data))
1454 return false;
1456 find_givs (data);
1457 mark_bivs (data);
1459 if (dump_file && (dump_flags & TDF_DETAILS))
1461 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1463 if (niter)
1465 fprintf (dump_file, " number of iterations ");
1466 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1467 if (!integer_zerop (niter->may_be_zero))
1469 fprintf (dump_file, "; zero if ");
1470 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1472 fprintf (dump_file, "\n");
1475 fprintf (dump_file, "\n<Induction Vars>:\n");
1476 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1478 struct version_info *info = ver_info (data, i);
1479 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1480 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1484 return true;
1487 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1488 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1489 is the const offset stripped from IV base; for other types use, both
1490 are zero by default. */
1492 static struct iv_use *
1493 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1494 gimple *stmt, enum use_type type, tree addr_base,
1495 unsigned HOST_WIDE_INT addr_offset)
1497 struct iv_use *use = XCNEW (struct iv_use);
1499 use->id = group->vuses.length ();
1500 use->group_id = group->id;
1501 use->type = type;
1502 use->iv = iv;
1503 use->stmt = stmt;
1504 use->op_p = use_p;
1505 use->addr_base = addr_base;
1506 use->addr_offset = addr_offset;
1508 group->vuses.safe_push (use);
1509 return use;
1512 /* Checks whether OP is a loop-level invariant and if so, records it.
1513 NONLINEAR_USE is true if the invariant is used in a way we do not
1514 handle specially. */
1516 static void
1517 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1519 basic_block bb;
1520 struct version_info *info;
1522 if (TREE_CODE (op) != SSA_NAME
1523 || virtual_operand_p (op))
1524 return;
1526 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1527 if (bb
1528 && flow_bb_inside_loop_p (data->current_loop, bb))
1529 return;
1531 info = name_info (data, op);
1532 info->name = op;
1533 info->has_nonlin_use |= nonlinear_use;
1534 if (!info->inv_id)
1535 info->inv_id = ++data->max_inv_id;
1536 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1539 static tree
1540 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1542 /* Record a group of TYPE. */
1544 static struct iv_group *
1545 record_group (struct ivopts_data *data, enum use_type type)
1547 struct iv_group *group = XCNEW (struct iv_group);
1549 group->id = data->vgroups.length ();
1550 group->type = type;
1551 group->related_cands = BITMAP_ALLOC (NULL);
1552 group->vuses.create (1);
1554 data->vgroups.safe_push (group);
1555 return group;
1558 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1559 New group will be created if there is no existing group for the use. */
1561 static struct iv_use *
1562 record_group_use (struct ivopts_data *data, tree *use_p,
1563 struct iv *iv, gimple *stmt, enum use_type type)
1565 tree addr_base = NULL;
1566 struct iv_group *group = NULL;
1567 unsigned HOST_WIDE_INT addr_offset = 0;
1569 /* Record non address type use in a new group. */
1570 if (type == USE_ADDRESS && iv->base_object)
1572 unsigned int i;
1574 addr_base = strip_offset (iv->base, &addr_offset);
1575 for (i = 0; i < data->vgroups.length (); i++)
1577 struct iv_use *use;
1579 group = data->vgroups[i];
1580 use = group->vuses[0];
1581 if (use->type != USE_ADDRESS || !use->iv->base_object)
1582 continue;
1584 /* Check if it has the same stripped base and step. */
1585 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1586 && operand_equal_p (iv->step, use->iv->step, 0)
1587 && operand_equal_p (addr_base, use->addr_base, 0))
1588 break;
1590 if (i == data->vgroups.length ())
1591 group = NULL;
1594 if (!group)
1595 group = record_group (data, type);
1597 return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1600 /* Checks whether the use OP is interesting and if so, records it. */
1602 static struct iv_use *
1603 find_interesting_uses_op (struct ivopts_data *data, tree op)
1605 struct iv *iv;
1606 gimple *stmt;
1607 struct iv_use *use;
1609 if (TREE_CODE (op) != SSA_NAME)
1610 return NULL;
1612 iv = get_iv (data, op);
1613 if (!iv)
1614 return NULL;
1616 if (iv->nonlin_use)
1618 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1619 return iv->nonlin_use;
1622 if (integer_zerop (iv->step))
1624 record_invariant (data, op, true);
1625 return NULL;
1628 stmt = SSA_NAME_DEF_STMT (op);
1629 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1631 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1632 iv->nonlin_use = use;
1633 return use;
1636 /* Given a condition in statement STMT, checks whether it is a compare
1637 of an induction variable and an invariant. If this is the case,
1638 CONTROL_VAR is set to location of the iv, BOUND to the location of
1639 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1640 induction variable descriptions, and true is returned. If this is not
1641 the case, CONTROL_VAR and BOUND are set to the arguments of the
1642 condition and false is returned. */
1644 static bool
1645 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1646 tree **control_var, tree **bound,
1647 struct iv **iv_var, struct iv **iv_bound)
1649 /* The objects returned when COND has constant operands. */
1650 static struct iv const_iv;
1651 static tree zero;
1652 tree *op0 = &zero, *op1 = &zero;
1653 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1654 bool ret = false;
1656 if (gimple_code (stmt) == GIMPLE_COND)
1658 gcond *cond_stmt = as_a <gcond *> (stmt);
1659 op0 = gimple_cond_lhs_ptr (cond_stmt);
1660 op1 = gimple_cond_rhs_ptr (cond_stmt);
1662 else
1664 op0 = gimple_assign_rhs1_ptr (stmt);
1665 op1 = gimple_assign_rhs2_ptr (stmt);
1668 zero = integer_zero_node;
1669 const_iv.step = integer_zero_node;
1671 if (TREE_CODE (*op0) == SSA_NAME)
1672 iv0 = get_iv (data, *op0);
1673 if (TREE_CODE (*op1) == SSA_NAME)
1674 iv1 = get_iv (data, *op1);
1676 /* Exactly one of the compared values must be an iv, and the other one must
1677 be an invariant. */
1678 if (!iv0 || !iv1)
1679 goto end;
1681 if (integer_zerop (iv0->step))
1683 /* Control variable may be on the other side. */
1684 std::swap (op0, op1);
1685 std::swap (iv0, iv1);
1687 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1689 end:
1690 if (control_var)
1691 *control_var = op0;
1692 if (iv_var)
1693 *iv_var = iv0;
1694 if (bound)
1695 *bound = op1;
1696 if (iv_bound)
1697 *iv_bound = iv1;
1699 return ret;
1702 /* Checks whether the condition in STMT is interesting and if so,
1703 records it. */
1705 static void
1706 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1708 tree *var_p, *bound_p;
1709 struct iv *var_iv;
1711 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1713 find_interesting_uses_op (data, *var_p);
1714 find_interesting_uses_op (data, *bound_p);
1715 return;
1718 record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1721 /* Returns the outermost loop EXPR is obviously invariant in
1722 relative to the loop LOOP, i.e. if all its operands are defined
1723 outside of the returned loop. Returns NULL if EXPR is not
1724 even obviously invariant in LOOP. */
1726 struct loop *
1727 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1729 basic_block def_bb;
1730 unsigned i, len;
1732 if (is_gimple_min_invariant (expr))
1733 return current_loops->tree_root;
1735 if (TREE_CODE (expr) == SSA_NAME)
1737 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1738 if (def_bb)
1740 if (flow_bb_inside_loop_p (loop, def_bb))
1741 return NULL;
1742 return superloop_at_depth (loop,
1743 loop_depth (def_bb->loop_father) + 1);
1746 return current_loops->tree_root;
1749 if (!EXPR_P (expr))
1750 return NULL;
1752 unsigned maxdepth = 0;
1753 len = TREE_OPERAND_LENGTH (expr);
1754 for (i = 0; i < len; i++)
1756 struct loop *ivloop;
1757 if (!TREE_OPERAND (expr, i))
1758 continue;
1760 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1761 if (!ivloop)
1762 return NULL;
1763 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1766 return superloop_at_depth (loop, maxdepth);
1769 /* Returns true if expression EXPR is obviously invariant in LOOP,
1770 i.e. if all its operands are defined outside of the LOOP. LOOP
1771 should not be the function body. */
1773 bool
1774 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1776 basic_block def_bb;
1777 unsigned i, len;
1779 gcc_assert (loop_depth (loop) > 0);
1781 if (is_gimple_min_invariant (expr))
1782 return true;
1784 if (TREE_CODE (expr) == SSA_NAME)
1786 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1787 if (def_bb
1788 && flow_bb_inside_loop_p (loop, def_bb))
1789 return false;
1791 return true;
1794 if (!EXPR_P (expr))
1795 return false;
1797 len = TREE_OPERAND_LENGTH (expr);
1798 for (i = 0; i < len; i++)
1799 if (TREE_OPERAND (expr, i)
1800 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1801 return false;
1803 return true;
1806 /* Given expression EXPR which computes inductive values with respect
1807 to loop recorded in DATA, this function returns biv from which EXPR
1808 is derived by tracing definition chains of ssa variables in EXPR. */
1810 static struct iv*
1811 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1813 struct iv *iv;
1814 unsigned i, n;
1815 tree e2, e1;
1816 enum tree_code code;
1817 gimple *stmt;
1819 if (expr == NULL_TREE)
1820 return NULL;
1822 if (is_gimple_min_invariant (expr))
1823 return NULL;
1825 code = TREE_CODE (expr);
1826 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1828 n = TREE_OPERAND_LENGTH (expr);
1829 for (i = 0; i < n; i++)
1831 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1832 if (iv)
1833 return iv;
1837 /* Stop if it's not ssa name. */
1838 if (code != SSA_NAME)
1839 return NULL;
1841 iv = get_iv (data, expr);
1842 if (!iv || integer_zerop (iv->step))
1843 return NULL;
1844 else if (iv->biv_p)
1845 return iv;
1847 stmt = SSA_NAME_DEF_STMT (expr);
1848 if (gphi *phi = dyn_cast <gphi *> (stmt))
1850 ssa_op_iter iter;
1851 use_operand_p use_p;
1853 if (virtual_operand_p (gimple_phi_result (phi)))
1854 return NULL;
1856 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1858 tree use = USE_FROM_PTR (use_p);
1859 iv = find_deriving_biv_for_expr (data, use);
1860 if (iv)
1861 return iv;
1863 return NULL;
1865 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1866 return NULL;
1868 e1 = gimple_assign_rhs1 (stmt);
1869 code = gimple_assign_rhs_code (stmt);
1870 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1871 return find_deriving_biv_for_expr (data, e1);
1873 switch (code)
1875 case MULT_EXPR:
1876 case PLUS_EXPR:
1877 case MINUS_EXPR:
1878 case POINTER_PLUS_EXPR:
1879 /* Increments, decrements and multiplications by a constant
1880 are simple. */
1881 e2 = gimple_assign_rhs2 (stmt);
1882 iv = find_deriving_biv_for_expr (data, e2);
1883 if (iv)
1884 return iv;
1886 /* Fallthru. */
1887 CASE_CONVERT:
1888 /* Casts are simple. */
1889 return find_deriving_biv_for_expr (data, e1);
1891 default:
1892 break;
1895 return NULL;
1898 /* Record BIV, its predecessor and successor that they are used in
1899 address type uses. */
1901 static void
1902 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1904 unsigned i;
1905 tree type, base_1, base_2;
1906 bitmap_iterator bi;
1908 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1909 || biv->have_address_use || !biv->no_overflow)
1910 return;
1912 type = TREE_TYPE (biv->base);
1913 if (!INTEGRAL_TYPE_P (type))
1914 return;
1916 biv->have_address_use = true;
1917 data->bivs_not_used_in_addr--;
1918 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1919 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1921 struct iv *iv = ver_info (data, i)->iv;
1923 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1924 || iv->have_address_use || !iv->no_overflow)
1925 continue;
1927 if (type != TREE_TYPE (iv->base)
1928 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1929 continue;
1931 if (!operand_equal_p (biv->step, iv->step, 0))
1932 continue;
1934 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1935 if (operand_equal_p (base_1, iv->base, 0)
1936 || operand_equal_p (base_2, biv->base, 0))
1938 iv->have_address_use = true;
1939 data->bivs_not_used_in_addr--;
1944 /* Cumulates the steps of indices into DATA and replaces their values with the
1945 initial ones. Returns false when the value of the index cannot be determined.
1946 Callback for for_each_index. */
1948 struct ifs_ivopts_data
1950 struct ivopts_data *ivopts_data;
1951 gimple *stmt;
1952 tree step;
1955 static bool
1956 idx_find_step (tree base, tree *idx, void *data)
1958 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1959 struct iv *iv;
1960 bool use_overflow_semantics = false;
1961 tree step, iv_base, iv_step, lbound, off;
1962 struct loop *loop = dta->ivopts_data->current_loop;
1964 /* If base is a component ref, require that the offset of the reference
1965 be invariant. */
1966 if (TREE_CODE (base) == COMPONENT_REF)
1968 off = component_ref_field_offset (base);
1969 return expr_invariant_in_loop_p (loop, off);
1972 /* If base is array, first check whether we will be able to move the
1973 reference out of the loop (in order to take its address in strength
1974 reduction). In order for this to work we need both lower bound
1975 and step to be loop invariants. */
1976 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1978 /* Moreover, for a range, the size needs to be invariant as well. */
1979 if (TREE_CODE (base) == ARRAY_RANGE_REF
1980 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1981 return false;
1983 step = array_ref_element_size (base);
1984 lbound = array_ref_low_bound (base);
1986 if (!expr_invariant_in_loop_p (loop, step)
1987 || !expr_invariant_in_loop_p (loop, lbound))
1988 return false;
1991 if (TREE_CODE (*idx) != SSA_NAME)
1992 return true;
1994 iv = get_iv (dta->ivopts_data, *idx);
1995 if (!iv)
1996 return false;
1998 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1999 *&x[0], which is not folded and does not trigger the
2000 ARRAY_REF path below. */
2001 *idx = iv->base;
2003 if (integer_zerop (iv->step))
2004 return true;
2006 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2008 step = array_ref_element_size (base);
2010 /* We only handle addresses whose step is an integer constant. */
2011 if (TREE_CODE (step) != INTEGER_CST)
2012 return false;
2014 else
2015 /* The step for pointer arithmetics already is 1 byte. */
2016 step = size_one_node;
2018 iv_base = iv->base;
2019 iv_step = iv->step;
2020 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2021 use_overflow_semantics = true;
2023 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2024 sizetype, &iv_base, &iv_step, dta->stmt,
2025 use_overflow_semantics))
2027 /* The index might wrap. */
2028 return false;
2031 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2032 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2034 if (dta->ivopts_data->bivs_not_used_in_addr)
2036 if (!iv->biv_p)
2037 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2039 record_biv_for_address_use (dta->ivopts_data, iv);
2041 return true;
2044 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2045 object is passed to it in DATA. */
2047 static bool
2048 idx_record_use (tree base, tree *idx,
2049 void *vdata)
2051 struct ivopts_data *data = (struct ivopts_data *) vdata;
2052 find_interesting_uses_op (data, *idx);
2053 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2055 find_interesting_uses_op (data, array_ref_element_size (base));
2056 find_interesting_uses_op (data, array_ref_low_bound (base));
2058 return true;
2061 /* If we can prove that TOP = cst * BOT for some constant cst,
2062 store cst to MUL and return true. Otherwise return false.
2063 The returned value is always sign-extended, regardless of the
2064 signedness of TOP and BOT. */
2066 static bool
2067 constant_multiple_of (tree top, tree bot, widest_int *mul)
2069 tree mby;
2070 enum tree_code code;
2071 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2072 widest_int res, p0, p1;
2074 STRIP_NOPS (top);
2075 STRIP_NOPS (bot);
2077 if (operand_equal_p (top, bot, 0))
2079 *mul = 1;
2080 return true;
2083 code = TREE_CODE (top);
2084 switch (code)
2086 case MULT_EXPR:
2087 mby = TREE_OPERAND (top, 1);
2088 if (TREE_CODE (mby) != INTEGER_CST)
2089 return false;
2091 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2092 return false;
2094 *mul = wi::sext (res * wi::to_widest (mby), precision);
2095 return true;
2097 case PLUS_EXPR:
2098 case MINUS_EXPR:
2099 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2100 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2101 return false;
2103 if (code == MINUS_EXPR)
2104 p1 = -p1;
2105 *mul = wi::sext (p0 + p1, precision);
2106 return true;
2108 case INTEGER_CST:
2109 if (TREE_CODE (bot) != INTEGER_CST)
2110 return false;
2112 p0 = widest_int::from (top, SIGNED);
2113 p1 = widest_int::from (bot, SIGNED);
2114 if (p1 == 0)
2115 return false;
2116 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2117 return res == 0;
2119 default:
2120 return false;
2124 /* Return true if memory reference REF with step STEP may be unaligned. */
2126 static bool
2127 may_be_unaligned_p (tree ref, tree step)
2129 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2130 thus they are not misaligned. */
2131 if (TREE_CODE (ref) == TARGET_MEM_REF)
2132 return false;
2134 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2135 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2136 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2138 unsigned HOST_WIDE_INT bitpos;
2139 unsigned int ref_align;
2140 get_object_alignment_1 (ref, &ref_align, &bitpos);
2141 if (ref_align < align
2142 || (bitpos % align) != 0
2143 || (bitpos % BITS_PER_UNIT) != 0)
2144 return true;
2146 unsigned int trailing_zeros = tree_ctz (step);
2147 if (trailing_zeros < HOST_BITS_PER_INT
2148 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2149 return true;
2151 return false;
2154 /* Return true if EXPR may be non-addressable. */
2156 bool
2157 may_be_nonaddressable_p (tree expr)
2159 switch (TREE_CODE (expr))
2161 case TARGET_MEM_REF:
2162 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2163 target, thus they are always addressable. */
2164 return false;
2166 case MEM_REF:
2167 /* Likewise for MEM_REFs, modulo the storage order. */
2168 return REF_REVERSE_STORAGE_ORDER (expr);
2170 case BIT_FIELD_REF:
2171 if (REF_REVERSE_STORAGE_ORDER (expr))
2172 return true;
2173 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2175 case COMPONENT_REF:
2176 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2177 return true;
2178 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2179 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2181 case ARRAY_REF:
2182 case ARRAY_RANGE_REF:
2183 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2184 return true;
2185 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2187 case VIEW_CONVERT_EXPR:
2188 /* This kind of view-conversions may wrap non-addressable objects
2189 and make them look addressable. After some processing the
2190 non-addressability may be uncovered again, causing ADDR_EXPRs
2191 of inappropriate objects to be built. */
2192 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2193 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2194 return true;
2195 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2197 CASE_CONVERT:
2198 return true;
2200 default:
2201 break;
2204 return false;
2207 /* Finds addresses in *OP_P inside STMT. */
2209 static void
2210 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2211 tree *op_p)
2213 tree base = *op_p, step = size_zero_node;
2214 struct iv *civ;
2215 struct ifs_ivopts_data ifs_ivopts_data;
2217 /* Do not play with volatile memory references. A bit too conservative,
2218 perhaps, but safe. */
2219 if (gimple_has_volatile_ops (stmt))
2220 goto fail;
2222 /* Ignore bitfields for now. Not really something terribly complicated
2223 to handle. TODO. */
2224 if (TREE_CODE (base) == BIT_FIELD_REF)
2225 goto fail;
2227 base = unshare_expr (base);
2229 if (TREE_CODE (base) == TARGET_MEM_REF)
2231 tree type = build_pointer_type (TREE_TYPE (base));
2232 tree astep;
2234 if (TMR_BASE (base)
2235 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2237 civ = get_iv (data, TMR_BASE (base));
2238 if (!civ)
2239 goto fail;
2241 TMR_BASE (base) = civ->base;
2242 step = civ->step;
2244 if (TMR_INDEX2 (base)
2245 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2247 civ = get_iv (data, TMR_INDEX2 (base));
2248 if (!civ)
2249 goto fail;
2251 TMR_INDEX2 (base) = civ->base;
2252 step = civ->step;
2254 if (TMR_INDEX (base)
2255 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2257 civ = get_iv (data, TMR_INDEX (base));
2258 if (!civ)
2259 goto fail;
2261 TMR_INDEX (base) = civ->base;
2262 astep = civ->step;
2264 if (astep)
2266 if (TMR_STEP (base))
2267 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2269 step = fold_build2 (PLUS_EXPR, type, step, astep);
2273 if (integer_zerop (step))
2274 goto fail;
2275 base = tree_mem_ref_addr (type, base);
2277 else
2279 ifs_ivopts_data.ivopts_data = data;
2280 ifs_ivopts_data.stmt = stmt;
2281 ifs_ivopts_data.step = size_zero_node;
2282 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2283 || integer_zerop (ifs_ivopts_data.step))
2284 goto fail;
2285 step = ifs_ivopts_data.step;
2287 /* Check that the base expression is addressable. This needs
2288 to be done after substituting bases of IVs into it. */
2289 if (may_be_nonaddressable_p (base))
2290 goto fail;
2292 /* Moreover, on strict alignment platforms, check that it is
2293 sufficiently aligned. */
2294 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2295 goto fail;
2297 base = build_fold_addr_expr (base);
2299 /* Substituting bases of IVs into the base expression might
2300 have caused folding opportunities. */
2301 if (TREE_CODE (base) == ADDR_EXPR)
2303 tree *ref = &TREE_OPERAND (base, 0);
2304 while (handled_component_p (*ref))
2305 ref = &TREE_OPERAND (*ref, 0);
2306 if (TREE_CODE (*ref) == MEM_REF)
2308 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2309 TREE_OPERAND (*ref, 0),
2310 TREE_OPERAND (*ref, 1));
2311 if (tem)
2312 *ref = tem;
2317 civ = alloc_iv (data, base, step);
2318 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2319 return;
2321 fail:
2322 for_each_index (op_p, idx_record_use, data);
2325 /* Finds and records invariants used in STMT. */
2327 static void
2328 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2330 ssa_op_iter iter;
2331 use_operand_p use_p;
2332 tree op;
2334 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2336 op = USE_FROM_PTR (use_p);
2337 record_invariant (data, op, false);
2341 /* Finds interesting uses of induction variables in the statement STMT. */
2343 static void
2344 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2346 struct iv *iv;
2347 tree op, *lhs, *rhs;
2348 ssa_op_iter iter;
2349 use_operand_p use_p;
2350 enum tree_code code;
2352 find_invariants_stmt (data, stmt);
2354 if (gimple_code (stmt) == GIMPLE_COND)
2356 find_interesting_uses_cond (data, stmt);
2357 return;
2360 if (is_gimple_assign (stmt))
2362 lhs = gimple_assign_lhs_ptr (stmt);
2363 rhs = gimple_assign_rhs1_ptr (stmt);
2365 if (TREE_CODE (*lhs) == SSA_NAME)
2367 /* If the statement defines an induction variable, the uses are not
2368 interesting by themselves. */
2370 iv = get_iv (data, *lhs);
2372 if (iv && !integer_zerop (iv->step))
2373 return;
2376 code = gimple_assign_rhs_code (stmt);
2377 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2378 && (REFERENCE_CLASS_P (*rhs)
2379 || is_gimple_val (*rhs)))
2381 if (REFERENCE_CLASS_P (*rhs))
2382 find_interesting_uses_address (data, stmt, rhs);
2383 else
2384 find_interesting_uses_op (data, *rhs);
2386 if (REFERENCE_CLASS_P (*lhs))
2387 find_interesting_uses_address (data, stmt, lhs);
2388 return;
2390 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2392 find_interesting_uses_cond (data, stmt);
2393 return;
2396 /* TODO -- we should also handle address uses of type
2398 memory = call (whatever);
2402 call (memory). */
2405 if (gimple_code (stmt) == GIMPLE_PHI
2406 && gimple_bb (stmt) == data->current_loop->header)
2408 iv = get_iv (data, PHI_RESULT (stmt));
2410 if (iv && !integer_zerop (iv->step))
2411 return;
2414 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2416 op = USE_FROM_PTR (use_p);
2418 if (TREE_CODE (op) != SSA_NAME)
2419 continue;
2421 iv = get_iv (data, op);
2422 if (!iv)
2423 continue;
2425 find_interesting_uses_op (data, op);
2429 /* Finds interesting uses of induction variables outside of loops
2430 on loop exit edge EXIT. */
2432 static void
2433 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2435 gphi *phi;
2436 gphi_iterator psi;
2437 tree def;
2439 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2441 phi = psi.phi ();
2442 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2443 if (!virtual_operand_p (def))
2444 find_interesting_uses_op (data, def);
2448 /* Compute maximum offset of [base + offset] addressing mode
2449 for memory reference represented by USE. */
2451 static HOST_WIDE_INT
2452 compute_max_addr_offset (struct iv_use *use)
2454 int width;
2455 rtx reg, addr;
2456 HOST_WIDE_INT i, off;
2457 unsigned list_index, num;
2458 addr_space_t as;
2459 machine_mode mem_mode, addr_mode;
2460 static vec<HOST_WIDE_INT> max_offset_list;
2462 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2463 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2465 num = max_offset_list.length ();
2466 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2467 if (list_index >= num)
2469 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2470 for (; num < max_offset_list.length (); num++)
2471 max_offset_list[num] = -1;
2474 off = max_offset_list[list_index];
2475 if (off != -1)
2476 return off;
2478 addr_mode = targetm.addr_space.address_mode (as);
2479 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2480 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2482 width = GET_MODE_BITSIZE (addr_mode) - 1;
2483 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2484 width = HOST_BITS_PER_WIDE_INT - 1;
2486 for (i = width; i > 0; i--)
2488 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2489 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2490 if (memory_address_addr_space_p (mem_mode, addr, as))
2491 break;
2493 /* For some strict-alignment targets, the offset must be naturally
2494 aligned. Try an aligned offset if mem_mode is not QImode. */
2495 off = ((unsigned HOST_WIDE_INT) 1 << i);
2496 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2498 off -= GET_MODE_SIZE (mem_mode);
2499 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2500 if (memory_address_addr_space_p (mem_mode, addr, as))
2501 break;
2504 if (i == 0)
2505 off = 0;
2507 max_offset_list[list_index] = off;
2508 return off;
2511 /* Comparison function to sort group in ascending order of addr_offset. */
2513 static int
2514 group_compare_offset (const void *a, const void *b)
2516 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2517 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2519 if ((*u1)->addr_offset != (*u2)->addr_offset)
2520 return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2521 else
2522 return 0;
2525 /* Check if small groups should be split. Return true if no group
2526 contains more than two uses with distinct addr_offsets. Return
2527 false otherwise. We want to split such groups because:
2529 1) Small groups don't have much benefit and may interfer with
2530 general candidate selection.
2531 2) Size for problem with only small groups is usually small and
2532 general algorithm can handle it well.
2534 TODO -- Above claim may not hold when we want to merge memory
2535 accesses with conseuctive addresses. */
2537 static bool
2538 split_small_address_groups_p (struct ivopts_data *data)
2540 unsigned int i, j, distinct = 1;
2541 struct iv_use *pre;
2542 struct iv_group *group;
2544 for (i = 0; i < data->vgroups.length (); i++)
2546 group = data->vgroups[i];
2547 if (group->vuses.length () == 1)
2548 continue;
2550 gcc_assert (group->type == USE_ADDRESS);
2551 if (group->vuses.length () == 2)
2553 if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2554 std::swap (group->vuses[0], group->vuses[1]);
2556 else
2557 group->vuses.qsort (group_compare_offset);
2559 if (distinct > 2)
2560 continue;
2562 distinct = 1;
2563 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2565 if (group->vuses[j]->addr_offset != pre->addr_offset)
2567 pre = group->vuses[j];
2568 distinct++;
2571 if (distinct > 2)
2572 break;
2576 return (distinct <= 2);
2579 /* For each group of address type uses, this function further groups
2580 these uses according to the maximum offset supported by target's
2581 [base + offset] addressing mode. */
2583 static void
2584 split_address_groups (struct ivopts_data *data)
2586 unsigned int i, j;
2587 HOST_WIDE_INT max_offset = -1;
2589 /* Reset max offset to split all small groups. */
2590 if (split_small_address_groups_p (data))
2591 max_offset = 0;
2593 for (i = 0; i < data->vgroups.length (); i++)
2595 struct iv_group *group = data->vgroups[i];
2596 struct iv_use *use = group->vuses[0];
2598 use->id = 0;
2599 use->group_id = group->id;
2600 if (group->vuses.length () == 1)
2601 continue;
2603 if (max_offset != 0)
2604 max_offset = compute_max_addr_offset (use);
2606 for (j = 1; j < group->vuses.length (); j++)
2608 struct iv_use *next = group->vuses[j];
2610 /* Only uses with offset that can fit in offset part against
2611 the first use can be grouped together. */
2612 if (next->addr_offset - use->addr_offset
2613 > (unsigned HOST_WIDE_INT) max_offset)
2614 break;
2616 next->id = j;
2617 next->group_id = group->id;
2619 /* Split group. */
2620 if (j < group->vuses.length ())
2622 struct iv_group *new_group = record_group (data, group->type);
2623 new_group->vuses.safe_splice (group->vuses);
2624 new_group->vuses.block_remove (0, j);
2625 group->vuses.truncate (j);
2630 /* Finds uses of the induction variables that are interesting. */
2632 static void
2633 find_interesting_uses (struct ivopts_data *data)
2635 basic_block bb;
2636 gimple_stmt_iterator bsi;
2637 basic_block *body = get_loop_body (data->current_loop);
2638 unsigned i;
2639 edge e;
2641 for (i = 0; i < data->current_loop->num_nodes; i++)
2643 edge_iterator ei;
2644 bb = body[i];
2646 FOR_EACH_EDGE (e, ei, bb->succs)
2647 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2648 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2649 find_interesting_uses_outside (data, e);
2651 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2652 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2653 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2654 if (!is_gimple_debug (gsi_stmt (bsi)))
2655 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2658 split_address_groups (data);
2660 if (dump_file && (dump_flags & TDF_DETAILS))
2662 bitmap_iterator bi;
2664 fprintf (dump_file, "\n<Invariant Vars>:\n");
2665 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2667 struct version_info *info = ver_info (data, i);
2668 if (info->inv_id)
2670 fprintf (dump_file, "Inv %d:\t", info->inv_id);
2671 print_generic_expr (dump_file, info->name, TDF_SLIM);
2672 fprintf (dump_file, "%s\n",
2673 info->has_nonlin_use ? "" : "\t(eliminable)");
2677 fprintf (dump_file, "\n<IV Groups>:\n");
2678 dump_groups (dump_file, data);
2679 fprintf (dump_file, "\n");
2682 free (body);
2685 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2686 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2687 we are at the top-level of the processed address. */
2689 static tree
2690 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2691 HOST_WIDE_INT *offset)
2693 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2694 enum tree_code code;
2695 tree type, orig_type = TREE_TYPE (expr);
2696 HOST_WIDE_INT off0, off1, st;
2697 tree orig_expr = expr;
2699 STRIP_NOPS (expr);
2701 type = TREE_TYPE (expr);
2702 code = TREE_CODE (expr);
2703 *offset = 0;
2705 switch (code)
2707 case INTEGER_CST:
2708 if (!cst_and_fits_in_hwi (expr)
2709 || integer_zerop (expr))
2710 return orig_expr;
2712 *offset = int_cst_value (expr);
2713 return build_int_cst (orig_type, 0);
2715 case POINTER_PLUS_EXPR:
2716 case PLUS_EXPR:
2717 case MINUS_EXPR:
2718 op0 = TREE_OPERAND (expr, 0);
2719 op1 = TREE_OPERAND (expr, 1);
2721 op0 = strip_offset_1 (op0, false, false, &off0);
2722 op1 = strip_offset_1 (op1, false, false, &off1);
2724 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2725 if (op0 == TREE_OPERAND (expr, 0)
2726 && op1 == TREE_OPERAND (expr, 1))
2727 return orig_expr;
2729 if (integer_zerop (op1))
2730 expr = op0;
2731 else if (integer_zerop (op0))
2733 if (code == MINUS_EXPR)
2734 expr = fold_build1 (NEGATE_EXPR, type, op1);
2735 else
2736 expr = op1;
2738 else
2739 expr = fold_build2 (code, type, op0, op1);
2741 return fold_convert (orig_type, expr);
2743 case MULT_EXPR:
2744 op1 = TREE_OPERAND (expr, 1);
2745 if (!cst_and_fits_in_hwi (op1))
2746 return orig_expr;
2748 op0 = TREE_OPERAND (expr, 0);
2749 op0 = strip_offset_1 (op0, false, false, &off0);
2750 if (op0 == TREE_OPERAND (expr, 0))
2751 return orig_expr;
2753 *offset = off0 * int_cst_value (op1);
2754 if (integer_zerop (op0))
2755 expr = op0;
2756 else
2757 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2759 return fold_convert (orig_type, expr);
2761 case ARRAY_REF:
2762 case ARRAY_RANGE_REF:
2763 if (!inside_addr)
2764 return orig_expr;
2766 step = array_ref_element_size (expr);
2767 if (!cst_and_fits_in_hwi (step))
2768 break;
2770 st = int_cst_value (step);
2771 op1 = TREE_OPERAND (expr, 1);
2772 op1 = strip_offset_1 (op1, false, false, &off1);
2773 *offset = off1 * st;
2775 if (top_compref
2776 && integer_zerop (op1))
2778 /* Strip the component reference completely. */
2779 op0 = TREE_OPERAND (expr, 0);
2780 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2781 *offset += off0;
2782 return op0;
2784 break;
2786 case COMPONENT_REF:
2788 tree field;
2790 if (!inside_addr)
2791 return orig_expr;
2793 tmp = component_ref_field_offset (expr);
2794 field = TREE_OPERAND (expr, 1);
2795 if (top_compref
2796 && cst_and_fits_in_hwi (tmp)
2797 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2799 HOST_WIDE_INT boffset, abs_off;
2801 /* Strip the component reference completely. */
2802 op0 = TREE_OPERAND (expr, 0);
2803 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2804 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2805 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2806 if (boffset < 0)
2807 abs_off = -abs_off;
2809 *offset = off0 + int_cst_value (tmp) + abs_off;
2810 return op0;
2813 break;
2815 case ADDR_EXPR:
2816 op0 = TREE_OPERAND (expr, 0);
2817 op0 = strip_offset_1 (op0, true, true, &off0);
2818 *offset += off0;
2820 if (op0 == TREE_OPERAND (expr, 0))
2821 return orig_expr;
2823 expr = build_fold_addr_expr (op0);
2824 return fold_convert (orig_type, expr);
2826 case MEM_REF:
2827 /* ??? Offset operand? */
2828 inside_addr = false;
2829 break;
2831 default:
2832 return orig_expr;
2835 /* Default handling of expressions for that we want to recurse into
2836 the first operand. */
2837 op0 = TREE_OPERAND (expr, 0);
2838 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2839 *offset += off0;
2841 if (op0 == TREE_OPERAND (expr, 0)
2842 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2843 return orig_expr;
2845 expr = copy_node (expr);
2846 TREE_OPERAND (expr, 0) = op0;
2847 if (op1)
2848 TREE_OPERAND (expr, 1) = op1;
2850 /* Inside address, we might strip the top level component references,
2851 thus changing type of the expression. Handling of ADDR_EXPR
2852 will fix that. */
2853 expr = fold_convert (orig_type, expr);
2855 return expr;
2858 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2860 static tree
2861 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2863 HOST_WIDE_INT off;
2864 tree core = strip_offset_1 (expr, false, false, &off);
2865 *offset = off;
2866 return core;
2869 /* Returns variant of TYPE that can be used as base for different uses.
2870 We return unsigned type with the same precision, which avoids problems
2871 with overflows. */
2873 static tree
2874 generic_type_for (tree type)
2876 if (POINTER_TYPE_P (type))
2877 return unsigned_type_for (type);
2879 if (TYPE_UNSIGNED (type))
2880 return type;
2882 return unsigned_type_for (type);
2885 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2886 the bitmap to that we should store it. */
2888 static struct ivopts_data *fd_ivopts_data;
2889 static tree
2890 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2892 bitmap *depends_on = (bitmap *) data;
2893 struct version_info *info;
2895 if (TREE_CODE (*expr_p) != SSA_NAME)
2896 return NULL_TREE;
2897 info = name_info (fd_ivopts_data, *expr_p);
2899 if (!info->inv_id || info->has_nonlin_use)
2900 return NULL_TREE;
2902 if (!*depends_on)
2903 *depends_on = BITMAP_ALLOC (NULL);
2904 bitmap_set_bit (*depends_on, info->inv_id);
2906 return NULL_TREE;
2909 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2910 position to POS. If USE is not NULL, the candidate is set as related to
2911 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2912 replacement of the final value of the iv by a direct computation. */
2914 static struct iv_cand *
2915 add_candidate_1 (struct ivopts_data *data,
2916 tree base, tree step, bool important, enum iv_position pos,
2917 struct iv_use *use, gimple *incremented_at,
2918 struct iv *orig_iv = NULL)
2920 unsigned i;
2921 struct iv_cand *cand = NULL;
2922 tree type, orig_type;
2924 gcc_assert (base && step);
2926 /* -fkeep-gc-roots-live means that we have to keep a real pointer
2927 live, but the ivopts code may replace a real pointer with one
2928 pointing before or after the memory block that is then adjusted
2929 into the memory block during the loop. FIXME: It would likely be
2930 better to actually force the pointer live and still use ivopts;
2931 for example, it would be enough to write the pointer into memory
2932 and keep it there until after the loop. */
2933 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2934 return NULL;
2936 /* For non-original variables, make sure their values are computed in a type
2937 that does not invoke undefined behavior on overflows (since in general,
2938 we cannot prove that these induction variables are non-wrapping). */
2939 if (pos != IP_ORIGINAL)
2941 orig_type = TREE_TYPE (base);
2942 type = generic_type_for (orig_type);
2943 if (type != orig_type)
2945 base = fold_convert (type, base);
2946 step = fold_convert (type, step);
2950 for (i = 0; i < data->vcands.length (); i++)
2952 cand = data->vcands[i];
2954 if (cand->pos != pos)
2955 continue;
2957 if (cand->incremented_at != incremented_at
2958 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2959 && cand->ainc_use != use))
2960 continue;
2962 if (operand_equal_p (base, cand->iv->base, 0)
2963 && operand_equal_p (step, cand->iv->step, 0)
2964 && (TYPE_PRECISION (TREE_TYPE (base))
2965 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2966 break;
2969 if (i == data->vcands.length ())
2971 cand = XCNEW (struct iv_cand);
2972 cand->id = i;
2973 cand->iv = alloc_iv (data, base, step);
2974 cand->pos = pos;
2975 if (pos != IP_ORIGINAL)
2977 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2978 cand->var_after = cand->var_before;
2980 cand->important = important;
2981 cand->incremented_at = incremented_at;
2982 data->vcands.safe_push (cand);
2984 if (TREE_CODE (step) != INTEGER_CST)
2986 fd_ivopts_data = data;
2987 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2990 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2991 cand->ainc_use = use;
2992 else
2993 cand->ainc_use = NULL;
2995 cand->orig_iv = orig_iv;
2996 if (dump_file && (dump_flags & TDF_DETAILS))
2997 dump_cand (dump_file, cand);
3000 cand->important |= important;
3002 /* Relate candidate to the group for which it is added. */
3003 if (use)
3004 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3006 return cand;
3009 /* Returns true if incrementing the induction variable at the end of the LOOP
3010 is allowed.
3012 The purpose is to avoid splitting latch edge with a biv increment, thus
3013 creating a jump, possibly confusing other optimization passes and leaving
3014 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
3015 is not available (so we do not have a better alternative), or if the latch
3016 edge is already nonempty. */
3018 static bool
3019 allow_ip_end_pos_p (struct loop *loop)
3021 if (!ip_normal_pos (loop))
3022 return true;
3024 if (!empty_block_p (ip_end_pos (loop)))
3025 return true;
3027 return false;
3030 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3031 Important field is set to IMPORTANT. */
3033 static void
3034 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3035 bool important, struct iv_use *use)
3037 basic_block use_bb = gimple_bb (use->stmt);
3038 machine_mode mem_mode;
3039 unsigned HOST_WIDE_INT cstepi;
3041 /* If we insert the increment in any position other than the standard
3042 ones, we must ensure that it is incremented once per iteration.
3043 It must not be in an inner nested loop, or one side of an if
3044 statement. */
3045 if (use_bb->loop_father != data->current_loop
3046 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3047 || stmt_could_throw_p (use->stmt)
3048 || !cst_and_fits_in_hwi (step))
3049 return;
3051 cstepi = int_cst_value (step);
3053 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3054 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3055 || USE_STORE_PRE_INCREMENT (mem_mode))
3056 && GET_MODE_SIZE (mem_mode) == cstepi)
3057 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3058 || USE_STORE_PRE_DECREMENT (mem_mode))
3059 && GET_MODE_SIZE (mem_mode) == -cstepi))
3061 enum tree_code code = MINUS_EXPR;
3062 tree new_base;
3063 tree new_step = step;
3065 if (POINTER_TYPE_P (TREE_TYPE (base)))
3067 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3068 code = POINTER_PLUS_EXPR;
3070 else
3071 new_step = fold_convert (TREE_TYPE (base), new_step);
3072 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3073 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3074 use->stmt);
3076 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3077 || USE_STORE_POST_INCREMENT (mem_mode))
3078 && GET_MODE_SIZE (mem_mode) == cstepi)
3079 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3080 || USE_STORE_POST_DECREMENT (mem_mode))
3081 && GET_MODE_SIZE (mem_mode) == -cstepi))
3083 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3084 use->stmt);
3088 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3089 position to POS. If USE is not NULL, the candidate is set as related to
3090 it. The candidate computation is scheduled before exit condition and at
3091 the end of loop. */
3093 static void
3094 add_candidate (struct ivopts_data *data,
3095 tree base, tree step, bool important, struct iv_use *use,
3096 struct iv *orig_iv = NULL)
3098 if (ip_normal_pos (data->current_loop))
3099 add_candidate_1 (data, base, step, important,
3100 IP_NORMAL, use, NULL, orig_iv);
3101 if (ip_end_pos (data->current_loop)
3102 && allow_ip_end_pos_p (data->current_loop))
3103 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3106 /* Adds standard iv candidates. */
3108 static void
3109 add_standard_iv_candidates (struct ivopts_data *data)
3111 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3113 /* The same for a double-integer type if it is still fast enough. */
3114 if (TYPE_PRECISION
3115 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3116 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3117 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3118 build_int_cst (long_integer_type_node, 1), true, NULL);
3120 /* The same for a double-integer type if it is still fast enough. */
3121 if (TYPE_PRECISION
3122 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3123 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3124 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3125 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3129 /* Adds candidates bases on the old induction variable IV. */
3131 static void
3132 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3134 gimple *phi;
3135 tree def;
3136 struct iv_cand *cand;
3138 /* Check if this biv is used in address type use. */
3139 if (iv->no_overflow && iv->have_address_use
3140 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3141 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3143 tree base = fold_convert (sizetype, iv->base);
3144 tree step = fold_convert (sizetype, iv->step);
3146 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3147 add_candidate (data, base, step, true, NULL, iv);
3148 /* Add iv cand of the original type only if it has nonlinear use. */
3149 if (iv->nonlin_use)
3150 add_candidate (data, iv->base, iv->step, true, NULL);
3152 else
3153 add_candidate (data, iv->base, iv->step, true, NULL);
3155 /* The same, but with initial value zero. */
3156 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3157 add_candidate (data, size_int (0), iv->step, true, NULL);
3158 else
3159 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3160 iv->step, true, NULL);
3162 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3163 if (gimple_code (phi) == GIMPLE_PHI)
3165 /* Additionally record the possibility of leaving the original iv
3166 untouched. */
3167 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3168 /* Don't add candidate if it's from another PHI node because
3169 it's an affine iv appearing in the form of PEELED_CHREC. */
3170 phi = SSA_NAME_DEF_STMT (def);
3171 if (gimple_code (phi) != GIMPLE_PHI)
3173 cand = add_candidate_1 (data,
3174 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3175 SSA_NAME_DEF_STMT (def));
3176 if (cand)
3178 cand->var_before = iv->ssa_name;
3179 cand->var_after = def;
3182 else
3183 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3187 /* Adds candidates based on the old induction variables. */
3189 static void
3190 add_iv_candidate_for_bivs (struct ivopts_data *data)
3192 unsigned i;
3193 struct iv *iv;
3194 bitmap_iterator bi;
3196 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3198 iv = ver_info (data, i)->iv;
3199 if (iv && iv->biv_p && !integer_zerop (iv->step))
3200 add_iv_candidate_for_biv (data, iv);
3204 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3206 static void
3207 record_common_cand (struct ivopts_data *data, tree base,
3208 tree step, struct iv_use *use)
3210 struct iv_common_cand ent;
3211 struct iv_common_cand **slot;
3213 ent.base = base;
3214 ent.step = step;
3215 ent.hash = iterative_hash_expr (base, 0);
3216 ent.hash = iterative_hash_expr (step, ent.hash);
3218 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3219 if (*slot == NULL)
3221 *slot = new iv_common_cand ();
3222 (*slot)->base = base;
3223 (*slot)->step = step;
3224 (*slot)->uses.create (8);
3225 (*slot)->hash = ent.hash;
3226 data->iv_common_cands.safe_push ((*slot));
3229 gcc_assert (use != NULL);
3230 (*slot)->uses.safe_push (use);
3231 return;
3234 /* Comparison function used to sort common candidates. */
3236 static int
3237 common_cand_cmp (const void *p1, const void *p2)
3239 unsigned n1, n2;
3240 const struct iv_common_cand *const *const ccand1
3241 = (const struct iv_common_cand *const *)p1;
3242 const struct iv_common_cand *const *const ccand2
3243 = (const struct iv_common_cand *const *)p2;
3245 n1 = (*ccand1)->uses.length ();
3246 n2 = (*ccand2)->uses.length ();
3247 return n2 - n1;
3250 /* Adds IV candidates based on common candidated recorded. */
3252 static void
3253 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3255 unsigned i, j;
3256 struct iv_cand *cand_1, *cand_2;
3258 data->iv_common_cands.qsort (common_cand_cmp);
3259 for (i = 0; i < data->iv_common_cands.length (); i++)
3261 struct iv_common_cand *ptr = data->iv_common_cands[i];
3263 /* Only add IV candidate if it's derived from multiple uses. */
3264 if (ptr->uses.length () <= 1)
3265 break;
3267 cand_1 = NULL;
3268 cand_2 = NULL;
3269 if (ip_normal_pos (data->current_loop))
3270 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3271 false, IP_NORMAL, NULL, NULL);
3273 if (ip_end_pos (data->current_loop)
3274 && allow_ip_end_pos_p (data->current_loop))
3275 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3276 false, IP_END, NULL, NULL);
3278 /* Bind deriving uses and the new candidates. */
3279 for (j = 0; j < ptr->uses.length (); j++)
3281 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3282 if (cand_1)
3283 bitmap_set_bit (group->related_cands, cand_1->id);
3284 if (cand_2)
3285 bitmap_set_bit (group->related_cands, cand_2->id);
3289 /* Release data since it is useless from this point. */
3290 data->iv_common_cand_tab->empty ();
3291 data->iv_common_cands.truncate (0);
3294 /* Adds candidates based on the value of USE's iv. */
3296 static void
3297 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3299 unsigned HOST_WIDE_INT offset;
3300 tree base;
3301 tree basetype;
3302 struct iv *iv = use->iv;
3304 add_candidate (data, iv->base, iv->step, false, use);
3306 /* Record common candidate for use in case it can be shared by others. */
3307 record_common_cand (data, iv->base, iv->step, use);
3309 /* Record common candidate with initial value zero. */
3310 basetype = TREE_TYPE (iv->base);
3311 if (POINTER_TYPE_P (basetype))
3312 basetype = sizetype;
3313 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3315 /* Record common candidate with constant offset stripped in base.
3316 Like the use itself, we also add candidate directly for it. */
3317 base = strip_offset (iv->base, &offset);
3318 if (offset || base != iv->base)
3320 record_common_cand (data, base, iv->step, use);
3321 add_candidate (data, base, iv->step, false, use);
3324 /* Record common candidate with base_object removed in base. */
3325 if (iv->base_object != NULL)
3327 unsigned i;
3328 aff_tree aff_base;
3329 tree step, base_object = iv->base_object;
3331 base = iv->base;
3332 step = iv->step;
3333 STRIP_NOPS (base);
3334 STRIP_NOPS (step);
3335 STRIP_NOPS (base_object);
3336 tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3337 for (i = 0; i < aff_base.n; i++)
3339 if (aff_base.elts[i].coef != 1)
3340 continue;
3342 if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3343 break;
3345 if (i < aff_base.n)
3347 aff_combination_remove_elt (&aff_base, i);
3348 base = aff_combination_to_tree (&aff_base);
3349 basetype = TREE_TYPE (base);
3350 if (POINTER_TYPE_P (basetype))
3351 basetype = sizetype;
3353 step = fold_convert (basetype, step);
3354 record_common_cand (data, base, step, use);
3355 /* Also record common candidate with offset stripped. */
3356 base = strip_offset (base, &offset);
3357 if (offset)
3358 record_common_cand (data, base, step, use);
3362 /* At last, add auto-incremental candidates. Make such variables
3363 important since other iv uses with same base object may be based
3364 on it. */
3365 if (use != NULL && use->type == USE_ADDRESS)
3366 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3369 /* Adds candidates based on the uses. */
3371 static void
3372 add_iv_candidate_for_groups (struct ivopts_data *data)
3374 unsigned i;
3376 /* Only add candidate for the first use in group. */
3377 for (i = 0; i < data->vgroups.length (); i++)
3379 struct iv_group *group = data->vgroups[i];
3381 gcc_assert (group->vuses[0] != NULL);
3382 add_iv_candidate_for_use (data, group->vuses[0]);
3384 add_iv_candidate_derived_from_uses (data);
3387 /* Record important candidates and add them to related_cands bitmaps. */
3389 static void
3390 record_important_candidates (struct ivopts_data *data)
3392 unsigned i;
3393 struct iv_group *group;
3395 for (i = 0; i < data->vcands.length (); i++)
3397 struct iv_cand *cand = data->vcands[i];
3399 if (cand->important)
3400 bitmap_set_bit (data->important_candidates, i);
3403 data->consider_all_candidates = (data->vcands.length ()
3404 <= CONSIDER_ALL_CANDIDATES_BOUND);
3406 /* Add important candidates to groups' related_cands bitmaps. */
3407 for (i = 0; i < data->vgroups.length (); i++)
3409 group = data->vgroups[i];
3410 bitmap_ior_into (group->related_cands, data->important_candidates);
3414 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3415 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3416 we allocate a simple list to every use. */
3418 static void
3419 alloc_use_cost_map (struct ivopts_data *data)
3421 unsigned i, size, s;
3423 for (i = 0; i < data->vgroups.length (); i++)
3425 struct iv_group *group = data->vgroups[i];
3427 if (data->consider_all_candidates)
3428 size = data->vcands.length ();
3429 else
3431 s = bitmap_count_bits (group->related_cands);
3433 /* Round up to the power of two, so that moduling by it is fast. */
3434 size = s ? (1 << ceil_log2 (s)) : 1;
3437 group->n_map_members = size;
3438 group->cost_map = XCNEWVEC (struct cost_pair, size);
3442 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3443 on invariants DEPENDS_ON and that the value used in expressing it
3444 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3446 static void
3447 set_group_iv_cost (struct ivopts_data *data,
3448 struct iv_group *group, struct iv_cand *cand,
3449 comp_cost cost, bitmap depends_on, tree value,
3450 enum tree_code comp, iv_inv_expr_ent *inv_expr)
3452 unsigned i, s;
3454 if (cost.infinite_cost_p ())
3456 BITMAP_FREE (depends_on);
3457 return;
3460 if (data->consider_all_candidates)
3462 group->cost_map[cand->id].cand = cand;
3463 group->cost_map[cand->id].cost = cost;
3464 group->cost_map[cand->id].depends_on = depends_on;
3465 group->cost_map[cand->id].value = value;
3466 group->cost_map[cand->id].comp = comp;
3467 group->cost_map[cand->id].inv_expr = inv_expr;
3468 return;
3471 /* n_map_members is a power of two, so this computes modulo. */
3472 s = cand->id & (group->n_map_members - 1);
3473 for (i = s; i < group->n_map_members; i++)
3474 if (!group->cost_map[i].cand)
3475 goto found;
3476 for (i = 0; i < s; i++)
3477 if (!group->cost_map[i].cand)
3478 goto found;
3480 gcc_unreachable ();
3482 found:
3483 group->cost_map[i].cand = cand;
3484 group->cost_map[i].cost = cost;
3485 group->cost_map[i].depends_on = depends_on;
3486 group->cost_map[i].value = value;
3487 group->cost_map[i].comp = comp;
3488 group->cost_map[i].inv_expr = inv_expr;
3491 /* Gets cost of (GROUP, CAND) pair. */
3493 static struct cost_pair *
3494 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3495 struct iv_cand *cand)
3497 unsigned i, s;
3498 struct cost_pair *ret;
3500 if (!cand)
3501 return NULL;
3503 if (data->consider_all_candidates)
3505 ret = group->cost_map + cand->id;
3506 if (!ret->cand)
3507 return NULL;
3509 return ret;
3512 /* n_map_members is a power of two, so this computes modulo. */
3513 s = cand->id & (group->n_map_members - 1);
3514 for (i = s; i < group->n_map_members; i++)
3515 if (group->cost_map[i].cand == cand)
3516 return group->cost_map + i;
3517 else if (group->cost_map[i].cand == NULL)
3518 return NULL;
3519 for (i = 0; i < s; i++)
3520 if (group->cost_map[i].cand == cand)
3521 return group->cost_map + i;
3522 else if (group->cost_map[i].cand == NULL)
3523 return NULL;
3525 return NULL;
3528 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3529 static rtx
3530 produce_memory_decl_rtl (tree obj, int *regno)
3532 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3533 machine_mode address_mode = targetm.addr_space.address_mode (as);
3534 rtx x;
3536 gcc_assert (obj);
3537 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3539 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3540 x = gen_rtx_SYMBOL_REF (address_mode, name);
3541 SET_SYMBOL_REF_DECL (x, obj);
3542 x = gen_rtx_MEM (DECL_MODE (obj), x);
3543 set_mem_addr_space (x, as);
3544 targetm.encode_section_info (obj, x, true);
3546 else
3548 x = gen_raw_REG (address_mode, (*regno)++);
3549 x = gen_rtx_MEM (DECL_MODE (obj), x);
3550 set_mem_addr_space (x, as);
3553 return x;
3556 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3557 walk_tree. DATA contains the actual fake register number. */
3559 static tree
3560 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3562 tree obj = NULL_TREE;
3563 rtx x = NULL_RTX;
3564 int *regno = (int *) data;
3566 switch (TREE_CODE (*expr_p))
3568 case ADDR_EXPR:
3569 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3570 handled_component_p (*expr_p);
3571 expr_p = &TREE_OPERAND (*expr_p, 0))
3572 continue;
3573 obj = *expr_p;
3574 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3575 x = produce_memory_decl_rtl (obj, regno);
3576 break;
3578 case SSA_NAME:
3579 *ws = 0;
3580 obj = SSA_NAME_VAR (*expr_p);
3581 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3582 if (!obj)
3583 return NULL_TREE;
3584 if (!DECL_RTL_SET_P (obj))
3585 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3586 break;
3588 case VAR_DECL:
3589 case PARM_DECL:
3590 case RESULT_DECL:
3591 *ws = 0;
3592 obj = *expr_p;
3594 if (DECL_RTL_SET_P (obj))
3595 break;
3597 if (DECL_MODE (obj) == BLKmode)
3598 x = produce_memory_decl_rtl (obj, regno);
3599 else
3600 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3602 break;
3604 default:
3605 break;
3608 if (x)
3610 decl_rtl_to_reset.safe_push (obj);
3611 SET_DECL_RTL (obj, x);
3614 return NULL_TREE;
3617 /* Determines cost of the computation of EXPR. */
3619 static unsigned
3620 computation_cost (tree expr, bool speed)
3622 rtx_insn *seq;
3623 rtx rslt;
3624 tree type = TREE_TYPE (expr);
3625 unsigned cost;
3626 /* Avoid using hard regs in ways which may be unsupported. */
3627 int regno = LAST_VIRTUAL_REGISTER + 1;
3628 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3629 enum node_frequency real_frequency = node->frequency;
3631 node->frequency = NODE_FREQUENCY_NORMAL;
3632 crtl->maybe_hot_insn_p = speed;
3633 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3634 start_sequence ();
3635 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3636 seq = get_insns ();
3637 end_sequence ();
3638 default_rtl_profile ();
3639 node->frequency = real_frequency;
3641 cost = seq_cost (seq, speed);
3642 if (MEM_P (rslt))
3643 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3644 TYPE_ADDR_SPACE (type), speed);
3645 else if (!REG_P (rslt))
3646 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3648 return cost;
3651 /* Returns variable containing the value of candidate CAND at statement AT. */
3653 static tree
3654 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3656 if (stmt_after_increment (loop, cand, stmt))
3657 return cand->var_after;
3658 else
3659 return cand->var_before;
3662 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3663 same precision that is at least as wide as the precision of TYPE, stores
3664 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3665 type of A and B. */
3667 static tree
3668 determine_common_wider_type (tree *a, tree *b)
3670 tree wider_type = NULL;
3671 tree suba, subb;
3672 tree atype = TREE_TYPE (*a);
3674 if (CONVERT_EXPR_P (*a))
3676 suba = TREE_OPERAND (*a, 0);
3677 wider_type = TREE_TYPE (suba);
3678 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3679 return atype;
3681 else
3682 return atype;
3684 if (CONVERT_EXPR_P (*b))
3686 subb = TREE_OPERAND (*b, 0);
3687 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3688 return atype;
3690 else
3691 return atype;
3693 *a = suba;
3694 *b = subb;
3695 return wider_type;
3698 /* Determines the expression by that USE is expressed from induction variable
3699 CAND at statement AT in LOOP. The expression is stored in a decomposed
3700 form into AFF. Returns false if USE cannot be expressed using CAND. */
3702 static bool
3703 get_computation_aff (struct loop *loop,
3704 struct iv_use *use, struct iv_cand *cand, gimple *at,
3705 struct aff_tree *aff)
3707 tree ubase = use->iv->base;
3708 tree ustep = use->iv->step;
3709 tree cbase = cand->iv->base;
3710 tree cstep = cand->iv->step, cstep_common;
3711 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3712 tree common_type, var;
3713 tree uutype;
3714 aff_tree cbase_aff, var_aff;
3715 widest_int rat;
3717 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3719 /* We do not have a precision to express the values of use. */
3720 return false;
3723 var = var_at_stmt (loop, cand, at);
3724 uutype = unsigned_type_for (utype);
3726 /* If the conversion is not noop, perform it. */
3727 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3729 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3730 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3732 tree inner_base, inner_step, inner_type;
3733 inner_base = TREE_OPERAND (cbase, 0);
3734 if (CONVERT_EXPR_P (cstep))
3735 inner_step = TREE_OPERAND (cstep, 0);
3736 else
3737 inner_step = cstep;
3739 inner_type = TREE_TYPE (inner_base);
3740 /* If candidate is added from a biv whose type is smaller than
3741 ctype, we know both candidate and the biv won't overflow.
3742 In this case, it's safe to skip the convertion in candidate.
3743 As an example, (unsigned short)((unsigned long)A) equals to
3744 (unsigned short)A, if A has a type no larger than short. */
3745 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3747 cbase = inner_base;
3748 cstep = inner_step;
3751 cstep = fold_convert (uutype, cstep);
3752 cbase = fold_convert (uutype, cbase);
3753 var = fold_convert (uutype, var);
3756 /* Ratio is 1 when computing the value of biv cand by itself.
3757 We can't rely on constant_multiple_of in this case because the
3758 use is created after the original biv is selected. The call
3759 could fail because of inconsistent fold behavior. See PR68021
3760 for more information. */
3761 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3763 gcc_assert (is_gimple_assign (use->stmt));
3764 gcc_assert (use->iv->ssa_name == cand->var_after);
3765 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3766 rat = 1;
3768 else if (!constant_multiple_of (ustep, cstep, &rat))
3769 return false;
3771 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3772 type, we achieve better folding by computing their difference in this
3773 wider type, and cast the result to UUTYPE. We do not need to worry about
3774 overflows, as all the arithmetics will in the end be performed in UUTYPE
3775 anyway. */
3776 common_type = determine_common_wider_type (&ubase, &cbase);
3778 /* use = ubase - ratio * cbase + ratio * var. */
3779 tree_to_aff_combination (ubase, common_type, aff);
3780 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3781 tree_to_aff_combination (var, uutype, &var_aff);
3783 /* We need to shift the value if we are after the increment. */
3784 if (stmt_after_increment (loop, cand, at))
3786 aff_tree cstep_aff;
3788 if (common_type != uutype)
3789 cstep_common = fold_convert (common_type, cstep);
3790 else
3791 cstep_common = cstep;
3793 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3794 aff_combination_add (&cbase_aff, &cstep_aff);
3797 aff_combination_scale (&cbase_aff, -rat);
3798 aff_combination_add (aff, &cbase_aff);
3799 if (common_type != uutype)
3800 aff_combination_convert (aff, uutype);
3802 aff_combination_scale (&var_aff, rat);
3803 aff_combination_add (aff, &var_aff);
3805 return true;
3808 /* Return the type of USE. */
3810 static tree
3811 get_use_type (struct iv_use *use)
3813 tree base_type = TREE_TYPE (use->iv->base);
3814 tree type;
3816 if (use->type == USE_ADDRESS)
3818 /* The base_type may be a void pointer. Create a pointer type based on
3819 the mem_ref instead. */
3820 type = build_pointer_type (TREE_TYPE (*use->op_p));
3821 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3822 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3824 else
3825 type = base_type;
3827 return type;
3830 /* Determines the expression by that USE is expressed from induction variable
3831 CAND at statement AT in LOOP. The computation is unshared. */
3833 static tree
3834 get_computation_at (struct loop *loop,
3835 struct iv_use *use, struct iv_cand *cand, gimple *at)
3837 aff_tree aff;
3838 tree type = get_use_type (use);
3840 if (!get_computation_aff (loop, use, cand, at, &aff))
3841 return NULL_TREE;
3842 unshare_aff_combination (&aff);
3843 return fold_convert (type, aff_combination_to_tree (&aff));
3846 /* Determines the expression by that USE is expressed from induction variable
3847 CAND in LOOP. The computation is unshared. */
3849 static tree
3850 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3852 return get_computation_at (loop, use, cand, use->stmt);
3855 /* Adjust the cost COST for being in loop setup rather than loop body.
3856 If we're optimizing for space, the loop setup overhead is constant;
3857 if we're optimizing for speed, amortize it over the per-iteration cost. */
3858 static unsigned
3859 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3861 if (cost == INFTY)
3862 return cost;
3863 else if (optimize_loop_for_speed_p (data->current_loop))
3864 return cost / avg_loop_niter (data->current_loop);
3865 else
3866 return cost;
3869 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3870 validity for a memory reference accessing memory of mode MODE in
3871 address space AS. */
3874 bool
3875 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3876 addr_space_t as)
3878 #define MAX_RATIO 128
3879 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3880 static vec<sbitmap> valid_mult_list;
3881 sbitmap valid_mult;
3883 if (data_index >= valid_mult_list.length ())
3884 valid_mult_list.safe_grow_cleared (data_index + 1);
3886 valid_mult = valid_mult_list[data_index];
3887 if (!valid_mult)
3889 machine_mode address_mode = targetm.addr_space.address_mode (as);
3890 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3891 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3892 rtx addr, scaled;
3893 HOST_WIDE_INT i;
3895 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3896 bitmap_clear (valid_mult);
3897 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3898 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3899 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3901 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3902 if (memory_address_addr_space_p (mode, addr, as)
3903 || memory_address_addr_space_p (mode, scaled, as))
3904 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3907 if (dump_file && (dump_flags & TDF_DETAILS))
3909 fprintf (dump_file, " allowed multipliers:");
3910 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3911 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3912 fprintf (dump_file, " %d", (int) i);
3913 fprintf (dump_file, "\n");
3914 fprintf (dump_file, "\n");
3917 valid_mult_list[data_index] = valid_mult;
3920 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3921 return false;
3923 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3926 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3927 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3928 variable is omitted. Compute the cost for a memory reference that accesses
3929 a memory location of mode MEM_MODE in address space AS.
3931 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3932 size of MEM_MODE / RATIO) is available. To make this determination, we
3933 look at the size of the increment to be made, which is given in CSTEP.
3934 CSTEP may be zero if the step is unknown.
3935 STMT_AFTER_INC is true iff the statement we're looking at is after the
3936 increment of the original biv.
3938 TODO -- there must be some better way. This all is quite crude. */
3940 enum ainc_type
3942 AINC_PRE_INC, /* Pre increment. */
3943 AINC_PRE_DEC, /* Pre decrement. */
3944 AINC_POST_INC, /* Post increment. */
3945 AINC_POST_DEC, /* Post decrement. */
3946 AINC_NONE /* Also the number of auto increment types. */
3949 struct address_cost_data
3951 HOST_WIDE_INT min_offset, max_offset;
3952 unsigned costs[2][2][2][2];
3953 unsigned ainc_costs[AINC_NONE];
3957 static comp_cost
3958 get_address_cost (bool symbol_present, bool var_present,
3959 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3960 HOST_WIDE_INT cstep, machine_mode mem_mode,
3961 addr_space_t as, bool speed,
3962 bool stmt_after_inc, bool *may_autoinc)
3964 machine_mode address_mode = targetm.addr_space.address_mode (as);
3965 static vec<address_cost_data *> address_cost_data_list;
3966 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3967 address_cost_data *data;
3968 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3969 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3970 unsigned cost, acost, complexity;
3971 enum ainc_type autoinc_type;
3972 bool offset_p, ratio_p, autoinc;
3973 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3974 unsigned HOST_WIDE_INT mask;
3975 unsigned bits;
3977 if (data_index >= address_cost_data_list.length ())
3978 address_cost_data_list.safe_grow_cleared (data_index + 1);
3980 data = address_cost_data_list[data_index];
3981 if (!data)
3983 HOST_WIDE_INT i;
3984 HOST_WIDE_INT rat, off = 0;
3985 int old_cse_not_expected, width;
3986 unsigned sym_p, var_p, off_p, rat_p, add_c;
3987 rtx_insn *seq;
3988 rtx addr, base;
3989 rtx reg0, reg1;
3991 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3993 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3995 width = GET_MODE_BITSIZE (address_mode) - 1;
3996 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3997 width = HOST_BITS_PER_WIDE_INT - 1;
3998 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
4000 for (i = width; i >= 0; i--)
4002 off = -((unsigned HOST_WIDE_INT) 1 << i);
4003 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4004 if (memory_address_addr_space_p (mem_mode, addr, as))
4005 break;
4007 data->min_offset = (i == -1? 0 : off);
4009 for (i = width; i >= 0; i--)
4011 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4012 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4013 if (memory_address_addr_space_p (mem_mode, addr, as))
4014 break;
4015 /* For some strict-alignment targets, the offset must be naturally
4016 aligned. Try an aligned offset if mem_mode is not QImode. */
4017 off = mem_mode != QImode
4018 ? ((unsigned HOST_WIDE_INT) 1 << i)
4019 - GET_MODE_SIZE (mem_mode)
4020 : 0;
4021 if (off > 0)
4023 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4024 if (memory_address_addr_space_p (mem_mode, addr, as))
4025 break;
4028 if (i == -1)
4029 off = 0;
4030 data->max_offset = off;
4032 if (dump_file && (dump_flags & TDF_DETAILS))
4034 fprintf (dump_file, "get_address_cost:\n");
4035 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4036 GET_MODE_NAME (mem_mode),
4037 data->min_offset);
4038 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4039 GET_MODE_NAME (mem_mode),
4040 data->max_offset);
4043 rat = 1;
4044 for (i = 2; i <= MAX_RATIO; i++)
4045 if (multiplier_allowed_in_address_p (i, mem_mode, as))
4047 rat = i;
4048 break;
4051 /* Compute the cost of various addressing modes. */
4052 acost = 0;
4053 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4054 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4056 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4057 || USE_STORE_PRE_DECREMENT (mem_mode))
4059 addr = gen_rtx_PRE_DEC (address_mode, reg0);
4060 has_predec[mem_mode]
4061 = memory_address_addr_space_p (mem_mode, addr, as);
4063 if (has_predec[mem_mode])
4064 data->ainc_costs[AINC_PRE_DEC]
4065 = address_cost (addr, mem_mode, as, speed);
4067 if (USE_LOAD_POST_DECREMENT (mem_mode)
4068 || USE_STORE_POST_DECREMENT (mem_mode))
4070 addr = gen_rtx_POST_DEC (address_mode, reg0);
4071 has_postdec[mem_mode]
4072 = memory_address_addr_space_p (mem_mode, addr, as);
4074 if (has_postdec[mem_mode])
4075 data->ainc_costs[AINC_POST_DEC]
4076 = address_cost (addr, mem_mode, as, speed);
4078 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4079 || USE_STORE_PRE_DECREMENT (mem_mode))
4081 addr = gen_rtx_PRE_INC (address_mode, reg0);
4082 has_preinc[mem_mode]
4083 = memory_address_addr_space_p (mem_mode, addr, as);
4085 if (has_preinc[mem_mode])
4086 data->ainc_costs[AINC_PRE_INC]
4087 = address_cost (addr, mem_mode, as, speed);
4089 if (USE_LOAD_POST_INCREMENT (mem_mode)
4090 || USE_STORE_POST_INCREMENT (mem_mode))
4092 addr = gen_rtx_POST_INC (address_mode, reg0);
4093 has_postinc[mem_mode]
4094 = memory_address_addr_space_p (mem_mode, addr, as);
4096 if (has_postinc[mem_mode])
4097 data->ainc_costs[AINC_POST_INC]
4098 = address_cost (addr, mem_mode, as, speed);
4100 for (i = 0; i < 16; i++)
4102 sym_p = i & 1;
4103 var_p = (i >> 1) & 1;
4104 off_p = (i >> 2) & 1;
4105 rat_p = (i >> 3) & 1;
4107 addr = reg0;
4108 if (rat_p)
4109 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4110 gen_int_mode (rat, address_mode));
4112 if (var_p)
4113 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4115 if (sym_p)
4117 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4118 /* ??? We can run into trouble with some backends by presenting
4119 it with symbols which haven't been properly passed through
4120 targetm.encode_section_info. By setting the local bit, we
4121 enhance the probability of things working. */
4122 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4124 if (off_p)
4125 base = gen_rtx_fmt_e (CONST, address_mode,
4126 gen_rtx_fmt_ee
4127 (PLUS, address_mode, base,
4128 gen_int_mode (off, address_mode)));
4130 else if (off_p)
4131 base = gen_int_mode (off, address_mode);
4132 else
4133 base = NULL_RTX;
4135 if (base)
4136 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4138 start_sequence ();
4139 /* To avoid splitting addressing modes, pretend that no cse will
4140 follow. */
4141 old_cse_not_expected = cse_not_expected;
4142 cse_not_expected = true;
4143 addr = memory_address_addr_space (mem_mode, addr, as);
4144 cse_not_expected = old_cse_not_expected;
4145 seq = get_insns ();
4146 end_sequence ();
4148 acost = seq_cost (seq, speed);
4149 acost += address_cost (addr, mem_mode, as, speed);
4151 if (!acost)
4152 acost = 1;
4153 data->costs[sym_p][var_p][off_p][rat_p] = acost;
4156 /* On some targets, it is quite expensive to load symbol to a register,
4157 which makes addresses that contain symbols look much more expensive.
4158 However, the symbol will have to be loaded in any case before the
4159 loop (and quite likely we have it in register already), so it does not
4160 make much sense to penalize them too heavily. So make some final
4161 tweaks for the SYMBOL_PRESENT modes:
4163 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4164 var is cheaper, use this mode with small penalty.
4165 If VAR_PRESENT is true, try whether the mode with
4166 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4167 if this is the case, use it. */
4168 add_c = add_cost (speed, address_mode);
4169 for (i = 0; i < 8; i++)
4171 var_p = i & 1;
4172 off_p = (i >> 1) & 1;
4173 rat_p = (i >> 2) & 1;
4175 acost = data->costs[0][1][off_p][rat_p] + 1;
4176 if (var_p)
4177 acost += add_c;
4179 if (acost < data->costs[1][var_p][off_p][rat_p])
4180 data->costs[1][var_p][off_p][rat_p] = acost;
4183 if (dump_file && (dump_flags & TDF_DETAILS))
4185 fprintf (dump_file, "<Address Costs>:\n");
4187 for (i = 0; i < 16; i++)
4189 sym_p = i & 1;
4190 var_p = (i >> 1) & 1;
4191 off_p = (i >> 2) & 1;
4192 rat_p = (i >> 3) & 1;
4194 fprintf (dump_file, " ");
4195 if (sym_p)
4196 fprintf (dump_file, "sym + ");
4197 if (var_p)
4198 fprintf (dump_file, "var + ");
4199 if (off_p)
4200 fprintf (dump_file, "cst + ");
4201 if (rat_p)
4202 fprintf (dump_file, "rat * ");
4204 acost = data->costs[sym_p][var_p][off_p][rat_p];
4205 fprintf (dump_file, "index costs %d\n", acost);
4207 if (has_predec[mem_mode] || has_postdec[mem_mode]
4208 || has_preinc[mem_mode] || has_postinc[mem_mode])
4209 fprintf (dump_file, " May include autoinc/dec\n");
4210 fprintf (dump_file, "\n");
4213 address_cost_data_list[data_index] = data;
4216 bits = GET_MODE_BITSIZE (address_mode);
4217 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4218 offset &= mask;
4219 if ((offset >> (bits - 1) & 1))
4220 offset |= ~mask;
4221 s_offset = offset;
4223 autoinc = false;
4224 autoinc_type = AINC_NONE;
4225 msize = GET_MODE_SIZE (mem_mode);
4226 autoinc_offset = offset;
4227 if (stmt_after_inc)
4228 autoinc_offset += ratio * cstep;
4229 if (symbol_present || var_present || ratio != 1)
4230 autoinc = false;
4231 else
4233 if (has_postinc[mem_mode] && autoinc_offset == 0
4234 && msize == cstep)
4235 autoinc_type = AINC_POST_INC;
4236 else if (has_postdec[mem_mode] && autoinc_offset == 0
4237 && msize == -cstep)
4238 autoinc_type = AINC_POST_DEC;
4239 else if (has_preinc[mem_mode] && autoinc_offset == msize
4240 && msize == cstep)
4241 autoinc_type = AINC_PRE_INC;
4242 else if (has_predec[mem_mode] && autoinc_offset == -msize
4243 && msize == -cstep)
4244 autoinc_type = AINC_PRE_DEC;
4246 if (autoinc_type != AINC_NONE)
4247 autoinc = true;
4250 cost = 0;
4251 offset_p = (s_offset != 0
4252 && data->min_offset <= s_offset
4253 && s_offset <= data->max_offset);
4254 ratio_p = (ratio != 1
4255 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4257 if (ratio != 1 && !ratio_p)
4258 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4260 if (s_offset && !offset_p && !symbol_present)
4261 cost += add_cost (speed, address_mode);
4263 if (may_autoinc)
4264 *may_autoinc = autoinc;
4265 if (autoinc)
4266 acost = data->ainc_costs[autoinc_type];
4267 else
4268 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4269 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4270 return comp_cost (cost + acost, complexity);
4273 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4274 EXPR operand holding the shift. COST0 and COST1 are the costs for
4275 calculating the operands of EXPR. Returns true if successful, and returns
4276 the cost in COST. */
4278 static bool
4279 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4280 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4282 comp_cost res;
4283 tree op1 = TREE_OPERAND (expr, 1);
4284 tree cst = TREE_OPERAND (mult, 1);
4285 tree multop = TREE_OPERAND (mult, 0);
4286 int m = exact_log2 (int_cst_value (cst));
4287 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4288 int as_cost, sa_cost;
4289 bool mult_in_op1;
4291 if (!(m >= 0 && m < maxm))
4292 return false;
4294 STRIP_NOPS (op1);
4295 mult_in_op1 = operand_equal_p (op1, mult, 0);
4297 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4299 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4300 use that in preference to a shift insn followed by an add insn. */
4301 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4302 ? shiftadd_cost (speed, mode, m)
4303 : (mult_in_op1
4304 ? shiftsub1_cost (speed, mode, m)
4305 : shiftsub0_cost (speed, mode, m)));
4307 res = comp_cost (MIN (as_cost, sa_cost), 0);
4308 res += (mult_in_op1 ? cost0 : cost1);
4310 STRIP_NOPS (multop);
4311 if (!is_gimple_val (multop))
4312 res += force_expr_to_var_cost (multop, speed);
4314 *cost = res;
4315 return true;
4318 /* Estimates cost of forcing expression EXPR into a variable. */
4320 static comp_cost
4321 force_expr_to_var_cost (tree expr, bool speed)
4323 static bool costs_initialized = false;
4324 static unsigned integer_cost [2];
4325 static unsigned symbol_cost [2];
4326 static unsigned address_cost [2];
4327 tree op0, op1;
4328 comp_cost cost0, cost1, cost;
4329 machine_mode mode;
4331 if (!costs_initialized)
4333 tree type = build_pointer_type (integer_type_node);
4334 tree var, addr;
4335 rtx x;
4336 int i;
4338 var = create_tmp_var_raw (integer_type_node, "test_var");
4339 TREE_STATIC (var) = 1;
4340 x = produce_memory_decl_rtl (var, NULL);
4341 SET_DECL_RTL (var, x);
4343 addr = build1 (ADDR_EXPR, type, var);
4346 for (i = 0; i < 2; i++)
4348 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4349 2000), i);
4351 symbol_cost[i] = computation_cost (addr, i) + 1;
4353 address_cost[i]
4354 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4355 if (dump_file && (dump_flags & TDF_DETAILS))
4357 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4358 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4359 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4360 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4361 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4362 fprintf (dump_file, "\n");
4366 costs_initialized = true;
4369 STRIP_NOPS (expr);
4371 if (SSA_VAR_P (expr))
4372 return no_cost;
4374 if (is_gimple_min_invariant (expr))
4376 if (TREE_CODE (expr) == INTEGER_CST)
4377 return comp_cost (integer_cost [speed], 0);
4379 if (TREE_CODE (expr) == ADDR_EXPR)
4381 tree obj = TREE_OPERAND (expr, 0);
4383 if (TREE_CODE (obj) == VAR_DECL
4384 || TREE_CODE (obj) == PARM_DECL
4385 || TREE_CODE (obj) == RESULT_DECL)
4386 return comp_cost (symbol_cost [speed], 0);
4389 return comp_cost (address_cost [speed], 0);
4392 switch (TREE_CODE (expr))
4394 case POINTER_PLUS_EXPR:
4395 case PLUS_EXPR:
4396 case MINUS_EXPR:
4397 case MULT_EXPR:
4398 op0 = TREE_OPERAND (expr, 0);
4399 op1 = TREE_OPERAND (expr, 1);
4400 STRIP_NOPS (op0);
4401 STRIP_NOPS (op1);
4402 break;
4404 CASE_CONVERT:
4405 case NEGATE_EXPR:
4406 op0 = TREE_OPERAND (expr, 0);
4407 STRIP_NOPS (op0);
4408 op1 = NULL_TREE;
4409 break;
4411 default:
4412 /* Just an arbitrary value, FIXME. */
4413 return comp_cost (target_spill_cost[speed], 0);
4416 if (op0 == NULL_TREE
4417 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4418 cost0 = no_cost;
4419 else
4420 cost0 = force_expr_to_var_cost (op0, speed);
4422 if (op1 == NULL_TREE
4423 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4424 cost1 = no_cost;
4425 else
4426 cost1 = force_expr_to_var_cost (op1, speed);
4428 mode = TYPE_MODE (TREE_TYPE (expr));
4429 switch (TREE_CODE (expr))
4431 case POINTER_PLUS_EXPR:
4432 case PLUS_EXPR:
4433 case MINUS_EXPR:
4434 case NEGATE_EXPR:
4435 cost = comp_cost (add_cost (speed, mode), 0);
4436 if (TREE_CODE (expr) != NEGATE_EXPR)
4438 tree mult = NULL_TREE;
4439 comp_cost sa_cost;
4440 if (TREE_CODE (op1) == MULT_EXPR)
4441 mult = op1;
4442 else if (TREE_CODE (op0) == MULT_EXPR)
4443 mult = op0;
4445 if (mult != NULL_TREE
4446 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4447 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4448 speed, &sa_cost))
4449 return sa_cost;
4451 break;
4453 CASE_CONVERT:
4455 tree inner_mode, outer_mode;
4456 outer_mode = TREE_TYPE (expr);
4457 inner_mode = TREE_TYPE (op0);
4458 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4459 TYPE_MODE (inner_mode), speed), 0);
4461 break;
4463 case MULT_EXPR:
4464 if (cst_and_fits_in_hwi (op0))
4465 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4466 mode, speed), 0);
4467 else if (cst_and_fits_in_hwi (op1))
4468 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4469 mode, speed), 0);
4470 else
4471 return comp_cost (target_spill_cost [speed], 0);
4472 break;
4474 default:
4475 gcc_unreachable ();
4478 cost += cost0;
4479 cost += cost1;
4481 /* Bound the cost by target_spill_cost. The parts of complicated
4482 computations often are either loop invariant or at least can
4483 be shared between several iv uses, so letting this grow without
4484 limits would not give reasonable results. */
4485 if (cost.cost > (int) target_spill_cost [speed])
4486 cost.cost = target_spill_cost [speed];
4488 return cost;
4491 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4492 invariants the computation depends on. */
4494 static comp_cost
4495 force_var_cost (struct ivopts_data *data,
4496 tree expr, bitmap *depends_on)
4498 if (depends_on)
4500 fd_ivopts_data = data;
4501 walk_tree (&expr, find_depends, depends_on, NULL);
4504 return force_expr_to_var_cost (expr, data->speed);
4507 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4508 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4509 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4510 invariants the computation depends on. */
4512 static comp_cost
4513 split_address_cost (struct ivopts_data *data,
4514 tree addr, bool *symbol_present, bool *var_present,
4515 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4517 tree core;
4518 HOST_WIDE_INT bitsize;
4519 HOST_WIDE_INT bitpos;
4520 tree toffset;
4521 machine_mode mode;
4522 int unsignedp, reversep, volatilep;
4524 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4525 &unsignedp, &reversep, &volatilep, false);
4527 if (toffset != 0
4528 || bitpos % BITS_PER_UNIT != 0
4529 || reversep
4530 || TREE_CODE (core) != VAR_DECL)
4532 *symbol_present = false;
4533 *var_present = true;
4534 fd_ivopts_data = data;
4535 if (depends_on)
4536 walk_tree (&addr, find_depends, depends_on, NULL);
4538 return comp_cost (target_spill_cost[data->speed], 0);
4541 *offset += bitpos / BITS_PER_UNIT;
4542 if (TREE_STATIC (core)
4543 || DECL_EXTERNAL (core))
4545 *symbol_present = true;
4546 *var_present = false;
4547 return no_cost;
4550 *symbol_present = false;
4551 *var_present = true;
4552 return no_cost;
4555 /* Estimates cost of expressing difference of addresses E1 - E2 as
4556 var + symbol + offset. The value of offset is added to OFFSET,
4557 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4558 part is missing. DEPENDS_ON is a set of the invariants the computation
4559 depends on. */
4561 static comp_cost
4562 ptr_difference_cost (struct ivopts_data *data,
4563 tree e1, tree e2, bool *symbol_present, bool *var_present,
4564 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4566 HOST_WIDE_INT diff = 0;
4567 aff_tree aff_e1, aff_e2;
4568 tree type;
4570 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4572 if (ptr_difference_const (e1, e2, &diff))
4574 *offset += diff;
4575 *symbol_present = false;
4576 *var_present = false;
4577 return no_cost;
4580 if (integer_zerop (e2))
4581 return split_address_cost (data, TREE_OPERAND (e1, 0),
4582 symbol_present, var_present, offset, depends_on);
4584 *symbol_present = false;
4585 *var_present = true;
4587 type = signed_type_for (TREE_TYPE (e1));
4588 tree_to_aff_combination (e1, type, &aff_e1);
4589 tree_to_aff_combination (e2, type, &aff_e2);
4590 aff_combination_scale (&aff_e2, -1);
4591 aff_combination_add (&aff_e1, &aff_e2);
4593 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4596 /* Estimates cost of expressing difference E1 - E2 as
4597 var + symbol + offset. The value of offset is added to OFFSET,
4598 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4599 part is missing. DEPENDS_ON is a set of the invariants the computation
4600 depends on. */
4602 static comp_cost
4603 difference_cost (struct ivopts_data *data,
4604 tree e1, tree e2, bool *symbol_present, bool *var_present,
4605 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4607 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4608 unsigned HOST_WIDE_INT off1, off2;
4609 aff_tree aff_e1, aff_e2;
4610 tree type;
4612 e1 = strip_offset (e1, &off1);
4613 e2 = strip_offset (e2, &off2);
4614 *offset += off1 - off2;
4616 STRIP_NOPS (e1);
4617 STRIP_NOPS (e2);
4619 if (TREE_CODE (e1) == ADDR_EXPR)
4620 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4621 offset, depends_on);
4622 *symbol_present = false;
4624 if (operand_equal_p (e1, e2, 0))
4626 *var_present = false;
4627 return no_cost;
4630 *var_present = true;
4632 if (integer_zerop (e2))
4633 return force_var_cost (data, e1, depends_on);
4635 if (integer_zerop (e1))
4637 comp_cost cost = force_var_cost (data, e2, depends_on);
4638 cost += mult_by_coeff_cost (-1, mode, data->speed);
4639 return cost;
4642 type = signed_type_for (TREE_TYPE (e1));
4643 tree_to_aff_combination (e1, type, &aff_e1);
4644 tree_to_aff_combination (e2, type, &aff_e2);
4645 aff_combination_scale (&aff_e2, -1);
4646 aff_combination_add (&aff_e1, &aff_e2);
4648 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4651 /* Returns true if AFF1 and AFF2 are identical. */
4653 static bool
4654 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4656 unsigned i;
4658 if (aff1->n != aff2->n)
4659 return false;
4661 for (i = 0; i < aff1->n; i++)
4663 if (aff1->elts[i].coef != aff2->elts[i].coef)
4664 return false;
4666 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4667 return false;
4669 return true;
4672 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
4674 static iv_inv_expr_ent *
4675 record_inv_expr (struct ivopts_data *data, tree expr)
4677 struct iv_inv_expr_ent ent;
4678 struct iv_inv_expr_ent **slot;
4680 ent.expr = expr;
4681 ent.hash = iterative_hash_expr (expr, 0);
4682 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4684 if (!*slot)
4686 *slot = XNEW (struct iv_inv_expr_ent);
4687 (*slot)->expr = expr;
4688 (*slot)->hash = ent.hash;
4689 (*slot)->id = data->max_inv_expr_id++;
4692 return *slot;
4695 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4696 requires a new compiler generated temporary. Returns -1 otherwise.
4697 ADDRESS_P is a flag indicating if the expression is for address
4698 computation. */
4700 static iv_inv_expr_ent *
4701 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4702 tree cbase, HOST_WIDE_INT ratio,
4703 bool address_p)
4705 aff_tree ubase_aff, cbase_aff;
4706 tree expr, ub, cb;
4708 STRIP_NOPS (ubase);
4709 STRIP_NOPS (cbase);
4710 ub = ubase;
4711 cb = cbase;
4713 if ((TREE_CODE (ubase) == INTEGER_CST)
4714 && (TREE_CODE (cbase) == INTEGER_CST))
4715 return NULL;
4717 /* Strips the constant part. */
4718 if (TREE_CODE (ubase) == PLUS_EXPR
4719 || TREE_CODE (ubase) == MINUS_EXPR
4720 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4722 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4723 ubase = TREE_OPERAND (ubase, 0);
4726 /* Strips the constant part. */
4727 if (TREE_CODE (cbase) == PLUS_EXPR
4728 || TREE_CODE (cbase) == MINUS_EXPR
4729 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4731 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4732 cbase = TREE_OPERAND (cbase, 0);
4735 if (address_p)
4737 if (((TREE_CODE (ubase) == SSA_NAME)
4738 || (TREE_CODE (ubase) == ADDR_EXPR
4739 && is_gimple_min_invariant (ubase)))
4740 && (TREE_CODE (cbase) == INTEGER_CST))
4741 return NULL;
4743 if (((TREE_CODE (cbase) == SSA_NAME)
4744 || (TREE_CODE (cbase) == ADDR_EXPR
4745 && is_gimple_min_invariant (cbase)))
4746 && (TREE_CODE (ubase) == INTEGER_CST))
4747 return NULL;
4750 if (ratio == 1)
4752 if (operand_equal_p (ubase, cbase, 0))
4753 return NULL;
4755 if (TREE_CODE (ubase) == ADDR_EXPR
4756 && TREE_CODE (cbase) == ADDR_EXPR)
4758 tree usym, csym;
4760 usym = TREE_OPERAND (ubase, 0);
4761 csym = TREE_OPERAND (cbase, 0);
4762 if (TREE_CODE (usym) == ARRAY_REF)
4764 tree ind = TREE_OPERAND (usym, 1);
4765 if (TREE_CODE (ind) == INTEGER_CST
4766 && tree_fits_shwi_p (ind)
4767 && tree_to_shwi (ind) == 0)
4768 usym = TREE_OPERAND (usym, 0);
4770 if (TREE_CODE (csym) == ARRAY_REF)
4772 tree ind = TREE_OPERAND (csym, 1);
4773 if (TREE_CODE (ind) == INTEGER_CST
4774 && tree_fits_shwi_p (ind)
4775 && tree_to_shwi (ind) == 0)
4776 csym = TREE_OPERAND (csym, 0);
4778 if (operand_equal_p (usym, csym, 0))
4779 return NULL;
4781 /* Now do more complex comparison */
4782 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4783 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4784 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4785 return NULL;
4788 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4789 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4791 aff_combination_scale (&cbase_aff, -1 * ratio);
4792 aff_combination_add (&ubase_aff, &cbase_aff);
4793 expr = aff_combination_to_tree (&ubase_aff);
4794 return record_inv_expr (data, expr);
4797 /* Scale (multiply) the computed COST (except scratch part that should be
4798 hoisted out a loop) by header->frequency / AT->frequency,
4799 which makes expected cost more accurate. */
4801 static comp_cost
4802 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, iv_cand *cand,
4803 comp_cost cost)
4805 int loop_freq = data->current_loop->header->frequency;
4806 int bb_freq = at->bb->frequency;
4807 if (loop_freq != 0)
4809 gcc_assert (cost.scratch <= cost.cost);
4810 int scaled_cost
4811 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4813 if (dump_file && (dump_flags & TDF_DETAILS))
4814 fprintf (dump_file, "Scaling iv_use based on cand %d "
4815 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
4816 cand->id, 1.0f * bb_freq / loop_freq, cost.cost,
4817 cost.scratch, scaled_cost, bb_freq, loop_freq);
4819 cost.cost = scaled_cost;
4822 return cost;
4825 /* Determines the cost of the computation by that USE is expressed
4826 from induction variable CAND. If ADDRESS_P is true, we just need
4827 to create an address from it, otherwise we want to get it into
4828 register. A set of invariants we depend on is stored in
4829 DEPENDS_ON. AT is the statement at that the value is computed.
4830 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4831 addressing is likely. */
4833 static comp_cost
4834 get_computation_cost_at (struct ivopts_data *data,
4835 struct iv_use *use, struct iv_cand *cand,
4836 bool address_p, bitmap *depends_on, gimple *at,
4837 bool *can_autoinc,
4838 iv_inv_expr_ent **inv_expr)
4840 tree ubase = use->iv->base, ustep = use->iv->step;
4841 tree cbase, cstep;
4842 tree utype = TREE_TYPE (ubase), ctype;
4843 unsigned HOST_WIDE_INT cstepi, offset = 0;
4844 HOST_WIDE_INT ratio, aratio;
4845 bool var_present, symbol_present, stmt_is_after_inc;
4846 comp_cost cost;
4847 widest_int rat;
4848 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4849 machine_mode mem_mode = (address_p
4850 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4851 : VOIDmode);
4853 if (depends_on)
4854 *depends_on = NULL;
4856 /* Only consider real candidates. */
4857 if (!cand->iv)
4858 return infinite_cost;
4860 cbase = cand->iv->base;
4861 cstep = cand->iv->step;
4862 ctype = TREE_TYPE (cbase);
4864 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4866 /* We do not have a precision to express the values of use. */
4867 return infinite_cost;
4870 if (address_p
4871 || (use->iv->base_object
4872 && cand->iv->base_object
4873 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4874 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4876 /* Do not try to express address of an object with computation based
4877 on address of a different object. This may cause problems in rtl
4878 level alias analysis (that does not expect this to be happening,
4879 as this is illegal in C), and would be unlikely to be useful
4880 anyway. */
4881 if (use->iv->base_object
4882 && cand->iv->base_object
4883 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4884 return infinite_cost;
4887 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4889 /* TODO -- add direct handling of this case. */
4890 goto fallback;
4893 /* CSTEPI is removed from the offset in case statement is after the
4894 increment. If the step is not constant, we use zero instead.
4895 This is a bit imprecise (there is the extra addition), but
4896 redundancy elimination is likely to transform the code so that
4897 it uses value of the variable before increment anyway,
4898 so it is not that much unrealistic. */
4899 if (cst_and_fits_in_hwi (cstep))
4900 cstepi = int_cst_value (cstep);
4901 else
4902 cstepi = 0;
4904 if (!constant_multiple_of (ustep, cstep, &rat))
4905 return infinite_cost;
4907 if (wi::fits_shwi_p (rat))
4908 ratio = rat.to_shwi ();
4909 else
4910 return infinite_cost;
4912 STRIP_NOPS (cbase);
4913 ctype = TREE_TYPE (cbase);
4915 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4917 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4918 or ratio == 1, it is better to handle this like
4920 ubase - ratio * cbase + ratio * var
4922 (also holds in the case ratio == -1, TODO. */
4924 if (cst_and_fits_in_hwi (cbase))
4926 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4927 cost = difference_cost (data,
4928 ubase, build_int_cst (utype, 0),
4929 &symbol_present, &var_present, &offset,
4930 depends_on);
4931 cost /= avg_loop_niter (data->current_loop);
4933 else if (ratio == 1)
4935 tree real_cbase = cbase;
4937 /* Check to see if any adjustment is needed. */
4938 if (cstepi == 0 && stmt_is_after_inc)
4940 aff_tree real_cbase_aff;
4941 aff_tree cstep_aff;
4943 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4944 &real_cbase_aff);
4945 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4947 aff_combination_add (&real_cbase_aff, &cstep_aff);
4948 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4951 cost = difference_cost (data,
4952 ubase, real_cbase,
4953 &symbol_present, &var_present, &offset,
4954 depends_on);
4955 cost /= avg_loop_niter (data->current_loop);
4957 else if (address_p
4958 && !POINTER_TYPE_P (ctype)
4959 && multiplier_allowed_in_address_p
4960 (ratio, mem_mode,
4961 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4963 tree real_cbase = cbase;
4965 if (cstepi == 0 && stmt_is_after_inc)
4967 if (POINTER_TYPE_P (ctype))
4968 real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4969 else
4970 real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4972 real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4973 build_int_cst (ctype, ratio));
4974 cost = difference_cost (data,
4975 ubase, real_cbase,
4976 &symbol_present, &var_present, &offset,
4977 depends_on);
4978 cost /= avg_loop_niter (data->current_loop);
4980 else
4982 cost = force_var_cost (data, cbase, depends_on);
4983 cost += difference_cost (data, ubase, build_int_cst (utype, 0),
4984 &symbol_present, &var_present, &offset,
4985 depends_on);
4986 cost /= avg_loop_niter (data->current_loop);
4987 cost += add_cost (data->speed, TYPE_MODE (ctype));
4990 /* Record setup cost in scratch field. */
4991 cost.scratch = cost.cost;
4993 if (inv_expr && depends_on && *depends_on)
4995 *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
4996 address_p);
4997 /* Clear depends on. */
4998 if (*inv_expr != NULL)
4999 bitmap_clear (*depends_on);
5002 /* If we are after the increment, the value of the candidate is higher by
5003 one iteration. */
5004 if (stmt_is_after_inc)
5005 offset -= ratio * cstepi;
5007 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
5008 (symbol/var1/const parts may be omitted). If we are looking for an
5009 address, find the cost of addressing this. */
5010 if (address_p)
5012 cost += get_address_cost (symbol_present, var_present,
5013 offset, ratio, cstepi,
5014 mem_mode,
5015 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
5016 speed, stmt_is_after_inc, can_autoinc);
5017 return get_scaled_computation_cost_at (data, at, cand, cost);
5020 /* Otherwise estimate the costs for computing the expression. */
5021 if (!symbol_present && !var_present && !offset)
5023 if (ratio != 1)
5024 cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
5025 return get_scaled_computation_cost_at (data, at, cand, cost);
5028 /* Symbol + offset should be compile-time computable so consider that they
5029 are added once to the variable, if present. */
5030 if (var_present && (symbol_present || offset))
5031 cost += adjust_setup_cost (data,
5032 add_cost (speed, TYPE_MODE (ctype)));
5034 /* Having offset does not affect runtime cost in case it is added to
5035 symbol, but it increases complexity. */
5036 if (offset)
5037 cost.complexity++;
5039 cost += add_cost (speed, TYPE_MODE (ctype));
5041 aratio = ratio > 0 ? ratio : -ratio;
5042 if (aratio != 1)
5043 cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5045 return get_scaled_computation_cost_at (data, at, cand, cost);
5047 fallback:
5048 if (can_autoinc)
5049 *can_autoinc = false;
5051 /* Just get the expression, expand it and measure the cost. */
5052 tree comp = get_computation_at (data->current_loop, use, cand, at);
5054 if (!comp)
5055 return infinite_cost;
5057 if (address_p)
5058 comp = build_simple_mem_ref (comp);
5060 cost = comp_cost (computation_cost (comp, speed), 0);
5062 return get_scaled_computation_cost_at (data, at, cand, cost);
5065 /* Determines the cost of the computation by that USE is expressed
5066 from induction variable CAND. If ADDRESS_P is true, we just need
5067 to create an address from it, otherwise we want to get it into
5068 register. A set of invariants we depend on is stored in
5069 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
5070 autoinc addressing is likely. */
5072 static comp_cost
5073 get_computation_cost (struct ivopts_data *data,
5074 struct iv_use *use, struct iv_cand *cand,
5075 bool address_p, bitmap *depends_on,
5076 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
5078 return get_computation_cost_at (data,
5079 use, cand, address_p, depends_on, use->stmt,
5080 can_autoinc, inv_expr);
5083 /* Determines cost of computing the use in GROUP with CAND in a generic
5084 expression. */
5086 static bool
5087 determine_group_iv_cost_generic (struct ivopts_data *data,
5088 struct iv_group *group, struct iv_cand *cand)
5090 comp_cost cost;
5091 iv_inv_expr_ent *inv_expr = NULL;
5092 bitmap depends_on = NULL;
5093 struct iv_use *use = group->vuses[0];
5095 /* The simple case first -- if we need to express value of the preserved
5096 original biv, the cost is 0. This also prevents us from counting the
5097 cost of increment twice -- once at this use and once in the cost of
5098 the candidate. */
5099 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5100 cost = no_cost;
5101 else
5102 cost = get_computation_cost (data, use, cand, false,
5103 &depends_on, NULL, &inv_expr);
5105 set_group_iv_cost (data, group, cand, cost, depends_on,
5106 NULL_TREE, ERROR_MARK, inv_expr);
5107 return !cost.infinite_cost_p ();
5110 /* Determines cost of computing uses in GROUP with CAND in addresses. */
5112 static bool
5113 determine_group_iv_cost_address (struct ivopts_data *data,
5114 struct iv_group *group, struct iv_cand *cand)
5116 unsigned i;
5117 bitmap depends_on;
5118 bool can_autoinc;
5119 iv_inv_expr_ent *inv_expr = NULL;
5120 struct iv_use *use = group->vuses[0];
5121 comp_cost sum_cost = no_cost, cost;
5123 cost = get_computation_cost (data, use, cand, true,
5124 &depends_on, &can_autoinc, &inv_expr);
5126 sum_cost = cost;
5127 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5129 if (can_autoinc)
5130 sum_cost -= cand->cost_step;
5131 /* If we generated the candidate solely for exploiting autoincrement
5132 opportunities, and it turns out it can't be used, set the cost to
5133 infinity to make sure we ignore it. */
5134 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5135 sum_cost = infinite_cost;
5138 /* Uses in a group can share setup code, so only add setup cost once. */
5139 cost -= cost.scratch;
5140 /* Compute and add costs for rest uses of this group. */
5141 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5143 struct iv_use *next = group->vuses[i];
5145 /* TODO: We could skip computing cost for sub iv_use when it has the
5146 same cost as the first iv_use, but the cost really depends on the
5147 offset and where the iv_use is. */
5148 cost = get_computation_cost (data, next, cand, true,
5149 NULL, &can_autoinc, NULL);
5150 sum_cost += cost;
5152 set_group_iv_cost (data, group, cand, sum_cost, depends_on,
5153 NULL_TREE, ERROR_MARK, inv_expr);
5155 return !sum_cost.infinite_cost_p ();
5158 /* Computes value of candidate CAND at position AT in iteration NITER, and
5159 stores it to VAL. */
5161 static void
5162 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5163 aff_tree *val)
5165 aff_tree step, delta, nit;
5166 struct iv *iv = cand->iv;
5167 tree type = TREE_TYPE (iv->base);
5168 tree steptype = type;
5169 if (POINTER_TYPE_P (type))
5170 steptype = sizetype;
5171 steptype = unsigned_type_for (type);
5173 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5174 aff_combination_convert (&step, steptype);
5175 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5176 aff_combination_convert (&nit, steptype);
5177 aff_combination_mult (&nit, &step, &delta);
5178 if (stmt_after_increment (loop, cand, at))
5179 aff_combination_add (&delta, &step);
5181 tree_to_aff_combination (iv->base, type, val);
5182 if (!POINTER_TYPE_P (type))
5183 aff_combination_convert (val, steptype);
5184 aff_combination_add (val, &delta);
5187 /* Returns period of induction variable iv. */
5189 static tree
5190 iv_period (struct iv *iv)
5192 tree step = iv->step, period, type;
5193 tree pow2div;
5195 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5197 type = unsigned_type_for (TREE_TYPE (step));
5198 /* Period of the iv is lcm (step, type_range)/step -1,
5199 i.e., N*type_range/step - 1. Since type range is power
5200 of two, N == (step >> num_of_ending_zeros_binary (step),
5201 so the final result is
5203 (type_range >> num_of_ending_zeros_binary (step)) - 1
5206 pow2div = num_ending_zeros (step);
5208 period = build_low_bits_mask (type,
5209 (TYPE_PRECISION (type)
5210 - tree_to_uhwi (pow2div)));
5212 return period;
5215 /* Returns the comparison operator used when eliminating the iv USE. */
5217 static enum tree_code
5218 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5220 struct loop *loop = data->current_loop;
5221 basic_block ex_bb;
5222 edge exit;
5224 ex_bb = gimple_bb (use->stmt);
5225 exit = EDGE_SUCC (ex_bb, 0);
5226 if (flow_bb_inside_loop_p (loop, exit->dest))
5227 exit = EDGE_SUCC (ex_bb, 1);
5229 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5232 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5233 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5234 calculation is performed in non-wrapping type.
5236 TODO: More generally, we could test for the situation that
5237 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5238 This would require knowing the sign of OFFSET. */
5240 static bool
5241 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5243 enum tree_code code;
5244 tree e1, e2;
5245 aff_tree aff_e1, aff_e2, aff_offset;
5247 if (!nowrap_type_p (TREE_TYPE (base)))
5248 return false;
5250 base = expand_simple_operations (base);
5252 if (TREE_CODE (base) == SSA_NAME)
5254 gimple *stmt = SSA_NAME_DEF_STMT (base);
5256 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5257 return false;
5259 code = gimple_assign_rhs_code (stmt);
5260 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5261 return false;
5263 e1 = gimple_assign_rhs1 (stmt);
5264 e2 = gimple_assign_rhs2 (stmt);
5266 else
5268 code = TREE_CODE (base);
5269 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5270 return false;
5271 e1 = TREE_OPERAND (base, 0);
5272 e2 = TREE_OPERAND (base, 1);
5275 /* Use affine expansion as deeper inspection to prove the equality. */
5276 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5277 &aff_e2, &data->name_expansion_cache);
5278 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5279 &aff_offset, &data->name_expansion_cache);
5280 aff_combination_scale (&aff_offset, -1);
5281 switch (code)
5283 case PLUS_EXPR:
5284 aff_combination_add (&aff_e2, &aff_offset);
5285 if (aff_combination_zero_p (&aff_e2))
5286 return true;
5288 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5289 &aff_e1, &data->name_expansion_cache);
5290 aff_combination_add (&aff_e1, &aff_offset);
5291 return aff_combination_zero_p (&aff_e1);
5293 case POINTER_PLUS_EXPR:
5294 aff_combination_add (&aff_e2, &aff_offset);
5295 return aff_combination_zero_p (&aff_e2);
5297 default:
5298 return false;
5302 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5303 comparison with CAND. NITER describes the number of iterations of
5304 the loops. If successful, the comparison in COMP_P is altered accordingly.
5306 We aim to handle the following situation:
5308 sometype *base, *p;
5309 int a, b, i;
5311 i = a;
5312 p = p_0 = base + a;
5316 bla (*p);
5317 p++;
5318 i++;
5320 while (i < b);
5322 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5323 We aim to optimize this to
5325 p = p_0 = base + a;
5328 bla (*p);
5329 p++;
5331 while (p < p_0 - a + b);
5333 This preserves the correctness, since the pointer arithmetics does not
5334 overflow. More precisely:
5336 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5337 overflow in computing it or the values of p.
5338 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5339 overflow. To prove this, we use the fact that p_0 = base + a. */
5341 static bool
5342 iv_elimination_compare_lt (struct ivopts_data *data,
5343 struct iv_cand *cand, enum tree_code *comp_p,
5344 struct tree_niter_desc *niter)
5346 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5347 struct aff_tree nit, tmpa, tmpb;
5348 enum tree_code comp;
5349 HOST_WIDE_INT step;
5351 /* We need to know that the candidate induction variable does not overflow.
5352 While more complex analysis may be used to prove this, for now just
5353 check that the variable appears in the original program and that it
5354 is computed in a type that guarantees no overflows. */
5355 cand_type = TREE_TYPE (cand->iv->base);
5356 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5357 return false;
5359 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5360 the calculation of the BOUND could overflow, making the comparison
5361 invalid. */
5362 if (!data->loop_single_exit_p)
5363 return false;
5365 /* We need to be able to decide whether candidate is increasing or decreasing
5366 in order to choose the right comparison operator. */
5367 if (!cst_and_fits_in_hwi (cand->iv->step))
5368 return false;
5369 step = int_cst_value (cand->iv->step);
5371 /* Check that the number of iterations matches the expected pattern:
5372 a + 1 > b ? 0 : b - a - 1. */
5373 mbz = niter->may_be_zero;
5374 if (TREE_CODE (mbz) == GT_EXPR)
5376 /* Handle a + 1 > b. */
5377 tree op0 = TREE_OPERAND (mbz, 0);
5378 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5380 a = TREE_OPERAND (op0, 0);
5381 b = TREE_OPERAND (mbz, 1);
5383 else
5384 return false;
5386 else if (TREE_CODE (mbz) == LT_EXPR)
5388 tree op1 = TREE_OPERAND (mbz, 1);
5390 /* Handle b < a + 1. */
5391 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5393 a = TREE_OPERAND (op1, 0);
5394 b = TREE_OPERAND (mbz, 0);
5396 else
5397 return false;
5399 else
5400 return false;
5402 /* Expected number of iterations is B - A - 1. Check that it matches
5403 the actual number, i.e., that B - A - NITER = 1. */
5404 tree_to_aff_combination (niter->niter, nit_type, &nit);
5405 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5406 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5407 aff_combination_scale (&nit, -1);
5408 aff_combination_scale (&tmpa, -1);
5409 aff_combination_add (&tmpb, &tmpa);
5410 aff_combination_add (&tmpb, &nit);
5411 if (tmpb.n != 0 || tmpb.offset != 1)
5412 return false;
5414 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5415 overflow. */
5416 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5417 cand->iv->step,
5418 fold_convert (TREE_TYPE (cand->iv->step), a));
5419 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5420 return false;
5422 /* Determine the new comparison operator. */
5423 comp = step < 0 ? GT_EXPR : LT_EXPR;
5424 if (*comp_p == NE_EXPR)
5425 *comp_p = comp;
5426 else if (*comp_p == EQ_EXPR)
5427 *comp_p = invert_tree_comparison (comp, false);
5428 else
5429 gcc_unreachable ();
5431 return true;
5434 /* Check whether it is possible to express the condition in USE by comparison
5435 of candidate CAND. If so, store the value compared with to BOUND, and the
5436 comparison operator to COMP. */
5438 static bool
5439 may_eliminate_iv (struct ivopts_data *data,
5440 struct iv_use *use, struct iv_cand *cand, tree *bound,
5441 enum tree_code *comp)
5443 basic_block ex_bb;
5444 edge exit;
5445 tree period;
5446 struct loop *loop = data->current_loop;
5447 aff_tree bnd;
5448 struct tree_niter_desc *desc = NULL;
5450 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5451 return false;
5453 /* For now works only for exits that dominate the loop latch.
5454 TODO: extend to other conditions inside loop body. */
5455 ex_bb = gimple_bb (use->stmt);
5456 if (use->stmt != last_stmt (ex_bb)
5457 || gimple_code (use->stmt) != GIMPLE_COND
5458 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5459 return false;
5461 exit = EDGE_SUCC (ex_bb, 0);
5462 if (flow_bb_inside_loop_p (loop, exit->dest))
5463 exit = EDGE_SUCC (ex_bb, 1);
5464 if (flow_bb_inside_loop_p (loop, exit->dest))
5465 return false;
5467 desc = niter_for_exit (data, exit);
5468 if (!desc)
5469 return false;
5471 /* Determine whether we can use the variable to test the exit condition.
5472 This is the case iff the period of the induction variable is greater
5473 than the number of iterations for which the exit condition is true. */
5474 period = iv_period (cand->iv);
5476 /* If the number of iterations is constant, compare against it directly. */
5477 if (TREE_CODE (desc->niter) == INTEGER_CST)
5479 /* See cand_value_at. */
5480 if (stmt_after_increment (loop, cand, use->stmt))
5482 if (!tree_int_cst_lt (desc->niter, period))
5483 return false;
5485 else
5487 if (tree_int_cst_lt (period, desc->niter))
5488 return false;
5492 /* If not, and if this is the only possible exit of the loop, see whether
5493 we can get a conservative estimate on the number of iterations of the
5494 entire loop and compare against that instead. */
5495 else
5497 widest_int period_value, max_niter;
5499 max_niter = desc->max;
5500 if (stmt_after_increment (loop, cand, use->stmt))
5501 max_niter += 1;
5502 period_value = wi::to_widest (period);
5503 if (wi::gtu_p (max_niter, period_value))
5505 /* See if we can take advantage of inferred loop bound
5506 information. */
5507 if (data->loop_single_exit_p)
5509 if (!max_loop_iterations (loop, &max_niter))
5510 return false;
5511 /* The loop bound is already adjusted by adding 1. */
5512 if (wi::gtu_p (max_niter, period_value))
5513 return false;
5515 else
5516 return false;
5520 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5522 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5523 aff_combination_to_tree (&bnd));
5524 *comp = iv_elimination_compare (data, use);
5526 /* It is unlikely that computing the number of iterations using division
5527 would be more profitable than keeping the original induction variable. */
5528 if (expression_expensive_p (*bound))
5529 return false;
5531 /* Sometimes, it is possible to handle the situation that the number of
5532 iterations may be zero unless additional assumtions by using <
5533 instead of != in the exit condition.
5535 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5536 base the exit condition on it. However, that is often too
5537 expensive. */
5538 if (!integer_zerop (desc->may_be_zero))
5539 return iv_elimination_compare_lt (data, cand, comp, desc);
5541 return true;
5544 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5545 be copied, if it is used in the loop body and DATA->body_includes_call. */
5547 static int
5548 parm_decl_cost (struct ivopts_data *data, tree bound)
5550 tree sbound = bound;
5551 STRIP_NOPS (sbound);
5553 if (TREE_CODE (sbound) == SSA_NAME
5554 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5555 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5556 && data->body_includes_call)
5557 return COSTS_N_INSNS (1);
5559 return 0;
5562 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5564 static bool
5565 determine_group_iv_cost_cond (struct ivopts_data *data,
5566 struct iv_group *group, struct iv_cand *cand)
5568 tree bound = NULL_TREE;
5569 struct iv *cmp_iv;
5570 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5571 comp_cost elim_cost, express_cost, cost, bound_cost;
5572 bool ok;
5573 iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr;
5574 tree *control_var, *bound_cst;
5575 enum tree_code comp = ERROR_MARK;
5576 struct iv_use *use = group->vuses[0];
5578 gcc_assert (cand->iv);
5580 /* Try iv elimination. */
5581 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5583 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5584 if (elim_cost.cost == 0)
5585 elim_cost.cost = parm_decl_cost (data, bound);
5586 else if (TREE_CODE (bound) == INTEGER_CST)
5587 elim_cost.cost = 0;
5588 /* If we replace a loop condition 'i < n' with 'p < base + n',
5589 depends_on_elim will have 'base' and 'n' set, which implies
5590 that both 'base' and 'n' will be live during the loop. More likely,
5591 'base + n' will be loop invariant, resulting in only one live value
5592 during the loop. So in that case we clear depends_on_elim and set
5593 elim_inv_expr_id instead. */
5594 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5596 elim_inv_expr = record_inv_expr (data, bound);
5597 bitmap_clear (depends_on_elim);
5599 /* The bound is a loop invariant, so it will be only computed
5600 once. */
5601 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5603 else
5604 elim_cost = infinite_cost;
5606 /* Try expressing the original giv. If it is compared with an invariant,
5607 note that we cannot get rid of it. */
5608 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5609 NULL, &cmp_iv);
5610 gcc_assert (ok);
5612 /* When the condition is a comparison of the candidate IV against
5613 zero, prefer this IV.
5615 TODO: The constant that we're subtracting from the cost should
5616 be target-dependent. This information should be added to the
5617 target costs for each backend. */
5618 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5619 && integer_zerop (*bound_cst)
5620 && (operand_equal_p (*control_var, cand->var_after, 0)
5621 || operand_equal_p (*control_var, cand->var_before, 0)))
5622 elim_cost -= 1;
5624 express_cost = get_computation_cost (data, use, cand, false,
5625 &depends_on_express, NULL,
5626 &express_inv_expr);
5627 fd_ivopts_data = data;
5628 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5630 /* Count the cost of the original bound as well. */
5631 bound_cost = force_var_cost (data, *bound_cst, NULL);
5632 if (bound_cost.cost == 0)
5633 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5634 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5635 bound_cost.cost = 0;
5636 express_cost += bound_cost;
5638 /* Choose the better approach, preferring the eliminated IV. */
5639 if (elim_cost <= express_cost)
5641 cost = elim_cost;
5642 depends_on = depends_on_elim;
5643 depends_on_elim = NULL;
5644 inv_expr = elim_inv_expr;
5646 else
5648 cost = express_cost;
5649 depends_on = depends_on_express;
5650 depends_on_express = NULL;
5651 bound = NULL_TREE;
5652 comp = ERROR_MARK;
5653 inv_expr = express_inv_expr;
5656 set_group_iv_cost (data, group, cand, cost,
5657 depends_on, bound, comp, inv_expr);
5659 if (depends_on_elim)
5660 BITMAP_FREE (depends_on_elim);
5661 if (depends_on_express)
5662 BITMAP_FREE (depends_on_express);
5664 return !cost.infinite_cost_p ();
5667 /* Determines cost of computing uses in GROUP with CAND. Returns false
5668 if USE cannot be represented with CAND. */
5670 static bool
5671 determine_group_iv_cost (struct ivopts_data *data,
5672 struct iv_group *group, struct iv_cand *cand)
5674 switch (group->type)
5676 case USE_NONLINEAR_EXPR:
5677 return determine_group_iv_cost_generic (data, group, cand);
5679 case USE_ADDRESS:
5680 return determine_group_iv_cost_address (data, group, cand);
5682 case USE_COMPARE:
5683 return determine_group_iv_cost_cond (data, group, cand);
5685 default:
5686 gcc_unreachable ();
5690 /* Return true if get_computation_cost indicates that autoincrement is
5691 a possibility for the pair of USE and CAND, false otherwise. */
5693 static bool
5694 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5695 struct iv_cand *cand)
5697 bitmap depends_on;
5698 bool can_autoinc;
5699 comp_cost cost;
5701 if (use->type != USE_ADDRESS)
5702 return false;
5704 cost = get_computation_cost (data, use, cand, true, &depends_on,
5705 &can_autoinc, NULL);
5707 BITMAP_FREE (depends_on);
5709 return !cost.infinite_cost_p () && can_autoinc;
5712 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5713 use that allows autoincrement, and set their AINC_USE if possible. */
5715 static void
5716 set_autoinc_for_original_candidates (struct ivopts_data *data)
5718 unsigned i, j;
5720 for (i = 0; i < data->vcands.length (); i++)
5722 struct iv_cand *cand = data->vcands[i];
5723 struct iv_use *closest_before = NULL;
5724 struct iv_use *closest_after = NULL;
5725 if (cand->pos != IP_ORIGINAL)
5726 continue;
5728 for (j = 0; j < data->vgroups.length (); j++)
5730 struct iv_group *group = data->vgroups[j];
5731 struct iv_use *use = group->vuses[0];
5732 unsigned uid = gimple_uid (use->stmt);
5734 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5735 continue;
5737 if (uid < gimple_uid (cand->incremented_at)
5738 && (closest_before == NULL
5739 || uid > gimple_uid (closest_before->stmt)))
5740 closest_before = use;
5742 if (uid > gimple_uid (cand->incremented_at)
5743 && (closest_after == NULL
5744 || uid < gimple_uid (closest_after->stmt)))
5745 closest_after = use;
5748 if (closest_before != NULL
5749 && autoinc_possible_for_pair (data, closest_before, cand))
5750 cand->ainc_use = closest_before;
5751 else if (closest_after != NULL
5752 && autoinc_possible_for_pair (data, closest_after, cand))
5753 cand->ainc_use = closest_after;
5757 /* Finds the candidates for the induction variables. */
5759 static void
5760 find_iv_candidates (struct ivopts_data *data)
5762 /* Add commonly used ivs. */
5763 add_standard_iv_candidates (data);
5765 /* Add old induction variables. */
5766 add_iv_candidate_for_bivs (data);
5768 /* Add induction variables derived from uses. */
5769 add_iv_candidate_for_groups (data);
5771 set_autoinc_for_original_candidates (data);
5773 /* Record the important candidates. */
5774 record_important_candidates (data);
5776 if (dump_file && (dump_flags & TDF_DETAILS))
5778 unsigned i;
5780 fprintf (dump_file, "\n<Important Candidates>:\t");
5781 for (i = 0; i < data->vcands.length (); i++)
5782 if (data->vcands[i]->important)
5783 fprintf (dump_file, " %d,", data->vcands[i]->id);
5784 fprintf (dump_file, "\n");
5786 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5787 for (i = 0; i < data->vgroups.length (); i++)
5789 struct iv_group *group = data->vgroups[i];
5791 if (group->related_cands)
5793 fprintf (dump_file, " Group %d:\t", group->id);
5794 dump_bitmap (dump_file, group->related_cands);
5797 fprintf (dump_file, "\n");
5801 /* Determines costs of computing use of iv with an iv candidate. */
5803 static void
5804 determine_group_iv_costs (struct ivopts_data *data)
5806 unsigned i, j;
5807 struct iv_cand *cand;
5808 struct iv_group *group;
5809 bitmap to_clear = BITMAP_ALLOC (NULL);
5811 alloc_use_cost_map (data);
5813 for (i = 0; i < data->vgroups.length (); i++)
5815 group = data->vgroups[i];
5817 if (data->consider_all_candidates)
5819 for (j = 0; j < data->vcands.length (); j++)
5821 cand = data->vcands[j];
5822 determine_group_iv_cost (data, group, cand);
5825 else
5827 bitmap_iterator bi;
5829 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5831 cand = data->vcands[j];
5832 if (!determine_group_iv_cost (data, group, cand))
5833 bitmap_set_bit (to_clear, j);
5836 /* Remove the candidates for that the cost is infinite from
5837 the list of related candidates. */
5838 bitmap_and_compl_into (group->related_cands, to_clear);
5839 bitmap_clear (to_clear);
5843 BITMAP_FREE (to_clear);
5845 if (dump_file && (dump_flags & TDF_DETAILS))
5847 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5848 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5850 for (hash_table<iv_inv_expr_hasher>::iterator it
5851 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5852 ++it)
5853 list.safe_push (*it);
5855 list.qsort (sort_iv_inv_expr_ent);
5857 for (i = 0; i < list.length (); ++i)
5859 fprintf (dump_file, "inv_expr %d: \t", i);
5860 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5861 fprintf (dump_file, "\n");
5864 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5866 for (i = 0; i < data->vgroups.length (); i++)
5868 group = data->vgroups[i];
5870 fprintf (dump_file, "Group %d:\n", i);
5871 fprintf (dump_file, " cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5872 for (j = 0; j < group->n_map_members; j++)
5874 if (!group->cost_map[j].cand
5875 || group->cost_map[j].cost.infinite_cost_p ())
5876 continue;
5878 fprintf (dump_file, " %d\t%d\t%d\t",
5879 group->cost_map[j].cand->id,
5880 group->cost_map[j].cost.cost,
5881 group->cost_map[j].cost.complexity);
5882 if (group->cost_map[j].inv_expr != NULL)
5883 fprintf (dump_file, "%d\t",
5884 group->cost_map[j].inv_expr->id);
5885 else
5886 fprintf (dump_file, "\t");
5887 if (group->cost_map[j].depends_on)
5888 bitmap_print (dump_file,
5889 group->cost_map[j].depends_on, "","");
5890 fprintf (dump_file, "\n");
5893 fprintf (dump_file, "\n");
5895 fprintf (dump_file, "\n");
5899 /* Determines cost of the candidate CAND. */
5901 static void
5902 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5904 comp_cost cost_base;
5905 unsigned cost, cost_step;
5906 tree base;
5908 if (!cand->iv)
5910 cand->cost = 0;
5911 return;
5914 /* There are two costs associated with the candidate -- its increment
5915 and its initialization. The second is almost negligible for any loop
5916 that rolls enough, so we take it just very little into account. */
5918 base = cand->iv->base;
5919 cost_base = force_var_cost (data, base, NULL);
5920 /* It will be exceptional that the iv register happens to be initialized with
5921 the proper value at no cost. In general, there will at least be a regcopy
5922 or a const set. */
5923 if (cost_base.cost == 0)
5924 cost_base.cost = COSTS_N_INSNS (1);
5925 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5927 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5929 /* Prefer the original ivs unless we may gain something by replacing it.
5930 The reason is to make debugging simpler; so this is not relevant for
5931 artificial ivs created by other optimization passes. */
5932 if (cand->pos != IP_ORIGINAL
5933 || !SSA_NAME_VAR (cand->var_before)
5934 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5935 cost++;
5937 /* Prefer not to insert statements into latch unless there are some
5938 already (so that we do not create unnecessary jumps). */
5939 if (cand->pos == IP_END
5940 && empty_block_p (ip_end_pos (data->current_loop)))
5941 cost++;
5943 cand->cost = cost;
5944 cand->cost_step = cost_step;
5947 /* Determines costs of computation of the candidates. */
5949 static void
5950 determine_iv_costs (struct ivopts_data *data)
5952 unsigned i;
5954 if (dump_file && (dump_flags & TDF_DETAILS))
5956 fprintf (dump_file, "<Candidate Costs>:\n");
5957 fprintf (dump_file, " cand\tcost\n");
5960 for (i = 0; i < data->vcands.length (); i++)
5962 struct iv_cand *cand = data->vcands[i];
5964 determine_iv_cost (data, cand);
5966 if (dump_file && (dump_flags & TDF_DETAILS))
5967 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5970 if (dump_file && (dump_flags & TDF_DETAILS))
5971 fprintf (dump_file, "\n");
5974 /* Calculates cost for having SIZE induction variables. */
5976 static unsigned
5977 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5979 /* We add size to the cost, so that we prefer eliminating ivs
5980 if possible. */
5981 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5982 data->body_includes_call);
5985 /* For each size of the induction variable set determine the penalty. */
5987 static void
5988 determine_set_costs (struct ivopts_data *data)
5990 unsigned j, n;
5991 gphi *phi;
5992 gphi_iterator psi;
5993 tree op;
5994 struct loop *loop = data->current_loop;
5995 bitmap_iterator bi;
5997 if (dump_file && (dump_flags & TDF_DETAILS))
5999 fprintf (dump_file, "<Global Costs>:\n");
6000 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
6001 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
6002 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
6003 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
6006 n = 0;
6007 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
6009 phi = psi.phi ();
6010 op = PHI_RESULT (phi);
6012 if (virtual_operand_p (op))
6013 continue;
6015 if (get_iv (data, op))
6016 continue;
6018 n++;
6021 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6023 struct version_info *info = ver_info (data, j);
6025 if (info->inv_id && info->has_nonlin_use)
6026 n++;
6029 data->regs_used = n;
6030 if (dump_file && (dump_flags & TDF_DETAILS))
6031 fprintf (dump_file, " regs_used %d\n", n);
6033 if (dump_file && (dump_flags & TDF_DETAILS))
6035 fprintf (dump_file, " cost for size:\n");
6036 fprintf (dump_file, " ivs\tcost\n");
6037 for (j = 0; j <= 2 * target_avail_regs; j++)
6038 fprintf (dump_file, " %d\t%d\n", j,
6039 ivopts_global_cost_for_size (data, j));
6040 fprintf (dump_file, "\n");
6044 /* Returns true if A is a cheaper cost pair than B. */
6046 static bool
6047 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6049 if (!a)
6050 return false;
6052 if (!b)
6053 return true;
6055 if (a->cost < b->cost)
6056 return true;
6058 if (b->cost < a->cost)
6059 return false;
6061 /* In case the costs are the same, prefer the cheaper candidate. */
6062 if (a->cand->cost < b->cand->cost)
6063 return true;
6065 return false;
6069 /* Returns candidate by that USE is expressed in IVS. */
6071 static struct cost_pair *
6072 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
6074 return ivs->cand_for_group[group->id];
6077 /* Computes the cost field of IVS structure. */
6079 static void
6080 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6082 comp_cost cost = ivs->cand_use_cost;
6084 cost += ivs->cand_cost;
6086 cost += ivopts_global_cost_for_size (data,
6087 ivs->n_regs
6088 + ivs->used_inv_exprs->elements ());
6090 ivs->cost = cost;
6093 /* Remove invariants in set INVS to set IVS. */
6095 static void
6096 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6098 bitmap_iterator bi;
6099 unsigned iid;
6101 if (!invs)
6102 return;
6104 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6106 ivs->n_invariant_uses[iid]--;
6107 if (ivs->n_invariant_uses[iid] == 0)
6108 ivs->n_regs--;
6112 /* Set USE not to be expressed by any candidate in IVS. */
6114 static void
6115 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6116 struct iv_group *group)
6118 unsigned gid = group->id, cid;
6119 struct cost_pair *cp;
6121 cp = ivs->cand_for_group[gid];
6122 if (!cp)
6123 return;
6124 cid = cp->cand->id;
6126 ivs->bad_groups++;
6127 ivs->cand_for_group[gid] = NULL;
6128 ivs->n_cand_uses[cid]--;
6130 if (ivs->n_cand_uses[cid] == 0)
6132 bitmap_clear_bit (ivs->cands, cid);
6133 /* Do not count the pseudocandidates. */
6134 if (cp->cand->iv)
6135 ivs->n_regs--;
6136 ivs->n_cands--;
6137 ivs->cand_cost -= cp->cand->cost;
6139 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6142 ivs->cand_use_cost -= cp->cost;
6144 iv_ca_set_remove_invariants (ivs, cp->depends_on);
6146 if (cp->inv_expr != NULL)
6148 unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr);
6149 --(*slot);
6150 if (*slot == 0)
6151 ivs->used_inv_exprs->remove (cp->inv_expr);
6153 iv_ca_recount_cost (data, ivs);
6156 /* Add invariants in set INVS to set IVS. */
6158 static void
6159 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6161 bitmap_iterator bi;
6162 unsigned iid;
6164 if (!invs)
6165 return;
6167 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6169 ivs->n_invariant_uses[iid]++;
6170 if (ivs->n_invariant_uses[iid] == 1)
6171 ivs->n_regs++;
6175 /* Set cost pair for GROUP in set IVS to CP. */
6177 static void
6178 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6179 struct iv_group *group, struct cost_pair *cp)
6181 unsigned gid = group->id, cid;
6183 if (ivs->cand_for_group[gid] == cp)
6184 return;
6186 if (ivs->cand_for_group[gid])
6187 iv_ca_set_no_cp (data, ivs, group);
6189 if (cp)
6191 cid = cp->cand->id;
6193 ivs->bad_groups--;
6194 ivs->cand_for_group[gid] = cp;
6195 ivs->n_cand_uses[cid]++;
6196 if (ivs->n_cand_uses[cid] == 1)
6198 bitmap_set_bit (ivs->cands, cid);
6199 /* Do not count the pseudocandidates. */
6200 if (cp->cand->iv)
6201 ivs->n_regs++;
6202 ivs->n_cands++;
6203 ivs->cand_cost += cp->cand->cost;
6205 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6208 ivs->cand_use_cost += cp->cost;
6209 iv_ca_set_add_invariants (ivs, cp->depends_on);
6211 if (cp->inv_expr != NULL)
6213 unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr);
6214 ++(*slot);
6216 iv_ca_recount_cost (data, ivs);
6220 /* Extend set IVS by expressing USE by some of the candidates in it
6221 if possible. Consider all important candidates if candidates in
6222 set IVS don't give any result. */
6224 static void
6225 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6226 struct iv_group *group)
6228 struct cost_pair *best_cp = NULL, *cp;
6229 bitmap_iterator bi;
6230 unsigned i;
6231 struct iv_cand *cand;
6233 gcc_assert (ivs->upto >= group->id);
6234 ivs->upto++;
6235 ivs->bad_groups++;
6237 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6239 cand = data->vcands[i];
6240 cp = get_group_iv_cost (data, group, cand);
6241 if (cheaper_cost_pair (cp, best_cp))
6242 best_cp = cp;
6245 if (best_cp == NULL)
6247 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6249 cand = data->vcands[i];
6250 cp = get_group_iv_cost (data, group, cand);
6251 if (cheaper_cost_pair (cp, best_cp))
6252 best_cp = cp;
6256 iv_ca_set_cp (data, ivs, group, best_cp);
6259 /* Get cost for assignment IVS. */
6261 static comp_cost
6262 iv_ca_cost (struct iv_ca *ivs)
6264 /* This was a conditional expression but it triggered a bug in
6265 Sun C 5.5. */
6266 if (ivs->bad_groups)
6267 return infinite_cost;
6268 else
6269 return ivs->cost;
6272 /* Returns true if all dependences of CP are among invariants in IVS. */
6274 static bool
6275 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6277 unsigned i;
6278 bitmap_iterator bi;
6280 if (!cp->depends_on)
6281 return true;
6283 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6285 if (ivs->n_invariant_uses[i] == 0)
6286 return false;
6289 return true;
6292 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6293 it before NEXT. */
6295 static struct iv_ca_delta *
6296 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6297 struct cost_pair *new_cp, struct iv_ca_delta *next)
6299 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6301 change->group = group;
6302 change->old_cp = old_cp;
6303 change->new_cp = new_cp;
6304 change->next = next;
6306 return change;
6309 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6310 are rewritten. */
6312 static struct iv_ca_delta *
6313 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6315 struct iv_ca_delta *last;
6317 if (!l2)
6318 return l1;
6320 if (!l1)
6321 return l2;
6323 for (last = l1; last->next; last = last->next)
6324 continue;
6325 last->next = l2;
6327 return l1;
6330 /* Reverse the list of changes DELTA, forming the inverse to it. */
6332 static struct iv_ca_delta *
6333 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6335 struct iv_ca_delta *act, *next, *prev = NULL;
6337 for (act = delta; act; act = next)
6339 next = act->next;
6340 act->next = prev;
6341 prev = act;
6343 std::swap (act->old_cp, act->new_cp);
6346 return prev;
6349 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6350 reverted instead. */
6352 static void
6353 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6354 struct iv_ca_delta *delta, bool forward)
6356 struct cost_pair *from, *to;
6357 struct iv_ca_delta *act;
6359 if (!forward)
6360 delta = iv_ca_delta_reverse (delta);
6362 for (act = delta; act; act = act->next)
6364 from = act->old_cp;
6365 to = act->new_cp;
6366 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6367 iv_ca_set_cp (data, ivs, act->group, to);
6370 if (!forward)
6371 iv_ca_delta_reverse (delta);
6374 /* Returns true if CAND is used in IVS. */
6376 static bool
6377 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6379 return ivs->n_cand_uses[cand->id] > 0;
6382 /* Returns number of induction variable candidates in the set IVS. */
6384 static unsigned
6385 iv_ca_n_cands (struct iv_ca *ivs)
6387 return ivs->n_cands;
6390 /* Free the list of changes DELTA. */
6392 static void
6393 iv_ca_delta_free (struct iv_ca_delta **delta)
6395 struct iv_ca_delta *act, *next;
6397 for (act = *delta; act; act = next)
6399 next = act->next;
6400 free (act);
6403 *delta = NULL;
6406 /* Allocates new iv candidates assignment. */
6408 static struct iv_ca *
6409 iv_ca_new (struct ivopts_data *data)
6411 struct iv_ca *nw = XNEW (struct iv_ca);
6413 nw->upto = 0;
6414 nw->bad_groups = 0;
6415 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6416 data->vgroups.length ());
6417 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6418 nw->cands = BITMAP_ALLOC (NULL);
6419 nw->n_cands = 0;
6420 nw->n_regs = 0;
6421 nw->cand_use_cost = no_cost;
6422 nw->cand_cost = 0;
6423 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6424 nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13);
6425 nw->cost = no_cost;
6427 return nw;
6430 /* Free memory occupied by the set IVS. */
6432 static void
6433 iv_ca_free (struct iv_ca **ivs)
6435 free ((*ivs)->cand_for_group);
6436 free ((*ivs)->n_cand_uses);
6437 BITMAP_FREE ((*ivs)->cands);
6438 free ((*ivs)->n_invariant_uses);
6439 delete ((*ivs)->used_inv_exprs);
6440 free (*ivs);
6441 *ivs = NULL;
6444 /* Dumps IVS to FILE. */
6446 static void
6447 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6449 unsigned i;
6450 comp_cost cost = iv_ca_cost (ivs);
6452 fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
6453 cost.complexity);
6454 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6455 ivs->cand_cost, ivs->cand_use_cost.cost,
6456 ivs->cand_use_cost.complexity);
6457 bitmap_print (file, ivs->cands, " candidates: ","\n");
6459 for (i = 0; i < ivs->upto; i++)
6461 struct iv_group *group = data->vgroups[i];
6462 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6463 if (cp)
6464 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6465 group->id, cp->cand->id, cp->cost.cost,
6466 cp->cost.complexity);
6467 else
6468 fprintf (file, " group:%d --> ??\n", group->id);
6471 const char *pref = "";
6472 fprintf (file, " invariant variables: ");
6473 for (i = 1; i <= data->max_inv_id; i++)
6474 if (ivs->n_invariant_uses[i])
6476 fprintf (file, "%s%d", pref, i);
6477 pref = ", ";
6480 pref = "";
6481 fprintf (file, "\n invariant expressions: ");
6482 for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it
6483 = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it)
6485 fprintf (file, "%s%d", pref, (*it).first->id);
6486 pref = ", ";
6489 fprintf (file, "\n\n");
6492 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6493 new set, and store differences in DELTA. Number of induction variables
6494 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6495 the function will try to find a solution with mimimal iv candidates. */
6497 static comp_cost
6498 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6499 struct iv_cand *cand, struct iv_ca_delta **delta,
6500 unsigned *n_ivs, bool min_ncand)
6502 unsigned i;
6503 comp_cost cost;
6504 struct iv_group *group;
6505 struct cost_pair *old_cp, *new_cp;
6507 *delta = NULL;
6508 for (i = 0; i < ivs->upto; i++)
6510 group = data->vgroups[i];
6511 old_cp = iv_ca_cand_for_group (ivs, group);
6513 if (old_cp
6514 && old_cp->cand == cand)
6515 continue;
6517 new_cp = get_group_iv_cost (data, group, cand);
6518 if (!new_cp)
6519 continue;
6521 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6522 continue;
6524 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6525 continue;
6527 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6530 iv_ca_delta_commit (data, ivs, *delta, true);
6531 cost = iv_ca_cost (ivs);
6532 if (n_ivs)
6533 *n_ivs = iv_ca_n_cands (ivs);
6534 iv_ca_delta_commit (data, ivs, *delta, false);
6536 return cost;
6539 /* Try narrowing set IVS by removing CAND. Return the cost of
6540 the new set and store the differences in DELTA. START is
6541 the candidate with which we start narrowing. */
6543 static comp_cost
6544 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6545 struct iv_cand *cand, struct iv_cand *start,
6546 struct iv_ca_delta **delta)
6548 unsigned i, ci;
6549 struct iv_group *group;
6550 struct cost_pair *old_cp, *new_cp, *cp;
6551 bitmap_iterator bi;
6552 struct iv_cand *cnd;
6553 comp_cost cost, best_cost, acost;
6555 *delta = NULL;
6556 for (i = 0; i < data->vgroups.length (); i++)
6558 group = data->vgroups[i];
6560 old_cp = iv_ca_cand_for_group (ivs, group);
6561 if (old_cp->cand != cand)
6562 continue;
6564 best_cost = iv_ca_cost (ivs);
6565 /* Start narrowing with START. */
6566 new_cp = get_group_iv_cost (data, group, start);
6568 if (data->consider_all_candidates)
6570 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6572 if (ci == cand->id || (start && ci == start->id))
6573 continue;
6575 cnd = data->vcands[ci];
6577 cp = get_group_iv_cost (data, group, cnd);
6578 if (!cp)
6579 continue;
6581 iv_ca_set_cp (data, ivs, group, cp);
6582 acost = iv_ca_cost (ivs);
6584 if (acost < best_cost)
6586 best_cost = acost;
6587 new_cp = cp;
6591 else
6593 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6595 if (ci == cand->id || (start && ci == start->id))
6596 continue;
6598 cnd = data->vcands[ci];
6600 cp = get_group_iv_cost (data, group, cnd);
6601 if (!cp)
6602 continue;
6604 iv_ca_set_cp (data, ivs, group, cp);
6605 acost = iv_ca_cost (ivs);
6607 if (acost < best_cost)
6609 best_cost = acost;
6610 new_cp = cp;
6614 /* Restore to old cp for use. */
6615 iv_ca_set_cp (data, ivs, group, old_cp);
6617 if (!new_cp)
6619 iv_ca_delta_free (delta);
6620 return infinite_cost;
6623 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6626 iv_ca_delta_commit (data, ivs, *delta, true);
6627 cost = iv_ca_cost (ivs);
6628 iv_ca_delta_commit (data, ivs, *delta, false);
6630 return cost;
6633 /* Try optimizing the set of candidates IVS by removing candidates different
6634 from to EXCEPT_CAND from it. Return cost of the new set, and store
6635 differences in DELTA. */
6637 static comp_cost
6638 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6639 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6641 bitmap_iterator bi;
6642 struct iv_ca_delta *act_delta, *best_delta;
6643 unsigned i;
6644 comp_cost best_cost, acost;
6645 struct iv_cand *cand;
6647 best_delta = NULL;
6648 best_cost = iv_ca_cost (ivs);
6650 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6652 cand = data->vcands[i];
6654 if (cand == except_cand)
6655 continue;
6657 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6659 if (acost < best_cost)
6661 best_cost = acost;
6662 iv_ca_delta_free (&best_delta);
6663 best_delta = act_delta;
6665 else
6666 iv_ca_delta_free (&act_delta);
6669 if (!best_delta)
6671 *delta = NULL;
6672 return best_cost;
6675 /* Recurse to possibly remove other unnecessary ivs. */
6676 iv_ca_delta_commit (data, ivs, best_delta, true);
6677 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6678 iv_ca_delta_commit (data, ivs, best_delta, false);
6679 *delta = iv_ca_delta_join (best_delta, *delta);
6680 return best_cost;
6683 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6684 cheaper local cost for GROUP than BEST_CP. Return pointer to
6685 the corresponding cost_pair, otherwise just return BEST_CP. */
6687 static struct cost_pair*
6688 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6689 unsigned int cand_idx, struct iv_cand *old_cand,
6690 struct cost_pair *best_cp)
6692 struct iv_cand *cand;
6693 struct cost_pair *cp;
6695 gcc_assert (old_cand != NULL && best_cp != NULL);
6696 if (cand_idx == old_cand->id)
6697 return best_cp;
6699 cand = data->vcands[cand_idx];
6700 cp = get_group_iv_cost (data, group, cand);
6701 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6702 return cp;
6704 return best_cp;
6707 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6708 which are used by more than one iv uses. For each of those candidates,
6709 this function tries to represent iv uses under that candidate using
6710 other ones with lower local cost, then tries to prune the new set.
6711 If the new set has lower cost, It returns the new cost after recording
6712 candidate replacement in list DELTA. */
6714 static comp_cost
6715 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6716 struct iv_ca_delta **delta)
6718 bitmap_iterator bi, bj;
6719 unsigned int i, j, k;
6720 struct iv_cand *cand;
6721 comp_cost orig_cost, acost;
6722 struct iv_ca_delta *act_delta, *tmp_delta;
6723 struct cost_pair *old_cp, *best_cp = NULL;
6725 *delta = NULL;
6726 orig_cost = iv_ca_cost (ivs);
6728 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6730 if (ivs->n_cand_uses[i] == 1
6731 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6732 continue;
6734 cand = data->vcands[i];
6736 act_delta = NULL;
6737 /* Represent uses under current candidate using other ones with
6738 lower local cost. */
6739 for (j = 0; j < ivs->upto; j++)
6741 struct iv_group *group = data->vgroups[j];
6742 old_cp = iv_ca_cand_for_group (ivs, group);
6744 if (old_cp->cand != cand)
6745 continue;
6747 best_cp = old_cp;
6748 if (data->consider_all_candidates)
6749 for (k = 0; k < data->vcands.length (); k++)
6750 best_cp = cheaper_cost_with_cand (data, group, k,
6751 old_cp->cand, best_cp);
6752 else
6753 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6754 best_cp = cheaper_cost_with_cand (data, group, k,
6755 old_cp->cand, best_cp);
6757 if (best_cp == old_cp)
6758 continue;
6760 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6762 /* No need for further prune. */
6763 if (!act_delta)
6764 continue;
6766 /* Prune the new candidate set. */
6767 iv_ca_delta_commit (data, ivs, act_delta, true);
6768 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6769 iv_ca_delta_commit (data, ivs, act_delta, false);
6770 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6772 if (acost < orig_cost)
6774 *delta = act_delta;
6775 return acost;
6777 else
6778 iv_ca_delta_free (&act_delta);
6781 return orig_cost;
6784 /* Tries to extend the sets IVS in the best possible way in order to
6785 express the GROUP. If ORIGINALP is true, prefer candidates from
6786 the original set of IVs, otherwise favor important candidates not
6787 based on any memory object. */
6789 static bool
6790 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6791 struct iv_group *group, bool originalp)
6793 comp_cost best_cost, act_cost;
6794 unsigned i;
6795 bitmap_iterator bi;
6796 struct iv_cand *cand;
6797 struct iv_ca_delta *best_delta = NULL, *act_delta;
6798 struct cost_pair *cp;
6800 iv_ca_add_group (data, ivs, group);
6801 best_cost = iv_ca_cost (ivs);
6802 cp = iv_ca_cand_for_group (ivs, group);
6803 if (cp)
6805 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6806 iv_ca_set_no_cp (data, ivs, group);
6809 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6810 first try important candidates not based on any memory object. Only if
6811 this fails, try the specific ones. Rationale -- in loops with many
6812 variables the best choice often is to use just one generic biv. If we
6813 added here many ivs specific to the uses, the optimization algorithm later
6814 would be likely to get stuck in a local minimum, thus causing us to create
6815 too many ivs. The approach from few ivs to more seems more likely to be
6816 successful -- starting from few ivs, replacing an expensive use by a
6817 specific iv should always be a win. */
6818 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6820 cand = data->vcands[i];
6822 if (originalp && cand->pos !=IP_ORIGINAL)
6823 continue;
6825 if (!originalp && cand->iv->base_object != NULL_TREE)
6826 continue;
6828 if (iv_ca_cand_used_p (ivs, cand))
6829 continue;
6831 cp = get_group_iv_cost (data, group, cand);
6832 if (!cp)
6833 continue;
6835 iv_ca_set_cp (data, ivs, group, cp);
6836 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6837 true);
6838 iv_ca_set_no_cp (data, ivs, group);
6839 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6841 if (act_cost < best_cost)
6843 best_cost = act_cost;
6845 iv_ca_delta_free (&best_delta);
6846 best_delta = act_delta;
6848 else
6849 iv_ca_delta_free (&act_delta);
6852 if (best_cost.infinite_cost_p ())
6854 for (i = 0; i < group->n_map_members; i++)
6856 cp = group->cost_map + i;
6857 cand = cp->cand;
6858 if (!cand)
6859 continue;
6861 /* Already tried this. */
6862 if (cand->important)
6864 if (originalp && cand->pos == IP_ORIGINAL)
6865 continue;
6866 if (!originalp && cand->iv->base_object == NULL_TREE)
6867 continue;
6870 if (iv_ca_cand_used_p (ivs, cand))
6871 continue;
6873 act_delta = NULL;
6874 iv_ca_set_cp (data, ivs, group, cp);
6875 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6876 iv_ca_set_no_cp (data, ivs, group);
6877 act_delta = iv_ca_delta_add (group,
6878 iv_ca_cand_for_group (ivs, group),
6879 cp, act_delta);
6881 if (act_cost < best_cost)
6883 best_cost = act_cost;
6885 if (best_delta)
6886 iv_ca_delta_free (&best_delta);
6887 best_delta = act_delta;
6889 else
6890 iv_ca_delta_free (&act_delta);
6894 iv_ca_delta_commit (data, ivs, best_delta, true);
6895 iv_ca_delta_free (&best_delta);
6897 return !best_cost.infinite_cost_p ();
6900 /* Finds an initial assignment of candidates to uses. */
6902 static struct iv_ca *
6903 get_initial_solution (struct ivopts_data *data, bool originalp)
6905 unsigned i;
6906 struct iv_ca *ivs = iv_ca_new (data);
6908 for (i = 0; i < data->vgroups.length (); i++)
6909 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6911 iv_ca_free (&ivs);
6912 return NULL;
6915 return ivs;
6918 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6919 points to a bool variable, this function tries to break local
6920 optimal fixed-point by replacing candidates in IVS if it's true. */
6922 static bool
6923 try_improve_iv_set (struct ivopts_data *data,
6924 struct iv_ca *ivs, bool *try_replace_p)
6926 unsigned i, n_ivs;
6927 comp_cost acost, best_cost = iv_ca_cost (ivs);
6928 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6929 struct iv_cand *cand;
6931 /* Try extending the set of induction variables by one. */
6932 for (i = 0; i < data->vcands.length (); i++)
6934 cand = data->vcands[i];
6936 if (iv_ca_cand_used_p (ivs, cand))
6937 continue;
6939 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6940 if (!act_delta)
6941 continue;
6943 /* If we successfully added the candidate and the set is small enough,
6944 try optimizing it by removing other candidates. */
6945 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6947 iv_ca_delta_commit (data, ivs, act_delta, true);
6948 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6949 iv_ca_delta_commit (data, ivs, act_delta, false);
6950 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6953 if (acost < best_cost)
6955 best_cost = acost;
6956 iv_ca_delta_free (&best_delta);
6957 best_delta = act_delta;
6959 else
6960 iv_ca_delta_free (&act_delta);
6963 if (!best_delta)
6965 /* Try removing the candidates from the set instead. */
6966 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6968 if (!best_delta && *try_replace_p)
6970 *try_replace_p = false;
6971 /* So far candidate selecting algorithm tends to choose fewer IVs
6972 so that it can handle cases in which loops have many variables
6973 but the best choice is often to use only one general biv. One
6974 weakness is it can't handle opposite cases, in which different
6975 candidates should be chosen with respect to each use. To solve
6976 the problem, we replace candidates in a manner described by the
6977 comments of iv_ca_replace, thus give general algorithm a chance
6978 to break local optimal fixed-point in these cases. */
6979 best_cost = iv_ca_replace (data, ivs, &best_delta);
6982 if (!best_delta)
6983 return false;
6986 iv_ca_delta_commit (data, ivs, best_delta, true);
6987 gcc_assert (best_cost == iv_ca_cost (ivs));
6988 iv_ca_delta_free (&best_delta);
6989 return true;
6992 /* Attempts to find the optimal set of induction variables. We do simple
6993 greedy heuristic -- we try to replace at most one candidate in the selected
6994 solution and remove the unused ivs while this improves the cost. */
6996 static struct iv_ca *
6997 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6999 struct iv_ca *set;
7000 bool try_replace_p = true;
7002 /* Get the initial solution. */
7003 set = get_initial_solution (data, originalp);
7004 if (!set)
7006 if (dump_file && (dump_flags & TDF_DETAILS))
7007 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
7008 return NULL;
7011 if (dump_file && (dump_flags & TDF_DETAILS))
7013 fprintf (dump_file, "Initial set of candidates:\n");
7014 iv_ca_dump (data, dump_file, set);
7017 while (try_improve_iv_set (data, set, &try_replace_p))
7019 if (dump_file && (dump_flags & TDF_DETAILS))
7021 fprintf (dump_file, "Improved to:\n");
7022 iv_ca_dump (data, dump_file, set);
7026 return set;
7029 static struct iv_ca *
7030 find_optimal_iv_set (struct ivopts_data *data)
7032 unsigned i;
7033 comp_cost cost, origcost;
7034 struct iv_ca *set, *origset;
7036 /* Determine the cost based on a strategy that starts with original IVs,
7037 and try again using a strategy that prefers candidates not based
7038 on any IVs. */
7039 origset = find_optimal_iv_set_1 (data, true);
7040 set = find_optimal_iv_set_1 (data, false);
7042 if (!origset && !set)
7043 return NULL;
7045 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7046 cost = set ? iv_ca_cost (set) : infinite_cost;
7048 if (dump_file && (dump_flags & TDF_DETAILS))
7050 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
7051 origcost.cost, origcost.complexity);
7052 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7053 cost.cost, cost.complexity);
7056 /* Choose the one with the best cost. */
7057 if (origcost <= cost)
7059 if (set)
7060 iv_ca_free (&set);
7061 set = origset;
7063 else if (origset)
7064 iv_ca_free (&origset);
7066 for (i = 0; i < data->vgroups.length (); i++)
7068 struct iv_group *group = data->vgroups[i];
7069 group->selected = iv_ca_cand_for_group (set, group)->cand;
7072 return set;
7075 /* Creates a new induction variable corresponding to CAND. */
7077 static void
7078 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7080 gimple_stmt_iterator incr_pos;
7081 tree base;
7082 struct iv_use *use;
7083 struct iv_group *group;
7084 bool after = false;
7086 if (!cand->iv)
7087 return;
7089 switch (cand->pos)
7091 case IP_NORMAL:
7092 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7093 break;
7095 case IP_END:
7096 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7097 after = true;
7098 break;
7100 case IP_AFTER_USE:
7101 after = true;
7102 /* fall through */
7103 case IP_BEFORE_USE:
7104 incr_pos = gsi_for_stmt (cand->incremented_at);
7105 break;
7107 case IP_ORIGINAL:
7108 /* Mark that the iv is preserved. */
7109 name_info (data, cand->var_before)->preserve_biv = true;
7110 name_info (data, cand->var_after)->preserve_biv = true;
7112 /* Rewrite the increment so that it uses var_before directly. */
7113 use = find_interesting_uses_op (data, cand->var_after);
7114 group = data->vgroups[use->group_id];
7115 group->selected = cand;
7116 return;
7119 gimple_add_tmp_var (cand->var_before);
7121 base = unshare_expr (cand->iv->base);
7123 create_iv (base, unshare_expr (cand->iv->step),
7124 cand->var_before, data->current_loop,
7125 &incr_pos, after, &cand->var_before, &cand->var_after);
7128 /* Creates new induction variables described in SET. */
7130 static void
7131 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7133 unsigned i;
7134 struct iv_cand *cand;
7135 bitmap_iterator bi;
7137 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7139 cand = data->vcands[i];
7140 create_new_iv (data, cand);
7143 if (dump_file && (dump_flags & TDF_DETAILS))
7145 fprintf (dump_file, "Selected IV set for loop %d",
7146 data->current_loop->num);
7147 if (data->loop_loc != UNKNOWN_LOCATION)
7148 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7149 LOCATION_LINE (data->loop_loc));
7150 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7151 avg_loop_niter (data->current_loop));
7152 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions",
7153 (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ());
7154 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7155 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7157 cand = data->vcands[i];
7158 dump_cand (dump_file, cand);
7160 fprintf (dump_file, "\n");
7164 /* Rewrites USE (definition of iv used in a nonlinear expression)
7165 using candidate CAND. */
7167 static void
7168 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7169 struct iv_use *use, struct iv_cand *cand)
7171 tree comp;
7172 tree op, tgt;
7173 gassign *ass;
7174 gimple_stmt_iterator bsi;
7176 /* An important special case -- if we are asked to express value of
7177 the original iv by itself, just exit; there is no need to
7178 introduce a new computation (that might also need casting the
7179 variable to unsigned and back). */
7180 if (cand->pos == IP_ORIGINAL
7181 && cand->incremented_at == use->stmt)
7183 enum tree_code stmt_code;
7185 gcc_assert (is_gimple_assign (use->stmt));
7186 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7188 /* Check whether we may leave the computation unchanged.
7189 This is the case only if it does not rely on other
7190 computations in the loop -- otherwise, the computation
7191 we rely upon may be removed in remove_unused_ivs,
7192 thus leading to ICE. */
7193 stmt_code = gimple_assign_rhs_code (use->stmt);
7194 if (stmt_code == PLUS_EXPR
7195 || stmt_code == MINUS_EXPR
7196 || stmt_code == POINTER_PLUS_EXPR)
7198 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7199 op = gimple_assign_rhs2 (use->stmt);
7200 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7201 op = gimple_assign_rhs1 (use->stmt);
7202 else
7203 op = NULL_TREE;
7205 else
7206 op = NULL_TREE;
7208 if (op && expr_invariant_in_loop_p (data->current_loop, op))
7209 return;
7212 comp = get_computation (data->current_loop, use, cand);
7213 gcc_assert (comp != NULL_TREE);
7215 switch (gimple_code (use->stmt))
7217 case GIMPLE_PHI:
7218 tgt = PHI_RESULT (use->stmt);
7220 /* If we should keep the biv, do not replace it. */
7221 if (name_info (data, tgt)->preserve_biv)
7222 return;
7224 bsi = gsi_after_labels (gimple_bb (use->stmt));
7225 break;
7227 case GIMPLE_ASSIGN:
7228 tgt = gimple_assign_lhs (use->stmt);
7229 bsi = gsi_for_stmt (use->stmt);
7230 break;
7232 default:
7233 gcc_unreachable ();
7236 if (!valid_gimple_rhs_p (comp)
7237 || (gimple_code (use->stmt) != GIMPLE_PHI
7238 /* We can't allow re-allocating the stmt as it might be pointed
7239 to still. */
7240 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7241 >= gimple_num_ops (gsi_stmt (bsi)))))
7243 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7244 true, GSI_SAME_STMT);
7245 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7247 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7248 /* As this isn't a plain copy we have to reset alignment
7249 information. */
7250 if (SSA_NAME_PTR_INFO (comp))
7251 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7255 if (gimple_code (use->stmt) == GIMPLE_PHI)
7257 ass = gimple_build_assign (tgt, comp);
7258 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7260 bsi = gsi_for_stmt (use->stmt);
7261 remove_phi_node (&bsi, false);
7263 else
7265 gimple_assign_set_rhs_from_tree (&bsi, comp);
7266 use->stmt = gsi_stmt (bsi);
7270 /* Performs a peephole optimization to reorder the iv update statement with
7271 a mem ref to enable instruction combining in later phases. The mem ref uses
7272 the iv value before the update, so the reordering transformation requires
7273 adjustment of the offset. CAND is the selected IV_CAND.
7275 Example:
7277 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7278 iv2 = iv1 + 1;
7280 if (t < val) (1)
7281 goto L;
7282 goto Head;
7285 directly propagating t over to (1) will introduce overlapping live range
7286 thus increase register pressure. This peephole transform it into:
7289 iv2 = iv1 + 1;
7290 t = MEM_REF (base, iv2, 8, 8);
7291 if (t < val)
7292 goto L;
7293 goto Head;
7296 static void
7297 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7299 tree var_after;
7300 gimple *iv_update, *stmt;
7301 basic_block bb;
7302 gimple_stmt_iterator gsi, gsi_iv;
7304 if (cand->pos != IP_NORMAL)
7305 return;
7307 var_after = cand->var_after;
7308 iv_update = SSA_NAME_DEF_STMT (var_after);
7310 bb = gimple_bb (iv_update);
7311 gsi = gsi_last_nondebug_bb (bb);
7312 stmt = gsi_stmt (gsi);
7314 /* Only handle conditional statement for now. */
7315 if (gimple_code (stmt) != GIMPLE_COND)
7316 return;
7318 gsi_prev_nondebug (&gsi);
7319 stmt = gsi_stmt (gsi);
7320 if (stmt != iv_update)
7321 return;
7323 gsi_prev_nondebug (&gsi);
7324 if (gsi_end_p (gsi))
7325 return;
7327 stmt = gsi_stmt (gsi);
7328 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7329 return;
7331 if (stmt != use->stmt)
7332 return;
7334 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7335 return;
7337 if (dump_file && (dump_flags & TDF_DETAILS))
7339 fprintf (dump_file, "Reordering \n");
7340 print_gimple_stmt (dump_file, iv_update, 0, 0);
7341 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7342 fprintf (dump_file, "\n");
7345 gsi = gsi_for_stmt (use->stmt);
7346 gsi_iv = gsi_for_stmt (iv_update);
7347 gsi_move_before (&gsi_iv, &gsi);
7349 cand->pos = IP_BEFORE_USE;
7350 cand->incremented_at = use->stmt;
7353 /* Rewrites USE (address that is an iv) using candidate CAND. */
7355 static void
7356 rewrite_use_address (struct ivopts_data *data,
7357 struct iv_use *use, struct iv_cand *cand)
7359 aff_tree aff;
7360 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7361 tree base_hint = NULL_TREE;
7362 tree ref, iv;
7363 bool ok;
7365 adjust_iv_update_pos (cand, use);
7366 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7367 gcc_assert (ok);
7368 unshare_aff_combination (&aff);
7370 /* To avoid undefined overflow problems, all IV candidates use unsigned
7371 integer types. The drawback is that this makes it impossible for
7372 create_mem_ref to distinguish an IV that is based on a memory object
7373 from one that represents simply an offset.
7375 To work around this problem, we pass a hint to create_mem_ref that
7376 indicates which variable (if any) in aff is an IV based on a memory
7377 object. Note that we only consider the candidate. If this is not
7378 based on an object, the base of the reference is in some subexpression
7379 of the use -- but these will use pointer types, so they are recognized
7380 by the create_mem_ref heuristics anyway. */
7381 if (cand->iv->base_object)
7382 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7384 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7385 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7386 reference_alias_ptr_type (*use->op_p),
7387 iv, base_hint, data->speed);
7388 copy_ref_info (ref, *use->op_p);
7389 *use->op_p = ref;
7392 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7393 candidate CAND. */
7395 static void
7396 rewrite_use_compare (struct ivopts_data *data,
7397 struct iv_use *use, struct iv_cand *cand)
7399 tree comp, *var_p, op, bound;
7400 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7401 enum tree_code compare;
7402 struct iv_group *group = data->vgroups[use->group_id];
7403 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7404 bool ok;
7406 bound = cp->value;
7407 if (bound)
7409 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7410 tree var_type = TREE_TYPE (var);
7411 gimple_seq stmts;
7413 if (dump_file && (dump_flags & TDF_DETAILS))
7415 fprintf (dump_file, "Replacing exit test: ");
7416 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7418 compare = cp->comp;
7419 bound = unshare_expr (fold_convert (var_type, bound));
7420 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7421 if (stmts)
7422 gsi_insert_seq_on_edge_immediate (
7423 loop_preheader_edge (data->current_loop),
7424 stmts);
7426 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7427 gimple_cond_set_lhs (cond_stmt, var);
7428 gimple_cond_set_code (cond_stmt, compare);
7429 gimple_cond_set_rhs (cond_stmt, op);
7430 return;
7433 /* The induction variable elimination failed; just express the original
7434 giv. */
7435 comp = get_computation (data->current_loop, use, cand);
7436 gcc_assert (comp != NULL_TREE);
7438 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7439 gcc_assert (ok);
7441 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7442 true, GSI_SAME_STMT);
7445 /* Rewrite the groups using the selected induction variables. */
7447 static void
7448 rewrite_groups (struct ivopts_data *data)
7450 unsigned i, j;
7452 for (i = 0; i < data->vgroups.length (); i++)
7454 struct iv_group *group = data->vgroups[i];
7455 struct iv_cand *cand = group->selected;
7457 gcc_assert (cand);
7459 if (group->type == USE_NONLINEAR_EXPR)
7461 for (j = 0; j < group->vuses.length (); j++)
7463 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7464 update_stmt (group->vuses[j]->stmt);
7467 else if (group->type == USE_ADDRESS)
7469 for (j = 0; j < group->vuses.length (); j++)
7471 rewrite_use_address (data, group->vuses[j], cand);
7472 update_stmt (group->vuses[j]->stmt);
7475 else
7477 gcc_assert (group->type == USE_COMPARE);
7479 for (j = 0; j < group->vuses.length (); j++)
7481 rewrite_use_compare (data, group->vuses[j], cand);
7482 update_stmt (group->vuses[j]->stmt);
7488 /* Removes the ivs that are not used after rewriting. */
7490 static void
7491 remove_unused_ivs (struct ivopts_data *data)
7493 unsigned j;
7494 bitmap_iterator bi;
7495 bitmap toremove = BITMAP_ALLOC (NULL);
7497 /* Figure out an order in which to release SSA DEFs so that we don't
7498 release something that we'd have to propagate into a debug stmt
7499 afterwards. */
7500 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7502 struct version_info *info;
7504 info = ver_info (data, j);
7505 if (info->iv
7506 && !integer_zerop (info->iv->step)
7507 && !info->inv_id
7508 && !info->iv->nonlin_use
7509 && !info->preserve_biv)
7511 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7513 tree def = info->iv->ssa_name;
7515 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7517 imm_use_iterator imm_iter;
7518 use_operand_p use_p;
7519 gimple *stmt;
7520 int count = 0;
7522 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7524 if (!gimple_debug_bind_p (stmt))
7525 continue;
7527 /* We just want to determine whether to do nothing
7528 (count == 0), to substitute the computed
7529 expression into a single use of the SSA DEF by
7530 itself (count == 1), or to use a debug temp
7531 because the SSA DEF is used multiple times or as
7532 part of a larger expression (count > 1). */
7533 count++;
7534 if (gimple_debug_bind_get_value (stmt) != def)
7535 count++;
7537 if (count > 1)
7538 BREAK_FROM_IMM_USE_STMT (imm_iter);
7541 if (!count)
7542 continue;
7544 struct iv_use dummy_use;
7545 struct iv_cand *best_cand = NULL, *cand;
7546 unsigned i, best_pref = 0, cand_pref;
7548 memset (&dummy_use, 0, sizeof (dummy_use));
7549 dummy_use.iv = info->iv;
7550 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7552 cand = data->vgroups[i]->selected;
7553 if (cand == best_cand)
7554 continue;
7555 cand_pref = operand_equal_p (cand->iv->step,
7556 info->iv->step, 0)
7557 ? 4 : 0;
7558 cand_pref
7559 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7560 == TYPE_MODE (TREE_TYPE (info->iv->base))
7561 ? 2 : 0;
7562 cand_pref
7563 += TREE_CODE (cand->iv->base) == INTEGER_CST
7564 ? 1 : 0;
7565 if (best_cand == NULL || best_pref < cand_pref)
7567 best_cand = cand;
7568 best_pref = cand_pref;
7572 if (!best_cand)
7573 continue;
7575 tree comp = get_computation_at (data->current_loop,
7576 &dummy_use, best_cand,
7577 SSA_NAME_DEF_STMT (def));
7578 if (!comp)
7579 continue;
7581 if (count > 1)
7583 tree vexpr = make_node (DEBUG_EXPR_DECL);
7584 DECL_ARTIFICIAL (vexpr) = 1;
7585 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7586 if (SSA_NAME_VAR (def))
7587 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7588 else
7589 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7590 gdebug *def_temp
7591 = gimple_build_debug_bind (vexpr, comp, NULL);
7592 gimple_stmt_iterator gsi;
7594 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7595 gsi = gsi_after_labels (gimple_bb
7596 (SSA_NAME_DEF_STMT (def)));
7597 else
7598 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7600 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7601 comp = vexpr;
7604 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7606 if (!gimple_debug_bind_p (stmt))
7607 continue;
7609 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7610 SET_USE (use_p, comp);
7612 update_stmt (stmt);
7618 release_defs_bitset (toremove);
7620 BITMAP_FREE (toremove);
7623 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7624 for hash_map::traverse. */
7626 bool
7627 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7629 free (value);
7630 return true;
7633 /* Frees data allocated by the optimization of a single loop. */
7635 static void
7636 free_loop_data (struct ivopts_data *data)
7638 unsigned i, j;
7639 bitmap_iterator bi;
7640 tree obj;
7642 if (data->niters)
7644 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7645 delete data->niters;
7646 data->niters = NULL;
7649 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7651 struct version_info *info;
7653 info = ver_info (data, i);
7654 info->iv = NULL;
7655 info->has_nonlin_use = false;
7656 info->preserve_biv = false;
7657 info->inv_id = 0;
7659 bitmap_clear (data->relevant);
7660 bitmap_clear (data->important_candidates);
7662 for (i = 0; i < data->vgroups.length (); i++)
7664 struct iv_group *group = data->vgroups[i];
7666 for (j = 0; j < group->vuses.length (); j++)
7667 free (group->vuses[j]);
7668 group->vuses.release ();
7670 BITMAP_FREE (group->related_cands);
7671 for (j = 0; j < group->n_map_members; j++)
7672 if (group->cost_map[j].depends_on)
7673 BITMAP_FREE (group->cost_map[j].depends_on);
7675 free (group->cost_map);
7676 free (group);
7678 data->vgroups.truncate (0);
7680 for (i = 0; i < data->vcands.length (); i++)
7682 struct iv_cand *cand = data->vcands[i];
7684 if (cand->depends_on)
7685 BITMAP_FREE (cand->depends_on);
7686 free (cand);
7688 data->vcands.truncate (0);
7690 if (data->version_info_size < num_ssa_names)
7692 data->version_info_size = 2 * num_ssa_names;
7693 free (data->version_info);
7694 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7697 data->max_inv_id = 0;
7699 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7700 SET_DECL_RTL (obj, NULL_RTX);
7702 decl_rtl_to_reset.truncate (0);
7704 data->inv_expr_tab->empty ();
7705 data->max_inv_expr_id = 0;
7707 data->iv_common_cand_tab->empty ();
7708 data->iv_common_cands.truncate (0);
7711 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7712 loop tree. */
7714 static void
7715 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7717 free_loop_data (data);
7718 free (data->version_info);
7719 BITMAP_FREE (data->relevant);
7720 BITMAP_FREE (data->important_candidates);
7722 decl_rtl_to_reset.release ();
7723 data->vgroups.release ();
7724 data->vcands.release ();
7725 delete data->inv_expr_tab;
7726 data->inv_expr_tab = NULL;
7727 free_affine_expand_cache (&data->name_expansion_cache);
7728 delete data->iv_common_cand_tab;
7729 data->iv_common_cand_tab = NULL;
7730 data->iv_common_cands.release ();
7731 obstack_free (&data->iv_obstack, NULL);
7734 /* Returns true if the loop body BODY includes any function calls. */
7736 static bool
7737 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7739 gimple_stmt_iterator gsi;
7740 unsigned i;
7742 for (i = 0; i < num_nodes; i++)
7743 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7745 gimple *stmt = gsi_stmt (gsi);
7746 if (is_gimple_call (stmt)
7747 && !gimple_call_internal_p (stmt)
7748 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7749 return true;
7751 return false;
7754 /* Optimizes the LOOP. Returns true if anything changed. */
7756 static bool
7757 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7759 bool changed = false;
7760 struct iv_ca *iv_ca;
7761 edge exit = single_dom_exit (loop);
7762 basic_block *body;
7764 gcc_assert (!data->niters);
7765 data->current_loop = loop;
7766 data->loop_loc = find_loop_location (loop);
7767 data->speed = optimize_loop_for_speed_p (loop);
7769 if (dump_file && (dump_flags & TDF_DETAILS))
7771 fprintf (dump_file, "Processing loop %d", loop->num);
7772 if (data->loop_loc != UNKNOWN_LOCATION)
7773 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7774 LOCATION_LINE (data->loop_loc));
7775 fprintf (dump_file, "\n");
7777 if (exit)
7779 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7780 exit->src->index, exit->dest->index);
7781 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7782 fprintf (dump_file, "\n");
7785 fprintf (dump_file, "\n");
7788 body = get_loop_body (loop);
7789 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7790 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7791 free (body);
7793 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7795 /* For each ssa name determines whether it behaves as an induction variable
7796 in some loop. */
7797 if (!find_induction_variables (data))
7798 goto finish;
7800 /* Finds interesting uses (item 1). */
7801 find_interesting_uses (data);
7802 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7803 goto finish;
7805 /* Finds candidates for the induction variables (item 2). */
7806 find_iv_candidates (data);
7808 /* Calculates the costs (item 3, part 1). */
7809 determine_iv_costs (data);
7810 determine_group_iv_costs (data);
7811 determine_set_costs (data);
7813 /* Find the optimal set of induction variables (item 3, part 2). */
7814 iv_ca = find_optimal_iv_set (data);
7815 if (!iv_ca)
7816 goto finish;
7817 changed = true;
7819 /* Create the new induction variables (item 4, part 1). */
7820 create_new_ivs (data, iv_ca);
7821 iv_ca_free (&iv_ca);
7823 /* Rewrite the uses (item 4, part 2). */
7824 rewrite_groups (data);
7826 /* Remove the ivs that are unused after rewriting. */
7827 remove_unused_ivs (data);
7829 /* We have changed the structure of induction variables; it might happen
7830 that definitions in the scev database refer to some of them that were
7831 eliminated. */
7832 scev_reset ();
7834 finish:
7835 free_loop_data (data);
7837 return changed;
7840 /* Main entry point. Optimizes induction variables in loops. */
7842 void
7843 tree_ssa_iv_optimize (void)
7845 struct loop *loop;
7846 struct ivopts_data data;
7848 tree_ssa_iv_optimize_init (&data);
7850 /* Optimize the loops starting with the innermost ones. */
7851 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7853 if (dump_file && (dump_flags & TDF_DETAILS))
7854 flow_loop_dump (loop, dump_file, NULL, 1);
7856 tree_ssa_iv_optimize_loop (&data, loop);
7859 tree_ssa_iv_optimize_finalize (&data);