Remove assert in get_def_bb_for_const
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobe6c49e9707b6abfb5e78249a511df6a2a7adc676
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "tm_p.h"
79 #include "ssa.h"
80 #include "expmed.h"
81 #include "insn-config.h"
82 #include "emit-rtl.h"
83 #include "recog.h"
84 #include "cgraph.h"
85 #include "gimple-pretty-print.h"
86 #include "alias.h"
87 #include "fold-const.h"
88 #include "stor-layout.h"
89 #include "tree-eh.h"
90 #include "gimplify.h"
91 #include "gimple-iterator.h"
92 #include "gimplify-me.h"
93 #include "tree-cfg.h"
94 #include "tree-ssa-loop-ivopts.h"
95 #include "tree-ssa-loop-manip.h"
96 #include "tree-ssa-loop-niter.h"
97 #include "tree-ssa-loop.h"
98 #include "explow.h"
99 #include "expr.h"
100 #include "tree-dfa.h"
101 #include "tree-ssa.h"
102 #include "cfgloop.h"
103 #include "tree-scalar-evolution.h"
104 #include "params.h"
105 #include "tree-affine.h"
106 #include "tree-ssa-propagate.h"
107 #include "tree-ssa-address.h"
108 #include "builtins.h"
109 #include "tree-vectorizer.h"
111 /* FIXME: Expressions are expanded to RTL in this pass to determine the
112 cost of different addressing modes. This should be moved to a TBD
113 interface between the GIMPLE and RTL worlds. */
115 /* The infinite cost. */
116 #define INFTY 10000000
118 #define AVG_LOOP_NITER(LOOP) 5
120 /* Returns the expected number of loop iterations for LOOP.
121 The average trip count is computed from profile data if it
122 exists. */
124 static inline HOST_WIDE_INT
125 avg_loop_niter (struct loop *loop)
127 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
128 if (niter == -1)
130 niter = likely_max_stmt_executions_int (loop);
131 if (niter == -1 || niter > AVG_LOOP_NITER (loop))
132 return AVG_LOOP_NITER (loop);
135 return niter;
138 struct iv_use;
140 /* Representation of the induction variable. */
141 struct iv
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
165 /* Types of uses. */
166 enum use_type
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_ADDRESS, /* Use in an address. */
170 USE_COMPARE /* Use is a compare. */
173 /* Cost of a computation. */
174 struct comp_cost
176 comp_cost (): cost (0), complexity (0), scratch (0)
179 comp_cost (int cost, unsigned complexity, int scratch = 0)
180 : cost (cost), complexity (complexity), scratch (scratch)
183 /* Returns true if COST is infinite. */
184 bool infinite_cost_p ();
186 /* Adds costs COST1 and COST2. */
187 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
189 /* Adds COST to the comp_cost. */
190 comp_cost operator+= (comp_cost cost);
192 /* Adds constant C to this comp_cost. */
193 comp_cost operator+= (HOST_WIDE_INT c);
195 /* Subtracts constant C to this comp_cost. */
196 comp_cost operator-= (HOST_WIDE_INT c);
198 /* Divide the comp_cost by constant C. */
199 comp_cost operator/= (HOST_WIDE_INT c);
201 /* Multiply the comp_cost by constant C. */
202 comp_cost operator*= (HOST_WIDE_INT c);
204 /* Subtracts costs COST1 and COST2. */
205 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
207 /* Subtracts COST from this comp_cost. */
208 comp_cost operator-= (comp_cost cost);
210 /* Returns true if COST1 is smaller than COST2. */
211 friend bool operator< (comp_cost cost1, comp_cost cost2);
213 /* Returns true if COST1 and COST2 are equal. */
214 friend bool operator== (comp_cost cost1, comp_cost cost2);
216 /* Returns true if COST1 is smaller or equal than COST2. */
217 friend bool operator<= (comp_cost cost1, comp_cost cost2);
219 int cost; /* The runtime cost. */
220 unsigned complexity; /* The estimate of the complexity of the code for
221 the computation (in no concrete units --
222 complexity field should be larger for more
223 complex expressions and addressing modes). */
224 int scratch; /* Scratch used during cost computation. */
227 static const comp_cost no_cost;
228 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
230 bool
231 comp_cost::infinite_cost_p ()
233 return cost == INFTY;
236 comp_cost
237 operator+ (comp_cost cost1, comp_cost cost2)
239 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
240 return infinite_cost;
242 cost1.cost += cost2.cost;
243 cost1.complexity += cost2.complexity;
245 return cost1;
248 comp_cost
249 operator- (comp_cost cost1, comp_cost cost2)
251 if (cost1.infinite_cost_p ())
252 return infinite_cost;
254 gcc_assert (!cost2.infinite_cost_p ());
256 cost1.cost -= cost2.cost;
257 cost1.complexity -= cost2.complexity;
259 return cost1;
262 comp_cost
263 comp_cost::operator+= (comp_cost cost)
265 *this = *this + cost;
266 return *this;
269 comp_cost
270 comp_cost::operator+= (HOST_WIDE_INT c)
272 if (infinite_cost_p ())
273 return *this;
275 this->cost += c;
277 return *this;
280 comp_cost
281 comp_cost::operator-= (HOST_WIDE_INT c)
283 if (infinite_cost_p ())
284 return *this;
286 this->cost -= c;
288 return *this;
291 comp_cost
292 comp_cost::operator/= (HOST_WIDE_INT c)
294 if (infinite_cost_p ())
295 return *this;
297 this->cost /= c;
299 return *this;
302 comp_cost
303 comp_cost::operator*= (HOST_WIDE_INT c)
305 if (infinite_cost_p ())
306 return *this;
308 this->cost *= c;
310 return *this;
313 comp_cost
314 comp_cost::operator-= (comp_cost cost)
316 *this = *this - cost;
317 return *this;
320 bool
321 operator< (comp_cost cost1, comp_cost cost2)
323 if (cost1.cost == cost2.cost)
324 return cost1.complexity < cost2.complexity;
326 return cost1.cost < cost2.cost;
329 bool
330 operator== (comp_cost cost1, comp_cost cost2)
332 return cost1.cost == cost2.cost
333 && cost1.complexity == cost2.complexity;
336 bool
337 operator<= (comp_cost cost1, comp_cost cost2)
339 return cost1 < cost2 || cost1 == cost2;
342 struct iv_inv_expr_ent;
344 /* The candidate - cost pair. */
345 struct cost_pair
347 struct iv_cand *cand; /* The candidate. */
348 comp_cost cost; /* The cost. */
349 bitmap depends_on; /* The list of invariants that have to be
350 preserved. */
351 tree value; /* For final value elimination, the expression for
352 the final value of the iv. For iv elimination,
353 the new bound to compare with. */
354 enum tree_code comp; /* For iv elimination, the comparison. */
355 iv_inv_expr_ent *inv_expr; /* Loop invariant expression. */
358 /* Use. */
359 struct iv_use
361 unsigned id; /* The id of the use. */
362 unsigned group_id; /* The group id the use belongs to. */
363 enum use_type type; /* Type of the use. */
364 struct iv *iv; /* The induction variable it is based on. */
365 gimple *stmt; /* Statement in that it occurs. */
366 tree *op_p; /* The place where it occurs. */
368 tree addr_base; /* Base address with const offset stripped. */
369 unsigned HOST_WIDE_INT addr_offset;
370 /* Const offset stripped from base address. */
373 /* Group of uses. */
374 struct iv_group
376 /* The id of the group. */
377 unsigned id;
378 /* Uses of the group are of the same type. */
379 enum use_type type;
380 /* The set of "related" IV candidates, plus the important ones. */
381 bitmap related_cands;
382 /* Number of IV candidates in the cost_map. */
383 unsigned n_map_members;
384 /* The costs wrto the iv candidates. */
385 struct cost_pair *cost_map;
386 /* The selected candidate for the group. */
387 struct iv_cand *selected;
388 /* Uses in the group. */
389 vec<struct iv_use *> vuses;
392 /* The position where the iv is computed. */
393 enum iv_position
395 IP_NORMAL, /* At the end, just before the exit condition. */
396 IP_END, /* At the end of the latch block. */
397 IP_BEFORE_USE, /* Immediately before a specific use. */
398 IP_AFTER_USE, /* Immediately after a specific use. */
399 IP_ORIGINAL /* The original biv. */
402 /* The induction variable candidate. */
403 struct iv_cand
405 unsigned id; /* The number of the candidate. */
406 bool important; /* Whether this is an "important" candidate, i.e. such
407 that it should be considered by all uses. */
408 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
409 gimple *incremented_at;/* For original biv, the statement where it is
410 incremented. */
411 tree var_before; /* The variable used for it before increment. */
412 tree var_after; /* The variable used for it after increment. */
413 struct iv *iv; /* The value of the candidate. NULL for
414 "pseudocandidate" used to indicate the possibility
415 to replace the final value of an iv by direct
416 computation of the value. */
417 unsigned cost; /* Cost of the candidate. */
418 unsigned cost_step; /* Cost of the candidate's increment operation. */
419 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
420 where it is incremented. */
421 bitmap depends_on; /* The list of invariants that are used in step of the
422 biv. */
423 struct iv *orig_iv; /* The original iv if this cand is added from biv with
424 smaller type. */
427 /* Hashtable entry for common candidate derived from iv uses. */
428 struct iv_common_cand
430 tree base;
431 tree step;
432 /* IV uses from which this common candidate is derived. */
433 auto_vec<struct iv_use *> uses;
434 hashval_t hash;
437 /* Hashtable helpers. */
439 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
441 static inline hashval_t hash (const iv_common_cand *);
442 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
445 /* Hash function for possible common candidates. */
447 inline hashval_t
448 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
450 return ccand->hash;
453 /* Hash table equality function for common candidates. */
455 inline bool
456 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
457 const iv_common_cand *ccand2)
459 return (ccand1->hash == ccand2->hash
460 && operand_equal_p (ccand1->base, ccand2->base, 0)
461 && operand_equal_p (ccand1->step, ccand2->step, 0)
462 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
463 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
466 /* Loop invariant expression hashtable entry. */
468 struct iv_inv_expr_ent
470 /* Tree expression of the entry. */
471 tree expr;
472 /* Unique indentifier. */
473 int id;
474 /* Hash value. */
475 hashval_t hash;
478 /* Sort iv_inv_expr_ent pair A and B by id field. */
480 static int
481 sort_iv_inv_expr_ent (const void *a, const void *b)
483 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
484 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
486 unsigned id1 = (*e1)->id;
487 unsigned id2 = (*e2)->id;
489 if (id1 < id2)
490 return -1;
491 else if (id1 > id2)
492 return 1;
493 else
494 return 0;
497 /* Hashtable helpers. */
499 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
501 static inline hashval_t hash (const iv_inv_expr_ent *);
502 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
505 /* Hash function for loop invariant expressions. */
507 inline hashval_t
508 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
510 return expr->hash;
513 /* Hash table equality function for expressions. */
515 inline bool
516 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
517 const iv_inv_expr_ent *expr2)
519 return expr1->hash == expr2->hash
520 && operand_equal_p (expr1->expr, expr2->expr, 0);
523 struct ivopts_data
525 /* The currently optimized loop. */
526 struct loop *current_loop;
527 source_location loop_loc;
529 /* Numbers of iterations for all exits of the current loop. */
530 hash_map<edge, tree_niter_desc *> *niters;
532 /* Number of registers used in it. */
533 unsigned regs_used;
535 /* The size of version_info array allocated. */
536 unsigned version_info_size;
538 /* The array of information for the ssa names. */
539 struct version_info *version_info;
541 /* The hashtable of loop invariant expressions created
542 by ivopt. */
543 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
545 /* Loop invariant expression id. */
546 int max_inv_expr_id;
548 /* The bitmap of indices in version_info whose value was changed. */
549 bitmap relevant;
551 /* The uses of induction variables. */
552 vec<iv_group *> vgroups;
554 /* The candidates. */
555 vec<iv_cand *> vcands;
557 /* A bitmap of important candidates. */
558 bitmap important_candidates;
560 /* Cache used by tree_to_aff_combination_expand. */
561 hash_map<tree, name_expansion *> *name_expansion_cache;
563 /* The hashtable of common candidates derived from iv uses. */
564 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
566 /* The common candidates. */
567 vec<iv_common_cand *> iv_common_cands;
569 /* The maximum invariant id. */
570 unsigned max_inv_id;
572 /* Number of no_overflow BIVs which are not used in memory address. */
573 unsigned bivs_not_used_in_addr;
575 /* Obstack for iv structure. */
576 struct obstack iv_obstack;
578 /* Whether to consider just related and important candidates when replacing a
579 use. */
580 bool consider_all_candidates;
582 /* Are we optimizing for speed? */
583 bool speed;
585 /* Whether the loop body includes any function calls. */
586 bool body_includes_call;
588 /* Whether the loop body can only be exited via single exit. */
589 bool loop_single_exit_p;
592 /* An assignment of iv candidates to uses. */
594 struct iv_ca
596 /* The number of uses covered by the assignment. */
597 unsigned upto;
599 /* Number of uses that cannot be expressed by the candidates in the set. */
600 unsigned bad_groups;
602 /* Candidate assigned to a use, together with the related costs. */
603 struct cost_pair **cand_for_group;
605 /* Number of times each candidate is used. */
606 unsigned *n_cand_uses;
608 /* The candidates used. */
609 bitmap cands;
611 /* The number of candidates in the set. */
612 unsigned n_cands;
614 /* Total number of registers needed. */
615 unsigned n_regs;
617 /* Total cost of expressing uses. */
618 comp_cost cand_use_cost;
620 /* Total cost of candidates. */
621 unsigned cand_cost;
623 /* Number of times each invariant is used. */
624 unsigned *n_invariant_uses;
626 /* Hash set with used invariant expression. */
627 hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs;
629 /* Total cost of the assignment. */
630 comp_cost cost;
633 /* Difference of two iv candidate assignments. */
635 struct iv_ca_delta
637 /* Changed group. */
638 struct iv_group *group;
640 /* An old assignment (for rollback purposes). */
641 struct cost_pair *old_cp;
643 /* A new assignment. */
644 struct cost_pair *new_cp;
646 /* Next change in the list. */
647 struct iv_ca_delta *next;
650 /* Bound on number of candidates below that all candidates are considered. */
652 #define CONSIDER_ALL_CANDIDATES_BOUND \
653 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
655 /* If there are more iv occurrences, we just give up (it is quite unlikely that
656 optimizing such a loop would help, and it would take ages). */
658 #define MAX_CONSIDERED_GROUPS \
659 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
661 /* If there are at most this number of ivs in the set, try removing unnecessary
662 ivs from the set always. */
664 #define ALWAYS_PRUNE_CAND_SET_BOUND \
665 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
667 /* The list of trees for that the decl_rtl field must be reset is stored
668 here. */
670 static vec<tree> decl_rtl_to_reset;
672 static comp_cost force_expr_to_var_cost (tree, bool);
674 /* The single loop exit if it dominates the latch, NULL otherwise. */
676 edge
677 single_dom_exit (struct loop *loop)
679 edge exit = single_exit (loop);
681 if (!exit)
682 return NULL;
684 if (!just_once_each_iteration_p (loop, exit->src))
685 return NULL;
687 return exit;
690 /* Dumps information about the induction variable IV to FILE. Don't dump
691 variable's name if DUMP_NAME is FALSE. The information is dumped with
692 preceding spaces indicated by INDENT_LEVEL. */
694 void
695 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
697 const char *p;
698 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
700 if (indent_level > 4)
701 indent_level = 4;
702 p = spaces + 8 - (indent_level << 1);
704 fprintf (file, "%sIV struct:\n", p);
705 if (iv->ssa_name && dump_name)
707 fprintf (file, "%s SSA_NAME:\t", p);
708 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
709 fprintf (file, "\n");
712 fprintf (file, "%s Type:\t", p);
713 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
714 fprintf (file, "\n");
716 fprintf (file, "%s Base:\t", p);
717 print_generic_expr (file, iv->base, TDF_SLIM);
718 fprintf (file, "\n");
720 fprintf (file, "%s Step:\t", p);
721 print_generic_expr (file, iv->step, TDF_SLIM);
722 fprintf (file, "\n");
724 if (iv->base_object)
726 fprintf (file, "%s Object:\t", p);
727 print_generic_expr (file, iv->base_object, TDF_SLIM);
728 fprintf (file, "\n");
731 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
733 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
734 p, iv->no_overflow ? "No-overflow" : "Overflow");
737 /* Dumps information about the USE to FILE. */
739 void
740 dump_use (FILE *file, struct iv_use *use)
742 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
743 fprintf (file, " At stmt:\t");
744 print_gimple_stmt (file, use->stmt, 0, 0);
745 fprintf (file, " At pos:\t");
746 if (use->op_p)
747 print_generic_expr (file, *use->op_p, TDF_SLIM);
748 fprintf (file, "\n");
749 dump_iv (file, use->iv, false, 2);
752 /* Dumps information about the uses to FILE. */
754 void
755 dump_groups (FILE *file, struct ivopts_data *data)
757 unsigned i, j;
758 struct iv_group *group;
760 for (i = 0; i < data->vgroups.length (); i++)
762 group = data->vgroups[i];
763 fprintf (file, "Group %d:\n", group->id);
764 if (group->type == USE_NONLINEAR_EXPR)
765 fprintf (file, " Type:\tGENERIC\n");
766 else if (group->type == USE_ADDRESS)
767 fprintf (file, " Type:\tADDRESS\n");
768 else
770 gcc_assert (group->type == USE_COMPARE);
771 fprintf (file, " Type:\tCOMPARE\n");
773 for (j = 0; j < group->vuses.length (); j++)
774 dump_use (file, group->vuses[j]);
778 /* Dumps information about induction variable candidate CAND to FILE. */
780 void
781 dump_cand (FILE *file, struct iv_cand *cand)
783 struct iv *iv = cand->iv;
785 fprintf (file, "Candidate %d:\n", cand->id);
786 if (cand->depends_on)
788 fprintf (file, " Depend on: ");
789 dump_bitmap (file, cand->depends_on);
792 if (cand->var_before)
794 fprintf (file, " Var befor: ");
795 print_generic_expr (file, cand->var_before, TDF_SLIM);
796 fprintf (file, "\n");
798 if (cand->var_after)
800 fprintf (file, " Var after: ");
801 print_generic_expr (file, cand->var_after, TDF_SLIM);
802 fprintf (file, "\n");
805 switch (cand->pos)
807 case IP_NORMAL:
808 fprintf (file, " Incr POS: before exit test\n");
809 break;
811 case IP_BEFORE_USE:
812 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
813 break;
815 case IP_AFTER_USE:
816 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
817 break;
819 case IP_END:
820 fprintf (file, " Incr POS: at end\n");
821 break;
823 case IP_ORIGINAL:
824 fprintf (file, " Incr POS: orig biv\n");
825 break;
828 dump_iv (file, iv, false, 1);
831 /* Returns the info for ssa version VER. */
833 static inline struct version_info *
834 ver_info (struct ivopts_data *data, unsigned ver)
836 return data->version_info + ver;
839 /* Returns the info for ssa name NAME. */
841 static inline struct version_info *
842 name_info (struct ivopts_data *data, tree name)
844 return ver_info (data, SSA_NAME_VERSION (name));
847 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
848 emitted in LOOP. */
850 static bool
851 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
853 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
855 gcc_assert (bb);
857 if (sbb == loop->latch)
858 return true;
860 if (sbb != bb)
861 return false;
863 return stmt == last_stmt (bb);
866 /* Returns true if STMT if after the place where the original induction
867 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
868 if the positions are identical. */
870 static bool
871 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
873 basic_block cand_bb = gimple_bb (cand->incremented_at);
874 basic_block stmt_bb = gimple_bb (stmt);
876 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
877 return false;
879 if (stmt_bb != cand_bb)
880 return true;
882 if (true_if_equal
883 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
884 return true;
885 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
888 /* Returns true if STMT if after the place where the induction variable
889 CAND is incremented in LOOP. */
891 static bool
892 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
894 switch (cand->pos)
896 case IP_END:
897 return false;
899 case IP_NORMAL:
900 return stmt_after_ip_normal_pos (loop, stmt);
902 case IP_ORIGINAL:
903 case IP_AFTER_USE:
904 return stmt_after_inc_pos (cand, stmt, false);
906 case IP_BEFORE_USE:
907 return stmt_after_inc_pos (cand, stmt, true);
909 default:
910 gcc_unreachable ();
914 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
916 static bool
917 abnormal_ssa_name_p (tree exp)
919 if (!exp)
920 return false;
922 if (TREE_CODE (exp) != SSA_NAME)
923 return false;
925 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
928 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
929 abnormal phi node. Callback for for_each_index. */
931 static bool
932 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
933 void *data ATTRIBUTE_UNUSED)
935 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
937 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
938 return false;
939 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
940 return false;
943 return !abnormal_ssa_name_p (*index);
946 /* Returns true if EXPR contains a ssa name that occurs in an
947 abnormal phi node. */
949 bool
950 contains_abnormal_ssa_name_p (tree expr)
952 enum tree_code code;
953 enum tree_code_class codeclass;
955 if (!expr)
956 return false;
958 code = TREE_CODE (expr);
959 codeclass = TREE_CODE_CLASS (code);
961 if (code == SSA_NAME)
962 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
964 if (code == INTEGER_CST
965 || is_gimple_min_invariant (expr))
966 return false;
968 if (code == ADDR_EXPR)
969 return !for_each_index (&TREE_OPERAND (expr, 0),
970 idx_contains_abnormal_ssa_name_p,
971 NULL);
973 if (code == COND_EXPR)
974 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
975 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
976 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
978 switch (codeclass)
980 case tcc_binary:
981 case tcc_comparison:
982 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
983 return true;
985 /* Fallthru. */
986 case tcc_unary:
987 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
988 return true;
990 break;
992 default:
993 gcc_unreachable ();
996 return false;
999 /* Returns the structure describing number of iterations determined from
1000 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1002 static struct tree_niter_desc *
1003 niter_for_exit (struct ivopts_data *data, edge exit)
1005 struct tree_niter_desc *desc;
1006 tree_niter_desc **slot;
1008 if (!data->niters)
1010 data->niters = new hash_map<edge, tree_niter_desc *>;
1011 slot = NULL;
1013 else
1014 slot = data->niters->get (exit);
1016 if (!slot)
1018 /* Try to determine number of iterations. We cannot safely work with ssa
1019 names that appear in phi nodes on abnormal edges, so that we do not
1020 create overlapping life ranges for them (PR 27283). */
1021 desc = XNEW (struct tree_niter_desc);
1022 if (!number_of_iterations_exit (data->current_loop,
1023 exit, desc, true)
1024 || contains_abnormal_ssa_name_p (desc->niter))
1026 XDELETE (desc);
1027 desc = NULL;
1029 data->niters->put (exit, desc);
1031 else
1032 desc = *slot;
1034 return desc;
1037 /* Returns the structure describing number of iterations determined from
1038 single dominating exit of DATA->current_loop, or NULL if something
1039 goes wrong. */
1041 static struct tree_niter_desc *
1042 niter_for_single_dom_exit (struct ivopts_data *data)
1044 edge exit = single_dom_exit (data->current_loop);
1046 if (!exit)
1047 return NULL;
1049 return niter_for_exit (data, exit);
1052 /* Initializes data structures used by the iv optimization pass, stored
1053 in DATA. */
1055 static void
1056 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1058 data->version_info_size = 2 * num_ssa_names;
1059 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1060 data->relevant = BITMAP_ALLOC (NULL);
1061 data->important_candidates = BITMAP_ALLOC (NULL);
1062 data->max_inv_id = 0;
1063 data->niters = NULL;
1064 data->vgroups.create (20);
1065 data->vcands.create (20);
1066 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1067 data->max_inv_expr_id = 0;
1068 data->name_expansion_cache = NULL;
1069 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1070 data->iv_common_cands.create (20);
1071 decl_rtl_to_reset.create (20);
1072 gcc_obstack_init (&data->iv_obstack);
1075 /* Returns a memory object to that EXPR points. In case we are able to
1076 determine that it does not point to any such object, NULL is returned. */
1078 static tree
1079 determine_base_object (tree expr)
1081 enum tree_code code = TREE_CODE (expr);
1082 tree base, obj;
1084 /* If this is a pointer casted to any type, we need to determine
1085 the base object for the pointer; so handle conversions before
1086 throwing away non-pointer expressions. */
1087 if (CONVERT_EXPR_P (expr))
1088 return determine_base_object (TREE_OPERAND (expr, 0));
1090 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1091 return NULL_TREE;
1093 switch (code)
1095 case INTEGER_CST:
1096 return NULL_TREE;
1098 case ADDR_EXPR:
1099 obj = TREE_OPERAND (expr, 0);
1100 base = get_base_address (obj);
1102 if (!base)
1103 return expr;
1105 if (TREE_CODE (base) == MEM_REF)
1106 return determine_base_object (TREE_OPERAND (base, 0));
1108 return fold_convert (ptr_type_node,
1109 build_fold_addr_expr (base));
1111 case POINTER_PLUS_EXPR:
1112 return determine_base_object (TREE_OPERAND (expr, 0));
1114 case PLUS_EXPR:
1115 case MINUS_EXPR:
1116 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1117 gcc_unreachable ();
1119 default:
1120 return fold_convert (ptr_type_node, expr);
1124 /* Return true if address expression with non-DECL_P operand appears
1125 in EXPR. */
1127 static bool
1128 contain_complex_addr_expr (tree expr)
1130 bool res = false;
1132 STRIP_NOPS (expr);
1133 switch (TREE_CODE (expr))
1135 case POINTER_PLUS_EXPR:
1136 case PLUS_EXPR:
1137 case MINUS_EXPR:
1138 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1139 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1140 break;
1142 case ADDR_EXPR:
1143 return (!DECL_P (TREE_OPERAND (expr, 0)));
1145 default:
1146 return false;
1149 return res;
1152 /* Allocates an induction variable with given initial value BASE and step STEP
1153 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1155 static struct iv *
1156 alloc_iv (struct ivopts_data *data, tree base, tree step,
1157 bool no_overflow = false)
1159 tree expr = base;
1160 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1161 sizeof (struct iv));
1162 gcc_assert (step != NULL_TREE);
1164 /* Lower address expression in base except ones with DECL_P as operand.
1165 By doing this:
1166 1) More accurate cost can be computed for address expressions;
1167 2) Duplicate candidates won't be created for bases in different
1168 forms, like &a[0] and &a. */
1169 STRIP_NOPS (expr);
1170 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1171 || contain_complex_addr_expr (expr))
1173 aff_tree comb;
1174 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1175 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1178 iv->base = base;
1179 iv->base_object = determine_base_object (base);
1180 iv->step = step;
1181 iv->biv_p = false;
1182 iv->nonlin_use = NULL;
1183 iv->ssa_name = NULL_TREE;
1184 iv->no_overflow = no_overflow;
1185 iv->have_address_use = false;
1187 return iv;
1190 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1191 doesn't overflow. */
1193 static void
1194 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1195 bool no_overflow)
1197 struct version_info *info = name_info (data, iv);
1199 gcc_assert (!info->iv);
1201 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1202 info->iv = alloc_iv (data, base, step, no_overflow);
1203 info->iv->ssa_name = iv;
1206 /* Finds induction variable declaration for VAR. */
1208 static struct iv *
1209 get_iv (struct ivopts_data *data, tree var)
1211 basic_block bb;
1212 tree type = TREE_TYPE (var);
1214 if (!POINTER_TYPE_P (type)
1215 && !INTEGRAL_TYPE_P (type))
1216 return NULL;
1218 if (!name_info (data, var)->iv)
1220 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1222 if (!bb
1223 || !flow_bb_inside_loop_p (data->current_loop, bb))
1224 set_iv (data, var, var, build_int_cst (type, 0), true);
1227 return name_info (data, var)->iv;
1230 /* Return the first non-invariant ssa var found in EXPR. */
1232 static tree
1233 extract_single_var_from_expr (tree expr)
1235 int i, n;
1236 tree tmp;
1237 enum tree_code code;
1239 if (!expr || is_gimple_min_invariant (expr))
1240 return NULL;
1242 code = TREE_CODE (expr);
1243 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1245 n = TREE_OPERAND_LENGTH (expr);
1246 for (i = 0; i < n; i++)
1248 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1250 if (tmp)
1251 return tmp;
1254 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1257 /* Finds basic ivs. */
1259 static bool
1260 find_bivs (struct ivopts_data *data)
1262 gphi *phi;
1263 affine_iv iv;
1264 tree step, type, base, stop;
1265 bool found = false;
1266 struct loop *loop = data->current_loop;
1267 gphi_iterator psi;
1269 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1271 phi = psi.phi ();
1273 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1274 continue;
1276 if (virtual_operand_p (PHI_RESULT (phi)))
1277 continue;
1279 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1280 continue;
1282 if (integer_zerop (iv.step))
1283 continue;
1285 step = iv.step;
1286 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1287 /* Stop expanding iv base at the first ssa var referred by iv step.
1288 Ideally we should stop at any ssa var, because that's expensive
1289 and unusual to happen, we just do it on the first one.
1291 See PR64705 for the rationale. */
1292 stop = extract_single_var_from_expr (step);
1293 base = expand_simple_operations (base, stop);
1294 if (contains_abnormal_ssa_name_p (base)
1295 || contains_abnormal_ssa_name_p (step))
1296 continue;
1298 type = TREE_TYPE (PHI_RESULT (phi));
1299 base = fold_convert (type, base);
1300 if (step)
1302 if (POINTER_TYPE_P (type))
1303 step = convert_to_ptrofftype (step);
1304 else
1305 step = fold_convert (type, step);
1308 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1309 found = true;
1312 return found;
1315 /* Marks basic ivs. */
1317 static void
1318 mark_bivs (struct ivopts_data *data)
1320 gphi *phi;
1321 gimple *def;
1322 tree var;
1323 struct iv *iv, *incr_iv;
1324 struct loop *loop = data->current_loop;
1325 basic_block incr_bb;
1326 gphi_iterator psi;
1328 data->bivs_not_used_in_addr = 0;
1329 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1331 phi = psi.phi ();
1333 iv = get_iv (data, PHI_RESULT (phi));
1334 if (!iv)
1335 continue;
1337 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1338 def = SSA_NAME_DEF_STMT (var);
1339 /* Don't mark iv peeled from other one as biv. */
1340 if (def
1341 && gimple_code (def) == GIMPLE_PHI
1342 && gimple_bb (def) == loop->header)
1343 continue;
1345 incr_iv = get_iv (data, var);
1346 if (!incr_iv)
1347 continue;
1349 /* If the increment is in the subloop, ignore it. */
1350 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1351 if (incr_bb->loop_father != data->current_loop
1352 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1353 continue;
1355 iv->biv_p = true;
1356 incr_iv->biv_p = true;
1357 if (iv->no_overflow)
1358 data->bivs_not_used_in_addr++;
1359 if (incr_iv->no_overflow)
1360 data->bivs_not_used_in_addr++;
1364 /* Checks whether STMT defines a linear induction variable and stores its
1365 parameters to IV. */
1367 static bool
1368 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1370 tree lhs, stop;
1371 struct loop *loop = data->current_loop;
1373 iv->base = NULL_TREE;
1374 iv->step = NULL_TREE;
1376 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1377 return false;
1379 lhs = gimple_assign_lhs (stmt);
1380 if (TREE_CODE (lhs) != SSA_NAME)
1381 return false;
1383 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1384 return false;
1386 /* Stop expanding iv base at the first ssa var referred by iv step.
1387 Ideally we should stop at any ssa var, because that's expensive
1388 and unusual to happen, we just do it on the first one.
1390 See PR64705 for the rationale. */
1391 stop = extract_single_var_from_expr (iv->step);
1392 iv->base = expand_simple_operations (iv->base, stop);
1393 if (contains_abnormal_ssa_name_p (iv->base)
1394 || contains_abnormal_ssa_name_p (iv->step))
1395 return false;
1397 /* If STMT could throw, then do not consider STMT as defining a GIV.
1398 While this will suppress optimizations, we can not safely delete this
1399 GIV and associated statements, even if it appears it is not used. */
1400 if (stmt_could_throw_p (stmt))
1401 return false;
1403 return true;
1406 /* Finds general ivs in statement STMT. */
1408 static void
1409 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1411 affine_iv iv;
1413 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1414 return;
1416 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1419 /* Finds general ivs in basic block BB. */
1421 static void
1422 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1424 gimple_stmt_iterator bsi;
1426 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1427 find_givs_in_stmt (data, gsi_stmt (bsi));
1430 /* Finds general ivs. */
1432 static void
1433 find_givs (struct ivopts_data *data)
1435 struct loop *loop = data->current_loop;
1436 basic_block *body = get_loop_body_in_dom_order (loop);
1437 unsigned i;
1439 for (i = 0; i < loop->num_nodes; i++)
1440 find_givs_in_bb (data, body[i]);
1441 free (body);
1444 /* For each ssa name defined in LOOP determines whether it is an induction
1445 variable and if so, its initial value and step. */
1447 static bool
1448 find_induction_variables (struct ivopts_data *data)
1450 unsigned i;
1451 bitmap_iterator bi;
1453 if (!find_bivs (data))
1454 return false;
1456 find_givs (data);
1457 mark_bivs (data);
1459 if (dump_file && (dump_flags & TDF_DETAILS))
1461 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1463 if (niter)
1465 fprintf (dump_file, " number of iterations ");
1466 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1467 if (!integer_zerop (niter->may_be_zero))
1469 fprintf (dump_file, "; zero if ");
1470 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1472 fprintf (dump_file, "\n");
1475 fprintf (dump_file, "\n<Induction Vars>:\n");
1476 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1478 struct version_info *info = ver_info (data, i);
1479 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1480 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1484 return true;
1487 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1488 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1489 is the const offset stripped from IV base; for other types use, both
1490 are zero by default. */
1492 static struct iv_use *
1493 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1494 gimple *stmt, enum use_type type, tree addr_base,
1495 unsigned HOST_WIDE_INT addr_offset)
1497 struct iv_use *use = XCNEW (struct iv_use);
1499 use->id = group->vuses.length ();
1500 use->group_id = group->id;
1501 use->type = type;
1502 use->iv = iv;
1503 use->stmt = stmt;
1504 use->op_p = use_p;
1505 use->addr_base = addr_base;
1506 use->addr_offset = addr_offset;
1508 group->vuses.safe_push (use);
1509 return use;
1512 /* Checks whether OP is a loop-level invariant and if so, records it.
1513 NONLINEAR_USE is true if the invariant is used in a way we do not
1514 handle specially. */
1516 static void
1517 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1519 basic_block bb;
1520 struct version_info *info;
1522 if (TREE_CODE (op) != SSA_NAME
1523 || virtual_operand_p (op))
1524 return;
1526 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1527 if (bb
1528 && flow_bb_inside_loop_p (data->current_loop, bb))
1529 return;
1531 info = name_info (data, op);
1532 info->name = op;
1533 info->has_nonlin_use |= nonlinear_use;
1534 if (!info->inv_id)
1535 info->inv_id = ++data->max_inv_id;
1536 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1539 static tree
1540 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1542 /* Record a group of TYPE. */
1544 static struct iv_group *
1545 record_group (struct ivopts_data *data, enum use_type type)
1547 struct iv_group *group = XCNEW (struct iv_group);
1549 group->id = data->vgroups.length ();
1550 group->type = type;
1551 group->related_cands = BITMAP_ALLOC (NULL);
1552 group->vuses.create (1);
1554 data->vgroups.safe_push (group);
1555 return group;
1558 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1559 New group will be created if there is no existing group for the use. */
1561 static struct iv_use *
1562 record_group_use (struct ivopts_data *data, tree *use_p,
1563 struct iv *iv, gimple *stmt, enum use_type type)
1565 tree addr_base = NULL;
1566 struct iv_group *group = NULL;
1567 unsigned HOST_WIDE_INT addr_offset = 0;
1569 /* Record non address type use in a new group. */
1570 if (type == USE_ADDRESS && iv->base_object)
1572 unsigned int i;
1574 addr_base = strip_offset (iv->base, &addr_offset);
1575 for (i = 0; i < data->vgroups.length (); i++)
1577 struct iv_use *use;
1579 group = data->vgroups[i];
1580 use = group->vuses[0];
1581 if (use->type != USE_ADDRESS || !use->iv->base_object)
1582 continue;
1584 /* Check if it has the same stripped base and step. */
1585 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1586 && operand_equal_p (iv->step, use->iv->step, 0)
1587 && operand_equal_p (addr_base, use->addr_base, 0))
1588 break;
1590 if (i == data->vgroups.length ())
1591 group = NULL;
1594 if (!group)
1595 group = record_group (data, type);
1597 return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1600 /* Checks whether the use OP is interesting and if so, records it. */
1602 static struct iv_use *
1603 find_interesting_uses_op (struct ivopts_data *data, tree op)
1605 struct iv *iv;
1606 gimple *stmt;
1607 struct iv_use *use;
1609 if (TREE_CODE (op) != SSA_NAME)
1610 return NULL;
1612 iv = get_iv (data, op);
1613 if (!iv)
1614 return NULL;
1616 if (iv->nonlin_use)
1618 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1619 return iv->nonlin_use;
1622 if (integer_zerop (iv->step))
1624 record_invariant (data, op, true);
1625 return NULL;
1628 stmt = SSA_NAME_DEF_STMT (op);
1629 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1631 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1632 iv->nonlin_use = use;
1633 return use;
1636 /* Given a condition in statement STMT, checks whether it is a compare
1637 of an induction variable and an invariant. If this is the case,
1638 CONTROL_VAR is set to location of the iv, BOUND to the location of
1639 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1640 induction variable descriptions, and true is returned. If this is not
1641 the case, CONTROL_VAR and BOUND are set to the arguments of the
1642 condition and false is returned. */
1644 static bool
1645 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1646 tree **control_var, tree **bound,
1647 struct iv **iv_var, struct iv **iv_bound)
1649 /* The objects returned when COND has constant operands. */
1650 static struct iv const_iv;
1651 static tree zero;
1652 tree *op0 = &zero, *op1 = &zero;
1653 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1654 bool ret = false;
1656 if (gimple_code (stmt) == GIMPLE_COND)
1658 gcond *cond_stmt = as_a <gcond *> (stmt);
1659 op0 = gimple_cond_lhs_ptr (cond_stmt);
1660 op1 = gimple_cond_rhs_ptr (cond_stmt);
1662 else
1664 op0 = gimple_assign_rhs1_ptr (stmt);
1665 op1 = gimple_assign_rhs2_ptr (stmt);
1668 zero = integer_zero_node;
1669 const_iv.step = integer_zero_node;
1671 if (TREE_CODE (*op0) == SSA_NAME)
1672 iv0 = get_iv (data, *op0);
1673 if (TREE_CODE (*op1) == SSA_NAME)
1674 iv1 = get_iv (data, *op1);
1676 /* Exactly one of the compared values must be an iv, and the other one must
1677 be an invariant. */
1678 if (!iv0 || !iv1)
1679 goto end;
1681 if (integer_zerop (iv0->step))
1683 /* Control variable may be on the other side. */
1684 std::swap (op0, op1);
1685 std::swap (iv0, iv1);
1687 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1689 end:
1690 if (control_var)
1691 *control_var = op0;
1692 if (iv_var)
1693 *iv_var = iv0;
1694 if (bound)
1695 *bound = op1;
1696 if (iv_bound)
1697 *iv_bound = iv1;
1699 return ret;
1702 /* Checks whether the condition in STMT is interesting and if so,
1703 records it. */
1705 static void
1706 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1708 tree *var_p, *bound_p;
1709 struct iv *var_iv;
1711 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1713 find_interesting_uses_op (data, *var_p);
1714 find_interesting_uses_op (data, *bound_p);
1715 return;
1718 record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1721 /* Returns the outermost loop EXPR is obviously invariant in
1722 relative to the loop LOOP, i.e. if all its operands are defined
1723 outside of the returned loop. Returns NULL if EXPR is not
1724 even obviously invariant in LOOP. */
1726 struct loop *
1727 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1729 basic_block def_bb;
1730 unsigned i, len;
1732 if (is_gimple_min_invariant (expr))
1733 return current_loops->tree_root;
1735 if (TREE_CODE (expr) == SSA_NAME)
1737 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1738 if (def_bb)
1740 if (flow_bb_inside_loop_p (loop, def_bb))
1741 return NULL;
1742 return superloop_at_depth (loop,
1743 loop_depth (def_bb->loop_father) + 1);
1746 return current_loops->tree_root;
1749 if (!EXPR_P (expr))
1750 return NULL;
1752 unsigned maxdepth = 0;
1753 len = TREE_OPERAND_LENGTH (expr);
1754 for (i = 0; i < len; i++)
1756 struct loop *ivloop;
1757 if (!TREE_OPERAND (expr, i))
1758 continue;
1760 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1761 if (!ivloop)
1762 return NULL;
1763 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1766 return superloop_at_depth (loop, maxdepth);
1769 /* Returns true if expression EXPR is obviously invariant in LOOP,
1770 i.e. if all its operands are defined outside of the LOOP. LOOP
1771 should not be the function body. */
1773 bool
1774 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1776 basic_block def_bb;
1777 unsigned i, len;
1779 gcc_assert (loop_depth (loop) > 0);
1781 if (is_gimple_min_invariant (expr))
1782 return true;
1784 if (TREE_CODE (expr) == SSA_NAME)
1786 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1787 if (def_bb
1788 && flow_bb_inside_loop_p (loop, def_bb))
1789 return false;
1791 return true;
1794 if (!EXPR_P (expr))
1795 return false;
1797 len = TREE_OPERAND_LENGTH (expr);
1798 for (i = 0; i < len; i++)
1799 if (TREE_OPERAND (expr, i)
1800 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1801 return false;
1803 return true;
1806 /* Given expression EXPR which computes inductive values with respect
1807 to loop recorded in DATA, this function returns biv from which EXPR
1808 is derived by tracing definition chains of ssa variables in EXPR. */
1810 static struct iv*
1811 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1813 struct iv *iv;
1814 unsigned i, n;
1815 tree e2, e1;
1816 enum tree_code code;
1817 gimple *stmt;
1819 if (expr == NULL_TREE)
1820 return NULL;
1822 if (is_gimple_min_invariant (expr))
1823 return NULL;
1825 code = TREE_CODE (expr);
1826 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1828 n = TREE_OPERAND_LENGTH (expr);
1829 for (i = 0; i < n; i++)
1831 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1832 if (iv)
1833 return iv;
1837 /* Stop if it's not ssa name. */
1838 if (code != SSA_NAME)
1839 return NULL;
1841 iv = get_iv (data, expr);
1842 if (!iv || integer_zerop (iv->step))
1843 return NULL;
1844 else if (iv->biv_p)
1845 return iv;
1847 stmt = SSA_NAME_DEF_STMT (expr);
1848 if (gphi *phi = dyn_cast <gphi *> (stmt))
1850 ssa_op_iter iter;
1851 use_operand_p use_p;
1853 if (virtual_operand_p (gimple_phi_result (phi)))
1854 return NULL;
1856 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1858 tree use = USE_FROM_PTR (use_p);
1859 iv = find_deriving_biv_for_expr (data, use);
1860 if (iv)
1861 return iv;
1863 return NULL;
1865 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1866 return NULL;
1868 e1 = gimple_assign_rhs1 (stmt);
1869 code = gimple_assign_rhs_code (stmt);
1870 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1871 return find_deriving_biv_for_expr (data, e1);
1873 switch (code)
1875 case MULT_EXPR:
1876 case PLUS_EXPR:
1877 case MINUS_EXPR:
1878 case POINTER_PLUS_EXPR:
1879 /* Increments, decrements and multiplications by a constant
1880 are simple. */
1881 e2 = gimple_assign_rhs2 (stmt);
1882 iv = find_deriving_biv_for_expr (data, e2);
1883 if (iv)
1884 return iv;
1886 /* Fallthru. */
1887 CASE_CONVERT:
1888 /* Casts are simple. */
1889 return find_deriving_biv_for_expr (data, e1);
1891 default:
1892 break;
1895 return NULL;
1898 /* Record BIV, its predecessor and successor that they are used in
1899 address type uses. */
1901 static void
1902 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1904 unsigned i;
1905 tree type, base_1, base_2;
1906 bitmap_iterator bi;
1908 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1909 || biv->have_address_use || !biv->no_overflow)
1910 return;
1912 type = TREE_TYPE (biv->base);
1913 if (!INTEGRAL_TYPE_P (type))
1914 return;
1916 biv->have_address_use = true;
1917 data->bivs_not_used_in_addr--;
1918 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1919 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1921 struct iv *iv = ver_info (data, i)->iv;
1923 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1924 || iv->have_address_use || !iv->no_overflow)
1925 continue;
1927 if (type != TREE_TYPE (iv->base)
1928 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1929 continue;
1931 if (!operand_equal_p (biv->step, iv->step, 0))
1932 continue;
1934 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1935 if (operand_equal_p (base_1, iv->base, 0)
1936 || operand_equal_p (base_2, biv->base, 0))
1938 iv->have_address_use = true;
1939 data->bivs_not_used_in_addr--;
1944 /* Cumulates the steps of indices into DATA and replaces their values with the
1945 initial ones. Returns false when the value of the index cannot be determined.
1946 Callback for for_each_index. */
1948 struct ifs_ivopts_data
1950 struct ivopts_data *ivopts_data;
1951 gimple *stmt;
1952 tree step;
1955 static bool
1956 idx_find_step (tree base, tree *idx, void *data)
1958 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1959 struct iv *iv;
1960 bool use_overflow_semantics = false;
1961 tree step, iv_base, iv_step, lbound, off;
1962 struct loop *loop = dta->ivopts_data->current_loop;
1964 /* If base is a component ref, require that the offset of the reference
1965 be invariant. */
1966 if (TREE_CODE (base) == COMPONENT_REF)
1968 off = component_ref_field_offset (base);
1969 return expr_invariant_in_loop_p (loop, off);
1972 /* If base is array, first check whether we will be able to move the
1973 reference out of the loop (in order to take its address in strength
1974 reduction). In order for this to work we need both lower bound
1975 and step to be loop invariants. */
1976 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1978 /* Moreover, for a range, the size needs to be invariant as well. */
1979 if (TREE_CODE (base) == ARRAY_RANGE_REF
1980 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1981 return false;
1983 step = array_ref_element_size (base);
1984 lbound = array_ref_low_bound (base);
1986 if (!expr_invariant_in_loop_p (loop, step)
1987 || !expr_invariant_in_loop_p (loop, lbound))
1988 return false;
1991 if (TREE_CODE (*idx) != SSA_NAME)
1992 return true;
1994 iv = get_iv (dta->ivopts_data, *idx);
1995 if (!iv)
1996 return false;
1998 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1999 *&x[0], which is not folded and does not trigger the
2000 ARRAY_REF path below. */
2001 *idx = iv->base;
2003 if (integer_zerop (iv->step))
2004 return true;
2006 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2008 step = array_ref_element_size (base);
2010 /* We only handle addresses whose step is an integer constant. */
2011 if (TREE_CODE (step) != INTEGER_CST)
2012 return false;
2014 else
2015 /* The step for pointer arithmetics already is 1 byte. */
2016 step = size_one_node;
2018 iv_base = iv->base;
2019 iv_step = iv->step;
2020 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2021 use_overflow_semantics = true;
2023 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2024 sizetype, &iv_base, &iv_step, dta->stmt,
2025 use_overflow_semantics))
2027 /* The index might wrap. */
2028 return false;
2031 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2032 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2034 if (dta->ivopts_data->bivs_not_used_in_addr)
2036 if (!iv->biv_p)
2037 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2039 record_biv_for_address_use (dta->ivopts_data, iv);
2041 return true;
2044 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2045 object is passed to it in DATA. */
2047 static bool
2048 idx_record_use (tree base, tree *idx,
2049 void *vdata)
2051 struct ivopts_data *data = (struct ivopts_data *) vdata;
2052 find_interesting_uses_op (data, *idx);
2053 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2055 find_interesting_uses_op (data, array_ref_element_size (base));
2056 find_interesting_uses_op (data, array_ref_low_bound (base));
2058 return true;
2061 /* If we can prove that TOP = cst * BOT for some constant cst,
2062 store cst to MUL and return true. Otherwise return false.
2063 The returned value is always sign-extended, regardless of the
2064 signedness of TOP and BOT. */
2066 static bool
2067 constant_multiple_of (tree top, tree bot, widest_int *mul)
2069 tree mby;
2070 enum tree_code code;
2071 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2072 widest_int res, p0, p1;
2074 STRIP_NOPS (top);
2075 STRIP_NOPS (bot);
2077 if (operand_equal_p (top, bot, 0))
2079 *mul = 1;
2080 return true;
2083 code = TREE_CODE (top);
2084 switch (code)
2086 case MULT_EXPR:
2087 mby = TREE_OPERAND (top, 1);
2088 if (TREE_CODE (mby) != INTEGER_CST)
2089 return false;
2091 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2092 return false;
2094 *mul = wi::sext (res * wi::to_widest (mby), precision);
2095 return true;
2097 case PLUS_EXPR:
2098 case MINUS_EXPR:
2099 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2100 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2101 return false;
2103 if (code == MINUS_EXPR)
2104 p1 = -p1;
2105 *mul = wi::sext (p0 + p1, precision);
2106 return true;
2108 case INTEGER_CST:
2109 if (TREE_CODE (bot) != INTEGER_CST)
2110 return false;
2112 p0 = widest_int::from (top, SIGNED);
2113 p1 = widest_int::from (bot, SIGNED);
2114 if (p1 == 0)
2115 return false;
2116 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2117 return res == 0;
2119 default:
2120 return false;
2124 /* Return true if memory reference REF with step STEP may be unaligned. */
2126 static bool
2127 may_be_unaligned_p (tree ref, tree step)
2129 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2130 thus they are not misaligned. */
2131 if (TREE_CODE (ref) == TARGET_MEM_REF)
2132 return false;
2134 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2135 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2136 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2138 unsigned HOST_WIDE_INT bitpos;
2139 unsigned int ref_align;
2140 get_object_alignment_1 (ref, &ref_align, &bitpos);
2141 if (ref_align < align
2142 || (bitpos % align) != 0
2143 || (bitpos % BITS_PER_UNIT) != 0)
2144 return true;
2146 unsigned int trailing_zeros = tree_ctz (step);
2147 if (trailing_zeros < HOST_BITS_PER_INT
2148 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2149 return true;
2151 return false;
2154 /* Return true if EXPR may be non-addressable. */
2156 bool
2157 may_be_nonaddressable_p (tree expr)
2159 switch (TREE_CODE (expr))
2161 case TARGET_MEM_REF:
2162 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2163 target, thus they are always addressable. */
2164 return false;
2166 case MEM_REF:
2167 /* Likewise for MEM_REFs, modulo the storage order. */
2168 return REF_REVERSE_STORAGE_ORDER (expr);
2170 case BIT_FIELD_REF:
2171 if (REF_REVERSE_STORAGE_ORDER (expr))
2172 return true;
2173 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2175 case COMPONENT_REF:
2176 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2177 return true;
2178 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2179 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2181 case ARRAY_REF:
2182 case ARRAY_RANGE_REF:
2183 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2184 return true;
2185 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2187 case VIEW_CONVERT_EXPR:
2188 /* This kind of view-conversions may wrap non-addressable objects
2189 and make them look addressable. After some processing the
2190 non-addressability may be uncovered again, causing ADDR_EXPRs
2191 of inappropriate objects to be built. */
2192 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2193 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2194 return true;
2195 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2197 CASE_CONVERT:
2198 return true;
2200 default:
2201 break;
2204 return false;
2207 /* Finds addresses in *OP_P inside STMT. */
2209 static void
2210 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2211 tree *op_p)
2213 tree base = *op_p, step = size_zero_node;
2214 struct iv *civ;
2215 struct ifs_ivopts_data ifs_ivopts_data;
2217 /* Do not play with volatile memory references. A bit too conservative,
2218 perhaps, but safe. */
2219 if (gimple_has_volatile_ops (stmt))
2220 goto fail;
2222 /* Ignore bitfields for now. Not really something terribly complicated
2223 to handle. TODO. */
2224 if (TREE_CODE (base) == BIT_FIELD_REF)
2225 goto fail;
2227 base = unshare_expr (base);
2229 if (TREE_CODE (base) == TARGET_MEM_REF)
2231 tree type = build_pointer_type (TREE_TYPE (base));
2232 tree astep;
2234 if (TMR_BASE (base)
2235 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2237 civ = get_iv (data, TMR_BASE (base));
2238 if (!civ)
2239 goto fail;
2241 TMR_BASE (base) = civ->base;
2242 step = civ->step;
2244 if (TMR_INDEX2 (base)
2245 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2247 civ = get_iv (data, TMR_INDEX2 (base));
2248 if (!civ)
2249 goto fail;
2251 TMR_INDEX2 (base) = civ->base;
2252 step = civ->step;
2254 if (TMR_INDEX (base)
2255 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2257 civ = get_iv (data, TMR_INDEX (base));
2258 if (!civ)
2259 goto fail;
2261 TMR_INDEX (base) = civ->base;
2262 astep = civ->step;
2264 if (astep)
2266 if (TMR_STEP (base))
2267 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2269 step = fold_build2 (PLUS_EXPR, type, step, astep);
2273 if (integer_zerop (step))
2274 goto fail;
2275 base = tree_mem_ref_addr (type, base);
2277 else
2279 ifs_ivopts_data.ivopts_data = data;
2280 ifs_ivopts_data.stmt = stmt;
2281 ifs_ivopts_data.step = size_zero_node;
2282 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2283 || integer_zerop (ifs_ivopts_data.step))
2284 goto fail;
2285 step = ifs_ivopts_data.step;
2287 /* Check that the base expression is addressable. This needs
2288 to be done after substituting bases of IVs into it. */
2289 if (may_be_nonaddressable_p (base))
2290 goto fail;
2292 /* Moreover, on strict alignment platforms, check that it is
2293 sufficiently aligned. */
2294 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2295 goto fail;
2297 base = build_fold_addr_expr (base);
2299 /* Substituting bases of IVs into the base expression might
2300 have caused folding opportunities. */
2301 if (TREE_CODE (base) == ADDR_EXPR)
2303 tree *ref = &TREE_OPERAND (base, 0);
2304 while (handled_component_p (*ref))
2305 ref = &TREE_OPERAND (*ref, 0);
2306 if (TREE_CODE (*ref) == MEM_REF)
2308 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2309 TREE_OPERAND (*ref, 0),
2310 TREE_OPERAND (*ref, 1));
2311 if (tem)
2312 *ref = tem;
2317 civ = alloc_iv (data, base, step);
2318 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2319 return;
2321 fail:
2322 for_each_index (op_p, idx_record_use, data);
2325 /* Finds and records invariants used in STMT. */
2327 static void
2328 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2330 ssa_op_iter iter;
2331 use_operand_p use_p;
2332 tree op;
2334 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2336 op = USE_FROM_PTR (use_p);
2337 record_invariant (data, op, false);
2341 /* Finds interesting uses of induction variables in the statement STMT. */
2343 static void
2344 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2346 struct iv *iv;
2347 tree op, *lhs, *rhs;
2348 ssa_op_iter iter;
2349 use_operand_p use_p;
2350 enum tree_code code;
2352 find_invariants_stmt (data, stmt);
2354 if (gimple_code (stmt) == GIMPLE_COND)
2356 find_interesting_uses_cond (data, stmt);
2357 return;
2360 if (is_gimple_assign (stmt))
2362 lhs = gimple_assign_lhs_ptr (stmt);
2363 rhs = gimple_assign_rhs1_ptr (stmt);
2365 if (TREE_CODE (*lhs) == SSA_NAME)
2367 /* If the statement defines an induction variable, the uses are not
2368 interesting by themselves. */
2370 iv = get_iv (data, *lhs);
2372 if (iv && !integer_zerop (iv->step))
2373 return;
2376 code = gimple_assign_rhs_code (stmt);
2377 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2378 && (REFERENCE_CLASS_P (*rhs)
2379 || is_gimple_val (*rhs)))
2381 if (REFERENCE_CLASS_P (*rhs))
2382 find_interesting_uses_address (data, stmt, rhs);
2383 else
2384 find_interesting_uses_op (data, *rhs);
2386 if (REFERENCE_CLASS_P (*lhs))
2387 find_interesting_uses_address (data, stmt, lhs);
2388 return;
2390 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2392 find_interesting_uses_cond (data, stmt);
2393 return;
2396 /* TODO -- we should also handle address uses of type
2398 memory = call (whatever);
2402 call (memory). */
2405 if (gimple_code (stmt) == GIMPLE_PHI
2406 && gimple_bb (stmt) == data->current_loop->header)
2408 iv = get_iv (data, PHI_RESULT (stmt));
2410 if (iv && !integer_zerop (iv->step))
2411 return;
2414 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2416 op = USE_FROM_PTR (use_p);
2418 if (TREE_CODE (op) != SSA_NAME)
2419 continue;
2421 iv = get_iv (data, op);
2422 if (!iv)
2423 continue;
2425 find_interesting_uses_op (data, op);
2429 /* Finds interesting uses of induction variables outside of loops
2430 on loop exit edge EXIT. */
2432 static void
2433 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2435 gphi *phi;
2436 gphi_iterator psi;
2437 tree def;
2439 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2441 phi = psi.phi ();
2442 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2443 if (!virtual_operand_p (def))
2444 find_interesting_uses_op (data, def);
2448 /* Compute maximum offset of [base + offset] addressing mode
2449 for memory reference represented by USE. */
2451 static HOST_WIDE_INT
2452 compute_max_addr_offset (struct iv_use *use)
2454 int width;
2455 rtx reg, addr;
2456 HOST_WIDE_INT i, off;
2457 unsigned list_index, num;
2458 addr_space_t as;
2459 machine_mode mem_mode, addr_mode;
2460 static vec<HOST_WIDE_INT> max_offset_list;
2462 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2463 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2465 num = max_offset_list.length ();
2466 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2467 if (list_index >= num)
2469 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2470 for (; num < max_offset_list.length (); num++)
2471 max_offset_list[num] = -1;
2474 off = max_offset_list[list_index];
2475 if (off != -1)
2476 return off;
2478 addr_mode = targetm.addr_space.address_mode (as);
2479 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2480 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2482 width = GET_MODE_BITSIZE (addr_mode) - 1;
2483 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2484 width = HOST_BITS_PER_WIDE_INT - 1;
2486 for (i = width; i > 0; i--)
2488 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2489 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2490 if (memory_address_addr_space_p (mem_mode, addr, as))
2491 break;
2493 /* For some strict-alignment targets, the offset must be naturally
2494 aligned. Try an aligned offset if mem_mode is not QImode. */
2495 off = ((unsigned HOST_WIDE_INT) 1 << i);
2496 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2498 off -= GET_MODE_SIZE (mem_mode);
2499 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2500 if (memory_address_addr_space_p (mem_mode, addr, as))
2501 break;
2504 if (i == 0)
2505 off = 0;
2507 max_offset_list[list_index] = off;
2508 return off;
2511 /* Comparison function to sort group in ascending order of addr_offset. */
2513 static int
2514 group_compare_offset (const void *a, const void *b)
2516 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2517 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2519 if ((*u1)->addr_offset != (*u2)->addr_offset)
2520 return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2521 else
2522 return 0;
2525 /* Check if small groups should be split. Return true if no group
2526 contains more than two uses with distinct addr_offsets. Return
2527 false otherwise. We want to split such groups because:
2529 1) Small groups don't have much benefit and may interfer with
2530 general candidate selection.
2531 2) Size for problem with only small groups is usually small and
2532 general algorithm can handle it well.
2534 TODO -- Above claim may not hold when we want to merge memory
2535 accesses with conseuctive addresses. */
2537 static bool
2538 split_small_address_groups_p (struct ivopts_data *data)
2540 unsigned int i, j, distinct = 1;
2541 struct iv_use *pre;
2542 struct iv_group *group;
2544 for (i = 0; i < data->vgroups.length (); i++)
2546 group = data->vgroups[i];
2547 if (group->vuses.length () == 1)
2548 continue;
2550 gcc_assert (group->type == USE_ADDRESS);
2551 if (group->vuses.length () == 2)
2553 if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2554 std::swap (group->vuses[0], group->vuses[1]);
2556 else
2557 group->vuses.qsort (group_compare_offset);
2559 if (distinct > 2)
2560 continue;
2562 distinct = 1;
2563 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2565 if (group->vuses[j]->addr_offset != pre->addr_offset)
2567 pre = group->vuses[j];
2568 distinct++;
2571 if (distinct > 2)
2572 break;
2576 return (distinct <= 2);
2579 /* For each group of address type uses, this function further groups
2580 these uses according to the maximum offset supported by target's
2581 [base + offset] addressing mode. */
2583 static void
2584 split_address_groups (struct ivopts_data *data)
2586 unsigned int i, j;
2587 HOST_WIDE_INT max_offset = -1;
2589 /* Reset max offset to split all small groups. */
2590 if (split_small_address_groups_p (data))
2591 max_offset = 0;
2593 for (i = 0; i < data->vgroups.length (); i++)
2595 struct iv_group *group = data->vgroups[i];
2596 struct iv_use *use = group->vuses[0];
2598 use->id = 0;
2599 use->group_id = group->id;
2600 if (group->vuses.length () == 1)
2601 continue;
2603 if (max_offset != 0)
2604 max_offset = compute_max_addr_offset (use);
2606 for (j = 1; j < group->vuses.length (); j++)
2608 struct iv_use *next = group->vuses[j];
2610 /* Only uses with offset that can fit in offset part against
2611 the first use can be grouped together. */
2612 if (next->addr_offset - use->addr_offset
2613 > (unsigned HOST_WIDE_INT) max_offset)
2614 break;
2616 next->id = j;
2617 next->group_id = group->id;
2619 /* Split group. */
2620 if (j < group->vuses.length ())
2622 struct iv_group *new_group = record_group (data, group->type);
2623 new_group->vuses.safe_splice (group->vuses);
2624 new_group->vuses.block_remove (0, j);
2625 group->vuses.truncate (j);
2630 /* Finds uses of the induction variables that are interesting. */
2632 static void
2633 find_interesting_uses (struct ivopts_data *data)
2635 basic_block bb;
2636 gimple_stmt_iterator bsi;
2637 basic_block *body = get_loop_body (data->current_loop);
2638 unsigned i;
2639 edge e;
2641 for (i = 0; i < data->current_loop->num_nodes; i++)
2643 edge_iterator ei;
2644 bb = body[i];
2646 FOR_EACH_EDGE (e, ei, bb->succs)
2647 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2648 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2649 find_interesting_uses_outside (data, e);
2651 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2652 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2653 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2654 if (!is_gimple_debug (gsi_stmt (bsi)))
2655 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2658 split_address_groups (data);
2660 if (dump_file && (dump_flags & TDF_DETAILS))
2662 bitmap_iterator bi;
2664 fprintf (dump_file, "\n<Invariant Vars>:\n");
2665 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2667 struct version_info *info = ver_info (data, i);
2668 if (info->inv_id)
2670 fprintf (dump_file, "Inv %d:\t", info->inv_id);
2671 print_generic_expr (dump_file, info->name, TDF_SLIM);
2672 fprintf (dump_file, "%s\n",
2673 info->has_nonlin_use ? "" : "\t(eliminable)");
2677 fprintf (dump_file, "\n<IV Groups>:\n");
2678 dump_groups (dump_file, data);
2679 fprintf (dump_file, "\n");
2682 free (body);
2685 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2686 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2687 we are at the top-level of the processed address. */
2689 static tree
2690 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2691 HOST_WIDE_INT *offset)
2693 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2694 enum tree_code code;
2695 tree type, orig_type = TREE_TYPE (expr);
2696 HOST_WIDE_INT off0, off1, st;
2697 tree orig_expr = expr;
2699 STRIP_NOPS (expr);
2701 type = TREE_TYPE (expr);
2702 code = TREE_CODE (expr);
2703 *offset = 0;
2705 switch (code)
2707 case INTEGER_CST:
2708 if (!cst_and_fits_in_hwi (expr)
2709 || integer_zerop (expr))
2710 return orig_expr;
2712 *offset = int_cst_value (expr);
2713 return build_int_cst (orig_type, 0);
2715 case POINTER_PLUS_EXPR:
2716 case PLUS_EXPR:
2717 case MINUS_EXPR:
2718 op0 = TREE_OPERAND (expr, 0);
2719 op1 = TREE_OPERAND (expr, 1);
2721 op0 = strip_offset_1 (op0, false, false, &off0);
2722 op1 = strip_offset_1 (op1, false, false, &off1);
2724 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2725 if (op0 == TREE_OPERAND (expr, 0)
2726 && op1 == TREE_OPERAND (expr, 1))
2727 return orig_expr;
2729 if (integer_zerop (op1))
2730 expr = op0;
2731 else if (integer_zerop (op0))
2733 if (code == MINUS_EXPR)
2734 expr = fold_build1 (NEGATE_EXPR, type, op1);
2735 else
2736 expr = op1;
2738 else
2739 expr = fold_build2 (code, type, op0, op1);
2741 return fold_convert (orig_type, expr);
2743 case MULT_EXPR:
2744 op1 = TREE_OPERAND (expr, 1);
2745 if (!cst_and_fits_in_hwi (op1))
2746 return orig_expr;
2748 op0 = TREE_OPERAND (expr, 0);
2749 op0 = strip_offset_1 (op0, false, false, &off0);
2750 if (op0 == TREE_OPERAND (expr, 0))
2751 return orig_expr;
2753 *offset = off0 * int_cst_value (op1);
2754 if (integer_zerop (op0))
2755 expr = op0;
2756 else
2757 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2759 return fold_convert (orig_type, expr);
2761 case ARRAY_REF:
2762 case ARRAY_RANGE_REF:
2763 if (!inside_addr)
2764 return orig_expr;
2766 step = array_ref_element_size (expr);
2767 if (!cst_and_fits_in_hwi (step))
2768 break;
2770 st = int_cst_value (step);
2771 op1 = TREE_OPERAND (expr, 1);
2772 op1 = strip_offset_1 (op1, false, false, &off1);
2773 *offset = off1 * st;
2775 if (top_compref
2776 && integer_zerop (op1))
2778 /* Strip the component reference completely. */
2779 op0 = TREE_OPERAND (expr, 0);
2780 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2781 *offset += off0;
2782 return op0;
2784 break;
2786 case COMPONENT_REF:
2788 tree field;
2790 if (!inside_addr)
2791 return orig_expr;
2793 tmp = component_ref_field_offset (expr);
2794 field = TREE_OPERAND (expr, 1);
2795 if (top_compref
2796 && cst_and_fits_in_hwi (tmp)
2797 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2799 HOST_WIDE_INT boffset, abs_off;
2801 /* Strip the component reference completely. */
2802 op0 = TREE_OPERAND (expr, 0);
2803 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2804 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2805 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2806 if (boffset < 0)
2807 abs_off = -abs_off;
2809 *offset = off0 + int_cst_value (tmp) + abs_off;
2810 return op0;
2813 break;
2815 case ADDR_EXPR:
2816 op0 = TREE_OPERAND (expr, 0);
2817 op0 = strip_offset_1 (op0, true, true, &off0);
2818 *offset += off0;
2820 if (op0 == TREE_OPERAND (expr, 0))
2821 return orig_expr;
2823 expr = build_fold_addr_expr (op0);
2824 return fold_convert (orig_type, expr);
2826 case MEM_REF:
2827 /* ??? Offset operand? */
2828 inside_addr = false;
2829 break;
2831 default:
2832 return orig_expr;
2835 /* Default handling of expressions for that we want to recurse into
2836 the first operand. */
2837 op0 = TREE_OPERAND (expr, 0);
2838 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2839 *offset += off0;
2841 if (op0 == TREE_OPERAND (expr, 0)
2842 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2843 return orig_expr;
2845 expr = copy_node (expr);
2846 TREE_OPERAND (expr, 0) = op0;
2847 if (op1)
2848 TREE_OPERAND (expr, 1) = op1;
2850 /* Inside address, we might strip the top level component references,
2851 thus changing type of the expression. Handling of ADDR_EXPR
2852 will fix that. */
2853 expr = fold_convert (orig_type, expr);
2855 return expr;
2858 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2860 static tree
2861 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2863 HOST_WIDE_INT off;
2864 tree core = strip_offset_1 (expr, false, false, &off);
2865 *offset = off;
2866 return core;
2869 /* Returns variant of TYPE that can be used as base for different uses.
2870 We return unsigned type with the same precision, which avoids problems
2871 with overflows. */
2873 static tree
2874 generic_type_for (tree type)
2876 if (POINTER_TYPE_P (type))
2877 return unsigned_type_for (type);
2879 if (TYPE_UNSIGNED (type))
2880 return type;
2882 return unsigned_type_for (type);
2885 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2886 the bitmap to that we should store it. */
2888 static struct ivopts_data *fd_ivopts_data;
2889 static tree
2890 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2892 bitmap *depends_on = (bitmap *) data;
2893 struct version_info *info;
2895 if (TREE_CODE (*expr_p) != SSA_NAME)
2896 return NULL_TREE;
2897 info = name_info (fd_ivopts_data, *expr_p);
2899 if (!info->inv_id || info->has_nonlin_use)
2900 return NULL_TREE;
2902 if (!*depends_on)
2903 *depends_on = BITMAP_ALLOC (NULL);
2904 bitmap_set_bit (*depends_on, info->inv_id);
2906 return NULL_TREE;
2909 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2910 position to POS. If USE is not NULL, the candidate is set as related to
2911 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2912 replacement of the final value of the iv by a direct computation. */
2914 static struct iv_cand *
2915 add_candidate_1 (struct ivopts_data *data,
2916 tree base, tree step, bool important, enum iv_position pos,
2917 struct iv_use *use, gimple *incremented_at,
2918 struct iv *orig_iv = NULL)
2920 unsigned i;
2921 struct iv_cand *cand = NULL;
2922 tree type, orig_type;
2924 gcc_assert (base && step);
2926 /* -fkeep-gc-roots-live means that we have to keep a real pointer
2927 live, but the ivopts code may replace a real pointer with one
2928 pointing before or after the memory block that is then adjusted
2929 into the memory block during the loop. FIXME: It would likely be
2930 better to actually force the pointer live and still use ivopts;
2931 for example, it would be enough to write the pointer into memory
2932 and keep it there until after the loop. */
2933 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2934 return NULL;
2936 /* For non-original variables, make sure their values are computed in a type
2937 that does not invoke undefined behavior on overflows (since in general,
2938 we cannot prove that these induction variables are non-wrapping). */
2939 if (pos != IP_ORIGINAL)
2941 orig_type = TREE_TYPE (base);
2942 type = generic_type_for (orig_type);
2943 if (type != orig_type)
2945 base = fold_convert (type, base);
2946 step = fold_convert (type, step);
2950 for (i = 0; i < data->vcands.length (); i++)
2952 cand = data->vcands[i];
2954 if (cand->pos != pos)
2955 continue;
2957 if (cand->incremented_at != incremented_at
2958 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2959 && cand->ainc_use != use))
2960 continue;
2962 if (operand_equal_p (base, cand->iv->base, 0)
2963 && operand_equal_p (step, cand->iv->step, 0)
2964 && (TYPE_PRECISION (TREE_TYPE (base))
2965 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2966 break;
2969 if (i == data->vcands.length ())
2971 cand = XCNEW (struct iv_cand);
2972 cand->id = i;
2973 cand->iv = alloc_iv (data, base, step);
2974 cand->pos = pos;
2975 if (pos != IP_ORIGINAL)
2977 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2978 cand->var_after = cand->var_before;
2980 cand->important = important;
2981 cand->incremented_at = incremented_at;
2982 data->vcands.safe_push (cand);
2984 if (TREE_CODE (step) != INTEGER_CST)
2986 fd_ivopts_data = data;
2987 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2990 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2991 cand->ainc_use = use;
2992 else
2993 cand->ainc_use = NULL;
2995 cand->orig_iv = orig_iv;
2996 if (dump_file && (dump_flags & TDF_DETAILS))
2997 dump_cand (dump_file, cand);
3000 cand->important |= important;
3002 /* Relate candidate to the group for which it is added. */
3003 if (use)
3004 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3006 return cand;
3009 /* Returns true if incrementing the induction variable at the end of the LOOP
3010 is allowed.
3012 The purpose is to avoid splitting latch edge with a biv increment, thus
3013 creating a jump, possibly confusing other optimization passes and leaving
3014 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
3015 is not available (so we do not have a better alternative), or if the latch
3016 edge is already nonempty. */
3018 static bool
3019 allow_ip_end_pos_p (struct loop *loop)
3021 if (!ip_normal_pos (loop))
3022 return true;
3024 if (!empty_block_p (ip_end_pos (loop)))
3025 return true;
3027 return false;
3030 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3031 Important field is set to IMPORTANT. */
3033 static void
3034 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3035 bool important, struct iv_use *use)
3037 basic_block use_bb = gimple_bb (use->stmt);
3038 machine_mode mem_mode;
3039 unsigned HOST_WIDE_INT cstepi;
3041 /* If we insert the increment in any position other than the standard
3042 ones, we must ensure that it is incremented once per iteration.
3043 It must not be in an inner nested loop, or one side of an if
3044 statement. */
3045 if (use_bb->loop_father != data->current_loop
3046 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3047 || stmt_could_throw_p (use->stmt)
3048 || !cst_and_fits_in_hwi (step))
3049 return;
3051 cstepi = int_cst_value (step);
3053 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
3054 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3055 || USE_STORE_PRE_INCREMENT (mem_mode))
3056 && GET_MODE_SIZE (mem_mode) == cstepi)
3057 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3058 || USE_STORE_PRE_DECREMENT (mem_mode))
3059 && GET_MODE_SIZE (mem_mode) == -cstepi))
3061 enum tree_code code = MINUS_EXPR;
3062 tree new_base;
3063 tree new_step = step;
3065 if (POINTER_TYPE_P (TREE_TYPE (base)))
3067 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3068 code = POINTER_PLUS_EXPR;
3070 else
3071 new_step = fold_convert (TREE_TYPE (base), new_step);
3072 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3073 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3074 use->stmt);
3076 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3077 || USE_STORE_POST_INCREMENT (mem_mode))
3078 && GET_MODE_SIZE (mem_mode) == cstepi)
3079 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3080 || USE_STORE_POST_DECREMENT (mem_mode))
3081 && GET_MODE_SIZE (mem_mode) == -cstepi))
3083 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3084 use->stmt);
3088 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3089 position to POS. If USE is not NULL, the candidate is set as related to
3090 it. The candidate computation is scheduled before exit condition and at
3091 the end of loop. */
3093 static void
3094 add_candidate (struct ivopts_data *data,
3095 tree base, tree step, bool important, struct iv_use *use,
3096 struct iv *orig_iv = NULL)
3098 if (ip_normal_pos (data->current_loop))
3099 add_candidate_1 (data, base, step, important,
3100 IP_NORMAL, use, NULL, orig_iv);
3101 if (ip_end_pos (data->current_loop)
3102 && allow_ip_end_pos_p (data->current_loop))
3103 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3106 /* Adds standard iv candidates. */
3108 static void
3109 add_standard_iv_candidates (struct ivopts_data *data)
3111 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3113 /* The same for a double-integer type if it is still fast enough. */
3114 if (TYPE_PRECISION
3115 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3116 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3117 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3118 build_int_cst (long_integer_type_node, 1), true, NULL);
3120 /* The same for a double-integer type if it is still fast enough. */
3121 if (TYPE_PRECISION
3122 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3123 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3124 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3125 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3129 /* Adds candidates bases on the old induction variable IV. */
3131 static void
3132 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3134 gimple *phi;
3135 tree def;
3136 struct iv_cand *cand;
3138 /* Check if this biv is used in address type use. */
3139 if (iv->no_overflow && iv->have_address_use
3140 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3141 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3143 tree base = fold_convert (sizetype, iv->base);
3144 tree step = fold_convert (sizetype, iv->step);
3146 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3147 add_candidate (data, base, step, true, NULL, iv);
3148 /* Add iv cand of the original type only if it has nonlinear use. */
3149 if (iv->nonlin_use)
3150 add_candidate (data, iv->base, iv->step, true, NULL);
3152 else
3153 add_candidate (data, iv->base, iv->step, true, NULL);
3155 /* The same, but with initial value zero. */
3156 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3157 add_candidate (data, size_int (0), iv->step, true, NULL);
3158 else
3159 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3160 iv->step, true, NULL);
3162 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3163 if (gimple_code (phi) == GIMPLE_PHI)
3165 /* Additionally record the possibility of leaving the original iv
3166 untouched. */
3167 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3168 /* Don't add candidate if it's from another PHI node because
3169 it's an affine iv appearing in the form of PEELED_CHREC. */
3170 phi = SSA_NAME_DEF_STMT (def);
3171 if (gimple_code (phi) != GIMPLE_PHI)
3173 cand = add_candidate_1 (data,
3174 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3175 SSA_NAME_DEF_STMT (def));
3176 if (cand)
3178 cand->var_before = iv->ssa_name;
3179 cand->var_after = def;
3182 else
3183 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3187 /* Adds candidates based on the old induction variables. */
3189 static void
3190 add_iv_candidate_for_bivs (struct ivopts_data *data)
3192 unsigned i;
3193 struct iv *iv;
3194 bitmap_iterator bi;
3196 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3198 iv = ver_info (data, i)->iv;
3199 if (iv && iv->biv_p && !integer_zerop (iv->step))
3200 add_iv_candidate_for_biv (data, iv);
3204 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3206 static void
3207 record_common_cand (struct ivopts_data *data, tree base,
3208 tree step, struct iv_use *use)
3210 struct iv_common_cand ent;
3211 struct iv_common_cand **slot;
3213 ent.base = base;
3214 ent.step = step;
3215 ent.hash = iterative_hash_expr (base, 0);
3216 ent.hash = iterative_hash_expr (step, ent.hash);
3218 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3219 if (*slot == NULL)
3221 *slot = new iv_common_cand ();
3222 (*slot)->base = base;
3223 (*slot)->step = step;
3224 (*slot)->uses.create (8);
3225 (*slot)->hash = ent.hash;
3226 data->iv_common_cands.safe_push ((*slot));
3229 gcc_assert (use != NULL);
3230 (*slot)->uses.safe_push (use);
3231 return;
3234 /* Comparison function used to sort common candidates. */
3236 static int
3237 common_cand_cmp (const void *p1, const void *p2)
3239 unsigned n1, n2;
3240 const struct iv_common_cand *const *const ccand1
3241 = (const struct iv_common_cand *const *)p1;
3242 const struct iv_common_cand *const *const ccand2
3243 = (const struct iv_common_cand *const *)p2;
3245 n1 = (*ccand1)->uses.length ();
3246 n2 = (*ccand2)->uses.length ();
3247 return n2 - n1;
3250 /* Adds IV candidates based on common candidated recorded. */
3252 static void
3253 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3255 unsigned i, j;
3256 struct iv_cand *cand_1, *cand_2;
3258 data->iv_common_cands.qsort (common_cand_cmp);
3259 for (i = 0; i < data->iv_common_cands.length (); i++)
3261 struct iv_common_cand *ptr = data->iv_common_cands[i];
3263 /* Only add IV candidate if it's derived from multiple uses. */
3264 if (ptr->uses.length () <= 1)
3265 break;
3267 cand_1 = NULL;
3268 cand_2 = NULL;
3269 if (ip_normal_pos (data->current_loop))
3270 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3271 false, IP_NORMAL, NULL, NULL);
3273 if (ip_end_pos (data->current_loop)
3274 && allow_ip_end_pos_p (data->current_loop))
3275 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3276 false, IP_END, NULL, NULL);
3278 /* Bind deriving uses and the new candidates. */
3279 for (j = 0; j < ptr->uses.length (); j++)
3281 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3282 if (cand_1)
3283 bitmap_set_bit (group->related_cands, cand_1->id);
3284 if (cand_2)
3285 bitmap_set_bit (group->related_cands, cand_2->id);
3289 /* Release data since it is useless from this point. */
3290 data->iv_common_cand_tab->empty ();
3291 data->iv_common_cands.truncate (0);
3294 /* Adds candidates based on the value of USE's iv. */
3296 static void
3297 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3299 unsigned HOST_WIDE_INT offset;
3300 tree base;
3301 tree basetype;
3302 struct iv *iv = use->iv;
3304 add_candidate (data, iv->base, iv->step, false, use);
3306 /* Record common candidate for use in case it can be shared by others. */
3307 record_common_cand (data, iv->base, iv->step, use);
3309 /* Record common candidate with initial value zero. */
3310 basetype = TREE_TYPE (iv->base);
3311 if (POINTER_TYPE_P (basetype))
3312 basetype = sizetype;
3313 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3315 /* Record common candidate with constant offset stripped in base.
3316 Like the use itself, we also add candidate directly for it. */
3317 base = strip_offset (iv->base, &offset);
3318 if (offset || base != iv->base)
3320 record_common_cand (data, base, iv->step, use);
3321 add_candidate (data, base, iv->step, false, use);
3324 /* Record common candidate with base_object removed in base. */
3325 if (iv->base_object != NULL)
3327 unsigned i;
3328 aff_tree aff_base;
3329 tree step, base_object = iv->base_object;
3331 base = iv->base;
3332 step = iv->step;
3333 STRIP_NOPS (base);
3334 STRIP_NOPS (step);
3335 STRIP_NOPS (base_object);
3336 tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3337 for (i = 0; i < aff_base.n; i++)
3339 if (aff_base.elts[i].coef != 1)
3340 continue;
3342 if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3343 break;
3345 if (i < aff_base.n)
3347 aff_combination_remove_elt (&aff_base, i);
3348 base = aff_combination_to_tree (&aff_base);
3349 basetype = TREE_TYPE (base);
3350 if (POINTER_TYPE_P (basetype))
3351 basetype = sizetype;
3353 step = fold_convert (basetype, step);
3354 record_common_cand (data, base, step, use);
3355 /* Also record common candidate with offset stripped. */
3356 base = strip_offset (base, &offset);
3357 if (offset)
3358 record_common_cand (data, base, step, use);
3362 /* At last, add auto-incremental candidates. Make such variables
3363 important since other iv uses with same base object may be based
3364 on it. */
3365 if (use != NULL && use->type == USE_ADDRESS)
3366 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3369 /* Adds candidates based on the uses. */
3371 static void
3372 add_iv_candidate_for_groups (struct ivopts_data *data)
3374 unsigned i;
3376 /* Only add candidate for the first use in group. */
3377 for (i = 0; i < data->vgroups.length (); i++)
3379 struct iv_group *group = data->vgroups[i];
3381 gcc_assert (group->vuses[0] != NULL);
3382 add_iv_candidate_for_use (data, group->vuses[0]);
3384 add_iv_candidate_derived_from_uses (data);
3387 /* Record important candidates and add them to related_cands bitmaps. */
3389 static void
3390 record_important_candidates (struct ivopts_data *data)
3392 unsigned i;
3393 struct iv_group *group;
3395 for (i = 0; i < data->vcands.length (); i++)
3397 struct iv_cand *cand = data->vcands[i];
3399 if (cand->important)
3400 bitmap_set_bit (data->important_candidates, i);
3403 data->consider_all_candidates = (data->vcands.length ()
3404 <= CONSIDER_ALL_CANDIDATES_BOUND);
3406 /* Add important candidates to groups' related_cands bitmaps. */
3407 for (i = 0; i < data->vgroups.length (); i++)
3409 group = data->vgroups[i];
3410 bitmap_ior_into (group->related_cands, data->important_candidates);
3414 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3415 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3416 we allocate a simple list to every use. */
3418 static void
3419 alloc_use_cost_map (struct ivopts_data *data)
3421 unsigned i, size, s;
3423 for (i = 0; i < data->vgroups.length (); i++)
3425 struct iv_group *group = data->vgroups[i];
3427 if (data->consider_all_candidates)
3428 size = data->vcands.length ();
3429 else
3431 s = bitmap_count_bits (group->related_cands);
3433 /* Round up to the power of two, so that moduling by it is fast. */
3434 size = s ? (1 << ceil_log2 (s)) : 1;
3437 group->n_map_members = size;
3438 group->cost_map = XCNEWVEC (struct cost_pair, size);
3442 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3443 on invariants DEPENDS_ON and that the value used in expressing it
3444 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3446 static void
3447 set_group_iv_cost (struct ivopts_data *data,
3448 struct iv_group *group, struct iv_cand *cand,
3449 comp_cost cost, bitmap depends_on, tree value,
3450 enum tree_code comp, iv_inv_expr_ent *inv_expr)
3452 unsigned i, s;
3454 if (cost.infinite_cost_p ())
3456 BITMAP_FREE (depends_on);
3457 return;
3460 if (data->consider_all_candidates)
3462 group->cost_map[cand->id].cand = cand;
3463 group->cost_map[cand->id].cost = cost;
3464 group->cost_map[cand->id].depends_on = depends_on;
3465 group->cost_map[cand->id].value = value;
3466 group->cost_map[cand->id].comp = comp;
3467 group->cost_map[cand->id].inv_expr = inv_expr;
3468 return;
3471 /* n_map_members is a power of two, so this computes modulo. */
3472 s = cand->id & (group->n_map_members - 1);
3473 for (i = s; i < group->n_map_members; i++)
3474 if (!group->cost_map[i].cand)
3475 goto found;
3476 for (i = 0; i < s; i++)
3477 if (!group->cost_map[i].cand)
3478 goto found;
3480 gcc_unreachable ();
3482 found:
3483 group->cost_map[i].cand = cand;
3484 group->cost_map[i].cost = cost;
3485 group->cost_map[i].depends_on = depends_on;
3486 group->cost_map[i].value = value;
3487 group->cost_map[i].comp = comp;
3488 group->cost_map[i].inv_expr = inv_expr;
3491 /* Gets cost of (GROUP, CAND) pair. */
3493 static struct cost_pair *
3494 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3495 struct iv_cand *cand)
3497 unsigned i, s;
3498 struct cost_pair *ret;
3500 if (!cand)
3501 return NULL;
3503 if (data->consider_all_candidates)
3505 ret = group->cost_map + cand->id;
3506 if (!ret->cand)
3507 return NULL;
3509 return ret;
3512 /* n_map_members is a power of two, so this computes modulo. */
3513 s = cand->id & (group->n_map_members - 1);
3514 for (i = s; i < group->n_map_members; i++)
3515 if (group->cost_map[i].cand == cand)
3516 return group->cost_map + i;
3517 else if (group->cost_map[i].cand == NULL)
3518 return NULL;
3519 for (i = 0; i < s; i++)
3520 if (group->cost_map[i].cand == cand)
3521 return group->cost_map + i;
3522 else if (group->cost_map[i].cand == NULL)
3523 return NULL;
3525 return NULL;
3528 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3529 static rtx
3530 produce_memory_decl_rtl (tree obj, int *regno)
3532 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3533 machine_mode address_mode = targetm.addr_space.address_mode (as);
3534 rtx x;
3536 gcc_assert (obj);
3537 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3539 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3540 x = gen_rtx_SYMBOL_REF (address_mode, name);
3541 SET_SYMBOL_REF_DECL (x, obj);
3542 x = gen_rtx_MEM (DECL_MODE (obj), x);
3543 set_mem_addr_space (x, as);
3544 targetm.encode_section_info (obj, x, true);
3546 else
3548 x = gen_raw_REG (address_mode, (*regno)++);
3549 x = gen_rtx_MEM (DECL_MODE (obj), x);
3550 set_mem_addr_space (x, as);
3553 return x;
3556 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3557 walk_tree. DATA contains the actual fake register number. */
3559 static tree
3560 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3562 tree obj = NULL_TREE;
3563 rtx x = NULL_RTX;
3564 int *regno = (int *) data;
3566 switch (TREE_CODE (*expr_p))
3568 case ADDR_EXPR:
3569 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3570 handled_component_p (*expr_p);
3571 expr_p = &TREE_OPERAND (*expr_p, 0))
3572 continue;
3573 obj = *expr_p;
3574 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3575 x = produce_memory_decl_rtl (obj, regno);
3576 break;
3578 case SSA_NAME:
3579 *ws = 0;
3580 obj = SSA_NAME_VAR (*expr_p);
3581 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3582 if (!obj)
3583 return NULL_TREE;
3584 if (!DECL_RTL_SET_P (obj))
3585 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3586 break;
3588 case VAR_DECL:
3589 case PARM_DECL:
3590 case RESULT_DECL:
3591 *ws = 0;
3592 obj = *expr_p;
3594 if (DECL_RTL_SET_P (obj))
3595 break;
3597 if (DECL_MODE (obj) == BLKmode)
3598 x = produce_memory_decl_rtl (obj, regno);
3599 else
3600 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3602 break;
3604 default:
3605 break;
3608 if (x)
3610 decl_rtl_to_reset.safe_push (obj);
3611 SET_DECL_RTL (obj, x);
3614 return NULL_TREE;
3617 /* Determines cost of the computation of EXPR. */
3619 static unsigned
3620 computation_cost (tree expr, bool speed)
3622 rtx_insn *seq;
3623 rtx rslt;
3624 tree type = TREE_TYPE (expr);
3625 unsigned cost;
3626 /* Avoid using hard regs in ways which may be unsupported. */
3627 int regno = LAST_VIRTUAL_REGISTER + 1;
3628 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3629 enum node_frequency real_frequency = node->frequency;
3631 node->frequency = NODE_FREQUENCY_NORMAL;
3632 crtl->maybe_hot_insn_p = speed;
3633 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3634 start_sequence ();
3635 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3636 seq = get_insns ();
3637 end_sequence ();
3638 default_rtl_profile ();
3639 node->frequency = real_frequency;
3641 cost = seq_cost (seq, speed);
3642 if (MEM_P (rslt))
3643 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3644 TYPE_ADDR_SPACE (type), speed);
3645 else if (!REG_P (rslt))
3646 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3648 return cost;
3651 /* Returns variable containing the value of candidate CAND at statement AT. */
3653 static tree
3654 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3656 if (stmt_after_increment (loop, cand, stmt))
3657 return cand->var_after;
3658 else
3659 return cand->var_before;
3662 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3663 same precision that is at least as wide as the precision of TYPE, stores
3664 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3665 type of A and B. */
3667 static tree
3668 determine_common_wider_type (tree *a, tree *b)
3670 tree wider_type = NULL;
3671 tree suba, subb;
3672 tree atype = TREE_TYPE (*a);
3674 if (CONVERT_EXPR_P (*a))
3676 suba = TREE_OPERAND (*a, 0);
3677 wider_type = TREE_TYPE (suba);
3678 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3679 return atype;
3681 else
3682 return atype;
3684 if (CONVERT_EXPR_P (*b))
3686 subb = TREE_OPERAND (*b, 0);
3687 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3688 return atype;
3690 else
3691 return atype;
3693 *a = suba;
3694 *b = subb;
3695 return wider_type;
3698 /* Determines the expression by that USE is expressed from induction variable
3699 CAND at statement AT in LOOP. The expression is stored in a decomposed
3700 form into AFF. Returns false if USE cannot be expressed using CAND. */
3702 static bool
3703 get_computation_aff (struct loop *loop,
3704 struct iv_use *use, struct iv_cand *cand, gimple *at,
3705 struct aff_tree *aff)
3707 tree ubase = use->iv->base;
3708 tree ustep = use->iv->step;
3709 tree cbase = cand->iv->base;
3710 tree cstep = cand->iv->step, cstep_common;
3711 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3712 tree common_type, var;
3713 tree uutype;
3714 aff_tree cbase_aff, var_aff;
3715 widest_int rat;
3717 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3719 /* We do not have a precision to express the values of use. */
3720 return false;
3723 var = var_at_stmt (loop, cand, at);
3724 uutype = unsigned_type_for (utype);
3726 /* If the conversion is not noop, perform it. */
3727 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3729 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3730 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3732 tree inner_base, inner_step, inner_type;
3733 inner_base = TREE_OPERAND (cbase, 0);
3734 if (CONVERT_EXPR_P (cstep))
3735 inner_step = TREE_OPERAND (cstep, 0);
3736 else
3737 inner_step = cstep;
3739 inner_type = TREE_TYPE (inner_base);
3740 /* If candidate is added from a biv whose type is smaller than
3741 ctype, we know both candidate and the biv won't overflow.
3742 In this case, it's safe to skip the convertion in candidate.
3743 As an example, (unsigned short)((unsigned long)A) equals to
3744 (unsigned short)A, if A has a type no larger than short. */
3745 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3747 cbase = inner_base;
3748 cstep = inner_step;
3751 cstep = fold_convert (uutype, cstep);
3752 cbase = fold_convert (uutype, cbase);
3753 var = fold_convert (uutype, var);
3756 /* Ratio is 1 when computing the value of biv cand by itself.
3757 We can't rely on constant_multiple_of in this case because the
3758 use is created after the original biv is selected. The call
3759 could fail because of inconsistent fold behavior. See PR68021
3760 for more information. */
3761 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3763 gcc_assert (is_gimple_assign (use->stmt));
3764 gcc_assert (use->iv->ssa_name == cand->var_after);
3765 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3766 rat = 1;
3768 else if (!constant_multiple_of (ustep, cstep, &rat))
3769 return false;
3771 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3772 type, we achieve better folding by computing their difference in this
3773 wider type, and cast the result to UUTYPE. We do not need to worry about
3774 overflows, as all the arithmetics will in the end be performed in UUTYPE
3775 anyway. */
3776 common_type = determine_common_wider_type (&ubase, &cbase);
3778 /* use = ubase - ratio * cbase + ratio * var. */
3779 tree_to_aff_combination (ubase, common_type, aff);
3780 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3781 tree_to_aff_combination (var, uutype, &var_aff);
3783 /* We need to shift the value if we are after the increment. */
3784 if (stmt_after_increment (loop, cand, at))
3786 aff_tree cstep_aff;
3788 if (common_type != uutype)
3789 cstep_common = fold_convert (common_type, cstep);
3790 else
3791 cstep_common = cstep;
3793 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3794 aff_combination_add (&cbase_aff, &cstep_aff);
3797 aff_combination_scale (&cbase_aff, -rat);
3798 aff_combination_add (aff, &cbase_aff);
3799 if (common_type != uutype)
3800 aff_combination_convert (aff, uutype);
3802 aff_combination_scale (&var_aff, rat);
3803 aff_combination_add (aff, &var_aff);
3805 return true;
3808 /* Return the type of USE. */
3810 static tree
3811 get_use_type (struct iv_use *use)
3813 tree base_type = TREE_TYPE (use->iv->base);
3814 tree type;
3816 if (use->type == USE_ADDRESS)
3818 /* The base_type may be a void pointer. Create a pointer type based on
3819 the mem_ref instead. */
3820 type = build_pointer_type (TREE_TYPE (*use->op_p));
3821 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3822 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3824 else
3825 type = base_type;
3827 return type;
3830 /* Determines the expression by that USE is expressed from induction variable
3831 CAND at statement AT in LOOP. The computation is unshared. */
3833 static tree
3834 get_computation_at (struct loop *loop,
3835 struct iv_use *use, struct iv_cand *cand, gimple *at)
3837 aff_tree aff;
3838 tree type = get_use_type (use);
3840 if (!get_computation_aff (loop, use, cand, at, &aff))
3841 return NULL_TREE;
3842 unshare_aff_combination (&aff);
3843 return fold_convert (type, aff_combination_to_tree (&aff));
3846 /* Determines the expression by that USE is expressed from induction variable
3847 CAND in LOOP. The computation is unshared. */
3849 static tree
3850 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3852 return get_computation_at (loop, use, cand, use->stmt);
3855 /* Adjust the cost COST for being in loop setup rather than loop body.
3856 If we're optimizing for space, the loop setup overhead is constant;
3857 if we're optimizing for speed, amortize it over the per-iteration cost. */
3858 static unsigned
3859 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3861 if (cost == INFTY)
3862 return cost;
3863 else if (optimize_loop_for_speed_p (data->current_loop))
3864 return cost / avg_loop_niter (data->current_loop);
3865 else
3866 return cost;
3869 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3870 validity for a memory reference accessing memory of mode MODE in
3871 address space AS. */
3874 bool
3875 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3876 addr_space_t as)
3878 #define MAX_RATIO 128
3879 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3880 static vec<sbitmap> valid_mult_list;
3881 sbitmap valid_mult;
3883 if (data_index >= valid_mult_list.length ())
3884 valid_mult_list.safe_grow_cleared (data_index + 1);
3886 valid_mult = valid_mult_list[data_index];
3887 if (!valid_mult)
3889 machine_mode address_mode = targetm.addr_space.address_mode (as);
3890 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3891 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3892 rtx addr, scaled;
3893 HOST_WIDE_INT i;
3895 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3896 bitmap_clear (valid_mult);
3897 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3898 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3899 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3901 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3902 if (memory_address_addr_space_p (mode, addr, as)
3903 || memory_address_addr_space_p (mode, scaled, as))
3904 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3907 if (dump_file && (dump_flags & TDF_DETAILS))
3909 fprintf (dump_file, " allowed multipliers:");
3910 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3911 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3912 fprintf (dump_file, " %d", (int) i);
3913 fprintf (dump_file, "\n");
3914 fprintf (dump_file, "\n");
3917 valid_mult_list[data_index] = valid_mult;
3920 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3921 return false;
3923 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3926 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3927 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3928 variable is omitted. Compute the cost for a memory reference that accesses
3929 a memory location of mode MEM_MODE in address space AS.
3931 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3932 size of MEM_MODE / RATIO) is available. To make this determination, we
3933 look at the size of the increment to be made, which is given in CSTEP.
3934 CSTEP may be zero if the step is unknown.
3935 STMT_AFTER_INC is true iff the statement we're looking at is after the
3936 increment of the original biv.
3938 TODO -- there must be some better way. This all is quite crude. */
3940 enum ainc_type
3942 AINC_PRE_INC, /* Pre increment. */
3943 AINC_PRE_DEC, /* Pre decrement. */
3944 AINC_POST_INC, /* Post increment. */
3945 AINC_POST_DEC, /* Post decrement. */
3946 AINC_NONE /* Also the number of auto increment types. */
3949 struct address_cost_data
3951 HOST_WIDE_INT min_offset, max_offset;
3952 unsigned costs[2][2][2][2];
3953 unsigned ainc_costs[AINC_NONE];
3957 static comp_cost
3958 get_address_cost (bool symbol_present, bool var_present,
3959 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3960 HOST_WIDE_INT cstep, machine_mode mem_mode,
3961 addr_space_t as, bool speed,
3962 bool stmt_after_inc, bool *may_autoinc)
3964 machine_mode address_mode = targetm.addr_space.address_mode (as);
3965 static vec<address_cost_data *> address_cost_data_list;
3966 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3967 address_cost_data *data;
3968 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3969 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3970 unsigned cost, acost, complexity;
3971 enum ainc_type autoinc_type;
3972 bool offset_p, ratio_p, autoinc;
3973 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3974 unsigned HOST_WIDE_INT mask;
3975 unsigned bits;
3977 if (data_index >= address_cost_data_list.length ())
3978 address_cost_data_list.safe_grow_cleared (data_index + 1);
3980 data = address_cost_data_list[data_index];
3981 if (!data)
3983 HOST_WIDE_INT i;
3984 HOST_WIDE_INT rat, off = 0;
3985 int old_cse_not_expected, width;
3986 unsigned sym_p, var_p, off_p, rat_p, add_c;
3987 rtx_insn *seq;
3988 rtx addr, base;
3989 rtx reg0, reg1;
3991 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3993 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3995 width = GET_MODE_BITSIZE (address_mode) - 1;
3996 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3997 width = HOST_BITS_PER_WIDE_INT - 1;
3998 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
4000 for (i = width; i >= 0; i--)
4002 off = -((unsigned HOST_WIDE_INT) 1 << i);
4003 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4004 if (memory_address_addr_space_p (mem_mode, addr, as))
4005 break;
4007 data->min_offset = (i == -1? 0 : off);
4009 for (i = width; i >= 0; i--)
4011 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
4012 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4013 if (memory_address_addr_space_p (mem_mode, addr, as))
4014 break;
4015 /* For some strict-alignment targets, the offset must be naturally
4016 aligned. Try an aligned offset if mem_mode is not QImode. */
4017 off = mem_mode != QImode
4018 ? ((unsigned HOST_WIDE_INT) 1 << i)
4019 - GET_MODE_SIZE (mem_mode)
4020 : 0;
4021 if (off > 0)
4023 XEXP (addr, 1) = gen_int_mode (off, address_mode);
4024 if (memory_address_addr_space_p (mem_mode, addr, as))
4025 break;
4028 if (i == -1)
4029 off = 0;
4030 data->max_offset = off;
4032 if (dump_file && (dump_flags & TDF_DETAILS))
4034 fprintf (dump_file, "get_address_cost:\n");
4035 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4036 GET_MODE_NAME (mem_mode),
4037 data->min_offset);
4038 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
4039 GET_MODE_NAME (mem_mode),
4040 data->max_offset);
4043 rat = 1;
4044 for (i = 2; i <= MAX_RATIO; i++)
4045 if (multiplier_allowed_in_address_p (i, mem_mode, as))
4047 rat = i;
4048 break;
4051 /* Compute the cost of various addressing modes. */
4052 acost = 0;
4053 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
4054 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
4056 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4057 || USE_STORE_PRE_DECREMENT (mem_mode))
4059 addr = gen_rtx_PRE_DEC (address_mode, reg0);
4060 has_predec[mem_mode]
4061 = memory_address_addr_space_p (mem_mode, addr, as);
4063 if (has_predec[mem_mode])
4064 data->ainc_costs[AINC_PRE_DEC]
4065 = address_cost (addr, mem_mode, as, speed);
4067 if (USE_LOAD_POST_DECREMENT (mem_mode)
4068 || USE_STORE_POST_DECREMENT (mem_mode))
4070 addr = gen_rtx_POST_DEC (address_mode, reg0);
4071 has_postdec[mem_mode]
4072 = memory_address_addr_space_p (mem_mode, addr, as);
4074 if (has_postdec[mem_mode])
4075 data->ainc_costs[AINC_POST_DEC]
4076 = address_cost (addr, mem_mode, as, speed);
4078 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4079 || USE_STORE_PRE_DECREMENT (mem_mode))
4081 addr = gen_rtx_PRE_INC (address_mode, reg0);
4082 has_preinc[mem_mode]
4083 = memory_address_addr_space_p (mem_mode, addr, as);
4085 if (has_preinc[mem_mode])
4086 data->ainc_costs[AINC_PRE_INC]
4087 = address_cost (addr, mem_mode, as, speed);
4089 if (USE_LOAD_POST_INCREMENT (mem_mode)
4090 || USE_STORE_POST_INCREMENT (mem_mode))
4092 addr = gen_rtx_POST_INC (address_mode, reg0);
4093 has_postinc[mem_mode]
4094 = memory_address_addr_space_p (mem_mode, addr, as);
4096 if (has_postinc[mem_mode])
4097 data->ainc_costs[AINC_POST_INC]
4098 = address_cost (addr, mem_mode, as, speed);
4100 for (i = 0; i < 16; i++)
4102 sym_p = i & 1;
4103 var_p = (i >> 1) & 1;
4104 off_p = (i >> 2) & 1;
4105 rat_p = (i >> 3) & 1;
4107 addr = reg0;
4108 if (rat_p)
4109 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4110 gen_int_mode (rat, address_mode));
4112 if (var_p)
4113 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4115 if (sym_p)
4117 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4118 /* ??? We can run into trouble with some backends by presenting
4119 it with symbols which haven't been properly passed through
4120 targetm.encode_section_info. By setting the local bit, we
4121 enhance the probability of things working. */
4122 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4124 if (off_p)
4125 base = gen_rtx_fmt_e (CONST, address_mode,
4126 gen_rtx_fmt_ee
4127 (PLUS, address_mode, base,
4128 gen_int_mode (off, address_mode)));
4130 else if (off_p)
4131 base = gen_int_mode (off, address_mode);
4132 else
4133 base = NULL_RTX;
4135 if (base)
4136 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4138 start_sequence ();
4139 /* To avoid splitting addressing modes, pretend that no cse will
4140 follow. */
4141 old_cse_not_expected = cse_not_expected;
4142 cse_not_expected = true;
4143 addr = memory_address_addr_space (mem_mode, addr, as);
4144 cse_not_expected = old_cse_not_expected;
4145 seq = get_insns ();
4146 end_sequence ();
4148 acost = seq_cost (seq, speed);
4149 acost += address_cost (addr, mem_mode, as, speed);
4151 if (!acost)
4152 acost = 1;
4153 data->costs[sym_p][var_p][off_p][rat_p] = acost;
4156 /* On some targets, it is quite expensive to load symbol to a register,
4157 which makes addresses that contain symbols look much more expensive.
4158 However, the symbol will have to be loaded in any case before the
4159 loop (and quite likely we have it in register already), so it does not
4160 make much sense to penalize them too heavily. So make some final
4161 tweaks for the SYMBOL_PRESENT modes:
4163 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4164 var is cheaper, use this mode with small penalty.
4165 If VAR_PRESENT is true, try whether the mode with
4166 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4167 if this is the case, use it. */
4168 add_c = add_cost (speed, address_mode);
4169 for (i = 0; i < 8; i++)
4171 var_p = i & 1;
4172 off_p = (i >> 1) & 1;
4173 rat_p = (i >> 2) & 1;
4175 acost = data->costs[0][1][off_p][rat_p] + 1;
4176 if (var_p)
4177 acost += add_c;
4179 if (acost < data->costs[1][var_p][off_p][rat_p])
4180 data->costs[1][var_p][off_p][rat_p] = acost;
4183 if (dump_file && (dump_flags & TDF_DETAILS))
4185 fprintf (dump_file, "<Address Costs>:\n");
4187 for (i = 0; i < 16; i++)
4189 sym_p = i & 1;
4190 var_p = (i >> 1) & 1;
4191 off_p = (i >> 2) & 1;
4192 rat_p = (i >> 3) & 1;
4194 fprintf (dump_file, " ");
4195 if (sym_p)
4196 fprintf (dump_file, "sym + ");
4197 if (var_p)
4198 fprintf (dump_file, "var + ");
4199 if (off_p)
4200 fprintf (dump_file, "cst + ");
4201 if (rat_p)
4202 fprintf (dump_file, "rat * ");
4204 acost = data->costs[sym_p][var_p][off_p][rat_p];
4205 fprintf (dump_file, "index costs %d\n", acost);
4207 if (has_predec[mem_mode] || has_postdec[mem_mode]
4208 || has_preinc[mem_mode] || has_postinc[mem_mode])
4209 fprintf (dump_file, " May include autoinc/dec\n");
4210 fprintf (dump_file, "\n");
4213 address_cost_data_list[data_index] = data;
4216 bits = GET_MODE_BITSIZE (address_mode);
4217 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4218 offset &= mask;
4219 if ((offset >> (bits - 1) & 1))
4220 offset |= ~mask;
4221 s_offset = offset;
4223 autoinc = false;
4224 autoinc_type = AINC_NONE;
4225 msize = GET_MODE_SIZE (mem_mode);
4226 autoinc_offset = offset;
4227 if (stmt_after_inc)
4228 autoinc_offset += ratio * cstep;
4229 if (symbol_present || var_present || ratio != 1)
4230 autoinc = false;
4231 else
4233 if (has_postinc[mem_mode] && autoinc_offset == 0
4234 && msize == cstep)
4235 autoinc_type = AINC_POST_INC;
4236 else if (has_postdec[mem_mode] && autoinc_offset == 0
4237 && msize == -cstep)
4238 autoinc_type = AINC_POST_DEC;
4239 else if (has_preinc[mem_mode] && autoinc_offset == msize
4240 && msize == cstep)
4241 autoinc_type = AINC_PRE_INC;
4242 else if (has_predec[mem_mode] && autoinc_offset == -msize
4243 && msize == -cstep)
4244 autoinc_type = AINC_PRE_DEC;
4246 if (autoinc_type != AINC_NONE)
4247 autoinc = true;
4250 cost = 0;
4251 offset_p = (s_offset != 0
4252 && data->min_offset <= s_offset
4253 && s_offset <= data->max_offset);
4254 ratio_p = (ratio != 1
4255 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4257 if (ratio != 1 && !ratio_p)
4258 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4260 if (s_offset && !offset_p && !symbol_present)
4261 cost += add_cost (speed, address_mode);
4263 if (may_autoinc)
4264 *may_autoinc = autoinc;
4265 if (autoinc)
4266 acost = data->ainc_costs[autoinc_type];
4267 else
4268 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4269 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4270 return comp_cost (cost + acost, complexity);
4273 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4274 EXPR operand holding the shift. COST0 and COST1 are the costs for
4275 calculating the operands of EXPR. Returns true if successful, and returns
4276 the cost in COST. */
4278 static bool
4279 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4280 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4282 comp_cost res;
4283 tree op1 = TREE_OPERAND (expr, 1);
4284 tree cst = TREE_OPERAND (mult, 1);
4285 tree multop = TREE_OPERAND (mult, 0);
4286 int m = exact_log2 (int_cst_value (cst));
4287 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4288 int as_cost, sa_cost;
4289 bool mult_in_op1;
4291 if (!(m >= 0 && m < maxm))
4292 return false;
4294 STRIP_NOPS (op1);
4295 mult_in_op1 = operand_equal_p (op1, mult, 0);
4297 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4299 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4300 use that in preference to a shift insn followed by an add insn. */
4301 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4302 ? shiftadd_cost (speed, mode, m)
4303 : (mult_in_op1
4304 ? shiftsub1_cost (speed, mode, m)
4305 : shiftsub0_cost (speed, mode, m)));
4307 res = comp_cost (MIN (as_cost, sa_cost), 0);
4308 res += (mult_in_op1 ? cost0 : cost1);
4310 STRIP_NOPS (multop);
4311 if (!is_gimple_val (multop))
4312 res += force_expr_to_var_cost (multop, speed);
4314 *cost = res;
4315 return true;
4318 /* Estimates cost of forcing expression EXPR into a variable. */
4320 static comp_cost
4321 force_expr_to_var_cost (tree expr, bool speed)
4323 static bool costs_initialized = false;
4324 static unsigned integer_cost [2];
4325 static unsigned symbol_cost [2];
4326 static unsigned address_cost [2];
4327 tree op0, op1;
4328 comp_cost cost0, cost1, cost;
4329 machine_mode mode;
4331 if (!costs_initialized)
4333 tree type = build_pointer_type (integer_type_node);
4334 tree var, addr;
4335 rtx x;
4336 int i;
4338 var = create_tmp_var_raw (integer_type_node, "test_var");
4339 TREE_STATIC (var) = 1;
4340 x = produce_memory_decl_rtl (var, NULL);
4341 SET_DECL_RTL (var, x);
4343 addr = build1 (ADDR_EXPR, type, var);
4346 for (i = 0; i < 2; i++)
4348 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4349 2000), i);
4351 symbol_cost[i] = computation_cost (addr, i) + 1;
4353 address_cost[i]
4354 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4355 if (dump_file && (dump_flags & TDF_DETAILS))
4357 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4358 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4359 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4360 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4361 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4362 fprintf (dump_file, "\n");
4366 costs_initialized = true;
4369 STRIP_NOPS (expr);
4371 if (SSA_VAR_P (expr))
4372 return no_cost;
4374 if (is_gimple_min_invariant (expr))
4376 if (TREE_CODE (expr) == INTEGER_CST)
4377 return comp_cost (integer_cost [speed], 0);
4379 if (TREE_CODE (expr) == ADDR_EXPR)
4381 tree obj = TREE_OPERAND (expr, 0);
4383 if (TREE_CODE (obj) == VAR_DECL
4384 || TREE_CODE (obj) == PARM_DECL
4385 || TREE_CODE (obj) == RESULT_DECL)
4386 return comp_cost (symbol_cost [speed], 0);
4389 return comp_cost (address_cost [speed], 0);
4392 switch (TREE_CODE (expr))
4394 case POINTER_PLUS_EXPR:
4395 case PLUS_EXPR:
4396 case MINUS_EXPR:
4397 case MULT_EXPR:
4398 op0 = TREE_OPERAND (expr, 0);
4399 op1 = TREE_OPERAND (expr, 1);
4400 STRIP_NOPS (op0);
4401 STRIP_NOPS (op1);
4402 break;
4404 CASE_CONVERT:
4405 case NEGATE_EXPR:
4406 op0 = TREE_OPERAND (expr, 0);
4407 STRIP_NOPS (op0);
4408 op1 = NULL_TREE;
4409 break;
4411 default:
4412 /* Just an arbitrary value, FIXME. */
4413 return comp_cost (target_spill_cost[speed], 0);
4416 if (op0 == NULL_TREE
4417 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4418 cost0 = no_cost;
4419 else
4420 cost0 = force_expr_to_var_cost (op0, speed);
4422 if (op1 == NULL_TREE
4423 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4424 cost1 = no_cost;
4425 else
4426 cost1 = force_expr_to_var_cost (op1, speed);
4428 mode = TYPE_MODE (TREE_TYPE (expr));
4429 switch (TREE_CODE (expr))
4431 case POINTER_PLUS_EXPR:
4432 case PLUS_EXPR:
4433 case MINUS_EXPR:
4434 case NEGATE_EXPR:
4435 cost = comp_cost (add_cost (speed, mode), 0);
4436 if (TREE_CODE (expr) != NEGATE_EXPR)
4438 tree mult = NULL_TREE;
4439 comp_cost sa_cost;
4440 if (TREE_CODE (op1) == MULT_EXPR)
4441 mult = op1;
4442 else if (TREE_CODE (op0) == MULT_EXPR)
4443 mult = op0;
4445 if (mult != NULL_TREE
4446 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4447 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4448 speed, &sa_cost))
4449 return sa_cost;
4451 break;
4453 CASE_CONVERT:
4455 tree inner_mode, outer_mode;
4456 outer_mode = TREE_TYPE (expr);
4457 inner_mode = TREE_TYPE (op0);
4458 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4459 TYPE_MODE (inner_mode), speed), 0);
4461 break;
4463 case MULT_EXPR:
4464 if (cst_and_fits_in_hwi (op0))
4465 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4466 mode, speed), 0);
4467 else if (cst_and_fits_in_hwi (op1))
4468 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4469 mode, speed), 0);
4470 else
4471 return comp_cost (target_spill_cost [speed], 0);
4472 break;
4474 default:
4475 gcc_unreachable ();
4478 cost += cost0;
4479 cost += cost1;
4481 /* Bound the cost by target_spill_cost. The parts of complicated
4482 computations often are either loop invariant or at least can
4483 be shared between several iv uses, so letting this grow without
4484 limits would not give reasonable results. */
4485 if (cost.cost > (int) target_spill_cost [speed])
4486 cost.cost = target_spill_cost [speed];
4488 return cost;
4491 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4492 invariants the computation depends on. */
4494 static comp_cost
4495 force_var_cost (struct ivopts_data *data,
4496 tree expr, bitmap *depends_on)
4498 if (depends_on)
4500 fd_ivopts_data = data;
4501 walk_tree (&expr, find_depends, depends_on, NULL);
4504 return force_expr_to_var_cost (expr, data->speed);
4507 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4508 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4509 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4510 invariants the computation depends on. */
4512 static comp_cost
4513 split_address_cost (struct ivopts_data *data,
4514 tree addr, bool *symbol_present, bool *var_present,
4515 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4517 tree core;
4518 HOST_WIDE_INT bitsize;
4519 HOST_WIDE_INT bitpos;
4520 tree toffset;
4521 machine_mode mode;
4522 int unsignedp, reversep, volatilep;
4524 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4525 &unsignedp, &reversep, &volatilep, false);
4527 if (toffset != 0
4528 || bitpos % BITS_PER_UNIT != 0
4529 || reversep
4530 || TREE_CODE (core) != VAR_DECL)
4532 *symbol_present = false;
4533 *var_present = true;
4534 fd_ivopts_data = data;
4535 if (depends_on)
4536 walk_tree (&addr, find_depends, depends_on, NULL);
4538 return comp_cost (target_spill_cost[data->speed], 0);
4541 *offset += bitpos / BITS_PER_UNIT;
4542 if (TREE_STATIC (core)
4543 || DECL_EXTERNAL (core))
4545 *symbol_present = true;
4546 *var_present = false;
4547 return no_cost;
4550 *symbol_present = false;
4551 *var_present = true;
4552 return no_cost;
4555 /* Estimates cost of expressing difference of addresses E1 - E2 as
4556 var + symbol + offset. The value of offset is added to OFFSET,
4557 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4558 part is missing. DEPENDS_ON is a set of the invariants the computation
4559 depends on. */
4561 static comp_cost
4562 ptr_difference_cost (struct ivopts_data *data,
4563 tree e1, tree e2, bool *symbol_present, bool *var_present,
4564 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4566 HOST_WIDE_INT diff = 0;
4567 aff_tree aff_e1, aff_e2;
4568 tree type;
4570 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4572 if (ptr_difference_const (e1, e2, &diff))
4574 *offset += diff;
4575 *symbol_present = false;
4576 *var_present = false;
4577 return no_cost;
4580 if (integer_zerop (e2))
4581 return split_address_cost (data, TREE_OPERAND (e1, 0),
4582 symbol_present, var_present, offset, depends_on);
4584 *symbol_present = false;
4585 *var_present = true;
4587 type = signed_type_for (TREE_TYPE (e1));
4588 tree_to_aff_combination (e1, type, &aff_e1);
4589 tree_to_aff_combination (e2, type, &aff_e2);
4590 aff_combination_scale (&aff_e2, -1);
4591 aff_combination_add (&aff_e1, &aff_e2);
4593 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4596 /* Estimates cost of expressing difference E1 - E2 as
4597 var + symbol + offset. The value of offset is added to OFFSET,
4598 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4599 part is missing. DEPENDS_ON is a set of the invariants the computation
4600 depends on. */
4602 static comp_cost
4603 difference_cost (struct ivopts_data *data,
4604 tree e1, tree e2, bool *symbol_present, bool *var_present,
4605 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4607 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4608 unsigned HOST_WIDE_INT off1, off2;
4609 aff_tree aff_e1, aff_e2;
4610 tree type;
4612 e1 = strip_offset (e1, &off1);
4613 e2 = strip_offset (e2, &off2);
4614 *offset += off1 - off2;
4616 STRIP_NOPS (e1);
4617 STRIP_NOPS (e2);
4619 if (TREE_CODE (e1) == ADDR_EXPR)
4620 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4621 offset, depends_on);
4622 *symbol_present = false;
4624 if (operand_equal_p (e1, e2, 0))
4626 *var_present = false;
4627 return no_cost;
4630 *var_present = true;
4632 if (integer_zerop (e2))
4633 return force_var_cost (data, e1, depends_on);
4635 if (integer_zerop (e1))
4637 comp_cost cost = force_var_cost (data, e2, depends_on);
4638 cost += mult_by_coeff_cost (-1, mode, data->speed);
4639 return cost;
4642 type = signed_type_for (TREE_TYPE (e1));
4643 tree_to_aff_combination (e1, type, &aff_e1);
4644 tree_to_aff_combination (e2, type, &aff_e2);
4645 aff_combination_scale (&aff_e2, -1);
4646 aff_combination_add (&aff_e1, &aff_e2);
4648 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4651 /* Returns true if AFF1 and AFF2 are identical. */
4653 static bool
4654 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4656 unsigned i;
4658 if (aff1->n != aff2->n)
4659 return false;
4661 for (i = 0; i < aff1->n; i++)
4663 if (aff1->elts[i].coef != aff2->elts[i].coef)
4664 return false;
4666 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4667 return false;
4669 return true;
4672 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
4674 static iv_inv_expr_ent *
4675 record_inv_expr (struct ivopts_data *data, tree expr)
4677 struct iv_inv_expr_ent ent;
4678 struct iv_inv_expr_ent **slot;
4680 ent.expr = expr;
4681 ent.hash = iterative_hash_expr (expr, 0);
4682 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4684 if (!*slot)
4686 *slot = XNEW (struct iv_inv_expr_ent);
4687 (*slot)->expr = expr;
4688 (*slot)->hash = ent.hash;
4689 (*slot)->id = data->max_inv_expr_id++;
4692 return *slot;
4695 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4696 requires a new compiler generated temporary. Returns -1 otherwise.
4697 ADDRESS_P is a flag indicating if the expression is for address
4698 computation. */
4700 static iv_inv_expr_ent *
4701 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4702 tree cbase, HOST_WIDE_INT ratio,
4703 bool address_p)
4705 aff_tree ubase_aff, cbase_aff;
4706 tree expr, ub, cb;
4708 STRIP_NOPS (ubase);
4709 STRIP_NOPS (cbase);
4710 ub = ubase;
4711 cb = cbase;
4713 if ((TREE_CODE (ubase) == INTEGER_CST)
4714 && (TREE_CODE (cbase) == INTEGER_CST))
4715 return NULL;
4717 /* Strips the constant part. */
4718 if (TREE_CODE (ubase) == PLUS_EXPR
4719 || TREE_CODE (ubase) == MINUS_EXPR
4720 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4722 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4723 ubase = TREE_OPERAND (ubase, 0);
4726 /* Strips the constant part. */
4727 if (TREE_CODE (cbase) == PLUS_EXPR
4728 || TREE_CODE (cbase) == MINUS_EXPR
4729 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4731 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4732 cbase = TREE_OPERAND (cbase, 0);
4735 if (address_p)
4737 if (((TREE_CODE (ubase) == SSA_NAME)
4738 || (TREE_CODE (ubase) == ADDR_EXPR
4739 && is_gimple_min_invariant (ubase)))
4740 && (TREE_CODE (cbase) == INTEGER_CST))
4741 return NULL;
4743 if (((TREE_CODE (cbase) == SSA_NAME)
4744 || (TREE_CODE (cbase) == ADDR_EXPR
4745 && is_gimple_min_invariant (cbase)))
4746 && (TREE_CODE (ubase) == INTEGER_CST))
4747 return NULL;
4750 if (ratio == 1)
4752 if (operand_equal_p (ubase, cbase, 0))
4753 return NULL;
4755 if (TREE_CODE (ubase) == ADDR_EXPR
4756 && TREE_CODE (cbase) == ADDR_EXPR)
4758 tree usym, csym;
4760 usym = TREE_OPERAND (ubase, 0);
4761 csym = TREE_OPERAND (cbase, 0);
4762 if (TREE_CODE (usym) == ARRAY_REF)
4764 tree ind = TREE_OPERAND (usym, 1);
4765 if (TREE_CODE (ind) == INTEGER_CST
4766 && tree_fits_shwi_p (ind)
4767 && tree_to_shwi (ind) == 0)
4768 usym = TREE_OPERAND (usym, 0);
4770 if (TREE_CODE (csym) == ARRAY_REF)
4772 tree ind = TREE_OPERAND (csym, 1);
4773 if (TREE_CODE (ind) == INTEGER_CST
4774 && tree_fits_shwi_p (ind)
4775 && tree_to_shwi (ind) == 0)
4776 csym = TREE_OPERAND (csym, 0);
4778 if (operand_equal_p (usym, csym, 0))
4779 return NULL;
4781 /* Now do more complex comparison */
4782 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4783 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4784 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4785 return NULL;
4788 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4789 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4791 aff_combination_scale (&cbase_aff, -1 * ratio);
4792 aff_combination_add (&ubase_aff, &cbase_aff);
4793 expr = aff_combination_to_tree (&ubase_aff);
4794 return record_inv_expr (data, expr);
4799 /* Determines the cost of the computation by that USE is expressed
4800 from induction variable CAND. If ADDRESS_P is true, we just need
4801 to create an address from it, otherwise we want to get it into
4802 register. A set of invariants we depend on is stored in
4803 DEPENDS_ON. AT is the statement at that the value is computed.
4804 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4805 addressing is likely. */
4807 static comp_cost
4808 get_computation_cost_at (struct ivopts_data *data,
4809 struct iv_use *use, struct iv_cand *cand,
4810 bool address_p, bitmap *depends_on, gimple *at,
4811 bool *can_autoinc,
4812 iv_inv_expr_ent **inv_expr)
4814 tree ubase = use->iv->base, ustep = use->iv->step;
4815 tree cbase, cstep;
4816 tree utype = TREE_TYPE (ubase), ctype;
4817 unsigned HOST_WIDE_INT cstepi, offset = 0;
4818 HOST_WIDE_INT ratio, aratio;
4819 bool var_present, symbol_present, stmt_is_after_inc;
4820 comp_cost cost;
4821 widest_int rat;
4822 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4823 machine_mode mem_mode = (address_p
4824 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4825 : VOIDmode);
4827 if (depends_on)
4828 *depends_on = NULL;
4830 /* Only consider real candidates. */
4831 if (!cand->iv)
4832 return infinite_cost;
4834 cbase = cand->iv->base;
4835 cstep = cand->iv->step;
4836 ctype = TREE_TYPE (cbase);
4838 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4840 /* We do not have a precision to express the values of use. */
4841 return infinite_cost;
4844 if (address_p
4845 || (use->iv->base_object
4846 && cand->iv->base_object
4847 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4848 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4850 /* Do not try to express address of an object with computation based
4851 on address of a different object. This may cause problems in rtl
4852 level alias analysis (that does not expect this to be happening,
4853 as this is illegal in C), and would be unlikely to be useful
4854 anyway. */
4855 if (use->iv->base_object
4856 && cand->iv->base_object
4857 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4858 return infinite_cost;
4861 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4863 /* TODO -- add direct handling of this case. */
4864 goto fallback;
4867 /* CSTEPI is removed from the offset in case statement is after the
4868 increment. If the step is not constant, we use zero instead.
4869 This is a bit imprecise (there is the extra addition), but
4870 redundancy elimination is likely to transform the code so that
4871 it uses value of the variable before increment anyway,
4872 so it is not that much unrealistic. */
4873 if (cst_and_fits_in_hwi (cstep))
4874 cstepi = int_cst_value (cstep);
4875 else
4876 cstepi = 0;
4878 if (!constant_multiple_of (ustep, cstep, &rat))
4879 return infinite_cost;
4881 if (wi::fits_shwi_p (rat))
4882 ratio = rat.to_shwi ();
4883 else
4884 return infinite_cost;
4886 STRIP_NOPS (cbase);
4887 ctype = TREE_TYPE (cbase);
4889 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4891 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4892 or ratio == 1, it is better to handle this like
4894 ubase - ratio * cbase + ratio * var
4896 (also holds in the case ratio == -1, TODO. */
4898 if (cst_and_fits_in_hwi (cbase))
4900 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4901 cost = difference_cost (data,
4902 ubase, build_int_cst (utype, 0),
4903 &symbol_present, &var_present, &offset,
4904 depends_on);
4905 cost /= avg_loop_niter (data->current_loop);
4907 else if (ratio == 1)
4909 tree real_cbase = cbase;
4911 /* Check to see if any adjustment is needed. */
4912 if (cstepi == 0 && stmt_is_after_inc)
4914 aff_tree real_cbase_aff;
4915 aff_tree cstep_aff;
4917 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4918 &real_cbase_aff);
4919 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4921 aff_combination_add (&real_cbase_aff, &cstep_aff);
4922 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4925 cost = difference_cost (data,
4926 ubase, real_cbase,
4927 &symbol_present, &var_present, &offset,
4928 depends_on);
4929 cost /= avg_loop_niter (data->current_loop);
4931 else if (address_p
4932 && !POINTER_TYPE_P (ctype)
4933 && multiplier_allowed_in_address_p
4934 (ratio, mem_mode,
4935 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4937 tree real_cbase = cbase;
4939 if (cstepi == 0 && stmt_is_after_inc)
4941 if (POINTER_TYPE_P (ctype))
4942 real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4943 else
4944 real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4946 real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4947 build_int_cst (ctype, ratio));
4948 cost = difference_cost (data,
4949 ubase, real_cbase,
4950 &symbol_present, &var_present, &offset,
4951 depends_on);
4952 cost /= avg_loop_niter (data->current_loop);
4954 else
4956 cost = force_var_cost (data, cbase, depends_on);
4957 cost += difference_cost (data, ubase, build_int_cst (utype, 0),
4958 &symbol_present, &var_present, &offset,
4959 depends_on);
4960 cost /= avg_loop_niter (data->current_loop);
4961 cost += add_cost (data->speed, TYPE_MODE (ctype));
4964 /* Record setup cost in scratch field. */
4965 cost.scratch = cost.cost;
4967 if (inv_expr && depends_on && *depends_on)
4969 *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
4970 address_p);
4971 /* Clear depends on. */
4972 if (*inv_expr != NULL)
4973 bitmap_clear (*depends_on);
4976 /* If we are after the increment, the value of the candidate is higher by
4977 one iteration. */
4978 if (stmt_is_after_inc)
4979 offset -= ratio * cstepi;
4981 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4982 (symbol/var1/const parts may be omitted). If we are looking for an
4983 address, find the cost of addressing this. */
4984 if (address_p)
4985 return cost + get_address_cost (symbol_present, var_present,
4986 offset, ratio, cstepi,
4987 mem_mode,
4988 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4989 speed, stmt_is_after_inc, can_autoinc);
4991 /* Otherwise estimate the costs for computing the expression. */
4992 if (!symbol_present && !var_present && !offset)
4994 if (ratio != 1)
4995 cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4996 return cost;
4999 /* Symbol + offset should be compile-time computable so consider that they
5000 are added once to the variable, if present. */
5001 if (var_present && (symbol_present || offset))
5002 cost += adjust_setup_cost (data,
5003 add_cost (speed, TYPE_MODE (ctype)));
5005 /* Having offset does not affect runtime cost in case it is added to
5006 symbol, but it increases complexity. */
5007 if (offset)
5008 cost.complexity++;
5010 cost += add_cost (speed, TYPE_MODE (ctype));
5012 aratio = ratio > 0 ? ratio : -ratio;
5013 if (aratio != 1)
5014 cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5015 return cost;
5017 fallback:
5018 if (can_autoinc)
5019 *can_autoinc = false;
5022 /* Just get the expression, expand it and measure the cost. */
5023 tree comp = get_computation_at (data->current_loop, use, cand, at);
5025 if (!comp)
5026 return infinite_cost;
5028 if (address_p)
5029 comp = build_simple_mem_ref (comp);
5031 return comp_cost (computation_cost (comp, speed), 0);
5035 /* Determines the cost of the computation by that USE is expressed
5036 from induction variable CAND. If ADDRESS_P is true, we just need
5037 to create an address from it, otherwise we want to get it into
5038 register. A set of invariants we depend on is stored in
5039 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
5040 autoinc addressing is likely. */
5042 static comp_cost
5043 get_computation_cost (struct ivopts_data *data,
5044 struct iv_use *use, struct iv_cand *cand,
5045 bool address_p, bitmap *depends_on,
5046 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
5048 return get_computation_cost_at (data,
5049 use, cand, address_p, depends_on, use->stmt,
5050 can_autoinc, inv_expr);
5053 /* Determines cost of computing the use in GROUP with CAND in a generic
5054 expression. */
5056 static bool
5057 determine_group_iv_cost_generic (struct ivopts_data *data,
5058 struct iv_group *group, struct iv_cand *cand)
5060 comp_cost cost;
5061 iv_inv_expr_ent *inv_expr = NULL;
5062 bitmap depends_on = NULL;
5063 struct iv_use *use = group->vuses[0];
5065 /* The simple case first -- if we need to express value of the preserved
5066 original biv, the cost is 0. This also prevents us from counting the
5067 cost of increment twice -- once at this use and once in the cost of
5068 the candidate. */
5069 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
5070 cost = no_cost;
5071 else
5072 cost = get_computation_cost (data, use, cand, false,
5073 &depends_on, NULL, &inv_expr);
5075 set_group_iv_cost (data, group, cand, cost, depends_on,
5076 NULL_TREE, ERROR_MARK, inv_expr);
5077 return !cost.infinite_cost_p ();
5080 /* Determines cost of computing uses in GROUP with CAND in addresses. */
5082 static bool
5083 determine_group_iv_cost_address (struct ivopts_data *data,
5084 struct iv_group *group, struct iv_cand *cand)
5086 unsigned i;
5087 bitmap depends_on;
5088 bool can_autoinc, first = true;
5089 iv_inv_expr_ent *inv_expr = NULL;
5090 struct iv_use *use = group->vuses[0];
5091 comp_cost sum_cost = no_cost, cost;
5093 cost = get_computation_cost (data, use, cand, true,
5094 &depends_on, &can_autoinc, &inv_expr);
5096 sum_cost = cost;
5097 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
5099 if (can_autoinc)
5100 sum_cost -= cand->cost_step;
5101 /* If we generated the candidate solely for exploiting autoincrement
5102 opportunities, and it turns out it can't be used, set the cost to
5103 infinity to make sure we ignore it. */
5104 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5105 sum_cost = infinite_cost;
5108 /* Uses in a group can share setup code, so only add setup cost once. */
5109 cost -= cost.scratch;
5110 /* Compute and add costs for rest uses of this group. */
5111 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
5113 struct iv_use *next = group->vuses[i];
5115 /* Compute cost for the first use with different offset to the main
5116 use and add it afterwards. Costs for these uses could be quite
5117 different. Given below uses in a group:
5118 use 0 : {base + A + offset_0, step}
5119 use 0.1: {base + A + offset_0, step}
5120 use 0.2: {base + A + offset_1, step}
5121 use 0.3: {base + A + offset_2, step}
5122 when we need to compute costs with candidate:
5123 cand 1 : {base + B + offset_0, step}
5125 The first use with different offset is use 0.2, its cost is larger
5126 than cost of use 0/0.1 because we need to compute:
5127 A - B + offset_1 - offset_0
5128 rather than:
5129 A - B. */
5130 if (first && next->addr_offset != use->addr_offset)
5132 first = false;
5133 cost = get_computation_cost (data, next, cand, true,
5134 NULL, &can_autoinc, NULL);
5135 /* Remove setup cost. */
5136 if (!cost.infinite_cost_p ())
5137 cost -= cost.scratch;
5139 sum_cost += cost;
5141 set_group_iv_cost (data, group, cand, sum_cost, depends_on,
5142 NULL_TREE, ERROR_MARK, inv_expr);
5144 return !sum_cost.infinite_cost_p ();
5147 /* Computes value of candidate CAND at position AT in iteration NITER, and
5148 stores it to VAL. */
5150 static void
5151 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5152 aff_tree *val)
5154 aff_tree step, delta, nit;
5155 struct iv *iv = cand->iv;
5156 tree type = TREE_TYPE (iv->base);
5157 tree steptype = type;
5158 if (POINTER_TYPE_P (type))
5159 steptype = sizetype;
5160 steptype = unsigned_type_for (type);
5162 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5163 aff_combination_convert (&step, steptype);
5164 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5165 aff_combination_convert (&nit, steptype);
5166 aff_combination_mult (&nit, &step, &delta);
5167 if (stmt_after_increment (loop, cand, at))
5168 aff_combination_add (&delta, &step);
5170 tree_to_aff_combination (iv->base, type, val);
5171 if (!POINTER_TYPE_P (type))
5172 aff_combination_convert (val, steptype);
5173 aff_combination_add (val, &delta);
5176 /* Returns period of induction variable iv. */
5178 static tree
5179 iv_period (struct iv *iv)
5181 tree step = iv->step, period, type;
5182 tree pow2div;
5184 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5186 type = unsigned_type_for (TREE_TYPE (step));
5187 /* Period of the iv is lcm (step, type_range)/step -1,
5188 i.e., N*type_range/step - 1. Since type range is power
5189 of two, N == (step >> num_of_ending_zeros_binary (step),
5190 so the final result is
5192 (type_range >> num_of_ending_zeros_binary (step)) - 1
5195 pow2div = num_ending_zeros (step);
5197 period = build_low_bits_mask (type,
5198 (TYPE_PRECISION (type)
5199 - tree_to_uhwi (pow2div)));
5201 return period;
5204 /* Returns the comparison operator used when eliminating the iv USE. */
5206 static enum tree_code
5207 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5209 struct loop *loop = data->current_loop;
5210 basic_block ex_bb;
5211 edge exit;
5213 ex_bb = gimple_bb (use->stmt);
5214 exit = EDGE_SUCC (ex_bb, 0);
5215 if (flow_bb_inside_loop_p (loop, exit->dest))
5216 exit = EDGE_SUCC (ex_bb, 1);
5218 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5221 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5222 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5223 calculation is performed in non-wrapping type.
5225 TODO: More generally, we could test for the situation that
5226 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5227 This would require knowing the sign of OFFSET. */
5229 static bool
5230 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5232 enum tree_code code;
5233 tree e1, e2;
5234 aff_tree aff_e1, aff_e2, aff_offset;
5236 if (!nowrap_type_p (TREE_TYPE (base)))
5237 return false;
5239 base = expand_simple_operations (base);
5241 if (TREE_CODE (base) == SSA_NAME)
5243 gimple *stmt = SSA_NAME_DEF_STMT (base);
5245 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5246 return false;
5248 code = gimple_assign_rhs_code (stmt);
5249 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5250 return false;
5252 e1 = gimple_assign_rhs1 (stmt);
5253 e2 = gimple_assign_rhs2 (stmt);
5255 else
5257 code = TREE_CODE (base);
5258 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5259 return false;
5260 e1 = TREE_OPERAND (base, 0);
5261 e2 = TREE_OPERAND (base, 1);
5264 /* Use affine expansion as deeper inspection to prove the equality. */
5265 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5266 &aff_e2, &data->name_expansion_cache);
5267 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5268 &aff_offset, &data->name_expansion_cache);
5269 aff_combination_scale (&aff_offset, -1);
5270 switch (code)
5272 case PLUS_EXPR:
5273 aff_combination_add (&aff_e2, &aff_offset);
5274 if (aff_combination_zero_p (&aff_e2))
5275 return true;
5277 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5278 &aff_e1, &data->name_expansion_cache);
5279 aff_combination_add (&aff_e1, &aff_offset);
5280 return aff_combination_zero_p (&aff_e1);
5282 case POINTER_PLUS_EXPR:
5283 aff_combination_add (&aff_e2, &aff_offset);
5284 return aff_combination_zero_p (&aff_e2);
5286 default:
5287 return false;
5291 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5292 comparison with CAND. NITER describes the number of iterations of
5293 the loops. If successful, the comparison in COMP_P is altered accordingly.
5295 We aim to handle the following situation:
5297 sometype *base, *p;
5298 int a, b, i;
5300 i = a;
5301 p = p_0 = base + a;
5305 bla (*p);
5306 p++;
5307 i++;
5309 while (i < b);
5311 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5312 We aim to optimize this to
5314 p = p_0 = base + a;
5317 bla (*p);
5318 p++;
5320 while (p < p_0 - a + b);
5322 This preserves the correctness, since the pointer arithmetics does not
5323 overflow. More precisely:
5325 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5326 overflow in computing it or the values of p.
5327 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5328 overflow. To prove this, we use the fact that p_0 = base + a. */
5330 static bool
5331 iv_elimination_compare_lt (struct ivopts_data *data,
5332 struct iv_cand *cand, enum tree_code *comp_p,
5333 struct tree_niter_desc *niter)
5335 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5336 struct aff_tree nit, tmpa, tmpb;
5337 enum tree_code comp;
5338 HOST_WIDE_INT step;
5340 /* We need to know that the candidate induction variable does not overflow.
5341 While more complex analysis may be used to prove this, for now just
5342 check that the variable appears in the original program and that it
5343 is computed in a type that guarantees no overflows. */
5344 cand_type = TREE_TYPE (cand->iv->base);
5345 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5346 return false;
5348 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5349 the calculation of the BOUND could overflow, making the comparison
5350 invalid. */
5351 if (!data->loop_single_exit_p)
5352 return false;
5354 /* We need to be able to decide whether candidate is increasing or decreasing
5355 in order to choose the right comparison operator. */
5356 if (!cst_and_fits_in_hwi (cand->iv->step))
5357 return false;
5358 step = int_cst_value (cand->iv->step);
5360 /* Check that the number of iterations matches the expected pattern:
5361 a + 1 > b ? 0 : b - a - 1. */
5362 mbz = niter->may_be_zero;
5363 if (TREE_CODE (mbz) == GT_EXPR)
5365 /* Handle a + 1 > b. */
5366 tree op0 = TREE_OPERAND (mbz, 0);
5367 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5369 a = TREE_OPERAND (op0, 0);
5370 b = TREE_OPERAND (mbz, 1);
5372 else
5373 return false;
5375 else if (TREE_CODE (mbz) == LT_EXPR)
5377 tree op1 = TREE_OPERAND (mbz, 1);
5379 /* Handle b < a + 1. */
5380 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5382 a = TREE_OPERAND (op1, 0);
5383 b = TREE_OPERAND (mbz, 0);
5385 else
5386 return false;
5388 else
5389 return false;
5391 /* Expected number of iterations is B - A - 1. Check that it matches
5392 the actual number, i.e., that B - A - NITER = 1. */
5393 tree_to_aff_combination (niter->niter, nit_type, &nit);
5394 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5395 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5396 aff_combination_scale (&nit, -1);
5397 aff_combination_scale (&tmpa, -1);
5398 aff_combination_add (&tmpb, &tmpa);
5399 aff_combination_add (&tmpb, &nit);
5400 if (tmpb.n != 0 || tmpb.offset != 1)
5401 return false;
5403 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5404 overflow. */
5405 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5406 cand->iv->step,
5407 fold_convert (TREE_TYPE (cand->iv->step), a));
5408 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5409 return false;
5411 /* Determine the new comparison operator. */
5412 comp = step < 0 ? GT_EXPR : LT_EXPR;
5413 if (*comp_p == NE_EXPR)
5414 *comp_p = comp;
5415 else if (*comp_p == EQ_EXPR)
5416 *comp_p = invert_tree_comparison (comp, false);
5417 else
5418 gcc_unreachable ();
5420 return true;
5423 /* Check whether it is possible to express the condition in USE by comparison
5424 of candidate CAND. If so, store the value compared with to BOUND, and the
5425 comparison operator to COMP. */
5427 static bool
5428 may_eliminate_iv (struct ivopts_data *data,
5429 struct iv_use *use, struct iv_cand *cand, tree *bound,
5430 enum tree_code *comp)
5432 basic_block ex_bb;
5433 edge exit;
5434 tree period;
5435 struct loop *loop = data->current_loop;
5436 aff_tree bnd;
5437 struct tree_niter_desc *desc = NULL;
5439 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5440 return false;
5442 /* For now works only for exits that dominate the loop latch.
5443 TODO: extend to other conditions inside loop body. */
5444 ex_bb = gimple_bb (use->stmt);
5445 if (use->stmt != last_stmt (ex_bb)
5446 || gimple_code (use->stmt) != GIMPLE_COND
5447 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5448 return false;
5450 exit = EDGE_SUCC (ex_bb, 0);
5451 if (flow_bb_inside_loop_p (loop, exit->dest))
5452 exit = EDGE_SUCC (ex_bb, 1);
5453 if (flow_bb_inside_loop_p (loop, exit->dest))
5454 return false;
5456 desc = niter_for_exit (data, exit);
5457 if (!desc)
5458 return false;
5460 /* Determine whether we can use the variable to test the exit condition.
5461 This is the case iff the period of the induction variable is greater
5462 than the number of iterations for which the exit condition is true. */
5463 period = iv_period (cand->iv);
5465 /* If the number of iterations is constant, compare against it directly. */
5466 if (TREE_CODE (desc->niter) == INTEGER_CST)
5468 /* See cand_value_at. */
5469 if (stmt_after_increment (loop, cand, use->stmt))
5471 if (!tree_int_cst_lt (desc->niter, period))
5472 return false;
5474 else
5476 if (tree_int_cst_lt (period, desc->niter))
5477 return false;
5481 /* If not, and if this is the only possible exit of the loop, see whether
5482 we can get a conservative estimate on the number of iterations of the
5483 entire loop and compare against that instead. */
5484 else
5486 widest_int period_value, max_niter;
5488 max_niter = desc->max;
5489 if (stmt_after_increment (loop, cand, use->stmt))
5490 max_niter += 1;
5491 period_value = wi::to_widest (period);
5492 if (wi::gtu_p (max_niter, period_value))
5494 /* See if we can take advantage of inferred loop bound
5495 information. */
5496 if (data->loop_single_exit_p)
5498 if (!max_loop_iterations (loop, &max_niter))
5499 return false;
5500 /* The loop bound is already adjusted by adding 1. */
5501 if (wi::gtu_p (max_niter, period_value))
5502 return false;
5504 else
5505 return false;
5509 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5511 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5512 aff_combination_to_tree (&bnd));
5513 *comp = iv_elimination_compare (data, use);
5515 /* It is unlikely that computing the number of iterations using division
5516 would be more profitable than keeping the original induction variable. */
5517 if (expression_expensive_p (*bound))
5518 return false;
5520 /* Sometimes, it is possible to handle the situation that the number of
5521 iterations may be zero unless additional assumtions by using <
5522 instead of != in the exit condition.
5524 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5525 base the exit condition on it. However, that is often too
5526 expensive. */
5527 if (!integer_zerop (desc->may_be_zero))
5528 return iv_elimination_compare_lt (data, cand, comp, desc);
5530 return true;
5533 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5534 be copied, if it is used in the loop body and DATA->body_includes_call. */
5536 static int
5537 parm_decl_cost (struct ivopts_data *data, tree bound)
5539 tree sbound = bound;
5540 STRIP_NOPS (sbound);
5542 if (TREE_CODE (sbound) == SSA_NAME
5543 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5544 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5545 && data->body_includes_call)
5546 return COSTS_N_INSNS (1);
5548 return 0;
5551 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5553 static bool
5554 determine_group_iv_cost_cond (struct ivopts_data *data,
5555 struct iv_group *group, struct iv_cand *cand)
5557 tree bound = NULL_TREE;
5558 struct iv *cmp_iv;
5559 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5560 comp_cost elim_cost, express_cost, cost, bound_cost;
5561 bool ok;
5562 iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr;
5563 tree *control_var, *bound_cst;
5564 enum tree_code comp = ERROR_MARK;
5565 struct iv_use *use = group->vuses[0];
5567 gcc_assert (cand->iv);
5569 /* Try iv elimination. */
5570 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5572 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5573 if (elim_cost.cost == 0)
5574 elim_cost.cost = parm_decl_cost (data, bound);
5575 else if (TREE_CODE (bound) == INTEGER_CST)
5576 elim_cost.cost = 0;
5577 /* If we replace a loop condition 'i < n' with 'p < base + n',
5578 depends_on_elim will have 'base' and 'n' set, which implies
5579 that both 'base' and 'n' will be live during the loop. More likely,
5580 'base + n' will be loop invariant, resulting in only one live value
5581 during the loop. So in that case we clear depends_on_elim and set
5582 elim_inv_expr_id instead. */
5583 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5585 elim_inv_expr = record_inv_expr (data, bound);
5586 bitmap_clear (depends_on_elim);
5588 /* The bound is a loop invariant, so it will be only computed
5589 once. */
5590 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5592 else
5593 elim_cost = infinite_cost;
5595 /* Try expressing the original giv. If it is compared with an invariant,
5596 note that we cannot get rid of it. */
5597 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5598 NULL, &cmp_iv);
5599 gcc_assert (ok);
5601 /* When the condition is a comparison of the candidate IV against
5602 zero, prefer this IV.
5604 TODO: The constant that we're subtracting from the cost should
5605 be target-dependent. This information should be added to the
5606 target costs for each backend. */
5607 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5608 && integer_zerop (*bound_cst)
5609 && (operand_equal_p (*control_var, cand->var_after, 0)
5610 || operand_equal_p (*control_var, cand->var_before, 0)))
5611 elim_cost -= 1;
5613 express_cost = get_computation_cost (data, use, cand, false,
5614 &depends_on_express, NULL,
5615 &express_inv_expr);
5616 fd_ivopts_data = data;
5617 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5619 /* Count the cost of the original bound as well. */
5620 bound_cost = force_var_cost (data, *bound_cst, NULL);
5621 if (bound_cost.cost == 0)
5622 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5623 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5624 bound_cost.cost = 0;
5625 express_cost += bound_cost;
5627 /* Choose the better approach, preferring the eliminated IV. */
5628 if (elim_cost <= express_cost)
5630 cost = elim_cost;
5631 depends_on = depends_on_elim;
5632 depends_on_elim = NULL;
5633 inv_expr = elim_inv_expr;
5635 else
5637 cost = express_cost;
5638 depends_on = depends_on_express;
5639 depends_on_express = NULL;
5640 bound = NULL_TREE;
5641 comp = ERROR_MARK;
5642 inv_expr = express_inv_expr;
5645 set_group_iv_cost (data, group, cand, cost,
5646 depends_on, bound, comp, inv_expr);
5648 if (depends_on_elim)
5649 BITMAP_FREE (depends_on_elim);
5650 if (depends_on_express)
5651 BITMAP_FREE (depends_on_express);
5653 return !cost.infinite_cost_p ();
5656 /* Determines cost of computing uses in GROUP with CAND. Returns false
5657 if USE cannot be represented with CAND. */
5659 static bool
5660 determine_group_iv_cost (struct ivopts_data *data,
5661 struct iv_group *group, struct iv_cand *cand)
5663 switch (group->type)
5665 case USE_NONLINEAR_EXPR:
5666 return determine_group_iv_cost_generic (data, group, cand);
5668 case USE_ADDRESS:
5669 return determine_group_iv_cost_address (data, group, cand);
5671 case USE_COMPARE:
5672 return determine_group_iv_cost_cond (data, group, cand);
5674 default:
5675 gcc_unreachable ();
5679 /* Return true if get_computation_cost indicates that autoincrement is
5680 a possibility for the pair of USE and CAND, false otherwise. */
5682 static bool
5683 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5684 struct iv_cand *cand)
5686 bitmap depends_on;
5687 bool can_autoinc;
5688 comp_cost cost;
5690 if (use->type != USE_ADDRESS)
5691 return false;
5693 cost = get_computation_cost (data, use, cand, true, &depends_on,
5694 &can_autoinc, NULL);
5696 BITMAP_FREE (depends_on);
5698 return !cost.infinite_cost_p () && can_autoinc;
5701 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5702 use that allows autoincrement, and set their AINC_USE if possible. */
5704 static void
5705 set_autoinc_for_original_candidates (struct ivopts_data *data)
5707 unsigned i, j;
5709 for (i = 0; i < data->vcands.length (); i++)
5711 struct iv_cand *cand = data->vcands[i];
5712 struct iv_use *closest_before = NULL;
5713 struct iv_use *closest_after = NULL;
5714 if (cand->pos != IP_ORIGINAL)
5715 continue;
5717 for (j = 0; j < data->vgroups.length (); j++)
5719 struct iv_group *group = data->vgroups[j];
5720 struct iv_use *use = group->vuses[0];
5721 unsigned uid = gimple_uid (use->stmt);
5723 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5724 continue;
5726 if (uid < gimple_uid (cand->incremented_at)
5727 && (closest_before == NULL
5728 || uid > gimple_uid (closest_before->stmt)))
5729 closest_before = use;
5731 if (uid > gimple_uid (cand->incremented_at)
5732 && (closest_after == NULL
5733 || uid < gimple_uid (closest_after->stmt)))
5734 closest_after = use;
5737 if (closest_before != NULL
5738 && autoinc_possible_for_pair (data, closest_before, cand))
5739 cand->ainc_use = closest_before;
5740 else if (closest_after != NULL
5741 && autoinc_possible_for_pair (data, closest_after, cand))
5742 cand->ainc_use = closest_after;
5746 /* Finds the candidates for the induction variables. */
5748 static void
5749 find_iv_candidates (struct ivopts_data *data)
5751 /* Add commonly used ivs. */
5752 add_standard_iv_candidates (data);
5754 /* Add old induction variables. */
5755 add_iv_candidate_for_bivs (data);
5757 /* Add induction variables derived from uses. */
5758 add_iv_candidate_for_groups (data);
5760 set_autoinc_for_original_candidates (data);
5762 /* Record the important candidates. */
5763 record_important_candidates (data);
5765 if (dump_file && (dump_flags & TDF_DETAILS))
5767 unsigned i;
5769 fprintf (dump_file, "\n<Important Candidates>:\t");
5770 for (i = 0; i < data->vcands.length (); i++)
5771 if (data->vcands[i]->important)
5772 fprintf (dump_file, " %d,", data->vcands[i]->id);
5773 fprintf (dump_file, "\n");
5775 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5776 for (i = 0; i < data->vgroups.length (); i++)
5778 struct iv_group *group = data->vgroups[i];
5780 if (group->related_cands)
5782 fprintf (dump_file, " Group %d:\t", group->id);
5783 dump_bitmap (dump_file, group->related_cands);
5786 fprintf (dump_file, "\n");
5790 /* Determines costs of computing use of iv with an iv candidate. */
5792 static void
5793 determine_group_iv_costs (struct ivopts_data *data)
5795 unsigned i, j;
5796 struct iv_cand *cand;
5797 struct iv_group *group;
5798 bitmap to_clear = BITMAP_ALLOC (NULL);
5800 alloc_use_cost_map (data);
5802 for (i = 0; i < data->vgroups.length (); i++)
5804 group = data->vgroups[i];
5806 if (data->consider_all_candidates)
5808 for (j = 0; j < data->vcands.length (); j++)
5810 cand = data->vcands[j];
5811 determine_group_iv_cost (data, group, cand);
5814 else
5816 bitmap_iterator bi;
5818 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5820 cand = data->vcands[j];
5821 if (!determine_group_iv_cost (data, group, cand))
5822 bitmap_set_bit (to_clear, j);
5825 /* Remove the candidates for that the cost is infinite from
5826 the list of related candidates. */
5827 bitmap_and_compl_into (group->related_cands, to_clear);
5828 bitmap_clear (to_clear);
5832 BITMAP_FREE (to_clear);
5834 if (dump_file && (dump_flags & TDF_DETAILS))
5836 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5837 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5839 for (hash_table<iv_inv_expr_hasher>::iterator it
5840 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5841 ++it)
5842 list.safe_push (*it);
5844 list.qsort (sort_iv_inv_expr_ent);
5846 for (i = 0; i < list.length (); ++i)
5848 fprintf (dump_file, "inv_expr %d: \t", i);
5849 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5850 fprintf (dump_file, "\n");
5853 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5855 for (i = 0; i < data->vgroups.length (); i++)
5857 group = data->vgroups[i];
5859 fprintf (dump_file, "Group %d:\n", i);
5860 fprintf (dump_file, " cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5861 for (j = 0; j < group->n_map_members; j++)
5863 if (!group->cost_map[j].cand
5864 || group->cost_map[j].cost.infinite_cost_p ())
5865 continue;
5867 fprintf (dump_file, " %d\t%d\t%d\t",
5868 group->cost_map[j].cand->id,
5869 group->cost_map[j].cost.cost,
5870 group->cost_map[j].cost.complexity);
5871 if (group->cost_map[j].inv_expr != NULL)
5872 fprintf (dump_file, "%d\t",
5873 group->cost_map[j].inv_expr->id);
5874 else
5875 fprintf (dump_file, "\t");
5876 if (group->cost_map[j].depends_on)
5877 bitmap_print (dump_file,
5878 group->cost_map[j].depends_on, "","");
5879 fprintf (dump_file, "\n");
5882 fprintf (dump_file, "\n");
5884 fprintf (dump_file, "\n");
5888 /* Determines cost of the candidate CAND. */
5890 static void
5891 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5893 comp_cost cost_base;
5894 unsigned cost, cost_step;
5895 tree base;
5897 if (!cand->iv)
5899 cand->cost = 0;
5900 return;
5903 /* There are two costs associated with the candidate -- its increment
5904 and its initialization. The second is almost negligible for any loop
5905 that rolls enough, so we take it just very little into account. */
5907 base = cand->iv->base;
5908 cost_base = force_var_cost (data, base, NULL);
5909 /* It will be exceptional that the iv register happens to be initialized with
5910 the proper value at no cost. In general, there will at least be a regcopy
5911 or a const set. */
5912 if (cost_base.cost == 0)
5913 cost_base.cost = COSTS_N_INSNS (1);
5914 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5916 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5918 /* Prefer the original ivs unless we may gain something by replacing it.
5919 The reason is to make debugging simpler; so this is not relevant for
5920 artificial ivs created by other optimization passes. */
5921 if (cand->pos != IP_ORIGINAL
5922 || !SSA_NAME_VAR (cand->var_before)
5923 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5924 cost++;
5926 /* Prefer not to insert statements into latch unless there are some
5927 already (so that we do not create unnecessary jumps). */
5928 if (cand->pos == IP_END
5929 && empty_block_p (ip_end_pos (data->current_loop)))
5930 cost++;
5932 cand->cost = cost;
5933 cand->cost_step = cost_step;
5936 /* Determines costs of computation of the candidates. */
5938 static void
5939 determine_iv_costs (struct ivopts_data *data)
5941 unsigned i;
5943 if (dump_file && (dump_flags & TDF_DETAILS))
5945 fprintf (dump_file, "<Candidate Costs>:\n");
5946 fprintf (dump_file, " cand\tcost\n");
5949 for (i = 0; i < data->vcands.length (); i++)
5951 struct iv_cand *cand = data->vcands[i];
5953 determine_iv_cost (data, cand);
5955 if (dump_file && (dump_flags & TDF_DETAILS))
5956 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5959 if (dump_file && (dump_flags & TDF_DETAILS))
5960 fprintf (dump_file, "\n");
5963 /* Calculates cost for having SIZE induction variables. */
5965 static unsigned
5966 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5968 /* We add size to the cost, so that we prefer eliminating ivs
5969 if possible. */
5970 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5971 data->body_includes_call);
5974 /* For each size of the induction variable set determine the penalty. */
5976 static void
5977 determine_set_costs (struct ivopts_data *data)
5979 unsigned j, n;
5980 gphi *phi;
5981 gphi_iterator psi;
5982 tree op;
5983 struct loop *loop = data->current_loop;
5984 bitmap_iterator bi;
5986 if (dump_file && (dump_flags & TDF_DETAILS))
5988 fprintf (dump_file, "<Global Costs>:\n");
5989 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5990 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5991 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5992 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5995 n = 0;
5996 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5998 phi = psi.phi ();
5999 op = PHI_RESULT (phi);
6001 if (virtual_operand_p (op))
6002 continue;
6004 if (get_iv (data, op))
6005 continue;
6007 n++;
6010 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6012 struct version_info *info = ver_info (data, j);
6014 if (info->inv_id && info->has_nonlin_use)
6015 n++;
6018 data->regs_used = n;
6019 if (dump_file && (dump_flags & TDF_DETAILS))
6020 fprintf (dump_file, " regs_used %d\n", n);
6022 if (dump_file && (dump_flags & TDF_DETAILS))
6024 fprintf (dump_file, " cost for size:\n");
6025 fprintf (dump_file, " ivs\tcost\n");
6026 for (j = 0; j <= 2 * target_avail_regs; j++)
6027 fprintf (dump_file, " %d\t%d\n", j,
6028 ivopts_global_cost_for_size (data, j));
6029 fprintf (dump_file, "\n");
6033 /* Returns true if A is a cheaper cost pair than B. */
6035 static bool
6036 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
6038 if (!a)
6039 return false;
6041 if (!b)
6042 return true;
6044 if (a->cost < b->cost)
6045 return true;
6047 if (b->cost < a->cost)
6048 return false;
6050 /* In case the costs are the same, prefer the cheaper candidate. */
6051 if (a->cand->cost < b->cand->cost)
6052 return true;
6054 return false;
6058 /* Returns candidate by that USE is expressed in IVS. */
6060 static struct cost_pair *
6061 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
6063 return ivs->cand_for_group[group->id];
6066 /* Computes the cost field of IVS structure. */
6068 static void
6069 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
6071 comp_cost cost = ivs->cand_use_cost;
6073 cost += ivs->cand_cost;
6075 cost += ivopts_global_cost_for_size (data,
6076 ivs->n_regs
6077 + ivs->used_inv_exprs->elements ());
6079 ivs->cost = cost;
6082 /* Remove invariants in set INVS to set IVS. */
6084 static void
6085 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
6087 bitmap_iterator bi;
6088 unsigned iid;
6090 if (!invs)
6091 return;
6093 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6095 ivs->n_invariant_uses[iid]--;
6096 if (ivs->n_invariant_uses[iid] == 0)
6097 ivs->n_regs--;
6101 /* Set USE not to be expressed by any candidate in IVS. */
6103 static void
6104 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6105 struct iv_group *group)
6107 unsigned gid = group->id, cid;
6108 struct cost_pair *cp;
6110 cp = ivs->cand_for_group[gid];
6111 if (!cp)
6112 return;
6113 cid = cp->cand->id;
6115 ivs->bad_groups++;
6116 ivs->cand_for_group[gid] = NULL;
6117 ivs->n_cand_uses[cid]--;
6119 if (ivs->n_cand_uses[cid] == 0)
6121 bitmap_clear_bit (ivs->cands, cid);
6122 /* Do not count the pseudocandidates. */
6123 if (cp->cand->iv)
6124 ivs->n_regs--;
6125 ivs->n_cands--;
6126 ivs->cand_cost -= cp->cand->cost;
6128 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6131 ivs->cand_use_cost -= cp->cost;
6133 iv_ca_set_remove_invariants (ivs, cp->depends_on);
6135 if (cp->inv_expr != NULL)
6137 unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr);
6138 --(*slot);
6139 if (*slot == 0)
6140 ivs->used_inv_exprs->remove (cp->inv_expr);
6142 iv_ca_recount_cost (data, ivs);
6145 /* Add invariants in set INVS to set IVS. */
6147 static void
6148 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6150 bitmap_iterator bi;
6151 unsigned iid;
6153 if (!invs)
6154 return;
6156 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6158 ivs->n_invariant_uses[iid]++;
6159 if (ivs->n_invariant_uses[iid] == 1)
6160 ivs->n_regs++;
6164 /* Set cost pair for GROUP in set IVS to CP. */
6166 static void
6167 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6168 struct iv_group *group, struct cost_pair *cp)
6170 unsigned gid = group->id, cid;
6172 if (ivs->cand_for_group[gid] == cp)
6173 return;
6175 if (ivs->cand_for_group[gid])
6176 iv_ca_set_no_cp (data, ivs, group);
6178 if (cp)
6180 cid = cp->cand->id;
6182 ivs->bad_groups--;
6183 ivs->cand_for_group[gid] = cp;
6184 ivs->n_cand_uses[cid]++;
6185 if (ivs->n_cand_uses[cid] == 1)
6187 bitmap_set_bit (ivs->cands, cid);
6188 /* Do not count the pseudocandidates. */
6189 if (cp->cand->iv)
6190 ivs->n_regs++;
6191 ivs->n_cands++;
6192 ivs->cand_cost += cp->cand->cost;
6194 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6197 ivs->cand_use_cost += cp->cost;
6198 iv_ca_set_add_invariants (ivs, cp->depends_on);
6200 if (cp->inv_expr != NULL)
6202 unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr);
6203 ++(*slot);
6205 iv_ca_recount_cost (data, ivs);
6209 /* Extend set IVS by expressing USE by some of the candidates in it
6210 if possible. Consider all important candidates if candidates in
6211 set IVS don't give any result. */
6213 static void
6214 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6215 struct iv_group *group)
6217 struct cost_pair *best_cp = NULL, *cp;
6218 bitmap_iterator bi;
6219 unsigned i;
6220 struct iv_cand *cand;
6222 gcc_assert (ivs->upto >= group->id);
6223 ivs->upto++;
6224 ivs->bad_groups++;
6226 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6228 cand = data->vcands[i];
6229 cp = get_group_iv_cost (data, group, cand);
6230 if (cheaper_cost_pair (cp, best_cp))
6231 best_cp = cp;
6234 if (best_cp == NULL)
6236 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6238 cand = data->vcands[i];
6239 cp = get_group_iv_cost (data, group, cand);
6240 if (cheaper_cost_pair (cp, best_cp))
6241 best_cp = cp;
6245 iv_ca_set_cp (data, ivs, group, best_cp);
6248 /* Get cost for assignment IVS. */
6250 static comp_cost
6251 iv_ca_cost (struct iv_ca *ivs)
6253 /* This was a conditional expression but it triggered a bug in
6254 Sun C 5.5. */
6255 if (ivs->bad_groups)
6256 return infinite_cost;
6257 else
6258 return ivs->cost;
6261 /* Returns true if all dependences of CP are among invariants in IVS. */
6263 static bool
6264 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6266 unsigned i;
6267 bitmap_iterator bi;
6269 if (!cp->depends_on)
6270 return true;
6272 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6274 if (ivs->n_invariant_uses[i] == 0)
6275 return false;
6278 return true;
6281 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6282 it before NEXT. */
6284 static struct iv_ca_delta *
6285 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6286 struct cost_pair *new_cp, struct iv_ca_delta *next)
6288 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6290 change->group = group;
6291 change->old_cp = old_cp;
6292 change->new_cp = new_cp;
6293 change->next = next;
6295 return change;
6298 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6299 are rewritten. */
6301 static struct iv_ca_delta *
6302 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6304 struct iv_ca_delta *last;
6306 if (!l2)
6307 return l1;
6309 if (!l1)
6310 return l2;
6312 for (last = l1; last->next; last = last->next)
6313 continue;
6314 last->next = l2;
6316 return l1;
6319 /* Reverse the list of changes DELTA, forming the inverse to it. */
6321 static struct iv_ca_delta *
6322 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6324 struct iv_ca_delta *act, *next, *prev = NULL;
6326 for (act = delta; act; act = next)
6328 next = act->next;
6329 act->next = prev;
6330 prev = act;
6332 std::swap (act->old_cp, act->new_cp);
6335 return prev;
6338 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6339 reverted instead. */
6341 static void
6342 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6343 struct iv_ca_delta *delta, bool forward)
6345 struct cost_pair *from, *to;
6346 struct iv_ca_delta *act;
6348 if (!forward)
6349 delta = iv_ca_delta_reverse (delta);
6351 for (act = delta; act; act = act->next)
6353 from = act->old_cp;
6354 to = act->new_cp;
6355 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6356 iv_ca_set_cp (data, ivs, act->group, to);
6359 if (!forward)
6360 iv_ca_delta_reverse (delta);
6363 /* Returns true if CAND is used in IVS. */
6365 static bool
6366 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6368 return ivs->n_cand_uses[cand->id] > 0;
6371 /* Returns number of induction variable candidates in the set IVS. */
6373 static unsigned
6374 iv_ca_n_cands (struct iv_ca *ivs)
6376 return ivs->n_cands;
6379 /* Free the list of changes DELTA. */
6381 static void
6382 iv_ca_delta_free (struct iv_ca_delta **delta)
6384 struct iv_ca_delta *act, *next;
6386 for (act = *delta; act; act = next)
6388 next = act->next;
6389 free (act);
6392 *delta = NULL;
6395 /* Allocates new iv candidates assignment. */
6397 static struct iv_ca *
6398 iv_ca_new (struct ivopts_data *data)
6400 struct iv_ca *nw = XNEW (struct iv_ca);
6402 nw->upto = 0;
6403 nw->bad_groups = 0;
6404 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6405 data->vgroups.length ());
6406 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6407 nw->cands = BITMAP_ALLOC (NULL);
6408 nw->n_cands = 0;
6409 nw->n_regs = 0;
6410 nw->cand_use_cost = no_cost;
6411 nw->cand_cost = 0;
6412 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6413 nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13);
6414 nw->cost = no_cost;
6416 return nw;
6419 /* Free memory occupied by the set IVS. */
6421 static void
6422 iv_ca_free (struct iv_ca **ivs)
6424 free ((*ivs)->cand_for_group);
6425 free ((*ivs)->n_cand_uses);
6426 BITMAP_FREE ((*ivs)->cands);
6427 free ((*ivs)->n_invariant_uses);
6428 delete ((*ivs)->used_inv_exprs);
6429 free (*ivs);
6430 *ivs = NULL;
6433 /* Dumps IVS to FILE. */
6435 static void
6436 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6438 unsigned i;
6439 comp_cost cost = iv_ca_cost (ivs);
6441 fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
6442 cost.complexity);
6443 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6444 ivs->cand_cost, ivs->cand_use_cost.cost,
6445 ivs->cand_use_cost.complexity);
6446 bitmap_print (file, ivs->cands, " candidates: ","\n");
6448 for (i = 0; i < ivs->upto; i++)
6450 struct iv_group *group = data->vgroups[i];
6451 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6452 if (cp)
6453 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6454 group->id, cp->cand->id, cp->cost.cost,
6455 cp->cost.complexity);
6456 else
6457 fprintf (file, " group:%d --> ??\n", group->id);
6460 const char *pref = "";
6461 fprintf (file, " invariant variables: ");
6462 for (i = 1; i <= data->max_inv_id; i++)
6463 if (ivs->n_invariant_uses[i])
6465 fprintf (file, "%s%d", pref, i);
6466 pref = ", ";
6469 pref = "";
6470 fprintf (file, "\n invariant expressions: ");
6471 for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it
6472 = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it)
6474 fprintf (file, "%s%d", pref, (*it).first->id);
6475 pref = ", ";
6478 fprintf (file, "\n\n");
6481 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6482 new set, and store differences in DELTA. Number of induction variables
6483 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6484 the function will try to find a solution with mimimal iv candidates. */
6486 static comp_cost
6487 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6488 struct iv_cand *cand, struct iv_ca_delta **delta,
6489 unsigned *n_ivs, bool min_ncand)
6491 unsigned i;
6492 comp_cost cost;
6493 struct iv_group *group;
6494 struct cost_pair *old_cp, *new_cp;
6496 *delta = NULL;
6497 for (i = 0; i < ivs->upto; i++)
6499 group = data->vgroups[i];
6500 old_cp = iv_ca_cand_for_group (ivs, group);
6502 if (old_cp
6503 && old_cp->cand == cand)
6504 continue;
6506 new_cp = get_group_iv_cost (data, group, cand);
6507 if (!new_cp)
6508 continue;
6510 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6511 continue;
6513 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6514 continue;
6516 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6519 iv_ca_delta_commit (data, ivs, *delta, true);
6520 cost = iv_ca_cost (ivs);
6521 if (n_ivs)
6522 *n_ivs = iv_ca_n_cands (ivs);
6523 iv_ca_delta_commit (data, ivs, *delta, false);
6525 return cost;
6528 /* Try narrowing set IVS by removing CAND. Return the cost of
6529 the new set and store the differences in DELTA. START is
6530 the candidate with which we start narrowing. */
6532 static comp_cost
6533 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6534 struct iv_cand *cand, struct iv_cand *start,
6535 struct iv_ca_delta **delta)
6537 unsigned i, ci;
6538 struct iv_group *group;
6539 struct cost_pair *old_cp, *new_cp, *cp;
6540 bitmap_iterator bi;
6541 struct iv_cand *cnd;
6542 comp_cost cost, best_cost, acost;
6544 *delta = NULL;
6545 for (i = 0; i < data->vgroups.length (); i++)
6547 group = data->vgroups[i];
6549 old_cp = iv_ca_cand_for_group (ivs, group);
6550 if (old_cp->cand != cand)
6551 continue;
6553 best_cost = iv_ca_cost (ivs);
6554 /* Start narrowing with START. */
6555 new_cp = get_group_iv_cost (data, group, start);
6557 if (data->consider_all_candidates)
6559 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6561 if (ci == cand->id || (start && ci == start->id))
6562 continue;
6564 cnd = data->vcands[ci];
6566 cp = get_group_iv_cost (data, group, cnd);
6567 if (!cp)
6568 continue;
6570 iv_ca_set_cp (data, ivs, group, cp);
6571 acost = iv_ca_cost (ivs);
6573 if (acost < best_cost)
6575 best_cost = acost;
6576 new_cp = cp;
6580 else
6582 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6584 if (ci == cand->id || (start && ci == start->id))
6585 continue;
6587 cnd = data->vcands[ci];
6589 cp = get_group_iv_cost (data, group, cnd);
6590 if (!cp)
6591 continue;
6593 iv_ca_set_cp (data, ivs, group, cp);
6594 acost = iv_ca_cost (ivs);
6596 if (acost < best_cost)
6598 best_cost = acost;
6599 new_cp = cp;
6603 /* Restore to old cp for use. */
6604 iv_ca_set_cp (data, ivs, group, old_cp);
6606 if (!new_cp)
6608 iv_ca_delta_free (delta);
6609 return infinite_cost;
6612 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6615 iv_ca_delta_commit (data, ivs, *delta, true);
6616 cost = iv_ca_cost (ivs);
6617 iv_ca_delta_commit (data, ivs, *delta, false);
6619 return cost;
6622 /* Try optimizing the set of candidates IVS by removing candidates different
6623 from to EXCEPT_CAND from it. Return cost of the new set, and store
6624 differences in DELTA. */
6626 static comp_cost
6627 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6628 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6630 bitmap_iterator bi;
6631 struct iv_ca_delta *act_delta, *best_delta;
6632 unsigned i;
6633 comp_cost best_cost, acost;
6634 struct iv_cand *cand;
6636 best_delta = NULL;
6637 best_cost = iv_ca_cost (ivs);
6639 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6641 cand = data->vcands[i];
6643 if (cand == except_cand)
6644 continue;
6646 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6648 if (acost < best_cost)
6650 best_cost = acost;
6651 iv_ca_delta_free (&best_delta);
6652 best_delta = act_delta;
6654 else
6655 iv_ca_delta_free (&act_delta);
6658 if (!best_delta)
6660 *delta = NULL;
6661 return best_cost;
6664 /* Recurse to possibly remove other unnecessary ivs. */
6665 iv_ca_delta_commit (data, ivs, best_delta, true);
6666 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6667 iv_ca_delta_commit (data, ivs, best_delta, false);
6668 *delta = iv_ca_delta_join (best_delta, *delta);
6669 return best_cost;
6672 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6673 cheaper local cost for GROUP than BEST_CP. Return pointer to
6674 the corresponding cost_pair, otherwise just return BEST_CP. */
6676 static struct cost_pair*
6677 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6678 unsigned int cand_idx, struct iv_cand *old_cand,
6679 struct cost_pair *best_cp)
6681 struct iv_cand *cand;
6682 struct cost_pair *cp;
6684 gcc_assert (old_cand != NULL && best_cp != NULL);
6685 if (cand_idx == old_cand->id)
6686 return best_cp;
6688 cand = data->vcands[cand_idx];
6689 cp = get_group_iv_cost (data, group, cand);
6690 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6691 return cp;
6693 return best_cp;
6696 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6697 which are used by more than one iv uses. For each of those candidates,
6698 this function tries to represent iv uses under that candidate using
6699 other ones with lower local cost, then tries to prune the new set.
6700 If the new set has lower cost, It returns the new cost after recording
6701 candidate replacement in list DELTA. */
6703 static comp_cost
6704 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6705 struct iv_ca_delta **delta)
6707 bitmap_iterator bi, bj;
6708 unsigned int i, j, k;
6709 struct iv_cand *cand;
6710 comp_cost orig_cost, acost;
6711 struct iv_ca_delta *act_delta, *tmp_delta;
6712 struct cost_pair *old_cp, *best_cp = NULL;
6714 *delta = NULL;
6715 orig_cost = iv_ca_cost (ivs);
6717 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6719 if (ivs->n_cand_uses[i] == 1
6720 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6721 continue;
6723 cand = data->vcands[i];
6725 act_delta = NULL;
6726 /* Represent uses under current candidate using other ones with
6727 lower local cost. */
6728 for (j = 0; j < ivs->upto; j++)
6730 struct iv_group *group = data->vgroups[j];
6731 old_cp = iv_ca_cand_for_group (ivs, group);
6733 if (old_cp->cand != cand)
6734 continue;
6736 best_cp = old_cp;
6737 if (data->consider_all_candidates)
6738 for (k = 0; k < data->vcands.length (); k++)
6739 best_cp = cheaper_cost_with_cand (data, group, k,
6740 old_cp->cand, best_cp);
6741 else
6742 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6743 best_cp = cheaper_cost_with_cand (data, group, k,
6744 old_cp->cand, best_cp);
6746 if (best_cp == old_cp)
6747 continue;
6749 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6751 /* No need for further prune. */
6752 if (!act_delta)
6753 continue;
6755 /* Prune the new candidate set. */
6756 iv_ca_delta_commit (data, ivs, act_delta, true);
6757 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6758 iv_ca_delta_commit (data, ivs, act_delta, false);
6759 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6761 if (acost < orig_cost)
6763 *delta = act_delta;
6764 return acost;
6766 else
6767 iv_ca_delta_free (&act_delta);
6770 return orig_cost;
6773 /* Tries to extend the sets IVS in the best possible way in order to
6774 express the GROUP. If ORIGINALP is true, prefer candidates from
6775 the original set of IVs, otherwise favor important candidates not
6776 based on any memory object. */
6778 static bool
6779 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6780 struct iv_group *group, bool originalp)
6782 comp_cost best_cost, act_cost;
6783 unsigned i;
6784 bitmap_iterator bi;
6785 struct iv_cand *cand;
6786 struct iv_ca_delta *best_delta = NULL, *act_delta;
6787 struct cost_pair *cp;
6789 iv_ca_add_group (data, ivs, group);
6790 best_cost = iv_ca_cost (ivs);
6791 cp = iv_ca_cand_for_group (ivs, group);
6792 if (cp)
6794 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6795 iv_ca_set_no_cp (data, ivs, group);
6798 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6799 first try important candidates not based on any memory object. Only if
6800 this fails, try the specific ones. Rationale -- in loops with many
6801 variables the best choice often is to use just one generic biv. If we
6802 added here many ivs specific to the uses, the optimization algorithm later
6803 would be likely to get stuck in a local minimum, thus causing us to create
6804 too many ivs. The approach from few ivs to more seems more likely to be
6805 successful -- starting from few ivs, replacing an expensive use by a
6806 specific iv should always be a win. */
6807 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6809 cand = data->vcands[i];
6811 if (originalp && cand->pos !=IP_ORIGINAL)
6812 continue;
6814 if (!originalp && cand->iv->base_object != NULL_TREE)
6815 continue;
6817 if (iv_ca_cand_used_p (ivs, cand))
6818 continue;
6820 cp = get_group_iv_cost (data, group, cand);
6821 if (!cp)
6822 continue;
6824 iv_ca_set_cp (data, ivs, group, cp);
6825 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6826 true);
6827 iv_ca_set_no_cp (data, ivs, group);
6828 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6830 if (act_cost < best_cost)
6832 best_cost = act_cost;
6834 iv_ca_delta_free (&best_delta);
6835 best_delta = act_delta;
6837 else
6838 iv_ca_delta_free (&act_delta);
6841 if (best_cost.infinite_cost_p ())
6843 for (i = 0; i < group->n_map_members; i++)
6845 cp = group->cost_map + i;
6846 cand = cp->cand;
6847 if (!cand)
6848 continue;
6850 /* Already tried this. */
6851 if (cand->important)
6853 if (originalp && cand->pos == IP_ORIGINAL)
6854 continue;
6855 if (!originalp && cand->iv->base_object == NULL_TREE)
6856 continue;
6859 if (iv_ca_cand_used_p (ivs, cand))
6860 continue;
6862 act_delta = NULL;
6863 iv_ca_set_cp (data, ivs, group, cp);
6864 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6865 iv_ca_set_no_cp (data, ivs, group);
6866 act_delta = iv_ca_delta_add (group,
6867 iv_ca_cand_for_group (ivs, group),
6868 cp, act_delta);
6870 if (act_cost < best_cost)
6872 best_cost = act_cost;
6874 if (best_delta)
6875 iv_ca_delta_free (&best_delta);
6876 best_delta = act_delta;
6878 else
6879 iv_ca_delta_free (&act_delta);
6883 iv_ca_delta_commit (data, ivs, best_delta, true);
6884 iv_ca_delta_free (&best_delta);
6886 return !best_cost.infinite_cost_p ();
6889 /* Finds an initial assignment of candidates to uses. */
6891 static struct iv_ca *
6892 get_initial_solution (struct ivopts_data *data, bool originalp)
6894 unsigned i;
6895 struct iv_ca *ivs = iv_ca_new (data);
6897 for (i = 0; i < data->vgroups.length (); i++)
6898 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6900 iv_ca_free (&ivs);
6901 return NULL;
6904 return ivs;
6907 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6908 points to a bool variable, this function tries to break local
6909 optimal fixed-point by replacing candidates in IVS if it's true. */
6911 static bool
6912 try_improve_iv_set (struct ivopts_data *data,
6913 struct iv_ca *ivs, bool *try_replace_p)
6915 unsigned i, n_ivs;
6916 comp_cost acost, best_cost = iv_ca_cost (ivs);
6917 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6918 struct iv_cand *cand;
6920 /* Try extending the set of induction variables by one. */
6921 for (i = 0; i < data->vcands.length (); i++)
6923 cand = data->vcands[i];
6925 if (iv_ca_cand_used_p (ivs, cand))
6926 continue;
6928 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6929 if (!act_delta)
6930 continue;
6932 /* If we successfully added the candidate and the set is small enough,
6933 try optimizing it by removing other candidates. */
6934 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6936 iv_ca_delta_commit (data, ivs, act_delta, true);
6937 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6938 iv_ca_delta_commit (data, ivs, act_delta, false);
6939 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6942 if (acost < best_cost)
6944 best_cost = acost;
6945 iv_ca_delta_free (&best_delta);
6946 best_delta = act_delta;
6948 else
6949 iv_ca_delta_free (&act_delta);
6952 if (!best_delta)
6954 /* Try removing the candidates from the set instead. */
6955 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6957 if (!best_delta && *try_replace_p)
6959 *try_replace_p = false;
6960 /* So far candidate selecting algorithm tends to choose fewer IVs
6961 so that it can handle cases in which loops have many variables
6962 but the best choice is often to use only one general biv. One
6963 weakness is it can't handle opposite cases, in which different
6964 candidates should be chosen with respect to each use. To solve
6965 the problem, we replace candidates in a manner described by the
6966 comments of iv_ca_replace, thus give general algorithm a chance
6967 to break local optimal fixed-point in these cases. */
6968 best_cost = iv_ca_replace (data, ivs, &best_delta);
6971 if (!best_delta)
6972 return false;
6975 iv_ca_delta_commit (data, ivs, best_delta, true);
6976 gcc_assert (best_cost == iv_ca_cost (ivs));
6977 iv_ca_delta_free (&best_delta);
6978 return true;
6981 /* Attempts to find the optimal set of induction variables. We do simple
6982 greedy heuristic -- we try to replace at most one candidate in the selected
6983 solution and remove the unused ivs while this improves the cost. */
6985 static struct iv_ca *
6986 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6988 struct iv_ca *set;
6989 bool try_replace_p = true;
6991 /* Get the initial solution. */
6992 set = get_initial_solution (data, originalp);
6993 if (!set)
6995 if (dump_file && (dump_flags & TDF_DETAILS))
6996 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6997 return NULL;
7000 if (dump_file && (dump_flags & TDF_DETAILS))
7002 fprintf (dump_file, "Initial set of candidates:\n");
7003 iv_ca_dump (data, dump_file, set);
7006 while (try_improve_iv_set (data, set, &try_replace_p))
7008 if (dump_file && (dump_flags & TDF_DETAILS))
7010 fprintf (dump_file, "Improved to:\n");
7011 iv_ca_dump (data, dump_file, set);
7015 return set;
7018 static struct iv_ca *
7019 find_optimal_iv_set (struct ivopts_data *data)
7021 unsigned i;
7022 comp_cost cost, origcost;
7023 struct iv_ca *set, *origset;
7025 /* Determine the cost based on a strategy that starts with original IVs,
7026 and try again using a strategy that prefers candidates not based
7027 on any IVs. */
7028 origset = find_optimal_iv_set_1 (data, true);
7029 set = find_optimal_iv_set_1 (data, false);
7031 if (!origset && !set)
7032 return NULL;
7034 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
7035 cost = set ? iv_ca_cost (set) : infinite_cost;
7037 if (dump_file && (dump_flags & TDF_DETAILS))
7039 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
7040 origcost.cost, origcost.complexity);
7041 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
7042 cost.cost, cost.complexity);
7045 /* Choose the one with the best cost. */
7046 if (origcost <= cost)
7048 if (set)
7049 iv_ca_free (&set);
7050 set = origset;
7052 else if (origset)
7053 iv_ca_free (&origset);
7055 for (i = 0; i < data->vgroups.length (); i++)
7057 struct iv_group *group = data->vgroups[i];
7058 group->selected = iv_ca_cand_for_group (set, group)->cand;
7061 return set;
7064 /* Creates a new induction variable corresponding to CAND. */
7066 static void
7067 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
7069 gimple_stmt_iterator incr_pos;
7070 tree base;
7071 struct iv_use *use;
7072 struct iv_group *group;
7073 bool after = false;
7075 if (!cand->iv)
7076 return;
7078 switch (cand->pos)
7080 case IP_NORMAL:
7081 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
7082 break;
7084 case IP_END:
7085 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
7086 after = true;
7087 break;
7089 case IP_AFTER_USE:
7090 after = true;
7091 /* fall through */
7092 case IP_BEFORE_USE:
7093 incr_pos = gsi_for_stmt (cand->incremented_at);
7094 break;
7096 case IP_ORIGINAL:
7097 /* Mark that the iv is preserved. */
7098 name_info (data, cand->var_before)->preserve_biv = true;
7099 name_info (data, cand->var_after)->preserve_biv = true;
7101 /* Rewrite the increment so that it uses var_before directly. */
7102 use = find_interesting_uses_op (data, cand->var_after);
7103 group = data->vgroups[use->group_id];
7104 group->selected = cand;
7105 return;
7108 gimple_add_tmp_var (cand->var_before);
7110 base = unshare_expr (cand->iv->base);
7112 create_iv (base, unshare_expr (cand->iv->step),
7113 cand->var_before, data->current_loop,
7114 &incr_pos, after, &cand->var_before, &cand->var_after);
7117 /* Creates new induction variables described in SET. */
7119 static void
7120 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7122 unsigned i;
7123 struct iv_cand *cand;
7124 bitmap_iterator bi;
7126 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7128 cand = data->vcands[i];
7129 create_new_iv (data, cand);
7132 if (dump_file && (dump_flags & TDF_DETAILS))
7134 fprintf (dump_file, "Selected IV set for loop %d",
7135 data->current_loop->num);
7136 if (data->loop_loc != UNKNOWN_LOCATION)
7137 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7138 LOCATION_LINE (data->loop_loc));
7139 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7140 avg_loop_niter (data->current_loop));
7141 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions",
7142 (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ());
7143 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7144 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7146 cand = data->vcands[i];
7147 dump_cand (dump_file, cand);
7149 fprintf (dump_file, "\n");
7153 /* Rewrites USE (definition of iv used in a nonlinear expression)
7154 using candidate CAND. */
7156 static void
7157 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7158 struct iv_use *use, struct iv_cand *cand)
7160 tree comp;
7161 tree op, tgt;
7162 gassign *ass;
7163 gimple_stmt_iterator bsi;
7165 /* An important special case -- if we are asked to express value of
7166 the original iv by itself, just exit; there is no need to
7167 introduce a new computation (that might also need casting the
7168 variable to unsigned and back). */
7169 if (cand->pos == IP_ORIGINAL
7170 && cand->incremented_at == use->stmt)
7172 enum tree_code stmt_code;
7174 gcc_assert (is_gimple_assign (use->stmt));
7175 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7177 /* Check whether we may leave the computation unchanged.
7178 This is the case only if it does not rely on other
7179 computations in the loop -- otherwise, the computation
7180 we rely upon may be removed in remove_unused_ivs,
7181 thus leading to ICE. */
7182 stmt_code = gimple_assign_rhs_code (use->stmt);
7183 if (stmt_code == PLUS_EXPR
7184 || stmt_code == MINUS_EXPR
7185 || stmt_code == POINTER_PLUS_EXPR)
7187 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7188 op = gimple_assign_rhs2 (use->stmt);
7189 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7190 op = gimple_assign_rhs1 (use->stmt);
7191 else
7192 op = NULL_TREE;
7194 else
7195 op = NULL_TREE;
7197 if (op && expr_invariant_in_loop_p (data->current_loop, op))
7198 return;
7201 comp = get_computation (data->current_loop, use, cand);
7202 gcc_assert (comp != NULL_TREE);
7204 switch (gimple_code (use->stmt))
7206 case GIMPLE_PHI:
7207 tgt = PHI_RESULT (use->stmt);
7209 /* If we should keep the biv, do not replace it. */
7210 if (name_info (data, tgt)->preserve_biv)
7211 return;
7213 bsi = gsi_after_labels (gimple_bb (use->stmt));
7214 break;
7216 case GIMPLE_ASSIGN:
7217 tgt = gimple_assign_lhs (use->stmt);
7218 bsi = gsi_for_stmt (use->stmt);
7219 break;
7221 default:
7222 gcc_unreachable ();
7225 if (!valid_gimple_rhs_p (comp)
7226 || (gimple_code (use->stmt) != GIMPLE_PHI
7227 /* We can't allow re-allocating the stmt as it might be pointed
7228 to still. */
7229 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7230 >= gimple_num_ops (gsi_stmt (bsi)))))
7232 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7233 true, GSI_SAME_STMT);
7234 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7236 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7237 /* As this isn't a plain copy we have to reset alignment
7238 information. */
7239 if (SSA_NAME_PTR_INFO (comp))
7240 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7244 if (gimple_code (use->stmt) == GIMPLE_PHI)
7246 ass = gimple_build_assign (tgt, comp);
7247 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7249 bsi = gsi_for_stmt (use->stmt);
7250 remove_phi_node (&bsi, false);
7252 else
7254 gimple_assign_set_rhs_from_tree (&bsi, comp);
7255 use->stmt = gsi_stmt (bsi);
7259 /* Performs a peephole optimization to reorder the iv update statement with
7260 a mem ref to enable instruction combining in later phases. The mem ref uses
7261 the iv value before the update, so the reordering transformation requires
7262 adjustment of the offset. CAND is the selected IV_CAND.
7264 Example:
7266 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7267 iv2 = iv1 + 1;
7269 if (t < val) (1)
7270 goto L;
7271 goto Head;
7274 directly propagating t over to (1) will introduce overlapping live range
7275 thus increase register pressure. This peephole transform it into:
7278 iv2 = iv1 + 1;
7279 t = MEM_REF (base, iv2, 8, 8);
7280 if (t < val)
7281 goto L;
7282 goto Head;
7285 static void
7286 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7288 tree var_after;
7289 gimple *iv_update, *stmt;
7290 basic_block bb;
7291 gimple_stmt_iterator gsi, gsi_iv;
7293 if (cand->pos != IP_NORMAL)
7294 return;
7296 var_after = cand->var_after;
7297 iv_update = SSA_NAME_DEF_STMT (var_after);
7299 bb = gimple_bb (iv_update);
7300 gsi = gsi_last_nondebug_bb (bb);
7301 stmt = gsi_stmt (gsi);
7303 /* Only handle conditional statement for now. */
7304 if (gimple_code (stmt) != GIMPLE_COND)
7305 return;
7307 gsi_prev_nondebug (&gsi);
7308 stmt = gsi_stmt (gsi);
7309 if (stmt != iv_update)
7310 return;
7312 gsi_prev_nondebug (&gsi);
7313 if (gsi_end_p (gsi))
7314 return;
7316 stmt = gsi_stmt (gsi);
7317 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7318 return;
7320 if (stmt != use->stmt)
7321 return;
7323 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7324 return;
7326 if (dump_file && (dump_flags & TDF_DETAILS))
7328 fprintf (dump_file, "Reordering \n");
7329 print_gimple_stmt (dump_file, iv_update, 0, 0);
7330 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7331 fprintf (dump_file, "\n");
7334 gsi = gsi_for_stmt (use->stmt);
7335 gsi_iv = gsi_for_stmt (iv_update);
7336 gsi_move_before (&gsi_iv, &gsi);
7338 cand->pos = IP_BEFORE_USE;
7339 cand->incremented_at = use->stmt;
7342 /* Rewrites USE (address that is an iv) using candidate CAND. */
7344 static void
7345 rewrite_use_address (struct ivopts_data *data,
7346 struct iv_use *use, struct iv_cand *cand)
7348 aff_tree aff;
7349 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7350 tree base_hint = NULL_TREE;
7351 tree ref, iv;
7352 bool ok;
7354 adjust_iv_update_pos (cand, use);
7355 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7356 gcc_assert (ok);
7357 unshare_aff_combination (&aff);
7359 /* To avoid undefined overflow problems, all IV candidates use unsigned
7360 integer types. The drawback is that this makes it impossible for
7361 create_mem_ref to distinguish an IV that is based on a memory object
7362 from one that represents simply an offset.
7364 To work around this problem, we pass a hint to create_mem_ref that
7365 indicates which variable (if any) in aff is an IV based on a memory
7366 object. Note that we only consider the candidate. If this is not
7367 based on an object, the base of the reference is in some subexpression
7368 of the use -- but these will use pointer types, so they are recognized
7369 by the create_mem_ref heuristics anyway. */
7370 if (cand->iv->base_object)
7371 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7373 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7374 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7375 reference_alias_ptr_type (*use->op_p),
7376 iv, base_hint, data->speed);
7377 copy_ref_info (ref, *use->op_p);
7378 *use->op_p = ref;
7381 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7382 candidate CAND. */
7384 static void
7385 rewrite_use_compare (struct ivopts_data *data,
7386 struct iv_use *use, struct iv_cand *cand)
7388 tree comp, *var_p, op, bound;
7389 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7390 enum tree_code compare;
7391 struct iv_group *group = data->vgroups[use->group_id];
7392 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7393 bool ok;
7395 bound = cp->value;
7396 if (bound)
7398 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7399 tree var_type = TREE_TYPE (var);
7400 gimple_seq stmts;
7402 if (dump_file && (dump_flags & TDF_DETAILS))
7404 fprintf (dump_file, "Replacing exit test: ");
7405 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7407 compare = cp->comp;
7408 bound = unshare_expr (fold_convert (var_type, bound));
7409 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7410 if (stmts)
7411 gsi_insert_seq_on_edge_immediate (
7412 loop_preheader_edge (data->current_loop),
7413 stmts);
7415 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7416 gimple_cond_set_lhs (cond_stmt, var);
7417 gimple_cond_set_code (cond_stmt, compare);
7418 gimple_cond_set_rhs (cond_stmt, op);
7419 return;
7422 /* The induction variable elimination failed; just express the original
7423 giv. */
7424 comp = get_computation (data->current_loop, use, cand);
7425 gcc_assert (comp != NULL_TREE);
7427 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7428 gcc_assert (ok);
7430 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7431 true, GSI_SAME_STMT);
7434 /* Rewrite the groups using the selected induction variables. */
7436 static void
7437 rewrite_groups (struct ivopts_data *data)
7439 unsigned i, j;
7441 for (i = 0; i < data->vgroups.length (); i++)
7443 struct iv_group *group = data->vgroups[i];
7444 struct iv_cand *cand = group->selected;
7446 gcc_assert (cand);
7448 if (group->type == USE_NONLINEAR_EXPR)
7450 for (j = 0; j < group->vuses.length (); j++)
7452 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7453 update_stmt (group->vuses[j]->stmt);
7456 else if (group->type == USE_ADDRESS)
7458 for (j = 0; j < group->vuses.length (); j++)
7460 rewrite_use_address (data, group->vuses[j], cand);
7461 update_stmt (group->vuses[j]->stmt);
7464 else
7466 gcc_assert (group->type == USE_COMPARE);
7468 for (j = 0; j < group->vuses.length (); j++)
7470 rewrite_use_compare (data, group->vuses[j], cand);
7471 update_stmt (group->vuses[j]->stmt);
7477 /* Removes the ivs that are not used after rewriting. */
7479 static void
7480 remove_unused_ivs (struct ivopts_data *data)
7482 unsigned j;
7483 bitmap_iterator bi;
7484 bitmap toremove = BITMAP_ALLOC (NULL);
7486 /* Figure out an order in which to release SSA DEFs so that we don't
7487 release something that we'd have to propagate into a debug stmt
7488 afterwards. */
7489 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7491 struct version_info *info;
7493 info = ver_info (data, j);
7494 if (info->iv
7495 && !integer_zerop (info->iv->step)
7496 && !info->inv_id
7497 && !info->iv->nonlin_use
7498 && !info->preserve_biv)
7500 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7502 tree def = info->iv->ssa_name;
7504 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7506 imm_use_iterator imm_iter;
7507 use_operand_p use_p;
7508 gimple *stmt;
7509 int count = 0;
7511 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7513 if (!gimple_debug_bind_p (stmt))
7514 continue;
7516 /* We just want to determine whether to do nothing
7517 (count == 0), to substitute the computed
7518 expression into a single use of the SSA DEF by
7519 itself (count == 1), or to use a debug temp
7520 because the SSA DEF is used multiple times or as
7521 part of a larger expression (count > 1). */
7522 count++;
7523 if (gimple_debug_bind_get_value (stmt) != def)
7524 count++;
7526 if (count > 1)
7527 BREAK_FROM_IMM_USE_STMT (imm_iter);
7530 if (!count)
7531 continue;
7533 struct iv_use dummy_use;
7534 struct iv_cand *best_cand = NULL, *cand;
7535 unsigned i, best_pref = 0, cand_pref;
7537 memset (&dummy_use, 0, sizeof (dummy_use));
7538 dummy_use.iv = info->iv;
7539 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7541 cand = data->vgroups[i]->selected;
7542 if (cand == best_cand)
7543 continue;
7544 cand_pref = operand_equal_p (cand->iv->step,
7545 info->iv->step, 0)
7546 ? 4 : 0;
7547 cand_pref
7548 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7549 == TYPE_MODE (TREE_TYPE (info->iv->base))
7550 ? 2 : 0;
7551 cand_pref
7552 += TREE_CODE (cand->iv->base) == INTEGER_CST
7553 ? 1 : 0;
7554 if (best_cand == NULL || best_pref < cand_pref)
7556 best_cand = cand;
7557 best_pref = cand_pref;
7561 if (!best_cand)
7562 continue;
7564 tree comp = get_computation_at (data->current_loop,
7565 &dummy_use, best_cand,
7566 SSA_NAME_DEF_STMT (def));
7567 if (!comp)
7568 continue;
7570 if (count > 1)
7572 tree vexpr = make_node (DEBUG_EXPR_DECL);
7573 DECL_ARTIFICIAL (vexpr) = 1;
7574 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7575 if (SSA_NAME_VAR (def))
7576 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7577 else
7578 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7579 gdebug *def_temp
7580 = gimple_build_debug_bind (vexpr, comp, NULL);
7581 gimple_stmt_iterator gsi;
7583 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7584 gsi = gsi_after_labels (gimple_bb
7585 (SSA_NAME_DEF_STMT (def)));
7586 else
7587 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7589 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7590 comp = vexpr;
7593 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7595 if (!gimple_debug_bind_p (stmt))
7596 continue;
7598 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7599 SET_USE (use_p, comp);
7601 update_stmt (stmt);
7607 release_defs_bitset (toremove);
7609 BITMAP_FREE (toremove);
7612 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7613 for hash_map::traverse. */
7615 bool
7616 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7618 free (value);
7619 return true;
7622 /* Frees data allocated by the optimization of a single loop. */
7624 static void
7625 free_loop_data (struct ivopts_data *data)
7627 unsigned i, j;
7628 bitmap_iterator bi;
7629 tree obj;
7631 if (data->niters)
7633 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7634 delete data->niters;
7635 data->niters = NULL;
7638 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7640 struct version_info *info;
7642 info = ver_info (data, i);
7643 info->iv = NULL;
7644 info->has_nonlin_use = false;
7645 info->preserve_biv = false;
7646 info->inv_id = 0;
7648 bitmap_clear (data->relevant);
7649 bitmap_clear (data->important_candidates);
7651 for (i = 0; i < data->vgroups.length (); i++)
7653 struct iv_group *group = data->vgroups[i];
7655 for (j = 0; j < group->vuses.length (); j++)
7656 free (group->vuses[j]);
7657 group->vuses.release ();
7659 BITMAP_FREE (group->related_cands);
7660 for (j = 0; j < group->n_map_members; j++)
7661 if (group->cost_map[j].depends_on)
7662 BITMAP_FREE (group->cost_map[j].depends_on);
7664 free (group->cost_map);
7665 free (group);
7667 data->vgroups.truncate (0);
7669 for (i = 0; i < data->vcands.length (); i++)
7671 struct iv_cand *cand = data->vcands[i];
7673 if (cand->depends_on)
7674 BITMAP_FREE (cand->depends_on);
7675 free (cand);
7677 data->vcands.truncate (0);
7679 if (data->version_info_size < num_ssa_names)
7681 data->version_info_size = 2 * num_ssa_names;
7682 free (data->version_info);
7683 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7686 data->max_inv_id = 0;
7688 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7689 SET_DECL_RTL (obj, NULL_RTX);
7691 decl_rtl_to_reset.truncate (0);
7693 data->inv_expr_tab->empty ();
7694 data->max_inv_expr_id = 0;
7696 data->iv_common_cand_tab->empty ();
7697 data->iv_common_cands.truncate (0);
7700 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7701 loop tree. */
7703 static void
7704 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7706 free_loop_data (data);
7707 free (data->version_info);
7708 BITMAP_FREE (data->relevant);
7709 BITMAP_FREE (data->important_candidates);
7711 decl_rtl_to_reset.release ();
7712 data->vgroups.release ();
7713 data->vcands.release ();
7714 delete data->inv_expr_tab;
7715 data->inv_expr_tab = NULL;
7716 free_affine_expand_cache (&data->name_expansion_cache);
7717 delete data->iv_common_cand_tab;
7718 data->iv_common_cand_tab = NULL;
7719 data->iv_common_cands.release ();
7720 obstack_free (&data->iv_obstack, NULL);
7723 /* Returns true if the loop body BODY includes any function calls. */
7725 static bool
7726 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7728 gimple_stmt_iterator gsi;
7729 unsigned i;
7731 for (i = 0; i < num_nodes; i++)
7732 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7734 gimple *stmt = gsi_stmt (gsi);
7735 if (is_gimple_call (stmt)
7736 && !gimple_call_internal_p (stmt)
7737 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7738 return true;
7740 return false;
7743 /* Optimizes the LOOP. Returns true if anything changed. */
7745 static bool
7746 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7748 bool changed = false;
7749 struct iv_ca *iv_ca;
7750 edge exit = single_dom_exit (loop);
7751 basic_block *body;
7753 gcc_assert (!data->niters);
7754 data->current_loop = loop;
7755 data->loop_loc = find_loop_location (loop);
7756 data->speed = optimize_loop_for_speed_p (loop);
7758 if (dump_file && (dump_flags & TDF_DETAILS))
7760 fprintf (dump_file, "Processing loop %d", loop->num);
7761 if (data->loop_loc != UNKNOWN_LOCATION)
7762 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7763 LOCATION_LINE (data->loop_loc));
7764 fprintf (dump_file, "\n");
7766 if (exit)
7768 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7769 exit->src->index, exit->dest->index);
7770 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7771 fprintf (dump_file, "\n");
7774 fprintf (dump_file, "\n");
7777 body = get_loop_body (loop);
7778 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7779 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7780 free (body);
7782 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7784 /* For each ssa name determines whether it behaves as an induction variable
7785 in some loop. */
7786 if (!find_induction_variables (data))
7787 goto finish;
7789 /* Finds interesting uses (item 1). */
7790 find_interesting_uses (data);
7791 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7792 goto finish;
7794 /* Finds candidates for the induction variables (item 2). */
7795 find_iv_candidates (data);
7797 /* Calculates the costs (item 3, part 1). */
7798 determine_iv_costs (data);
7799 determine_group_iv_costs (data);
7800 determine_set_costs (data);
7802 /* Find the optimal set of induction variables (item 3, part 2). */
7803 iv_ca = find_optimal_iv_set (data);
7804 if (!iv_ca)
7805 goto finish;
7806 changed = true;
7808 /* Create the new induction variables (item 4, part 1). */
7809 create_new_ivs (data, iv_ca);
7810 iv_ca_free (&iv_ca);
7812 /* Rewrite the uses (item 4, part 2). */
7813 rewrite_groups (data);
7815 /* Remove the ivs that are unused after rewriting. */
7816 remove_unused_ivs (data);
7818 /* We have changed the structure of induction variables; it might happen
7819 that definitions in the scev database refer to some of them that were
7820 eliminated. */
7821 scev_reset ();
7823 finish:
7824 free_loop_data (data);
7826 return changed;
7829 /* Main entry point. Optimizes induction variables in loops. */
7831 void
7832 tree_ssa_iv_optimize (void)
7834 struct loop *loop;
7835 struct ivopts_data data;
7837 tree_ssa_iv_optimize_init (&data);
7839 /* Optimize the loops starting with the innermost ones. */
7840 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7842 if (dump_file && (dump_flags & TDF_DETAILS))
7843 flow_loop_dump (loop, dump_file, NULL, 1);
7845 tree_ssa_iv_optimize_loop (&data, loop);
7848 tree_ssa_iv_optimize_finalize (&data);