match.pd: Relax some tree_nop_conversion_p
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob9ce6b64951976ee4583a9fda89cace8336a2a294
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "tm_p.h"
79 #include "ssa.h"
80 #include "expmed.h"
81 #include "insn-config.h"
82 #include "emit-rtl.h"
83 #include "recog.h"
84 #include "cgraph.h"
85 #include "gimple-pretty-print.h"
86 #include "alias.h"
87 #include "fold-const.h"
88 #include "stor-layout.h"
89 #include "tree-eh.h"
90 #include "gimplify.h"
91 #include "gimple-iterator.h"
92 #include "gimplify-me.h"
93 #include "tree-cfg.h"
94 #include "tree-ssa-loop-ivopts.h"
95 #include "tree-ssa-loop-manip.h"
96 #include "tree-ssa-loop-niter.h"
97 #include "tree-ssa-loop.h"
98 #include "explow.h"
99 #include "expr.h"
100 #include "tree-dfa.h"
101 #include "tree-ssa.h"
102 #include "cfgloop.h"
103 #include "tree-scalar-evolution.h"
104 #include "params.h"
105 #include "tree-affine.h"
106 #include "tree-ssa-propagate.h"
107 #include "tree-ssa-address.h"
108 #include "builtins.h"
109 #include "tree-vectorizer.h"
111 /* FIXME: Expressions are expanded to RTL in this pass to determine the
112 cost of different addressing modes. This should be moved to a TBD
113 interface between the GIMPLE and RTL worlds. */
115 /* The infinite cost. */
116 #define INFTY 10000000
118 #define AVG_LOOP_NITER(LOOP) 5
120 /* Returns the expected number of loop iterations for LOOP.
121 The average trip count is computed from profile data if it
122 exists. */
124 static inline HOST_WIDE_INT
125 avg_loop_niter (struct loop *loop)
127 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
128 if (niter == -1)
130 niter = max_stmt_executions_int (loop);
131 if (niter == -1 || niter > AVG_LOOP_NITER (loop))
132 return AVG_LOOP_NITER (loop);
135 return niter;
138 struct iv_use;
140 /* Representation of the induction variable. */
141 struct iv
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
165 /* Types of uses. */
166 enum use_type
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_ADDRESS, /* Use in an address. */
170 USE_COMPARE /* Use is a compare. */
173 /* Cost of a computation. */
174 struct comp_cost
176 int cost; /* The runtime cost. */
177 unsigned complexity; /* The estimate of the complexity of the code for
178 the computation (in no concrete units --
179 complexity field should be larger for more
180 complex expressions and addressing modes). */
181 int scratch; /* Scratch used during cost computation. */
184 static const comp_cost no_cost = {0, 0, 0};
185 static const comp_cost infinite_cost = {INFTY, INFTY, INFTY};
187 struct iv_inv_expr_ent;
189 /* The candidate - cost pair. */
190 struct cost_pair
192 struct iv_cand *cand; /* The candidate. */
193 comp_cost cost; /* The cost. */
194 bitmap depends_on; /* The list of invariants that have to be
195 preserved. */
196 tree value; /* For final value elimination, the expression for
197 the final value of the iv. For iv elimination,
198 the new bound to compare with. */
199 enum tree_code comp; /* For iv elimination, the comparison. */
200 iv_inv_expr_ent *inv_expr; /* Loop invariant expression. */
203 /* Use. */
204 struct iv_use
206 unsigned id; /* The id of the use. */
207 unsigned group_id; /* The group id the use belongs to. */
208 enum use_type type; /* Type of the use. */
209 struct iv *iv; /* The induction variable it is based on. */
210 gimple *stmt; /* Statement in that it occurs. */
211 tree *op_p; /* The place where it occurs. */
213 tree addr_base; /* Base address with const offset stripped. */
214 unsigned HOST_WIDE_INT addr_offset;
215 /* Const offset stripped from base address. */
218 /* Group of uses. */
219 struct iv_group
221 /* The id of the group. */
222 unsigned id;
223 /* Uses of the group are of the same type. */
224 enum use_type type;
225 /* The set of "related" IV candidates, plus the important ones. */
226 bitmap related_cands;
227 /* Number of IV candidates in the cost_map. */
228 unsigned n_map_members;
229 /* The costs wrto the iv candidates. */
230 struct cost_pair *cost_map;
231 /* The selected candidate for the group. */
232 struct iv_cand *selected;
233 /* Uses in the group. */
234 vec<struct iv_use *> vuses;
237 /* The position where the iv is computed. */
238 enum iv_position
240 IP_NORMAL, /* At the end, just before the exit condition. */
241 IP_END, /* At the end of the latch block. */
242 IP_BEFORE_USE, /* Immediately before a specific use. */
243 IP_AFTER_USE, /* Immediately after a specific use. */
244 IP_ORIGINAL /* The original biv. */
247 /* The induction variable candidate. */
248 struct iv_cand
250 unsigned id; /* The number of the candidate. */
251 bool important; /* Whether this is an "important" candidate, i.e. such
252 that it should be considered by all uses. */
253 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
254 gimple *incremented_at;/* For original biv, the statement where it is
255 incremented. */
256 tree var_before; /* The variable used for it before increment. */
257 tree var_after; /* The variable used for it after increment. */
258 struct iv *iv; /* The value of the candidate. NULL for
259 "pseudocandidate" used to indicate the possibility
260 to replace the final value of an iv by direct
261 computation of the value. */
262 unsigned cost; /* Cost of the candidate. */
263 unsigned cost_step; /* Cost of the candidate's increment operation. */
264 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
265 where it is incremented. */
266 bitmap depends_on; /* The list of invariants that are used in step of the
267 biv. */
268 struct iv *orig_iv; /* The original iv if this cand is added from biv with
269 smaller type. */
272 /* Hashtable entry for common candidate derived from iv uses. */
273 struct iv_common_cand
275 tree base;
276 tree step;
277 /* IV uses from which this common candidate is derived. */
278 auto_vec<struct iv_use *> uses;
279 hashval_t hash;
282 /* Hashtable helpers. */
284 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
286 static inline hashval_t hash (const iv_common_cand *);
287 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
290 /* Hash function for possible common candidates. */
292 inline hashval_t
293 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
295 return ccand->hash;
298 /* Hash table equality function for common candidates. */
300 inline bool
301 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
302 const iv_common_cand *ccand2)
304 return (ccand1->hash == ccand2->hash
305 && operand_equal_p (ccand1->base, ccand2->base, 0)
306 && operand_equal_p (ccand1->step, ccand2->step, 0)
307 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
308 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
311 /* Loop invariant expression hashtable entry. */
313 struct iv_inv_expr_ent
315 /* Tree expression of the entry. */
316 tree expr;
317 /* Unique indentifier. */
318 int id;
319 /* Hash value. */
320 hashval_t hash;
323 /* Sort iv_inv_expr_ent pair A and B by id field. */
325 static int
326 sort_iv_inv_expr_ent (const void *a, const void *b)
328 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
329 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
331 unsigned id1 = (*e1)->id;
332 unsigned id2 = (*e2)->id;
334 if (id1 < id2)
335 return -1;
336 else if (id1 > id2)
337 return 1;
338 else
339 return 0;
342 /* Hashtable helpers. */
344 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
346 static inline hashval_t hash (const iv_inv_expr_ent *);
347 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
350 /* Hash function for loop invariant expressions. */
352 inline hashval_t
353 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
355 return expr->hash;
358 /* Hash table equality function for expressions. */
360 inline bool
361 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
362 const iv_inv_expr_ent *expr2)
364 return expr1->hash == expr2->hash
365 && operand_equal_p (expr1->expr, expr2->expr, 0);
368 struct ivopts_data
370 /* The currently optimized loop. */
371 struct loop *current_loop;
372 source_location loop_loc;
374 /* Numbers of iterations for all exits of the current loop. */
375 hash_map<edge, tree_niter_desc *> *niters;
377 /* Number of registers used in it. */
378 unsigned regs_used;
380 /* The size of version_info array allocated. */
381 unsigned version_info_size;
383 /* The array of information for the ssa names. */
384 struct version_info *version_info;
386 /* The hashtable of loop invariant expressions created
387 by ivopt. */
388 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
390 /* Loop invariant expression id. */
391 int max_inv_expr_id;
393 /* The bitmap of indices in version_info whose value was changed. */
394 bitmap relevant;
396 /* The uses of induction variables. */
397 vec<iv_group *> vgroups;
399 /* The candidates. */
400 vec<iv_cand *> vcands;
402 /* A bitmap of important candidates. */
403 bitmap important_candidates;
405 /* Cache used by tree_to_aff_combination_expand. */
406 hash_map<tree, name_expansion *> *name_expansion_cache;
408 /* The hashtable of common candidates derived from iv uses. */
409 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
411 /* The common candidates. */
412 vec<iv_common_cand *> iv_common_cands;
414 /* The maximum invariant id. */
415 unsigned max_inv_id;
417 /* Number of no_overflow BIVs which are not used in memory address. */
418 unsigned bivs_not_used_in_addr;
420 /* Obstack for iv structure. */
421 struct obstack iv_obstack;
423 /* Whether to consider just related and important candidates when replacing a
424 use. */
425 bool consider_all_candidates;
427 /* Are we optimizing for speed? */
428 bool speed;
430 /* Whether the loop body includes any function calls. */
431 bool body_includes_call;
433 /* Whether the loop body can only be exited via single exit. */
434 bool loop_single_exit_p;
437 /* An assignment of iv candidates to uses. */
439 struct iv_ca
441 /* The number of uses covered by the assignment. */
442 unsigned upto;
444 /* Number of uses that cannot be expressed by the candidates in the set. */
445 unsigned bad_groups;
447 /* Candidate assigned to a use, together with the related costs. */
448 struct cost_pair **cand_for_group;
450 /* Number of times each candidate is used. */
451 unsigned *n_cand_uses;
453 /* The candidates used. */
454 bitmap cands;
456 /* The number of candidates in the set. */
457 unsigned n_cands;
459 /* Total number of registers needed. */
460 unsigned n_regs;
462 /* Total cost of expressing uses. */
463 comp_cost cand_use_cost;
465 /* Total cost of candidates. */
466 unsigned cand_cost;
468 /* Number of times each invariant is used. */
469 unsigned *n_invariant_uses;
471 /* Hash set with used invariant expression. */
472 hash_map <iv_inv_expr_ent *, unsigned> *used_inv_exprs;
474 /* Total cost of the assignment. */
475 comp_cost cost;
478 /* Difference of two iv candidate assignments. */
480 struct iv_ca_delta
482 /* Changed group. */
483 struct iv_group *group;
485 /* An old assignment (for rollback purposes). */
486 struct cost_pair *old_cp;
488 /* A new assignment. */
489 struct cost_pair *new_cp;
491 /* Next change in the list. */
492 struct iv_ca_delta *next;
495 /* Bound on number of candidates below that all candidates are considered. */
497 #define CONSIDER_ALL_CANDIDATES_BOUND \
498 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
500 /* If there are more iv occurrences, we just give up (it is quite unlikely that
501 optimizing such a loop would help, and it would take ages). */
503 #define MAX_CONSIDERED_GROUPS \
504 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
506 /* If there are at most this number of ivs in the set, try removing unnecessary
507 ivs from the set always. */
509 #define ALWAYS_PRUNE_CAND_SET_BOUND \
510 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
512 /* The list of trees for that the decl_rtl field must be reset is stored
513 here. */
515 static vec<tree> decl_rtl_to_reset;
517 static comp_cost force_expr_to_var_cost (tree, bool);
519 /* The single loop exit if it dominates the latch, NULL otherwise. */
521 edge
522 single_dom_exit (struct loop *loop)
524 edge exit = single_exit (loop);
526 if (!exit)
527 return NULL;
529 if (!just_once_each_iteration_p (loop, exit->src))
530 return NULL;
532 return exit;
535 /* Dumps information about the induction variable IV to FILE. Don't dump
536 variable's name if DUMP_NAME is FALSE. The information is dumped with
537 preceding spaces indicated by INDENT_LEVEL. */
539 void
540 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
542 const char *p;
543 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
545 if (indent_level > 4)
546 indent_level = 4;
547 p = spaces + 8 - (indent_level << 1);
549 fprintf (file, "%sIV struct:\n", p);
550 if (iv->ssa_name && dump_name)
552 fprintf (file, "%s SSA_NAME:\t", p);
553 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
554 fprintf (file, "\n");
557 fprintf (file, "%s Type:\t", p);
558 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
559 fprintf (file, "\n");
561 fprintf (file, "%s Base:\t", p);
562 print_generic_expr (file, iv->base, TDF_SLIM);
563 fprintf (file, "\n");
565 fprintf (file, "%s Step:\t", p);
566 print_generic_expr (file, iv->step, TDF_SLIM);
567 fprintf (file, "\n");
569 if (iv->base_object)
571 fprintf (file, "%s Object:\t", p);
572 print_generic_expr (file, iv->base_object, TDF_SLIM);
573 fprintf (file, "\n");
576 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
578 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
579 p, iv->no_overflow ? "No-overflow" : "Overflow");
582 /* Dumps information about the USE to FILE. */
584 void
585 dump_use (FILE *file, struct iv_use *use)
587 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
588 fprintf (file, " At stmt:\t");
589 print_gimple_stmt (file, use->stmt, 0, 0);
590 fprintf (file, " At pos:\t");
591 if (use->op_p)
592 print_generic_expr (file, *use->op_p, TDF_SLIM);
593 fprintf (file, "\n");
594 dump_iv (file, use->iv, false, 2);
597 /* Dumps information about the uses to FILE. */
599 void
600 dump_groups (FILE *file, struct ivopts_data *data)
602 unsigned i, j;
603 struct iv_group *group;
605 for (i = 0; i < data->vgroups.length (); i++)
607 group = data->vgroups[i];
608 fprintf (file, "Group %d:\n", group->id);
609 if (group->type == USE_NONLINEAR_EXPR)
610 fprintf (file, " Type:\tGENERIC\n");
611 else if (group->type == USE_ADDRESS)
612 fprintf (file, " Type:\tADDRESS\n");
613 else
615 gcc_assert (group->type == USE_COMPARE);
616 fprintf (file, " Type:\tCOMPARE\n");
618 for (j = 0; j < group->vuses.length (); j++)
619 dump_use (file, group->vuses[j]);
623 /* Dumps information about induction variable candidate CAND to FILE. */
625 void
626 dump_cand (FILE *file, struct iv_cand *cand)
628 struct iv *iv = cand->iv;
630 fprintf (file, "Candidate %d:\n", cand->id);
631 if (cand->depends_on)
633 fprintf (file, " Depend on: ");
634 dump_bitmap (file, cand->depends_on);
637 if (cand->var_before)
639 fprintf (file, " Var befor: ");
640 print_generic_expr (file, cand->var_before, TDF_SLIM);
641 fprintf (file, "\n");
643 if (cand->var_after)
645 fprintf (file, " Var after: ");
646 print_generic_expr (file, cand->var_after, TDF_SLIM);
647 fprintf (file, "\n");
650 switch (cand->pos)
652 case IP_NORMAL:
653 fprintf (file, " Incr POS: before exit test\n");
654 break;
656 case IP_BEFORE_USE:
657 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
658 break;
660 case IP_AFTER_USE:
661 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
662 break;
664 case IP_END:
665 fprintf (file, " Incr POS: at end\n");
666 break;
668 case IP_ORIGINAL:
669 fprintf (file, " Incr POS: orig biv\n");
670 break;
673 dump_iv (file, iv, false, 1);
676 /* Returns the info for ssa version VER. */
678 static inline struct version_info *
679 ver_info (struct ivopts_data *data, unsigned ver)
681 return data->version_info + ver;
684 /* Returns the info for ssa name NAME. */
686 static inline struct version_info *
687 name_info (struct ivopts_data *data, tree name)
689 return ver_info (data, SSA_NAME_VERSION (name));
692 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
693 emitted in LOOP. */
695 static bool
696 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
698 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
700 gcc_assert (bb);
702 if (sbb == loop->latch)
703 return true;
705 if (sbb != bb)
706 return false;
708 return stmt == last_stmt (bb);
711 /* Returns true if STMT if after the place where the original induction
712 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
713 if the positions are identical. */
715 static bool
716 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
718 basic_block cand_bb = gimple_bb (cand->incremented_at);
719 basic_block stmt_bb = gimple_bb (stmt);
721 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
722 return false;
724 if (stmt_bb != cand_bb)
725 return true;
727 if (true_if_equal
728 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
729 return true;
730 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
733 /* Returns true if STMT if after the place where the induction variable
734 CAND is incremented in LOOP. */
736 static bool
737 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
739 switch (cand->pos)
741 case IP_END:
742 return false;
744 case IP_NORMAL:
745 return stmt_after_ip_normal_pos (loop, stmt);
747 case IP_ORIGINAL:
748 case IP_AFTER_USE:
749 return stmt_after_inc_pos (cand, stmt, false);
751 case IP_BEFORE_USE:
752 return stmt_after_inc_pos (cand, stmt, true);
754 default:
755 gcc_unreachable ();
759 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
761 static bool
762 abnormal_ssa_name_p (tree exp)
764 if (!exp)
765 return false;
767 if (TREE_CODE (exp) != SSA_NAME)
768 return false;
770 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
773 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
774 abnormal phi node. Callback for for_each_index. */
776 static bool
777 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
778 void *data ATTRIBUTE_UNUSED)
780 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
782 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
783 return false;
784 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
785 return false;
788 return !abnormal_ssa_name_p (*index);
791 /* Returns true if EXPR contains a ssa name that occurs in an
792 abnormal phi node. */
794 bool
795 contains_abnormal_ssa_name_p (tree expr)
797 enum tree_code code;
798 enum tree_code_class codeclass;
800 if (!expr)
801 return false;
803 code = TREE_CODE (expr);
804 codeclass = TREE_CODE_CLASS (code);
806 if (code == SSA_NAME)
807 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
809 if (code == INTEGER_CST
810 || is_gimple_min_invariant (expr))
811 return false;
813 if (code == ADDR_EXPR)
814 return !for_each_index (&TREE_OPERAND (expr, 0),
815 idx_contains_abnormal_ssa_name_p,
816 NULL);
818 if (code == COND_EXPR)
819 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
820 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
821 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
823 switch (codeclass)
825 case tcc_binary:
826 case tcc_comparison:
827 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
828 return true;
830 /* Fallthru. */
831 case tcc_unary:
832 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
833 return true;
835 break;
837 default:
838 gcc_unreachable ();
841 return false;
844 /* Returns the structure describing number of iterations determined from
845 EXIT of DATA->current_loop, or NULL if something goes wrong. */
847 static struct tree_niter_desc *
848 niter_for_exit (struct ivopts_data *data, edge exit)
850 struct tree_niter_desc *desc;
851 tree_niter_desc **slot;
853 if (!data->niters)
855 data->niters = new hash_map<edge, tree_niter_desc *>;
856 slot = NULL;
858 else
859 slot = data->niters->get (exit);
861 if (!slot)
863 /* Try to determine number of iterations. We cannot safely work with ssa
864 names that appear in phi nodes on abnormal edges, so that we do not
865 create overlapping life ranges for them (PR 27283). */
866 desc = XNEW (struct tree_niter_desc);
867 if (!number_of_iterations_exit (data->current_loop,
868 exit, desc, true)
869 || contains_abnormal_ssa_name_p (desc->niter))
871 XDELETE (desc);
872 desc = NULL;
874 data->niters->put (exit, desc);
876 else
877 desc = *slot;
879 return desc;
882 /* Returns the structure describing number of iterations determined from
883 single dominating exit of DATA->current_loop, or NULL if something
884 goes wrong. */
886 static struct tree_niter_desc *
887 niter_for_single_dom_exit (struct ivopts_data *data)
889 edge exit = single_dom_exit (data->current_loop);
891 if (!exit)
892 return NULL;
894 return niter_for_exit (data, exit);
897 /* Initializes data structures used by the iv optimization pass, stored
898 in DATA. */
900 static void
901 tree_ssa_iv_optimize_init (struct ivopts_data *data)
903 data->version_info_size = 2 * num_ssa_names;
904 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
905 data->relevant = BITMAP_ALLOC (NULL);
906 data->important_candidates = BITMAP_ALLOC (NULL);
907 data->max_inv_id = 0;
908 data->niters = NULL;
909 data->vgroups.create (20);
910 data->vcands.create (20);
911 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
912 data->max_inv_expr_id = 0;
913 data->name_expansion_cache = NULL;
914 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
915 data->iv_common_cands.create (20);
916 decl_rtl_to_reset.create (20);
917 gcc_obstack_init (&data->iv_obstack);
920 /* Returns a memory object to that EXPR points. In case we are able to
921 determine that it does not point to any such object, NULL is returned. */
923 static tree
924 determine_base_object (tree expr)
926 enum tree_code code = TREE_CODE (expr);
927 tree base, obj;
929 /* If this is a pointer casted to any type, we need to determine
930 the base object for the pointer; so handle conversions before
931 throwing away non-pointer expressions. */
932 if (CONVERT_EXPR_P (expr))
933 return determine_base_object (TREE_OPERAND (expr, 0));
935 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
936 return NULL_TREE;
938 switch (code)
940 case INTEGER_CST:
941 return NULL_TREE;
943 case ADDR_EXPR:
944 obj = TREE_OPERAND (expr, 0);
945 base = get_base_address (obj);
947 if (!base)
948 return expr;
950 if (TREE_CODE (base) == MEM_REF)
951 return determine_base_object (TREE_OPERAND (base, 0));
953 return fold_convert (ptr_type_node,
954 build_fold_addr_expr (base));
956 case POINTER_PLUS_EXPR:
957 return determine_base_object (TREE_OPERAND (expr, 0));
959 case PLUS_EXPR:
960 case MINUS_EXPR:
961 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
962 gcc_unreachable ();
964 default:
965 return fold_convert (ptr_type_node, expr);
969 /* Return true if address expression with non-DECL_P operand appears
970 in EXPR. */
972 static bool
973 contain_complex_addr_expr (tree expr)
975 bool res = false;
977 STRIP_NOPS (expr);
978 switch (TREE_CODE (expr))
980 case POINTER_PLUS_EXPR:
981 case PLUS_EXPR:
982 case MINUS_EXPR:
983 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
984 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
985 break;
987 case ADDR_EXPR:
988 return (!DECL_P (TREE_OPERAND (expr, 0)));
990 default:
991 return false;
994 return res;
997 /* Allocates an induction variable with given initial value BASE and step STEP
998 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1000 static struct iv *
1001 alloc_iv (struct ivopts_data *data, tree base, tree step,
1002 bool no_overflow = false)
1004 tree expr = base;
1005 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1006 sizeof (struct iv));
1007 gcc_assert (step != NULL_TREE);
1009 /* Lower address expression in base except ones with DECL_P as operand.
1010 By doing this:
1011 1) More accurate cost can be computed for address expressions;
1012 2) Duplicate candidates won't be created for bases in different
1013 forms, like &a[0] and &a. */
1014 STRIP_NOPS (expr);
1015 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1016 || contain_complex_addr_expr (expr))
1018 aff_tree comb;
1019 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1020 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1023 iv->base = base;
1024 iv->base_object = determine_base_object (base);
1025 iv->step = step;
1026 iv->biv_p = false;
1027 iv->nonlin_use = NULL;
1028 iv->ssa_name = NULL_TREE;
1029 iv->no_overflow = no_overflow;
1030 iv->have_address_use = false;
1032 return iv;
1035 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1036 doesn't overflow. */
1038 static void
1039 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1040 bool no_overflow)
1042 struct version_info *info = name_info (data, iv);
1044 gcc_assert (!info->iv);
1046 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1047 info->iv = alloc_iv (data, base, step, no_overflow);
1048 info->iv->ssa_name = iv;
1051 /* Finds induction variable declaration for VAR. */
1053 static struct iv *
1054 get_iv (struct ivopts_data *data, tree var)
1056 basic_block bb;
1057 tree type = TREE_TYPE (var);
1059 if (!POINTER_TYPE_P (type)
1060 && !INTEGRAL_TYPE_P (type))
1061 return NULL;
1063 if (!name_info (data, var)->iv)
1065 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1067 if (!bb
1068 || !flow_bb_inside_loop_p (data->current_loop, bb))
1069 set_iv (data, var, var, build_int_cst (type, 0), true);
1072 return name_info (data, var)->iv;
1075 /* Return the first non-invariant ssa var found in EXPR. */
1077 static tree
1078 extract_single_var_from_expr (tree expr)
1080 int i, n;
1081 tree tmp;
1082 enum tree_code code;
1084 if (!expr || is_gimple_min_invariant (expr))
1085 return NULL;
1087 code = TREE_CODE (expr);
1088 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1090 n = TREE_OPERAND_LENGTH (expr);
1091 for (i = 0; i < n; i++)
1093 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1095 if (tmp)
1096 return tmp;
1099 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1102 /* Finds basic ivs. */
1104 static bool
1105 find_bivs (struct ivopts_data *data)
1107 gphi *phi;
1108 affine_iv iv;
1109 tree step, type, base, stop;
1110 bool found = false;
1111 struct loop *loop = data->current_loop;
1112 gphi_iterator psi;
1114 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1116 phi = psi.phi ();
1118 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1119 continue;
1121 if (virtual_operand_p (PHI_RESULT (phi)))
1122 continue;
1124 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1125 continue;
1127 if (integer_zerop (iv.step))
1128 continue;
1130 step = iv.step;
1131 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1132 /* Stop expanding iv base at the first ssa var referred by iv step.
1133 Ideally we should stop at any ssa var, because that's expensive
1134 and unusual to happen, we just do it on the first one.
1136 See PR64705 for the rationale. */
1137 stop = extract_single_var_from_expr (step);
1138 base = expand_simple_operations (base, stop);
1139 if (contains_abnormal_ssa_name_p (base)
1140 || contains_abnormal_ssa_name_p (step))
1141 continue;
1143 type = TREE_TYPE (PHI_RESULT (phi));
1144 base = fold_convert (type, base);
1145 if (step)
1147 if (POINTER_TYPE_P (type))
1148 step = convert_to_ptrofftype (step);
1149 else
1150 step = fold_convert (type, step);
1153 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1154 found = true;
1157 return found;
1160 /* Marks basic ivs. */
1162 static void
1163 mark_bivs (struct ivopts_data *data)
1165 gphi *phi;
1166 gimple *def;
1167 tree var;
1168 struct iv *iv, *incr_iv;
1169 struct loop *loop = data->current_loop;
1170 basic_block incr_bb;
1171 gphi_iterator psi;
1173 data->bivs_not_used_in_addr = 0;
1174 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1176 phi = psi.phi ();
1178 iv = get_iv (data, PHI_RESULT (phi));
1179 if (!iv)
1180 continue;
1182 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1183 def = SSA_NAME_DEF_STMT (var);
1184 /* Don't mark iv peeled from other one as biv. */
1185 if (def
1186 && gimple_code (def) == GIMPLE_PHI
1187 && gimple_bb (def) == loop->header)
1188 continue;
1190 incr_iv = get_iv (data, var);
1191 if (!incr_iv)
1192 continue;
1194 /* If the increment is in the subloop, ignore it. */
1195 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1196 if (incr_bb->loop_father != data->current_loop
1197 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1198 continue;
1200 iv->biv_p = true;
1201 incr_iv->biv_p = true;
1202 if (iv->no_overflow)
1203 data->bivs_not_used_in_addr++;
1204 if (incr_iv->no_overflow)
1205 data->bivs_not_used_in_addr++;
1209 /* Checks whether STMT defines a linear induction variable and stores its
1210 parameters to IV. */
1212 static bool
1213 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1215 tree lhs, stop;
1216 struct loop *loop = data->current_loop;
1218 iv->base = NULL_TREE;
1219 iv->step = NULL_TREE;
1221 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1222 return false;
1224 lhs = gimple_assign_lhs (stmt);
1225 if (TREE_CODE (lhs) != SSA_NAME)
1226 return false;
1228 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1229 return false;
1231 /* Stop expanding iv base at the first ssa var referred by iv step.
1232 Ideally we should stop at any ssa var, because that's expensive
1233 and unusual to happen, we just do it on the first one.
1235 See PR64705 for the rationale. */
1236 stop = extract_single_var_from_expr (iv->step);
1237 iv->base = expand_simple_operations (iv->base, stop);
1238 if (contains_abnormal_ssa_name_p (iv->base)
1239 || contains_abnormal_ssa_name_p (iv->step))
1240 return false;
1242 /* If STMT could throw, then do not consider STMT as defining a GIV.
1243 While this will suppress optimizations, we can not safely delete this
1244 GIV and associated statements, even if it appears it is not used. */
1245 if (stmt_could_throw_p (stmt))
1246 return false;
1248 return true;
1251 /* Finds general ivs in statement STMT. */
1253 static void
1254 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1256 affine_iv iv;
1258 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1259 return;
1261 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1264 /* Finds general ivs in basic block BB. */
1266 static void
1267 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1269 gimple_stmt_iterator bsi;
1271 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1272 find_givs_in_stmt (data, gsi_stmt (bsi));
1275 /* Finds general ivs. */
1277 static void
1278 find_givs (struct ivopts_data *data)
1280 struct loop *loop = data->current_loop;
1281 basic_block *body = get_loop_body_in_dom_order (loop);
1282 unsigned i;
1284 for (i = 0; i < loop->num_nodes; i++)
1285 find_givs_in_bb (data, body[i]);
1286 free (body);
1289 /* For each ssa name defined in LOOP determines whether it is an induction
1290 variable and if so, its initial value and step. */
1292 static bool
1293 find_induction_variables (struct ivopts_data *data)
1295 unsigned i;
1296 bitmap_iterator bi;
1298 if (!find_bivs (data))
1299 return false;
1301 find_givs (data);
1302 mark_bivs (data);
1304 if (dump_file && (dump_flags & TDF_DETAILS))
1306 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1308 if (niter)
1310 fprintf (dump_file, " number of iterations ");
1311 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1312 if (!integer_zerop (niter->may_be_zero))
1314 fprintf (dump_file, "; zero if ");
1315 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1317 fprintf (dump_file, "\n");
1320 fprintf (dump_file, "\n<Induction Vars>:\n");
1321 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1323 struct version_info *info = ver_info (data, i);
1324 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1325 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1329 return true;
1332 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1333 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1334 is the const offset stripped from IV base; for other types use, both
1335 are zero by default. */
1337 static struct iv_use *
1338 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1339 gimple *stmt, enum use_type type, tree addr_base,
1340 unsigned HOST_WIDE_INT addr_offset)
1342 struct iv_use *use = XCNEW (struct iv_use);
1344 use->id = group->vuses.length ();
1345 use->group_id = group->id;
1346 use->type = type;
1347 use->iv = iv;
1348 use->stmt = stmt;
1349 use->op_p = use_p;
1350 use->addr_base = addr_base;
1351 use->addr_offset = addr_offset;
1353 group->vuses.safe_push (use);
1354 return use;
1357 /* Checks whether OP is a loop-level invariant and if so, records it.
1358 NONLINEAR_USE is true if the invariant is used in a way we do not
1359 handle specially. */
1361 static void
1362 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1364 basic_block bb;
1365 struct version_info *info;
1367 if (TREE_CODE (op) != SSA_NAME
1368 || virtual_operand_p (op))
1369 return;
1371 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1372 if (bb
1373 && flow_bb_inside_loop_p (data->current_loop, bb))
1374 return;
1376 info = name_info (data, op);
1377 info->name = op;
1378 info->has_nonlin_use |= nonlinear_use;
1379 if (!info->inv_id)
1380 info->inv_id = ++data->max_inv_id;
1381 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1384 static tree
1385 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1387 /* Record a group of TYPE. */
1389 static struct iv_group *
1390 record_group (struct ivopts_data *data, enum use_type type)
1392 struct iv_group *group = XCNEW (struct iv_group);
1394 group->id = data->vgroups.length ();
1395 group->type = type;
1396 group->related_cands = BITMAP_ALLOC (NULL);
1397 group->vuses.create (1);
1399 data->vgroups.safe_push (group);
1400 return group;
1403 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1404 New group will be created if there is no existing group for the use. */
1406 static struct iv_use *
1407 record_group_use (struct ivopts_data *data, tree *use_p,
1408 struct iv *iv, gimple *stmt, enum use_type type)
1410 tree addr_base = NULL;
1411 struct iv_group *group = NULL;
1412 unsigned HOST_WIDE_INT addr_offset = 0;
1414 /* Record non address type use in a new group. */
1415 if (type == USE_ADDRESS && iv->base_object)
1417 unsigned int i;
1419 addr_base = strip_offset (iv->base, &addr_offset);
1420 for (i = 0; i < data->vgroups.length (); i++)
1422 struct iv_use *use;
1424 group = data->vgroups[i];
1425 use = group->vuses[0];
1426 if (use->type != USE_ADDRESS || !use->iv->base_object)
1427 continue;
1429 /* Check if it has the same stripped base and step. */
1430 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1431 && operand_equal_p (iv->step, use->iv->step, 0)
1432 && operand_equal_p (addr_base, use->addr_base, 0))
1433 break;
1435 if (i == data->vgroups.length ())
1436 group = NULL;
1439 if (!group)
1440 group = record_group (data, type);
1442 return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1445 /* Checks whether the use OP is interesting and if so, records it. */
1447 static struct iv_use *
1448 find_interesting_uses_op (struct ivopts_data *data, tree op)
1450 struct iv *iv;
1451 gimple *stmt;
1452 struct iv_use *use;
1454 if (TREE_CODE (op) != SSA_NAME)
1455 return NULL;
1457 iv = get_iv (data, op);
1458 if (!iv)
1459 return NULL;
1461 if (iv->nonlin_use)
1463 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1464 return iv->nonlin_use;
1467 if (integer_zerop (iv->step))
1469 record_invariant (data, op, true);
1470 return NULL;
1473 stmt = SSA_NAME_DEF_STMT (op);
1474 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1476 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1477 iv->nonlin_use = use;
1478 return use;
1481 /* Given a condition in statement STMT, checks whether it is a compare
1482 of an induction variable and an invariant. If this is the case,
1483 CONTROL_VAR is set to location of the iv, BOUND to the location of
1484 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1485 induction variable descriptions, and true is returned. If this is not
1486 the case, CONTROL_VAR and BOUND are set to the arguments of the
1487 condition and false is returned. */
1489 static bool
1490 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1491 tree **control_var, tree **bound,
1492 struct iv **iv_var, struct iv **iv_bound)
1494 /* The objects returned when COND has constant operands. */
1495 static struct iv const_iv;
1496 static tree zero;
1497 tree *op0 = &zero, *op1 = &zero;
1498 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1499 bool ret = false;
1501 if (gimple_code (stmt) == GIMPLE_COND)
1503 gcond *cond_stmt = as_a <gcond *> (stmt);
1504 op0 = gimple_cond_lhs_ptr (cond_stmt);
1505 op1 = gimple_cond_rhs_ptr (cond_stmt);
1507 else
1509 op0 = gimple_assign_rhs1_ptr (stmt);
1510 op1 = gimple_assign_rhs2_ptr (stmt);
1513 zero = integer_zero_node;
1514 const_iv.step = integer_zero_node;
1516 if (TREE_CODE (*op0) == SSA_NAME)
1517 iv0 = get_iv (data, *op0);
1518 if (TREE_CODE (*op1) == SSA_NAME)
1519 iv1 = get_iv (data, *op1);
1521 /* Exactly one of the compared values must be an iv, and the other one must
1522 be an invariant. */
1523 if (!iv0 || !iv1)
1524 goto end;
1526 if (integer_zerop (iv0->step))
1528 /* Control variable may be on the other side. */
1529 std::swap (op0, op1);
1530 std::swap (iv0, iv1);
1532 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1534 end:
1535 if (control_var)
1536 *control_var = op0;
1537 if (iv_var)
1538 *iv_var = iv0;
1539 if (bound)
1540 *bound = op1;
1541 if (iv_bound)
1542 *iv_bound = iv1;
1544 return ret;
1547 /* Checks whether the condition in STMT is interesting and if so,
1548 records it. */
1550 static void
1551 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1553 tree *var_p, *bound_p;
1554 struct iv *var_iv;
1556 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1558 find_interesting_uses_op (data, *var_p);
1559 find_interesting_uses_op (data, *bound_p);
1560 return;
1563 record_group_use (data, NULL, var_iv, stmt, USE_COMPARE);
1566 /* Returns the outermost loop EXPR is obviously invariant in
1567 relative to the loop LOOP, i.e. if all its operands are defined
1568 outside of the returned loop. Returns NULL if EXPR is not
1569 even obviously invariant in LOOP. */
1571 struct loop *
1572 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1574 basic_block def_bb;
1575 unsigned i, len;
1577 if (is_gimple_min_invariant (expr))
1578 return current_loops->tree_root;
1580 if (TREE_CODE (expr) == SSA_NAME)
1582 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1583 if (def_bb)
1585 if (flow_bb_inside_loop_p (loop, def_bb))
1586 return NULL;
1587 return superloop_at_depth (loop,
1588 loop_depth (def_bb->loop_father) + 1);
1591 return current_loops->tree_root;
1594 if (!EXPR_P (expr))
1595 return NULL;
1597 unsigned maxdepth = 0;
1598 len = TREE_OPERAND_LENGTH (expr);
1599 for (i = 0; i < len; i++)
1601 struct loop *ivloop;
1602 if (!TREE_OPERAND (expr, i))
1603 continue;
1605 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1606 if (!ivloop)
1607 return NULL;
1608 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1611 return superloop_at_depth (loop, maxdepth);
1614 /* Returns true if expression EXPR is obviously invariant in LOOP,
1615 i.e. if all its operands are defined outside of the LOOP. LOOP
1616 should not be the function body. */
1618 bool
1619 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1621 basic_block def_bb;
1622 unsigned i, len;
1624 gcc_assert (loop_depth (loop) > 0);
1626 if (is_gimple_min_invariant (expr))
1627 return true;
1629 if (TREE_CODE (expr) == SSA_NAME)
1631 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1632 if (def_bb
1633 && flow_bb_inside_loop_p (loop, def_bb))
1634 return false;
1636 return true;
1639 if (!EXPR_P (expr))
1640 return false;
1642 len = TREE_OPERAND_LENGTH (expr);
1643 for (i = 0; i < len; i++)
1644 if (TREE_OPERAND (expr, i)
1645 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1646 return false;
1648 return true;
1651 /* Given expression EXPR which computes inductive values with respect
1652 to loop recorded in DATA, this function returns biv from which EXPR
1653 is derived by tracing definition chains of ssa variables in EXPR. */
1655 static struct iv*
1656 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1658 struct iv *iv;
1659 unsigned i, n;
1660 tree e2, e1;
1661 enum tree_code code;
1662 gimple *stmt;
1664 if (expr == NULL_TREE)
1665 return NULL;
1667 if (is_gimple_min_invariant (expr))
1668 return NULL;
1670 code = TREE_CODE (expr);
1671 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1673 n = TREE_OPERAND_LENGTH (expr);
1674 for (i = 0; i < n; i++)
1676 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1677 if (iv)
1678 return iv;
1682 /* Stop if it's not ssa name. */
1683 if (code != SSA_NAME)
1684 return NULL;
1686 iv = get_iv (data, expr);
1687 if (!iv || integer_zerop (iv->step))
1688 return NULL;
1689 else if (iv->biv_p)
1690 return iv;
1692 stmt = SSA_NAME_DEF_STMT (expr);
1693 if (gphi *phi = dyn_cast <gphi *> (stmt))
1695 ssa_op_iter iter;
1696 use_operand_p use_p;
1698 if (virtual_operand_p (gimple_phi_result (phi)))
1699 return NULL;
1701 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1703 tree use = USE_FROM_PTR (use_p);
1704 iv = find_deriving_biv_for_expr (data, use);
1705 if (iv)
1706 return iv;
1708 return NULL;
1710 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1711 return NULL;
1713 e1 = gimple_assign_rhs1 (stmt);
1714 code = gimple_assign_rhs_code (stmt);
1715 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1716 return find_deriving_biv_for_expr (data, e1);
1718 switch (code)
1720 case MULT_EXPR:
1721 case PLUS_EXPR:
1722 case MINUS_EXPR:
1723 case POINTER_PLUS_EXPR:
1724 /* Increments, decrements and multiplications by a constant
1725 are simple. */
1726 e2 = gimple_assign_rhs2 (stmt);
1727 iv = find_deriving_biv_for_expr (data, e2);
1728 if (iv)
1729 return iv;
1731 /* Fallthru. */
1732 CASE_CONVERT:
1733 /* Casts are simple. */
1734 return find_deriving_biv_for_expr (data, e1);
1736 default:
1737 break;
1740 return NULL;
1743 /* Record BIV, its predecessor and successor that they are used in
1744 address type uses. */
1746 static void
1747 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1749 unsigned i;
1750 tree type, base_1, base_2;
1751 bitmap_iterator bi;
1753 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1754 || biv->have_address_use || !biv->no_overflow)
1755 return;
1757 type = TREE_TYPE (biv->base);
1758 if (!INTEGRAL_TYPE_P (type))
1759 return;
1761 biv->have_address_use = true;
1762 data->bivs_not_used_in_addr--;
1763 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1764 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1766 struct iv *iv = ver_info (data, i)->iv;
1768 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1769 || iv->have_address_use || !iv->no_overflow)
1770 continue;
1772 if (type != TREE_TYPE (iv->base)
1773 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1774 continue;
1776 if (!operand_equal_p (biv->step, iv->step, 0))
1777 continue;
1779 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1780 if (operand_equal_p (base_1, iv->base, 0)
1781 || operand_equal_p (base_2, biv->base, 0))
1783 iv->have_address_use = true;
1784 data->bivs_not_used_in_addr--;
1789 /* Cumulates the steps of indices into DATA and replaces their values with the
1790 initial ones. Returns false when the value of the index cannot be determined.
1791 Callback for for_each_index. */
1793 struct ifs_ivopts_data
1795 struct ivopts_data *ivopts_data;
1796 gimple *stmt;
1797 tree step;
1800 static bool
1801 idx_find_step (tree base, tree *idx, void *data)
1803 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1804 struct iv *iv;
1805 bool use_overflow_semantics = false;
1806 tree step, iv_base, iv_step, lbound, off;
1807 struct loop *loop = dta->ivopts_data->current_loop;
1809 /* If base is a component ref, require that the offset of the reference
1810 be invariant. */
1811 if (TREE_CODE (base) == COMPONENT_REF)
1813 off = component_ref_field_offset (base);
1814 return expr_invariant_in_loop_p (loop, off);
1817 /* If base is array, first check whether we will be able to move the
1818 reference out of the loop (in order to take its address in strength
1819 reduction). In order for this to work we need both lower bound
1820 and step to be loop invariants. */
1821 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1823 /* Moreover, for a range, the size needs to be invariant as well. */
1824 if (TREE_CODE (base) == ARRAY_RANGE_REF
1825 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1826 return false;
1828 step = array_ref_element_size (base);
1829 lbound = array_ref_low_bound (base);
1831 if (!expr_invariant_in_loop_p (loop, step)
1832 || !expr_invariant_in_loop_p (loop, lbound))
1833 return false;
1836 if (TREE_CODE (*idx) != SSA_NAME)
1837 return true;
1839 iv = get_iv (dta->ivopts_data, *idx);
1840 if (!iv)
1841 return false;
1843 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1844 *&x[0], which is not folded and does not trigger the
1845 ARRAY_REF path below. */
1846 *idx = iv->base;
1848 if (integer_zerop (iv->step))
1849 return true;
1851 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1853 step = array_ref_element_size (base);
1855 /* We only handle addresses whose step is an integer constant. */
1856 if (TREE_CODE (step) != INTEGER_CST)
1857 return false;
1859 else
1860 /* The step for pointer arithmetics already is 1 byte. */
1861 step = size_one_node;
1863 iv_base = iv->base;
1864 iv_step = iv->step;
1865 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1866 use_overflow_semantics = true;
1868 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1869 sizetype, &iv_base, &iv_step, dta->stmt,
1870 use_overflow_semantics))
1872 /* The index might wrap. */
1873 return false;
1876 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1877 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1879 if (dta->ivopts_data->bivs_not_used_in_addr)
1881 if (!iv->biv_p)
1882 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1884 record_biv_for_address_use (dta->ivopts_data, iv);
1886 return true;
1889 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1890 object is passed to it in DATA. */
1892 static bool
1893 idx_record_use (tree base, tree *idx,
1894 void *vdata)
1896 struct ivopts_data *data = (struct ivopts_data *) vdata;
1897 find_interesting_uses_op (data, *idx);
1898 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1900 find_interesting_uses_op (data, array_ref_element_size (base));
1901 find_interesting_uses_op (data, array_ref_low_bound (base));
1903 return true;
1906 /* If we can prove that TOP = cst * BOT for some constant cst,
1907 store cst to MUL and return true. Otherwise return false.
1908 The returned value is always sign-extended, regardless of the
1909 signedness of TOP and BOT. */
1911 static bool
1912 constant_multiple_of (tree top, tree bot, widest_int *mul)
1914 tree mby;
1915 enum tree_code code;
1916 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1917 widest_int res, p0, p1;
1919 STRIP_NOPS (top);
1920 STRIP_NOPS (bot);
1922 if (operand_equal_p (top, bot, 0))
1924 *mul = 1;
1925 return true;
1928 code = TREE_CODE (top);
1929 switch (code)
1931 case MULT_EXPR:
1932 mby = TREE_OPERAND (top, 1);
1933 if (TREE_CODE (mby) != INTEGER_CST)
1934 return false;
1936 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1937 return false;
1939 *mul = wi::sext (res * wi::to_widest (mby), precision);
1940 return true;
1942 case PLUS_EXPR:
1943 case MINUS_EXPR:
1944 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1945 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1946 return false;
1948 if (code == MINUS_EXPR)
1949 p1 = -p1;
1950 *mul = wi::sext (p0 + p1, precision);
1951 return true;
1953 case INTEGER_CST:
1954 if (TREE_CODE (bot) != INTEGER_CST)
1955 return false;
1957 p0 = widest_int::from (top, SIGNED);
1958 p1 = widest_int::from (bot, SIGNED);
1959 if (p1 == 0)
1960 return false;
1961 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1962 return res == 0;
1964 default:
1965 return false;
1969 /* Return true if memory reference REF with step STEP may be unaligned. */
1971 static bool
1972 may_be_unaligned_p (tree ref, tree step)
1974 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1975 thus they are not misaligned. */
1976 if (TREE_CODE (ref) == TARGET_MEM_REF)
1977 return false;
1979 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1980 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1981 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1983 unsigned HOST_WIDE_INT bitpos;
1984 unsigned int ref_align;
1985 get_object_alignment_1 (ref, &ref_align, &bitpos);
1986 if (ref_align < align
1987 || (bitpos % align) != 0
1988 || (bitpos % BITS_PER_UNIT) != 0)
1989 return true;
1991 unsigned int trailing_zeros = tree_ctz (step);
1992 if (trailing_zeros < HOST_BITS_PER_INT
1993 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1994 return true;
1996 return false;
1999 /* Return true if EXPR may be non-addressable. */
2001 bool
2002 may_be_nonaddressable_p (tree expr)
2004 switch (TREE_CODE (expr))
2006 case TARGET_MEM_REF:
2007 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2008 target, thus they are always addressable. */
2009 return false;
2011 case MEM_REF:
2012 /* Likewise for MEM_REFs, modulo the storage order. */
2013 return REF_REVERSE_STORAGE_ORDER (expr);
2015 case BIT_FIELD_REF:
2016 if (REF_REVERSE_STORAGE_ORDER (expr))
2017 return true;
2018 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2020 case COMPONENT_REF:
2021 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2022 return true;
2023 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2024 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2026 case ARRAY_REF:
2027 case ARRAY_RANGE_REF:
2028 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2029 return true;
2030 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2032 case VIEW_CONVERT_EXPR:
2033 /* This kind of view-conversions may wrap non-addressable objects
2034 and make them look addressable. After some processing the
2035 non-addressability may be uncovered again, causing ADDR_EXPRs
2036 of inappropriate objects to be built. */
2037 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2038 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2039 return true;
2040 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2042 CASE_CONVERT:
2043 return true;
2045 default:
2046 break;
2049 return false;
2052 /* Finds addresses in *OP_P inside STMT. */
2054 static void
2055 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2056 tree *op_p)
2058 tree base = *op_p, step = size_zero_node;
2059 struct iv *civ;
2060 struct ifs_ivopts_data ifs_ivopts_data;
2062 /* Do not play with volatile memory references. A bit too conservative,
2063 perhaps, but safe. */
2064 if (gimple_has_volatile_ops (stmt))
2065 goto fail;
2067 /* Ignore bitfields for now. Not really something terribly complicated
2068 to handle. TODO. */
2069 if (TREE_CODE (base) == BIT_FIELD_REF)
2070 goto fail;
2072 base = unshare_expr (base);
2074 if (TREE_CODE (base) == TARGET_MEM_REF)
2076 tree type = build_pointer_type (TREE_TYPE (base));
2077 tree astep;
2079 if (TMR_BASE (base)
2080 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2082 civ = get_iv (data, TMR_BASE (base));
2083 if (!civ)
2084 goto fail;
2086 TMR_BASE (base) = civ->base;
2087 step = civ->step;
2089 if (TMR_INDEX2 (base)
2090 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2092 civ = get_iv (data, TMR_INDEX2 (base));
2093 if (!civ)
2094 goto fail;
2096 TMR_INDEX2 (base) = civ->base;
2097 step = civ->step;
2099 if (TMR_INDEX (base)
2100 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2102 civ = get_iv (data, TMR_INDEX (base));
2103 if (!civ)
2104 goto fail;
2106 TMR_INDEX (base) = civ->base;
2107 astep = civ->step;
2109 if (astep)
2111 if (TMR_STEP (base))
2112 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2114 step = fold_build2 (PLUS_EXPR, type, step, astep);
2118 if (integer_zerop (step))
2119 goto fail;
2120 base = tree_mem_ref_addr (type, base);
2122 else
2124 ifs_ivopts_data.ivopts_data = data;
2125 ifs_ivopts_data.stmt = stmt;
2126 ifs_ivopts_data.step = size_zero_node;
2127 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2128 || integer_zerop (ifs_ivopts_data.step))
2129 goto fail;
2130 step = ifs_ivopts_data.step;
2132 /* Check that the base expression is addressable. This needs
2133 to be done after substituting bases of IVs into it. */
2134 if (may_be_nonaddressable_p (base))
2135 goto fail;
2137 /* Moreover, on strict alignment platforms, check that it is
2138 sufficiently aligned. */
2139 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2140 goto fail;
2142 base = build_fold_addr_expr (base);
2144 /* Substituting bases of IVs into the base expression might
2145 have caused folding opportunities. */
2146 if (TREE_CODE (base) == ADDR_EXPR)
2148 tree *ref = &TREE_OPERAND (base, 0);
2149 while (handled_component_p (*ref))
2150 ref = &TREE_OPERAND (*ref, 0);
2151 if (TREE_CODE (*ref) == MEM_REF)
2153 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2154 TREE_OPERAND (*ref, 0),
2155 TREE_OPERAND (*ref, 1));
2156 if (tem)
2157 *ref = tem;
2162 civ = alloc_iv (data, base, step);
2163 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2164 return;
2166 fail:
2167 for_each_index (op_p, idx_record_use, data);
2170 /* Finds and records invariants used in STMT. */
2172 static void
2173 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2175 ssa_op_iter iter;
2176 use_operand_p use_p;
2177 tree op;
2179 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2181 op = USE_FROM_PTR (use_p);
2182 record_invariant (data, op, false);
2186 /* Finds interesting uses of induction variables in the statement STMT. */
2188 static void
2189 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2191 struct iv *iv;
2192 tree op, *lhs, *rhs;
2193 ssa_op_iter iter;
2194 use_operand_p use_p;
2195 enum tree_code code;
2197 find_invariants_stmt (data, stmt);
2199 if (gimple_code (stmt) == GIMPLE_COND)
2201 find_interesting_uses_cond (data, stmt);
2202 return;
2205 if (is_gimple_assign (stmt))
2207 lhs = gimple_assign_lhs_ptr (stmt);
2208 rhs = gimple_assign_rhs1_ptr (stmt);
2210 if (TREE_CODE (*lhs) == SSA_NAME)
2212 /* If the statement defines an induction variable, the uses are not
2213 interesting by themselves. */
2215 iv = get_iv (data, *lhs);
2217 if (iv && !integer_zerop (iv->step))
2218 return;
2221 code = gimple_assign_rhs_code (stmt);
2222 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2223 && (REFERENCE_CLASS_P (*rhs)
2224 || is_gimple_val (*rhs)))
2226 if (REFERENCE_CLASS_P (*rhs))
2227 find_interesting_uses_address (data, stmt, rhs);
2228 else
2229 find_interesting_uses_op (data, *rhs);
2231 if (REFERENCE_CLASS_P (*lhs))
2232 find_interesting_uses_address (data, stmt, lhs);
2233 return;
2235 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2237 find_interesting_uses_cond (data, stmt);
2238 return;
2241 /* TODO -- we should also handle address uses of type
2243 memory = call (whatever);
2247 call (memory). */
2250 if (gimple_code (stmt) == GIMPLE_PHI
2251 && gimple_bb (stmt) == data->current_loop->header)
2253 iv = get_iv (data, PHI_RESULT (stmt));
2255 if (iv && !integer_zerop (iv->step))
2256 return;
2259 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2261 op = USE_FROM_PTR (use_p);
2263 if (TREE_CODE (op) != SSA_NAME)
2264 continue;
2266 iv = get_iv (data, op);
2267 if (!iv)
2268 continue;
2270 find_interesting_uses_op (data, op);
2274 /* Finds interesting uses of induction variables outside of loops
2275 on loop exit edge EXIT. */
2277 static void
2278 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2280 gphi *phi;
2281 gphi_iterator psi;
2282 tree def;
2284 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2286 phi = psi.phi ();
2287 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2288 if (!virtual_operand_p (def))
2289 find_interesting_uses_op (data, def);
2293 /* Compute maximum offset of [base + offset] addressing mode
2294 for memory reference represented by USE. */
2296 static HOST_WIDE_INT
2297 compute_max_addr_offset (struct iv_use *use)
2299 int width;
2300 rtx reg, addr;
2301 HOST_WIDE_INT i, off;
2302 unsigned list_index, num;
2303 addr_space_t as;
2304 machine_mode mem_mode, addr_mode;
2305 static vec<HOST_WIDE_INT> max_offset_list;
2307 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2308 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2310 num = max_offset_list.length ();
2311 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2312 if (list_index >= num)
2314 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2315 for (; num < max_offset_list.length (); num++)
2316 max_offset_list[num] = -1;
2319 off = max_offset_list[list_index];
2320 if (off != -1)
2321 return off;
2323 addr_mode = targetm.addr_space.address_mode (as);
2324 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2325 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2327 width = GET_MODE_BITSIZE (addr_mode) - 1;
2328 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2329 width = HOST_BITS_PER_WIDE_INT - 1;
2331 for (i = width; i > 0; i--)
2333 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2334 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2335 if (memory_address_addr_space_p (mem_mode, addr, as))
2336 break;
2338 /* For some strict-alignment targets, the offset must be naturally
2339 aligned. Try an aligned offset if mem_mode is not QImode. */
2340 off = ((unsigned HOST_WIDE_INT) 1 << i);
2341 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2343 off -= GET_MODE_SIZE (mem_mode);
2344 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2345 if (memory_address_addr_space_p (mem_mode, addr, as))
2346 break;
2349 if (i == 0)
2350 off = 0;
2352 max_offset_list[list_index] = off;
2353 return off;
2356 /* Comparison function to sort group in ascending order of addr_offset. */
2358 static int
2359 group_compare_offset (const void *a, const void *b)
2361 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2362 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2364 if ((*u1)->addr_offset != (*u2)->addr_offset)
2365 return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2366 else
2367 return 0;
2370 /* Check if small groups should be split. Return true if no group
2371 contains more than two uses with distinct addr_offsets. Return
2372 false otherwise. We want to split such groups because:
2374 1) Small groups don't have much benefit and may interfer with
2375 general candidate selection.
2376 2) Size for problem with only small groups is usually small and
2377 general algorithm can handle it well.
2379 TODO -- Above claim may not hold when we want to merge memory
2380 accesses with conseuctive addresses. */
2382 static bool
2383 split_small_address_groups_p (struct ivopts_data *data)
2385 unsigned int i, j, distinct = 1;
2386 struct iv_use *pre;
2387 struct iv_group *group;
2389 for (i = 0; i < data->vgroups.length (); i++)
2391 group = data->vgroups[i];
2392 if (group->vuses.length () == 1)
2393 continue;
2395 gcc_assert (group->type == USE_ADDRESS);
2396 if (group->vuses.length () == 2)
2398 if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2399 std::swap (group->vuses[0], group->vuses[1]);
2401 else
2402 group->vuses.qsort (group_compare_offset);
2404 if (distinct > 2)
2405 continue;
2407 distinct = 1;
2408 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2410 if (group->vuses[j]->addr_offset != pre->addr_offset)
2412 pre = group->vuses[j];
2413 distinct++;
2416 if (distinct > 2)
2417 break;
2421 return (distinct <= 2);
2424 /* For each group of address type uses, this function further groups
2425 these uses according to the maximum offset supported by target's
2426 [base + offset] addressing mode. */
2428 static void
2429 split_address_groups (struct ivopts_data *data)
2431 unsigned int i, j;
2432 HOST_WIDE_INT max_offset = -1;
2434 /* Reset max offset to split all small groups. */
2435 if (split_small_address_groups_p (data))
2436 max_offset = 0;
2438 for (i = 0; i < data->vgroups.length (); i++)
2440 struct iv_group *group = data->vgroups[i];
2441 struct iv_use *use = group->vuses[0];
2443 use->id = 0;
2444 use->group_id = group->id;
2445 if (group->vuses.length () == 1)
2446 continue;
2448 if (max_offset != 0)
2449 max_offset = compute_max_addr_offset (use);
2451 for (j = 1; j < group->vuses.length (); j++)
2453 struct iv_use *next = group->vuses[j];
2455 /* Only uses with offset that can fit in offset part against
2456 the first use can be grouped together. */
2457 if (next->addr_offset - use->addr_offset
2458 > (unsigned HOST_WIDE_INT) max_offset)
2459 break;
2461 next->id = j;
2462 next->group_id = group->id;
2464 /* Split group. */
2465 if (j < group->vuses.length ())
2467 struct iv_group *new_group = record_group (data, group->type);
2468 new_group->vuses.safe_splice (group->vuses);
2469 new_group->vuses.block_remove (0, j);
2470 group->vuses.truncate (j);
2475 /* Finds uses of the induction variables that are interesting. */
2477 static void
2478 find_interesting_uses (struct ivopts_data *data)
2480 basic_block bb;
2481 gimple_stmt_iterator bsi;
2482 basic_block *body = get_loop_body (data->current_loop);
2483 unsigned i;
2484 edge e;
2486 for (i = 0; i < data->current_loop->num_nodes; i++)
2488 edge_iterator ei;
2489 bb = body[i];
2491 FOR_EACH_EDGE (e, ei, bb->succs)
2492 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2493 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2494 find_interesting_uses_outside (data, e);
2496 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2497 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2498 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2499 if (!is_gimple_debug (gsi_stmt (bsi)))
2500 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2503 split_address_groups (data);
2505 if (dump_file && (dump_flags & TDF_DETAILS))
2507 bitmap_iterator bi;
2509 fprintf (dump_file, "\n<Invariant Vars>:\n");
2510 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2512 struct version_info *info = ver_info (data, i);
2513 if (info->inv_id)
2515 fprintf (dump_file, "Inv %d:\t", info->inv_id);
2516 print_generic_expr (dump_file, info->name, TDF_SLIM);
2517 fprintf (dump_file, "%s\n",
2518 info->has_nonlin_use ? "" : "\t(eliminable)");
2522 fprintf (dump_file, "\n<IV Groups>:\n");
2523 dump_groups (dump_file, data);
2524 fprintf (dump_file, "\n");
2527 free (body);
2530 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2531 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2532 we are at the top-level of the processed address. */
2534 static tree
2535 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2536 HOST_WIDE_INT *offset)
2538 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2539 enum tree_code code;
2540 tree type, orig_type = TREE_TYPE (expr);
2541 HOST_WIDE_INT off0, off1, st;
2542 tree orig_expr = expr;
2544 STRIP_NOPS (expr);
2546 type = TREE_TYPE (expr);
2547 code = TREE_CODE (expr);
2548 *offset = 0;
2550 switch (code)
2552 case INTEGER_CST:
2553 if (!cst_and_fits_in_hwi (expr)
2554 || integer_zerop (expr))
2555 return orig_expr;
2557 *offset = int_cst_value (expr);
2558 return build_int_cst (orig_type, 0);
2560 case POINTER_PLUS_EXPR:
2561 case PLUS_EXPR:
2562 case MINUS_EXPR:
2563 op0 = TREE_OPERAND (expr, 0);
2564 op1 = TREE_OPERAND (expr, 1);
2566 op0 = strip_offset_1 (op0, false, false, &off0);
2567 op1 = strip_offset_1 (op1, false, false, &off1);
2569 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2570 if (op0 == TREE_OPERAND (expr, 0)
2571 && op1 == TREE_OPERAND (expr, 1))
2572 return orig_expr;
2574 if (integer_zerop (op1))
2575 expr = op0;
2576 else if (integer_zerop (op0))
2578 if (code == MINUS_EXPR)
2579 expr = fold_build1 (NEGATE_EXPR, type, op1);
2580 else
2581 expr = op1;
2583 else
2584 expr = fold_build2 (code, type, op0, op1);
2586 return fold_convert (orig_type, expr);
2588 case MULT_EXPR:
2589 op1 = TREE_OPERAND (expr, 1);
2590 if (!cst_and_fits_in_hwi (op1))
2591 return orig_expr;
2593 op0 = TREE_OPERAND (expr, 0);
2594 op0 = strip_offset_1 (op0, false, false, &off0);
2595 if (op0 == TREE_OPERAND (expr, 0))
2596 return orig_expr;
2598 *offset = off0 * int_cst_value (op1);
2599 if (integer_zerop (op0))
2600 expr = op0;
2601 else
2602 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2604 return fold_convert (orig_type, expr);
2606 case ARRAY_REF:
2607 case ARRAY_RANGE_REF:
2608 if (!inside_addr)
2609 return orig_expr;
2611 step = array_ref_element_size (expr);
2612 if (!cst_and_fits_in_hwi (step))
2613 break;
2615 st = int_cst_value (step);
2616 op1 = TREE_OPERAND (expr, 1);
2617 op1 = strip_offset_1 (op1, false, false, &off1);
2618 *offset = off1 * st;
2620 if (top_compref
2621 && integer_zerop (op1))
2623 /* Strip the component reference completely. */
2624 op0 = TREE_OPERAND (expr, 0);
2625 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2626 *offset += off0;
2627 return op0;
2629 break;
2631 case COMPONENT_REF:
2633 tree field;
2635 if (!inside_addr)
2636 return orig_expr;
2638 tmp = component_ref_field_offset (expr);
2639 field = TREE_OPERAND (expr, 1);
2640 if (top_compref
2641 && cst_and_fits_in_hwi (tmp)
2642 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2644 HOST_WIDE_INT boffset, abs_off;
2646 /* Strip the component reference completely. */
2647 op0 = TREE_OPERAND (expr, 0);
2648 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2649 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2650 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2651 if (boffset < 0)
2652 abs_off = -abs_off;
2654 *offset = off0 + int_cst_value (tmp) + abs_off;
2655 return op0;
2658 break;
2660 case ADDR_EXPR:
2661 op0 = TREE_OPERAND (expr, 0);
2662 op0 = strip_offset_1 (op0, true, true, &off0);
2663 *offset += off0;
2665 if (op0 == TREE_OPERAND (expr, 0))
2666 return orig_expr;
2668 expr = build_fold_addr_expr (op0);
2669 return fold_convert (orig_type, expr);
2671 case MEM_REF:
2672 /* ??? Offset operand? */
2673 inside_addr = false;
2674 break;
2676 default:
2677 return orig_expr;
2680 /* Default handling of expressions for that we want to recurse into
2681 the first operand. */
2682 op0 = TREE_OPERAND (expr, 0);
2683 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2684 *offset += off0;
2686 if (op0 == TREE_OPERAND (expr, 0)
2687 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2688 return orig_expr;
2690 expr = copy_node (expr);
2691 TREE_OPERAND (expr, 0) = op0;
2692 if (op1)
2693 TREE_OPERAND (expr, 1) = op1;
2695 /* Inside address, we might strip the top level component references,
2696 thus changing type of the expression. Handling of ADDR_EXPR
2697 will fix that. */
2698 expr = fold_convert (orig_type, expr);
2700 return expr;
2703 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2705 static tree
2706 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2708 HOST_WIDE_INT off;
2709 tree core = strip_offset_1 (expr, false, false, &off);
2710 *offset = off;
2711 return core;
2714 /* Returns variant of TYPE that can be used as base for different uses.
2715 We return unsigned type with the same precision, which avoids problems
2716 with overflows. */
2718 static tree
2719 generic_type_for (tree type)
2721 if (POINTER_TYPE_P (type))
2722 return unsigned_type_for (type);
2724 if (TYPE_UNSIGNED (type))
2725 return type;
2727 return unsigned_type_for (type);
2730 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2731 the bitmap to that we should store it. */
2733 static struct ivopts_data *fd_ivopts_data;
2734 static tree
2735 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2737 bitmap *depends_on = (bitmap *) data;
2738 struct version_info *info;
2740 if (TREE_CODE (*expr_p) != SSA_NAME)
2741 return NULL_TREE;
2742 info = name_info (fd_ivopts_data, *expr_p);
2744 if (!info->inv_id || info->has_nonlin_use)
2745 return NULL_TREE;
2747 if (!*depends_on)
2748 *depends_on = BITMAP_ALLOC (NULL);
2749 bitmap_set_bit (*depends_on, info->inv_id);
2751 return NULL_TREE;
2754 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2755 position to POS. If USE is not NULL, the candidate is set as related to
2756 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2757 replacement of the final value of the iv by a direct computation. */
2759 static struct iv_cand *
2760 add_candidate_1 (struct ivopts_data *data,
2761 tree base, tree step, bool important, enum iv_position pos,
2762 struct iv_use *use, gimple *incremented_at,
2763 struct iv *orig_iv = NULL)
2765 unsigned i;
2766 struct iv_cand *cand = NULL;
2767 tree type, orig_type;
2769 gcc_assert (base && step);
2771 /* -fkeep-gc-roots-live means that we have to keep a real pointer
2772 live, but the ivopts code may replace a real pointer with one
2773 pointing before or after the memory block that is then adjusted
2774 into the memory block during the loop. FIXME: It would likely be
2775 better to actually force the pointer live and still use ivopts;
2776 for example, it would be enough to write the pointer into memory
2777 and keep it there until after the loop. */
2778 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
2779 return NULL;
2781 /* For non-original variables, make sure their values are computed in a type
2782 that does not invoke undefined behavior on overflows (since in general,
2783 we cannot prove that these induction variables are non-wrapping). */
2784 if (pos != IP_ORIGINAL)
2786 orig_type = TREE_TYPE (base);
2787 type = generic_type_for (orig_type);
2788 if (type != orig_type)
2790 base = fold_convert (type, base);
2791 step = fold_convert (type, step);
2795 for (i = 0; i < data->vcands.length (); i++)
2797 cand = data->vcands[i];
2799 if (cand->pos != pos)
2800 continue;
2802 if (cand->incremented_at != incremented_at
2803 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2804 && cand->ainc_use != use))
2805 continue;
2807 if (operand_equal_p (base, cand->iv->base, 0)
2808 && operand_equal_p (step, cand->iv->step, 0)
2809 && (TYPE_PRECISION (TREE_TYPE (base))
2810 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2811 break;
2814 if (i == data->vcands.length ())
2816 cand = XCNEW (struct iv_cand);
2817 cand->id = i;
2818 cand->iv = alloc_iv (data, base, step);
2819 cand->pos = pos;
2820 if (pos != IP_ORIGINAL)
2822 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2823 cand->var_after = cand->var_before;
2825 cand->important = important;
2826 cand->incremented_at = incremented_at;
2827 data->vcands.safe_push (cand);
2829 if (TREE_CODE (step) != INTEGER_CST)
2831 fd_ivopts_data = data;
2832 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2835 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2836 cand->ainc_use = use;
2837 else
2838 cand->ainc_use = NULL;
2840 cand->orig_iv = orig_iv;
2841 if (dump_file && (dump_flags & TDF_DETAILS))
2842 dump_cand (dump_file, cand);
2845 cand->important |= important;
2847 /* Relate candidate to the group for which it is added. */
2848 if (use)
2849 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
2851 return cand;
2854 /* Returns true if incrementing the induction variable at the end of the LOOP
2855 is allowed.
2857 The purpose is to avoid splitting latch edge with a biv increment, thus
2858 creating a jump, possibly confusing other optimization passes and leaving
2859 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2860 is not available (so we do not have a better alternative), or if the latch
2861 edge is already nonempty. */
2863 static bool
2864 allow_ip_end_pos_p (struct loop *loop)
2866 if (!ip_normal_pos (loop))
2867 return true;
2869 if (!empty_block_p (ip_end_pos (loop)))
2870 return true;
2872 return false;
2875 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2876 Important field is set to IMPORTANT. */
2878 static void
2879 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2880 bool important, struct iv_use *use)
2882 basic_block use_bb = gimple_bb (use->stmt);
2883 machine_mode mem_mode;
2884 unsigned HOST_WIDE_INT cstepi;
2886 /* If we insert the increment in any position other than the standard
2887 ones, we must ensure that it is incremented once per iteration.
2888 It must not be in an inner nested loop, or one side of an if
2889 statement. */
2890 if (use_bb->loop_father != data->current_loop
2891 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2892 || stmt_could_throw_p (use->stmt)
2893 || !cst_and_fits_in_hwi (step))
2894 return;
2896 cstepi = int_cst_value (step);
2898 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2899 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2900 || USE_STORE_PRE_INCREMENT (mem_mode))
2901 && GET_MODE_SIZE (mem_mode) == cstepi)
2902 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2903 || USE_STORE_PRE_DECREMENT (mem_mode))
2904 && GET_MODE_SIZE (mem_mode) == -cstepi))
2906 enum tree_code code = MINUS_EXPR;
2907 tree new_base;
2908 tree new_step = step;
2910 if (POINTER_TYPE_P (TREE_TYPE (base)))
2912 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2913 code = POINTER_PLUS_EXPR;
2915 else
2916 new_step = fold_convert (TREE_TYPE (base), new_step);
2917 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2918 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2919 use->stmt);
2921 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2922 || USE_STORE_POST_INCREMENT (mem_mode))
2923 && GET_MODE_SIZE (mem_mode) == cstepi)
2924 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2925 || USE_STORE_POST_DECREMENT (mem_mode))
2926 && GET_MODE_SIZE (mem_mode) == -cstepi))
2928 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2929 use->stmt);
2933 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2934 position to POS. If USE is not NULL, the candidate is set as related to
2935 it. The candidate computation is scheduled before exit condition and at
2936 the end of loop. */
2938 static void
2939 add_candidate (struct ivopts_data *data,
2940 tree base, tree step, bool important, struct iv_use *use,
2941 struct iv *orig_iv = NULL)
2943 if (ip_normal_pos (data->current_loop))
2944 add_candidate_1 (data, base, step, important,
2945 IP_NORMAL, use, NULL, orig_iv);
2946 if (ip_end_pos (data->current_loop)
2947 && allow_ip_end_pos_p (data->current_loop))
2948 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
2951 /* Adds standard iv candidates. */
2953 static void
2954 add_standard_iv_candidates (struct ivopts_data *data)
2956 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2958 /* The same for a double-integer type if it is still fast enough. */
2959 if (TYPE_PRECISION
2960 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2961 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2962 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2963 build_int_cst (long_integer_type_node, 1), true, NULL);
2965 /* The same for a double-integer type if it is still fast enough. */
2966 if (TYPE_PRECISION
2967 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2968 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2969 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2970 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2974 /* Adds candidates bases on the old induction variable IV. */
2976 static void
2977 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
2979 gimple *phi;
2980 tree def;
2981 struct iv_cand *cand;
2983 /* Check if this biv is used in address type use. */
2984 if (iv->no_overflow && iv->have_address_use
2985 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
2986 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
2988 tree base = fold_convert (sizetype, iv->base);
2989 tree step = fold_convert (sizetype, iv->step);
2991 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
2992 add_candidate (data, base, step, true, NULL, iv);
2993 /* Add iv cand of the original type only if it has nonlinear use. */
2994 if (iv->nonlin_use)
2995 add_candidate (data, iv->base, iv->step, true, NULL);
2997 else
2998 add_candidate (data, iv->base, iv->step, true, NULL);
3000 /* The same, but with initial value zero. */
3001 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3002 add_candidate (data, size_int (0), iv->step, true, NULL);
3003 else
3004 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3005 iv->step, true, NULL);
3007 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3008 if (gimple_code (phi) == GIMPLE_PHI)
3010 /* Additionally record the possibility of leaving the original iv
3011 untouched. */
3012 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3013 /* Don't add candidate if it's from another PHI node because
3014 it's an affine iv appearing in the form of PEELED_CHREC. */
3015 phi = SSA_NAME_DEF_STMT (def);
3016 if (gimple_code (phi) != GIMPLE_PHI)
3018 cand = add_candidate_1 (data,
3019 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3020 SSA_NAME_DEF_STMT (def));
3021 if (cand)
3023 cand->var_before = iv->ssa_name;
3024 cand->var_after = def;
3027 else
3028 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3032 /* Adds candidates based on the old induction variables. */
3034 static void
3035 add_iv_candidate_for_bivs (struct ivopts_data *data)
3037 unsigned i;
3038 struct iv *iv;
3039 bitmap_iterator bi;
3041 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3043 iv = ver_info (data, i)->iv;
3044 if (iv && iv->biv_p && !integer_zerop (iv->step))
3045 add_iv_candidate_for_biv (data, iv);
3049 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3051 static void
3052 record_common_cand (struct ivopts_data *data, tree base,
3053 tree step, struct iv_use *use)
3055 struct iv_common_cand ent;
3056 struct iv_common_cand **slot;
3058 ent.base = base;
3059 ent.step = step;
3060 ent.hash = iterative_hash_expr (base, 0);
3061 ent.hash = iterative_hash_expr (step, ent.hash);
3063 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3064 if (*slot == NULL)
3066 *slot = new iv_common_cand ();
3067 (*slot)->base = base;
3068 (*slot)->step = step;
3069 (*slot)->uses.create (8);
3070 (*slot)->hash = ent.hash;
3071 data->iv_common_cands.safe_push ((*slot));
3074 gcc_assert (use != NULL);
3075 (*slot)->uses.safe_push (use);
3076 return;
3079 /* Comparison function used to sort common candidates. */
3081 static int
3082 common_cand_cmp (const void *p1, const void *p2)
3084 unsigned n1, n2;
3085 const struct iv_common_cand *const *const ccand1
3086 = (const struct iv_common_cand *const *)p1;
3087 const struct iv_common_cand *const *const ccand2
3088 = (const struct iv_common_cand *const *)p2;
3090 n1 = (*ccand1)->uses.length ();
3091 n2 = (*ccand2)->uses.length ();
3092 return n2 - n1;
3095 /* Adds IV candidates based on common candidated recorded. */
3097 static void
3098 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3100 unsigned i, j;
3101 struct iv_cand *cand_1, *cand_2;
3103 data->iv_common_cands.qsort (common_cand_cmp);
3104 for (i = 0; i < data->iv_common_cands.length (); i++)
3106 struct iv_common_cand *ptr = data->iv_common_cands[i];
3108 /* Only add IV candidate if it's derived from multiple uses. */
3109 if (ptr->uses.length () <= 1)
3110 break;
3112 cand_1 = NULL;
3113 cand_2 = NULL;
3114 if (ip_normal_pos (data->current_loop))
3115 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3116 false, IP_NORMAL, NULL, NULL);
3118 if (ip_end_pos (data->current_loop)
3119 && allow_ip_end_pos_p (data->current_loop))
3120 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3121 false, IP_END, NULL, NULL);
3123 /* Bind deriving uses and the new candidates. */
3124 for (j = 0; j < ptr->uses.length (); j++)
3126 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3127 if (cand_1)
3128 bitmap_set_bit (group->related_cands, cand_1->id);
3129 if (cand_2)
3130 bitmap_set_bit (group->related_cands, cand_2->id);
3134 /* Release data since it is useless from this point. */
3135 data->iv_common_cand_tab->empty ();
3136 data->iv_common_cands.truncate (0);
3139 /* Adds candidates based on the value of USE's iv. */
3141 static void
3142 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3144 unsigned HOST_WIDE_INT offset;
3145 tree base;
3146 tree basetype;
3147 struct iv *iv = use->iv;
3149 add_candidate (data, iv->base, iv->step, false, use);
3151 /* Record common candidate for use in case it can be shared by others. */
3152 record_common_cand (data, iv->base, iv->step, use);
3154 /* Record common candidate with initial value zero. */
3155 basetype = TREE_TYPE (iv->base);
3156 if (POINTER_TYPE_P (basetype))
3157 basetype = sizetype;
3158 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3160 /* Record common candidate with constant offset stripped in base.
3161 Like the use itself, we also add candidate directly for it. */
3162 base = strip_offset (iv->base, &offset);
3163 if (offset || base != iv->base)
3165 record_common_cand (data, base, iv->step, use);
3166 add_candidate (data, base, iv->step, false, use);
3169 /* Record common candidate with base_object removed in base. */
3170 if (iv->base_object != NULL)
3172 unsigned i;
3173 aff_tree aff_base;
3174 tree step, base_object = iv->base_object;
3176 base = iv->base;
3177 step = iv->step;
3178 STRIP_NOPS (base);
3179 STRIP_NOPS (step);
3180 STRIP_NOPS (base_object);
3181 tree_to_aff_combination (base, TREE_TYPE (base), &aff_base);
3182 for (i = 0; i < aff_base.n; i++)
3184 if (aff_base.elts[i].coef != 1)
3185 continue;
3187 if (operand_equal_p (aff_base.elts[i].val, base_object, 0))
3188 break;
3190 if (i < aff_base.n)
3192 aff_combination_remove_elt (&aff_base, i);
3193 base = aff_combination_to_tree (&aff_base);
3194 basetype = TREE_TYPE (base);
3195 if (POINTER_TYPE_P (basetype))
3196 basetype = sizetype;
3198 step = fold_convert (basetype, step);
3199 record_common_cand (data, base, step, use);
3200 /* Also record common candidate with offset stripped. */
3201 base = strip_offset (base, &offset);
3202 if (offset)
3203 record_common_cand (data, base, step, use);
3207 /* At last, add auto-incremental candidates. Make such variables
3208 important since other iv uses with same base object may be based
3209 on it. */
3210 if (use != NULL && use->type == USE_ADDRESS)
3211 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3214 /* Adds candidates based on the uses. */
3216 static void
3217 add_iv_candidate_for_groups (struct ivopts_data *data)
3219 unsigned i;
3221 /* Only add candidate for the first use in group. */
3222 for (i = 0; i < data->vgroups.length (); i++)
3224 struct iv_group *group = data->vgroups[i];
3226 gcc_assert (group->vuses[0] != NULL);
3227 add_iv_candidate_for_use (data, group->vuses[0]);
3229 add_iv_candidate_derived_from_uses (data);
3232 /* Record important candidates and add them to related_cands bitmaps. */
3234 static void
3235 record_important_candidates (struct ivopts_data *data)
3237 unsigned i;
3238 struct iv_group *group;
3240 for (i = 0; i < data->vcands.length (); i++)
3242 struct iv_cand *cand = data->vcands[i];
3244 if (cand->important)
3245 bitmap_set_bit (data->important_candidates, i);
3248 data->consider_all_candidates = (data->vcands.length ()
3249 <= CONSIDER_ALL_CANDIDATES_BOUND);
3251 /* Add important candidates to groups' related_cands bitmaps. */
3252 for (i = 0; i < data->vgroups.length (); i++)
3254 group = data->vgroups[i];
3255 bitmap_ior_into (group->related_cands, data->important_candidates);
3259 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3260 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3261 we allocate a simple list to every use. */
3263 static void
3264 alloc_use_cost_map (struct ivopts_data *data)
3266 unsigned i, size, s;
3268 for (i = 0; i < data->vgroups.length (); i++)
3270 struct iv_group *group = data->vgroups[i];
3272 if (data->consider_all_candidates)
3273 size = data->vcands.length ();
3274 else
3276 s = bitmap_count_bits (group->related_cands);
3278 /* Round up to the power of two, so that moduling by it is fast. */
3279 size = s ? (1 << ceil_log2 (s)) : 1;
3282 group->n_map_members = size;
3283 group->cost_map = XCNEWVEC (struct cost_pair, size);
3287 /* Returns description of computation cost of expression whose runtime
3288 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3290 static comp_cost
3291 new_cost (unsigned runtime, unsigned complexity)
3293 comp_cost cost;
3295 cost.cost = runtime;
3296 cost.complexity = complexity;
3298 return cost;
3301 /* Returns true if COST is infinite. */
3303 static bool
3304 infinite_cost_p (comp_cost cost)
3306 return cost.cost == INFTY;
3309 /* Adds costs COST1 and COST2. */
3311 static comp_cost
3312 add_costs (comp_cost cost1, comp_cost cost2)
3314 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3315 return infinite_cost;
3317 cost1.cost += cost2.cost;
3318 cost1.complexity += cost2.complexity;
3320 return cost1;
3322 /* Subtracts costs COST1 and COST2. */
3324 static comp_cost
3325 sub_costs (comp_cost cost1, comp_cost cost2)
3327 cost1.cost -= cost2.cost;
3328 cost1.complexity -= cost2.complexity;
3330 return cost1;
3333 /* Returns a negative number if COST1 < COST2, a positive number if
3334 COST1 > COST2, and 0 if COST1 = COST2. */
3336 static int
3337 compare_costs (comp_cost cost1, comp_cost cost2)
3339 if (cost1.cost == cost2.cost)
3340 return cost1.complexity - cost2.complexity;
3342 return cost1.cost - cost2.cost;
3345 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3346 on invariants DEPENDS_ON and that the value used in expressing it
3347 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3349 static void
3350 set_group_iv_cost (struct ivopts_data *data,
3351 struct iv_group *group, struct iv_cand *cand,
3352 comp_cost cost, bitmap depends_on, tree value,
3353 enum tree_code comp, iv_inv_expr_ent *inv_expr)
3355 unsigned i, s;
3357 if (infinite_cost_p (cost))
3359 BITMAP_FREE (depends_on);
3360 return;
3363 if (data->consider_all_candidates)
3365 group->cost_map[cand->id].cand = cand;
3366 group->cost_map[cand->id].cost = cost;
3367 group->cost_map[cand->id].depends_on = depends_on;
3368 group->cost_map[cand->id].value = value;
3369 group->cost_map[cand->id].comp = comp;
3370 group->cost_map[cand->id].inv_expr = inv_expr;
3371 return;
3374 /* n_map_members is a power of two, so this computes modulo. */
3375 s = cand->id & (group->n_map_members - 1);
3376 for (i = s; i < group->n_map_members; i++)
3377 if (!group->cost_map[i].cand)
3378 goto found;
3379 for (i = 0; i < s; i++)
3380 if (!group->cost_map[i].cand)
3381 goto found;
3383 gcc_unreachable ();
3385 found:
3386 group->cost_map[i].cand = cand;
3387 group->cost_map[i].cost = cost;
3388 group->cost_map[i].depends_on = depends_on;
3389 group->cost_map[i].value = value;
3390 group->cost_map[i].comp = comp;
3391 group->cost_map[i].inv_expr = inv_expr;
3394 /* Gets cost of (GROUP, CAND) pair. */
3396 static struct cost_pair *
3397 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3398 struct iv_cand *cand)
3400 unsigned i, s;
3401 struct cost_pair *ret;
3403 if (!cand)
3404 return NULL;
3406 if (data->consider_all_candidates)
3408 ret = group->cost_map + cand->id;
3409 if (!ret->cand)
3410 return NULL;
3412 return ret;
3415 /* n_map_members is a power of two, so this computes modulo. */
3416 s = cand->id & (group->n_map_members - 1);
3417 for (i = s; i < group->n_map_members; i++)
3418 if (group->cost_map[i].cand == cand)
3419 return group->cost_map + i;
3420 else if (group->cost_map[i].cand == NULL)
3421 return NULL;
3422 for (i = 0; i < s; i++)
3423 if (group->cost_map[i].cand == cand)
3424 return group->cost_map + i;
3425 else if (group->cost_map[i].cand == NULL)
3426 return NULL;
3428 return NULL;
3431 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3432 static rtx
3433 produce_memory_decl_rtl (tree obj, int *regno)
3435 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3436 machine_mode address_mode = targetm.addr_space.address_mode (as);
3437 rtx x;
3439 gcc_assert (obj);
3440 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3442 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3443 x = gen_rtx_SYMBOL_REF (address_mode, name);
3444 SET_SYMBOL_REF_DECL (x, obj);
3445 x = gen_rtx_MEM (DECL_MODE (obj), x);
3446 set_mem_addr_space (x, as);
3447 targetm.encode_section_info (obj, x, true);
3449 else
3451 x = gen_raw_REG (address_mode, (*regno)++);
3452 x = gen_rtx_MEM (DECL_MODE (obj), x);
3453 set_mem_addr_space (x, as);
3456 return x;
3459 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3460 walk_tree. DATA contains the actual fake register number. */
3462 static tree
3463 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3465 tree obj = NULL_TREE;
3466 rtx x = NULL_RTX;
3467 int *regno = (int *) data;
3469 switch (TREE_CODE (*expr_p))
3471 case ADDR_EXPR:
3472 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3473 handled_component_p (*expr_p);
3474 expr_p = &TREE_OPERAND (*expr_p, 0))
3475 continue;
3476 obj = *expr_p;
3477 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3478 x = produce_memory_decl_rtl (obj, regno);
3479 break;
3481 case SSA_NAME:
3482 *ws = 0;
3483 obj = SSA_NAME_VAR (*expr_p);
3484 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3485 if (!obj)
3486 return NULL_TREE;
3487 if (!DECL_RTL_SET_P (obj))
3488 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3489 break;
3491 case VAR_DECL:
3492 case PARM_DECL:
3493 case RESULT_DECL:
3494 *ws = 0;
3495 obj = *expr_p;
3497 if (DECL_RTL_SET_P (obj))
3498 break;
3500 if (DECL_MODE (obj) == BLKmode)
3501 x = produce_memory_decl_rtl (obj, regno);
3502 else
3503 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3505 break;
3507 default:
3508 break;
3511 if (x)
3513 decl_rtl_to_reset.safe_push (obj);
3514 SET_DECL_RTL (obj, x);
3517 return NULL_TREE;
3520 /* Determines cost of the computation of EXPR. */
3522 static unsigned
3523 computation_cost (tree expr, bool speed)
3525 rtx_insn *seq;
3526 rtx rslt;
3527 tree type = TREE_TYPE (expr);
3528 unsigned cost;
3529 /* Avoid using hard regs in ways which may be unsupported. */
3530 int regno = LAST_VIRTUAL_REGISTER + 1;
3531 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3532 enum node_frequency real_frequency = node->frequency;
3534 node->frequency = NODE_FREQUENCY_NORMAL;
3535 crtl->maybe_hot_insn_p = speed;
3536 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3537 start_sequence ();
3538 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3539 seq = get_insns ();
3540 end_sequence ();
3541 default_rtl_profile ();
3542 node->frequency = real_frequency;
3544 cost = seq_cost (seq, speed);
3545 if (MEM_P (rslt))
3546 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3547 TYPE_ADDR_SPACE (type), speed);
3548 else if (!REG_P (rslt))
3549 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3551 return cost;
3554 /* Returns variable containing the value of candidate CAND at statement AT. */
3556 static tree
3557 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3559 if (stmt_after_increment (loop, cand, stmt))
3560 return cand->var_after;
3561 else
3562 return cand->var_before;
3565 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3566 same precision that is at least as wide as the precision of TYPE, stores
3567 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3568 type of A and B. */
3570 static tree
3571 determine_common_wider_type (tree *a, tree *b)
3573 tree wider_type = NULL;
3574 tree suba, subb;
3575 tree atype = TREE_TYPE (*a);
3577 if (CONVERT_EXPR_P (*a))
3579 suba = TREE_OPERAND (*a, 0);
3580 wider_type = TREE_TYPE (suba);
3581 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3582 return atype;
3584 else
3585 return atype;
3587 if (CONVERT_EXPR_P (*b))
3589 subb = TREE_OPERAND (*b, 0);
3590 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3591 return atype;
3593 else
3594 return atype;
3596 *a = suba;
3597 *b = subb;
3598 return wider_type;
3601 /* Determines the expression by that USE is expressed from induction variable
3602 CAND at statement AT in LOOP. The expression is stored in a decomposed
3603 form into AFF. Returns false if USE cannot be expressed using CAND. */
3605 static bool
3606 get_computation_aff (struct loop *loop,
3607 struct iv_use *use, struct iv_cand *cand, gimple *at,
3608 struct aff_tree *aff)
3610 tree ubase = use->iv->base;
3611 tree ustep = use->iv->step;
3612 tree cbase = cand->iv->base;
3613 tree cstep = cand->iv->step, cstep_common;
3614 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3615 tree common_type, var;
3616 tree uutype;
3617 aff_tree cbase_aff, var_aff;
3618 widest_int rat;
3620 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3622 /* We do not have a precision to express the values of use. */
3623 return false;
3626 var = var_at_stmt (loop, cand, at);
3627 uutype = unsigned_type_for (utype);
3629 /* If the conversion is not noop, perform it. */
3630 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3632 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3633 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3635 tree inner_base, inner_step, inner_type;
3636 inner_base = TREE_OPERAND (cbase, 0);
3637 if (CONVERT_EXPR_P (cstep))
3638 inner_step = TREE_OPERAND (cstep, 0);
3639 else
3640 inner_step = cstep;
3642 inner_type = TREE_TYPE (inner_base);
3643 /* If candidate is added from a biv whose type is smaller than
3644 ctype, we know both candidate and the biv won't overflow.
3645 In this case, it's safe to skip the convertion in candidate.
3646 As an example, (unsigned short)((unsigned long)A) equals to
3647 (unsigned short)A, if A has a type no larger than short. */
3648 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3650 cbase = inner_base;
3651 cstep = inner_step;
3654 cstep = fold_convert (uutype, cstep);
3655 cbase = fold_convert (uutype, cbase);
3656 var = fold_convert (uutype, var);
3659 /* Ratio is 1 when computing the value of biv cand by itself.
3660 We can't rely on constant_multiple_of in this case because the
3661 use is created after the original biv is selected. The call
3662 could fail because of inconsistent fold behavior. See PR68021
3663 for more information. */
3664 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3666 gcc_assert (is_gimple_assign (use->stmt));
3667 gcc_assert (use->iv->ssa_name == cand->var_after);
3668 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3669 rat = 1;
3671 else if (!constant_multiple_of (ustep, cstep, &rat))
3672 return false;
3674 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3675 type, we achieve better folding by computing their difference in this
3676 wider type, and cast the result to UUTYPE. We do not need to worry about
3677 overflows, as all the arithmetics will in the end be performed in UUTYPE
3678 anyway. */
3679 common_type = determine_common_wider_type (&ubase, &cbase);
3681 /* use = ubase - ratio * cbase + ratio * var. */
3682 tree_to_aff_combination (ubase, common_type, aff);
3683 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3684 tree_to_aff_combination (var, uutype, &var_aff);
3686 /* We need to shift the value if we are after the increment. */
3687 if (stmt_after_increment (loop, cand, at))
3689 aff_tree cstep_aff;
3691 if (common_type != uutype)
3692 cstep_common = fold_convert (common_type, cstep);
3693 else
3694 cstep_common = cstep;
3696 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3697 aff_combination_add (&cbase_aff, &cstep_aff);
3700 aff_combination_scale (&cbase_aff, -rat);
3701 aff_combination_add (aff, &cbase_aff);
3702 if (common_type != uutype)
3703 aff_combination_convert (aff, uutype);
3705 aff_combination_scale (&var_aff, rat);
3706 aff_combination_add (aff, &var_aff);
3708 return true;
3711 /* Return the type of USE. */
3713 static tree
3714 get_use_type (struct iv_use *use)
3716 tree base_type = TREE_TYPE (use->iv->base);
3717 tree type;
3719 if (use->type == USE_ADDRESS)
3721 /* The base_type may be a void pointer. Create a pointer type based on
3722 the mem_ref instead. */
3723 type = build_pointer_type (TREE_TYPE (*use->op_p));
3724 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3725 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3727 else
3728 type = base_type;
3730 return type;
3733 /* Determines the expression by that USE is expressed from induction variable
3734 CAND at statement AT in LOOP. The computation is unshared. */
3736 static tree
3737 get_computation_at (struct loop *loop,
3738 struct iv_use *use, struct iv_cand *cand, gimple *at)
3740 aff_tree aff;
3741 tree type = get_use_type (use);
3743 if (!get_computation_aff (loop, use, cand, at, &aff))
3744 return NULL_TREE;
3745 unshare_aff_combination (&aff);
3746 return fold_convert (type, aff_combination_to_tree (&aff));
3749 /* Determines the expression by that USE is expressed from induction variable
3750 CAND in LOOP. The computation is unshared. */
3752 static tree
3753 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3755 return get_computation_at (loop, use, cand, use->stmt);
3758 /* Adjust the cost COST for being in loop setup rather than loop body.
3759 If we're optimizing for space, the loop setup overhead is constant;
3760 if we're optimizing for speed, amortize it over the per-iteration cost. */
3761 static unsigned
3762 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3764 if (cost == INFTY)
3765 return cost;
3766 else if (optimize_loop_for_speed_p (data->current_loop))
3767 return cost / avg_loop_niter (data->current_loop);
3768 else
3769 return cost;
3772 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3773 validity for a memory reference accessing memory of mode MODE in
3774 address space AS. */
3777 bool
3778 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3779 addr_space_t as)
3781 #define MAX_RATIO 128
3782 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3783 static vec<sbitmap> valid_mult_list;
3784 sbitmap valid_mult;
3786 if (data_index >= valid_mult_list.length ())
3787 valid_mult_list.safe_grow_cleared (data_index + 1);
3789 valid_mult = valid_mult_list[data_index];
3790 if (!valid_mult)
3792 machine_mode address_mode = targetm.addr_space.address_mode (as);
3793 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3794 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3795 rtx addr, scaled;
3796 HOST_WIDE_INT i;
3798 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3799 bitmap_clear (valid_mult);
3800 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3801 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3802 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3804 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3805 if (memory_address_addr_space_p (mode, addr, as)
3806 || memory_address_addr_space_p (mode, scaled, as))
3807 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3810 if (dump_file && (dump_flags & TDF_DETAILS))
3812 fprintf (dump_file, " allowed multipliers:");
3813 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3814 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3815 fprintf (dump_file, " %d", (int) i);
3816 fprintf (dump_file, "\n");
3817 fprintf (dump_file, "\n");
3820 valid_mult_list[data_index] = valid_mult;
3823 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3824 return false;
3826 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3829 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3830 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3831 variable is omitted. Compute the cost for a memory reference that accesses
3832 a memory location of mode MEM_MODE in address space AS.
3834 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3835 size of MEM_MODE / RATIO) is available. To make this determination, we
3836 look at the size of the increment to be made, which is given in CSTEP.
3837 CSTEP may be zero if the step is unknown.
3838 STMT_AFTER_INC is true iff the statement we're looking at is after the
3839 increment of the original biv.
3841 TODO -- there must be some better way. This all is quite crude. */
3843 enum ainc_type
3845 AINC_PRE_INC, /* Pre increment. */
3846 AINC_PRE_DEC, /* Pre decrement. */
3847 AINC_POST_INC, /* Post increment. */
3848 AINC_POST_DEC, /* Post decrement. */
3849 AINC_NONE /* Also the number of auto increment types. */
3852 struct address_cost_data
3854 HOST_WIDE_INT min_offset, max_offset;
3855 unsigned costs[2][2][2][2];
3856 unsigned ainc_costs[AINC_NONE];
3860 static comp_cost
3861 get_address_cost (bool symbol_present, bool var_present,
3862 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3863 HOST_WIDE_INT cstep, machine_mode mem_mode,
3864 addr_space_t as, bool speed,
3865 bool stmt_after_inc, bool *may_autoinc)
3867 machine_mode address_mode = targetm.addr_space.address_mode (as);
3868 static vec<address_cost_data *> address_cost_data_list;
3869 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3870 address_cost_data *data;
3871 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3872 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3873 unsigned cost, acost, complexity;
3874 enum ainc_type autoinc_type;
3875 bool offset_p, ratio_p, autoinc;
3876 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3877 unsigned HOST_WIDE_INT mask;
3878 unsigned bits;
3880 if (data_index >= address_cost_data_list.length ())
3881 address_cost_data_list.safe_grow_cleared (data_index + 1);
3883 data = address_cost_data_list[data_index];
3884 if (!data)
3886 HOST_WIDE_INT i;
3887 HOST_WIDE_INT rat, off = 0;
3888 int old_cse_not_expected, width;
3889 unsigned sym_p, var_p, off_p, rat_p, add_c;
3890 rtx_insn *seq;
3891 rtx addr, base;
3892 rtx reg0, reg1;
3894 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3896 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3898 width = GET_MODE_BITSIZE (address_mode) - 1;
3899 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3900 width = HOST_BITS_PER_WIDE_INT - 1;
3901 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3903 for (i = width; i >= 0; i--)
3905 off = -((unsigned HOST_WIDE_INT) 1 << i);
3906 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3907 if (memory_address_addr_space_p (mem_mode, addr, as))
3908 break;
3910 data->min_offset = (i == -1? 0 : off);
3912 for (i = width; i >= 0; i--)
3914 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3915 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3916 if (memory_address_addr_space_p (mem_mode, addr, as))
3917 break;
3918 /* For some strict-alignment targets, the offset must be naturally
3919 aligned. Try an aligned offset if mem_mode is not QImode. */
3920 off = mem_mode != QImode
3921 ? ((unsigned HOST_WIDE_INT) 1 << i)
3922 - GET_MODE_SIZE (mem_mode)
3923 : 0;
3924 if (off > 0)
3926 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3927 if (memory_address_addr_space_p (mem_mode, addr, as))
3928 break;
3931 if (i == -1)
3932 off = 0;
3933 data->max_offset = off;
3935 if (dump_file && (dump_flags & TDF_DETAILS))
3937 fprintf (dump_file, "get_address_cost:\n");
3938 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3939 GET_MODE_NAME (mem_mode),
3940 data->min_offset);
3941 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3942 GET_MODE_NAME (mem_mode),
3943 data->max_offset);
3946 rat = 1;
3947 for (i = 2; i <= MAX_RATIO; i++)
3948 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3950 rat = i;
3951 break;
3954 /* Compute the cost of various addressing modes. */
3955 acost = 0;
3956 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3957 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3959 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3960 || USE_STORE_PRE_DECREMENT (mem_mode))
3962 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3963 has_predec[mem_mode]
3964 = memory_address_addr_space_p (mem_mode, addr, as);
3966 if (has_predec[mem_mode])
3967 data->ainc_costs[AINC_PRE_DEC]
3968 = address_cost (addr, mem_mode, as, speed);
3970 if (USE_LOAD_POST_DECREMENT (mem_mode)
3971 || USE_STORE_POST_DECREMENT (mem_mode))
3973 addr = gen_rtx_POST_DEC (address_mode, reg0);
3974 has_postdec[mem_mode]
3975 = memory_address_addr_space_p (mem_mode, addr, as);
3977 if (has_postdec[mem_mode])
3978 data->ainc_costs[AINC_POST_DEC]
3979 = address_cost (addr, mem_mode, as, speed);
3981 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3982 || USE_STORE_PRE_DECREMENT (mem_mode))
3984 addr = gen_rtx_PRE_INC (address_mode, reg0);
3985 has_preinc[mem_mode]
3986 = memory_address_addr_space_p (mem_mode, addr, as);
3988 if (has_preinc[mem_mode])
3989 data->ainc_costs[AINC_PRE_INC]
3990 = address_cost (addr, mem_mode, as, speed);
3992 if (USE_LOAD_POST_INCREMENT (mem_mode)
3993 || USE_STORE_POST_INCREMENT (mem_mode))
3995 addr = gen_rtx_POST_INC (address_mode, reg0);
3996 has_postinc[mem_mode]
3997 = memory_address_addr_space_p (mem_mode, addr, as);
3999 if (has_postinc[mem_mode])
4000 data->ainc_costs[AINC_POST_INC]
4001 = address_cost (addr, mem_mode, as, speed);
4003 for (i = 0; i < 16; i++)
4005 sym_p = i & 1;
4006 var_p = (i >> 1) & 1;
4007 off_p = (i >> 2) & 1;
4008 rat_p = (i >> 3) & 1;
4010 addr = reg0;
4011 if (rat_p)
4012 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
4013 gen_int_mode (rat, address_mode));
4015 if (var_p)
4016 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
4018 if (sym_p)
4020 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
4021 /* ??? We can run into trouble with some backends by presenting
4022 it with symbols which haven't been properly passed through
4023 targetm.encode_section_info. By setting the local bit, we
4024 enhance the probability of things working. */
4025 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
4027 if (off_p)
4028 base = gen_rtx_fmt_e (CONST, address_mode,
4029 gen_rtx_fmt_ee
4030 (PLUS, address_mode, base,
4031 gen_int_mode (off, address_mode)));
4033 else if (off_p)
4034 base = gen_int_mode (off, address_mode);
4035 else
4036 base = NULL_RTX;
4038 if (base)
4039 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
4041 start_sequence ();
4042 /* To avoid splitting addressing modes, pretend that no cse will
4043 follow. */
4044 old_cse_not_expected = cse_not_expected;
4045 cse_not_expected = true;
4046 addr = memory_address_addr_space (mem_mode, addr, as);
4047 cse_not_expected = old_cse_not_expected;
4048 seq = get_insns ();
4049 end_sequence ();
4051 acost = seq_cost (seq, speed);
4052 acost += address_cost (addr, mem_mode, as, speed);
4054 if (!acost)
4055 acost = 1;
4056 data->costs[sym_p][var_p][off_p][rat_p] = acost;
4059 /* On some targets, it is quite expensive to load symbol to a register,
4060 which makes addresses that contain symbols look much more expensive.
4061 However, the symbol will have to be loaded in any case before the
4062 loop (and quite likely we have it in register already), so it does not
4063 make much sense to penalize them too heavily. So make some final
4064 tweaks for the SYMBOL_PRESENT modes:
4066 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4067 var is cheaper, use this mode with small penalty.
4068 If VAR_PRESENT is true, try whether the mode with
4069 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4070 if this is the case, use it. */
4071 add_c = add_cost (speed, address_mode);
4072 for (i = 0; i < 8; i++)
4074 var_p = i & 1;
4075 off_p = (i >> 1) & 1;
4076 rat_p = (i >> 2) & 1;
4078 acost = data->costs[0][1][off_p][rat_p] + 1;
4079 if (var_p)
4080 acost += add_c;
4082 if (acost < data->costs[1][var_p][off_p][rat_p])
4083 data->costs[1][var_p][off_p][rat_p] = acost;
4086 if (dump_file && (dump_flags & TDF_DETAILS))
4088 fprintf (dump_file, "<Address Costs>:\n");
4090 for (i = 0; i < 16; i++)
4092 sym_p = i & 1;
4093 var_p = (i >> 1) & 1;
4094 off_p = (i >> 2) & 1;
4095 rat_p = (i >> 3) & 1;
4097 fprintf (dump_file, " ");
4098 if (sym_p)
4099 fprintf (dump_file, "sym + ");
4100 if (var_p)
4101 fprintf (dump_file, "var + ");
4102 if (off_p)
4103 fprintf (dump_file, "cst + ");
4104 if (rat_p)
4105 fprintf (dump_file, "rat * ");
4107 acost = data->costs[sym_p][var_p][off_p][rat_p];
4108 fprintf (dump_file, "index costs %d\n", acost);
4110 if (has_predec[mem_mode] || has_postdec[mem_mode]
4111 || has_preinc[mem_mode] || has_postinc[mem_mode])
4112 fprintf (dump_file, " May include autoinc/dec\n");
4113 fprintf (dump_file, "\n");
4116 address_cost_data_list[data_index] = data;
4119 bits = GET_MODE_BITSIZE (address_mode);
4120 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4121 offset &= mask;
4122 if ((offset >> (bits - 1) & 1))
4123 offset |= ~mask;
4124 s_offset = offset;
4126 autoinc = false;
4127 autoinc_type = AINC_NONE;
4128 msize = GET_MODE_SIZE (mem_mode);
4129 autoinc_offset = offset;
4130 if (stmt_after_inc)
4131 autoinc_offset += ratio * cstep;
4132 if (symbol_present || var_present || ratio != 1)
4133 autoinc = false;
4134 else
4136 if (has_postinc[mem_mode] && autoinc_offset == 0
4137 && msize == cstep)
4138 autoinc_type = AINC_POST_INC;
4139 else if (has_postdec[mem_mode] && autoinc_offset == 0
4140 && msize == -cstep)
4141 autoinc_type = AINC_POST_DEC;
4142 else if (has_preinc[mem_mode] && autoinc_offset == msize
4143 && msize == cstep)
4144 autoinc_type = AINC_PRE_INC;
4145 else if (has_predec[mem_mode] && autoinc_offset == -msize
4146 && msize == -cstep)
4147 autoinc_type = AINC_PRE_DEC;
4149 if (autoinc_type != AINC_NONE)
4150 autoinc = true;
4153 cost = 0;
4154 offset_p = (s_offset != 0
4155 && data->min_offset <= s_offset
4156 && s_offset <= data->max_offset);
4157 ratio_p = (ratio != 1
4158 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4160 if (ratio != 1 && !ratio_p)
4161 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4163 if (s_offset && !offset_p && !symbol_present)
4164 cost += add_cost (speed, address_mode);
4166 if (may_autoinc)
4167 *may_autoinc = autoinc;
4168 if (autoinc)
4169 acost = data->ainc_costs[autoinc_type];
4170 else
4171 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4172 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4173 return new_cost (cost + acost, complexity);
4176 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4177 EXPR operand holding the shift. COST0 and COST1 are the costs for
4178 calculating the operands of EXPR. Returns true if successful, and returns
4179 the cost in COST. */
4181 static bool
4182 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4183 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4185 comp_cost res;
4186 tree op1 = TREE_OPERAND (expr, 1);
4187 tree cst = TREE_OPERAND (mult, 1);
4188 tree multop = TREE_OPERAND (mult, 0);
4189 int m = exact_log2 (int_cst_value (cst));
4190 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4191 int as_cost, sa_cost;
4192 bool mult_in_op1;
4194 if (!(m >= 0 && m < maxm))
4195 return false;
4197 STRIP_NOPS (op1);
4198 mult_in_op1 = operand_equal_p (op1, mult, 0);
4200 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4202 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4203 use that in preference to a shift insn followed by an add insn. */
4204 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4205 ? shiftadd_cost (speed, mode, m)
4206 : (mult_in_op1
4207 ? shiftsub1_cost (speed, mode, m)
4208 : shiftsub0_cost (speed, mode, m)));
4210 res = new_cost (MIN (as_cost, sa_cost), 0);
4211 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4213 STRIP_NOPS (multop);
4214 if (!is_gimple_val (multop))
4215 res = add_costs (res, force_expr_to_var_cost (multop, speed));
4217 *cost = res;
4218 return true;
4221 /* Estimates cost of forcing expression EXPR into a variable. */
4223 static comp_cost
4224 force_expr_to_var_cost (tree expr, bool speed)
4226 static bool costs_initialized = false;
4227 static unsigned integer_cost [2];
4228 static unsigned symbol_cost [2];
4229 static unsigned address_cost [2];
4230 tree op0, op1;
4231 comp_cost cost0, cost1, cost;
4232 machine_mode mode;
4234 if (!costs_initialized)
4236 tree type = build_pointer_type (integer_type_node);
4237 tree var, addr;
4238 rtx x;
4239 int i;
4241 var = create_tmp_var_raw (integer_type_node, "test_var");
4242 TREE_STATIC (var) = 1;
4243 x = produce_memory_decl_rtl (var, NULL);
4244 SET_DECL_RTL (var, x);
4246 addr = build1 (ADDR_EXPR, type, var);
4249 for (i = 0; i < 2; i++)
4251 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4252 2000), i);
4254 symbol_cost[i] = computation_cost (addr, i) + 1;
4256 address_cost[i]
4257 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4258 if (dump_file && (dump_flags & TDF_DETAILS))
4260 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4261 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4262 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4263 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4264 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4265 fprintf (dump_file, "\n");
4269 costs_initialized = true;
4272 STRIP_NOPS (expr);
4274 if (SSA_VAR_P (expr))
4275 return no_cost;
4277 if (is_gimple_min_invariant (expr))
4279 if (TREE_CODE (expr) == INTEGER_CST)
4280 return new_cost (integer_cost [speed], 0);
4282 if (TREE_CODE (expr) == ADDR_EXPR)
4284 tree obj = TREE_OPERAND (expr, 0);
4286 if (TREE_CODE (obj) == VAR_DECL
4287 || TREE_CODE (obj) == PARM_DECL
4288 || TREE_CODE (obj) == RESULT_DECL)
4289 return new_cost (symbol_cost [speed], 0);
4292 return new_cost (address_cost [speed], 0);
4295 switch (TREE_CODE (expr))
4297 case POINTER_PLUS_EXPR:
4298 case PLUS_EXPR:
4299 case MINUS_EXPR:
4300 case MULT_EXPR:
4301 op0 = TREE_OPERAND (expr, 0);
4302 op1 = TREE_OPERAND (expr, 1);
4303 STRIP_NOPS (op0);
4304 STRIP_NOPS (op1);
4305 break;
4307 CASE_CONVERT:
4308 case NEGATE_EXPR:
4309 op0 = TREE_OPERAND (expr, 0);
4310 STRIP_NOPS (op0);
4311 op1 = NULL_TREE;
4312 break;
4314 default:
4315 /* Just an arbitrary value, FIXME. */
4316 return new_cost (target_spill_cost[speed], 0);
4319 if (op0 == NULL_TREE
4320 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4321 cost0 = no_cost;
4322 else
4323 cost0 = force_expr_to_var_cost (op0, speed);
4325 if (op1 == NULL_TREE
4326 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4327 cost1 = no_cost;
4328 else
4329 cost1 = force_expr_to_var_cost (op1, speed);
4331 mode = TYPE_MODE (TREE_TYPE (expr));
4332 switch (TREE_CODE (expr))
4334 case POINTER_PLUS_EXPR:
4335 case PLUS_EXPR:
4336 case MINUS_EXPR:
4337 case NEGATE_EXPR:
4338 cost = new_cost (add_cost (speed, mode), 0);
4339 if (TREE_CODE (expr) != NEGATE_EXPR)
4341 tree mult = NULL_TREE;
4342 comp_cost sa_cost;
4343 if (TREE_CODE (op1) == MULT_EXPR)
4344 mult = op1;
4345 else if (TREE_CODE (op0) == MULT_EXPR)
4346 mult = op0;
4348 if (mult != NULL_TREE
4349 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4350 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4351 speed, &sa_cost))
4352 return sa_cost;
4354 break;
4356 CASE_CONVERT:
4358 tree inner_mode, outer_mode;
4359 outer_mode = TREE_TYPE (expr);
4360 inner_mode = TREE_TYPE (op0);
4361 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4362 TYPE_MODE (inner_mode), speed), 0);
4364 break;
4366 case MULT_EXPR:
4367 if (cst_and_fits_in_hwi (op0))
4368 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4369 mode, speed), 0);
4370 else if (cst_and_fits_in_hwi (op1))
4371 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4372 mode, speed), 0);
4373 else
4374 return new_cost (target_spill_cost [speed], 0);
4375 break;
4377 default:
4378 gcc_unreachable ();
4381 cost = add_costs (cost, cost0);
4382 cost = add_costs (cost, cost1);
4384 /* Bound the cost by target_spill_cost. The parts of complicated
4385 computations often are either loop invariant or at least can
4386 be shared between several iv uses, so letting this grow without
4387 limits would not give reasonable results. */
4388 if (cost.cost > (int) target_spill_cost [speed])
4389 cost.cost = target_spill_cost [speed];
4391 return cost;
4394 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4395 invariants the computation depends on. */
4397 static comp_cost
4398 force_var_cost (struct ivopts_data *data,
4399 tree expr, bitmap *depends_on)
4401 if (depends_on)
4403 fd_ivopts_data = data;
4404 walk_tree (&expr, find_depends, depends_on, NULL);
4407 return force_expr_to_var_cost (expr, data->speed);
4410 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4411 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4412 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4413 invariants the computation depends on. */
4415 static comp_cost
4416 split_address_cost (struct ivopts_data *data,
4417 tree addr, bool *symbol_present, bool *var_present,
4418 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4420 tree core;
4421 HOST_WIDE_INT bitsize;
4422 HOST_WIDE_INT bitpos;
4423 tree toffset;
4424 machine_mode mode;
4425 int unsignedp, reversep, volatilep;
4427 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4428 &unsignedp, &reversep, &volatilep, false);
4430 if (toffset != 0
4431 || bitpos % BITS_PER_UNIT != 0
4432 || reversep
4433 || TREE_CODE (core) != VAR_DECL)
4435 *symbol_present = false;
4436 *var_present = true;
4437 fd_ivopts_data = data;
4438 if (depends_on)
4439 walk_tree (&addr, find_depends, depends_on, NULL);
4441 return new_cost (target_spill_cost[data->speed], 0);
4444 *offset += bitpos / BITS_PER_UNIT;
4445 if (TREE_STATIC (core)
4446 || DECL_EXTERNAL (core))
4448 *symbol_present = true;
4449 *var_present = false;
4450 return no_cost;
4453 *symbol_present = false;
4454 *var_present = true;
4455 return no_cost;
4458 /* Estimates cost of expressing difference of addresses E1 - E2 as
4459 var + symbol + offset. The value of offset is added to OFFSET,
4460 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4461 part is missing. DEPENDS_ON is a set of the invariants the computation
4462 depends on. */
4464 static comp_cost
4465 ptr_difference_cost (struct ivopts_data *data,
4466 tree e1, tree e2, bool *symbol_present, bool *var_present,
4467 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4469 HOST_WIDE_INT diff = 0;
4470 aff_tree aff_e1, aff_e2;
4471 tree type;
4473 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4475 if (ptr_difference_const (e1, e2, &diff))
4477 *offset += diff;
4478 *symbol_present = false;
4479 *var_present = false;
4480 return no_cost;
4483 if (integer_zerop (e2))
4484 return split_address_cost (data, TREE_OPERAND (e1, 0),
4485 symbol_present, var_present, offset, depends_on);
4487 *symbol_present = false;
4488 *var_present = true;
4490 type = signed_type_for (TREE_TYPE (e1));
4491 tree_to_aff_combination (e1, type, &aff_e1);
4492 tree_to_aff_combination (e2, type, &aff_e2);
4493 aff_combination_scale (&aff_e2, -1);
4494 aff_combination_add (&aff_e1, &aff_e2);
4496 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4499 /* Estimates cost of expressing difference E1 - E2 as
4500 var + symbol + offset. The value of offset is added to OFFSET,
4501 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4502 part is missing. DEPENDS_ON is a set of the invariants the computation
4503 depends on. */
4505 static comp_cost
4506 difference_cost (struct ivopts_data *data,
4507 tree e1, tree e2, bool *symbol_present, bool *var_present,
4508 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4510 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4511 unsigned HOST_WIDE_INT off1, off2;
4512 aff_tree aff_e1, aff_e2;
4513 tree type;
4515 e1 = strip_offset (e1, &off1);
4516 e2 = strip_offset (e2, &off2);
4517 *offset += off1 - off2;
4519 STRIP_NOPS (e1);
4520 STRIP_NOPS (e2);
4522 if (TREE_CODE (e1) == ADDR_EXPR)
4523 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4524 offset, depends_on);
4525 *symbol_present = false;
4527 if (operand_equal_p (e1, e2, 0))
4529 *var_present = false;
4530 return no_cost;
4533 *var_present = true;
4535 if (integer_zerop (e2))
4536 return force_var_cost (data, e1, depends_on);
4538 if (integer_zerop (e1))
4540 comp_cost cost = force_var_cost (data, e2, depends_on);
4541 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4542 return cost;
4545 type = signed_type_for (TREE_TYPE (e1));
4546 tree_to_aff_combination (e1, type, &aff_e1);
4547 tree_to_aff_combination (e2, type, &aff_e2);
4548 aff_combination_scale (&aff_e2, -1);
4549 aff_combination_add (&aff_e1, &aff_e2);
4551 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4554 /* Returns true if AFF1 and AFF2 are identical. */
4556 static bool
4557 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4559 unsigned i;
4561 if (aff1->n != aff2->n)
4562 return false;
4564 for (i = 0; i < aff1->n; i++)
4566 if (aff1->elts[i].coef != aff2->elts[i].coef)
4567 return false;
4569 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4570 return false;
4572 return true;
4575 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
4577 static iv_inv_expr_ent *
4578 record_inv_expr (struct ivopts_data *data, tree expr)
4580 struct iv_inv_expr_ent ent;
4581 struct iv_inv_expr_ent **slot;
4583 ent.expr = expr;
4584 ent.hash = iterative_hash_expr (expr, 0);
4585 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4587 if (!*slot)
4589 *slot = XNEW (struct iv_inv_expr_ent);
4590 (*slot)->expr = expr;
4591 (*slot)->hash = ent.hash;
4592 (*slot)->id = data->max_inv_expr_id++;
4595 return *slot;
4598 /* Returns the invariant expression if expression UBASE - RATIO * CBASE
4599 requires a new compiler generated temporary. Returns -1 otherwise.
4600 ADDRESS_P is a flag indicating if the expression is for address
4601 computation. */
4603 static iv_inv_expr_ent *
4604 get_loop_invariant_expr (struct ivopts_data *data, tree ubase,
4605 tree cbase, HOST_WIDE_INT ratio,
4606 bool address_p)
4608 aff_tree ubase_aff, cbase_aff;
4609 tree expr, ub, cb;
4611 STRIP_NOPS (ubase);
4612 STRIP_NOPS (cbase);
4613 ub = ubase;
4614 cb = cbase;
4616 if ((TREE_CODE (ubase) == INTEGER_CST)
4617 && (TREE_CODE (cbase) == INTEGER_CST))
4618 return NULL;
4620 /* Strips the constant part. */
4621 if (TREE_CODE (ubase) == PLUS_EXPR
4622 || TREE_CODE (ubase) == MINUS_EXPR
4623 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4625 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4626 ubase = TREE_OPERAND (ubase, 0);
4629 /* Strips the constant part. */
4630 if (TREE_CODE (cbase) == PLUS_EXPR
4631 || TREE_CODE (cbase) == MINUS_EXPR
4632 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4634 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4635 cbase = TREE_OPERAND (cbase, 0);
4638 if (address_p)
4640 if (((TREE_CODE (ubase) == SSA_NAME)
4641 || (TREE_CODE (ubase) == ADDR_EXPR
4642 && is_gimple_min_invariant (ubase)))
4643 && (TREE_CODE (cbase) == INTEGER_CST))
4644 return NULL;
4646 if (((TREE_CODE (cbase) == SSA_NAME)
4647 || (TREE_CODE (cbase) == ADDR_EXPR
4648 && is_gimple_min_invariant (cbase)))
4649 && (TREE_CODE (ubase) == INTEGER_CST))
4650 return NULL;
4653 if (ratio == 1)
4655 if (operand_equal_p (ubase, cbase, 0))
4656 return NULL;
4658 if (TREE_CODE (ubase) == ADDR_EXPR
4659 && TREE_CODE (cbase) == ADDR_EXPR)
4661 tree usym, csym;
4663 usym = TREE_OPERAND (ubase, 0);
4664 csym = TREE_OPERAND (cbase, 0);
4665 if (TREE_CODE (usym) == ARRAY_REF)
4667 tree ind = TREE_OPERAND (usym, 1);
4668 if (TREE_CODE (ind) == INTEGER_CST
4669 && tree_fits_shwi_p (ind)
4670 && tree_to_shwi (ind) == 0)
4671 usym = TREE_OPERAND (usym, 0);
4673 if (TREE_CODE (csym) == ARRAY_REF)
4675 tree ind = TREE_OPERAND (csym, 1);
4676 if (TREE_CODE (ind) == INTEGER_CST
4677 && tree_fits_shwi_p (ind)
4678 && tree_to_shwi (ind) == 0)
4679 csym = TREE_OPERAND (csym, 0);
4681 if (operand_equal_p (usym, csym, 0))
4682 return NULL;
4684 /* Now do more complex comparison */
4685 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4686 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4687 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4688 return NULL;
4691 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4692 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4694 aff_combination_scale (&cbase_aff, -1 * ratio);
4695 aff_combination_add (&ubase_aff, &cbase_aff);
4696 expr = aff_combination_to_tree (&ubase_aff);
4697 return record_inv_expr (data, expr);
4702 /* Determines the cost of the computation by that USE is expressed
4703 from induction variable CAND. If ADDRESS_P is true, we just need
4704 to create an address from it, otherwise we want to get it into
4705 register. A set of invariants we depend on is stored in
4706 DEPENDS_ON. AT is the statement at that the value is computed.
4707 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4708 addressing is likely. */
4710 static comp_cost
4711 get_computation_cost_at (struct ivopts_data *data,
4712 struct iv_use *use, struct iv_cand *cand,
4713 bool address_p, bitmap *depends_on, gimple *at,
4714 bool *can_autoinc,
4715 iv_inv_expr_ent **inv_expr)
4717 tree ubase = use->iv->base, ustep = use->iv->step;
4718 tree cbase, cstep;
4719 tree utype = TREE_TYPE (ubase), ctype;
4720 unsigned HOST_WIDE_INT cstepi, offset = 0;
4721 HOST_WIDE_INT ratio, aratio;
4722 bool var_present, symbol_present, stmt_is_after_inc;
4723 comp_cost cost;
4724 widest_int rat;
4725 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4726 machine_mode mem_mode = (address_p
4727 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4728 : VOIDmode);
4730 if (depends_on)
4731 *depends_on = NULL;
4733 /* Only consider real candidates. */
4734 if (!cand->iv)
4735 return infinite_cost;
4737 cbase = cand->iv->base;
4738 cstep = cand->iv->step;
4739 ctype = TREE_TYPE (cbase);
4741 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4743 /* We do not have a precision to express the values of use. */
4744 return infinite_cost;
4747 if (address_p
4748 || (use->iv->base_object
4749 && cand->iv->base_object
4750 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4751 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4753 /* Do not try to express address of an object with computation based
4754 on address of a different object. This may cause problems in rtl
4755 level alias analysis (that does not expect this to be happening,
4756 as this is illegal in C), and would be unlikely to be useful
4757 anyway. */
4758 if (use->iv->base_object
4759 && cand->iv->base_object
4760 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4761 return infinite_cost;
4764 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4766 /* TODO -- add direct handling of this case. */
4767 goto fallback;
4770 /* CSTEPI is removed from the offset in case statement is after the
4771 increment. If the step is not constant, we use zero instead.
4772 This is a bit imprecise (there is the extra addition), but
4773 redundancy elimination is likely to transform the code so that
4774 it uses value of the variable before increment anyway,
4775 so it is not that much unrealistic. */
4776 if (cst_and_fits_in_hwi (cstep))
4777 cstepi = int_cst_value (cstep);
4778 else
4779 cstepi = 0;
4781 if (!constant_multiple_of (ustep, cstep, &rat))
4782 return infinite_cost;
4784 if (wi::fits_shwi_p (rat))
4785 ratio = rat.to_shwi ();
4786 else
4787 return infinite_cost;
4789 STRIP_NOPS (cbase);
4790 ctype = TREE_TYPE (cbase);
4792 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4794 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4795 or ratio == 1, it is better to handle this like
4797 ubase - ratio * cbase + ratio * var
4799 (also holds in the case ratio == -1, TODO. */
4801 if (cst_and_fits_in_hwi (cbase))
4803 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4804 cost = difference_cost (data,
4805 ubase, build_int_cst (utype, 0),
4806 &symbol_present, &var_present, &offset,
4807 depends_on);
4808 cost.cost /= avg_loop_niter (data->current_loop);
4810 else if (ratio == 1)
4812 tree real_cbase = cbase;
4814 /* Check to see if any adjustment is needed. */
4815 if (cstepi == 0 && stmt_is_after_inc)
4817 aff_tree real_cbase_aff;
4818 aff_tree cstep_aff;
4820 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4821 &real_cbase_aff);
4822 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4824 aff_combination_add (&real_cbase_aff, &cstep_aff);
4825 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4828 cost = difference_cost (data,
4829 ubase, real_cbase,
4830 &symbol_present, &var_present, &offset,
4831 depends_on);
4832 cost.cost /= avg_loop_niter (data->current_loop);
4834 else if (address_p
4835 && !POINTER_TYPE_P (ctype)
4836 && multiplier_allowed_in_address_p
4837 (ratio, mem_mode,
4838 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4840 tree real_cbase = cbase;
4842 if (cstepi == 0 && stmt_is_after_inc)
4844 if (POINTER_TYPE_P (ctype))
4845 real_cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4846 else
4847 real_cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4849 real_cbase = fold_build2 (MULT_EXPR, ctype, real_cbase,
4850 build_int_cst (ctype, ratio));
4851 cost = difference_cost (data,
4852 ubase, real_cbase,
4853 &symbol_present, &var_present, &offset,
4854 depends_on);
4855 cost.cost /= avg_loop_niter (data->current_loop);
4857 else
4859 cost = force_var_cost (data, cbase, depends_on);
4860 cost = add_costs (cost,
4861 difference_cost (data,
4862 ubase, build_int_cst (utype, 0),
4863 &symbol_present, &var_present,
4864 &offset, depends_on));
4865 cost.cost /= avg_loop_niter (data->current_loop);
4866 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4869 /* Record setup cost in scrach field. */
4870 cost.scratch = cost.cost;
4872 if (inv_expr && depends_on && *depends_on)
4874 *inv_expr = get_loop_invariant_expr (data, ubase, cbase, ratio,
4875 address_p);
4876 /* Clear depends on. */
4877 if (*inv_expr != NULL)
4878 bitmap_clear (*depends_on);
4881 /* If we are after the increment, the value of the candidate is higher by
4882 one iteration. */
4883 if (stmt_is_after_inc)
4884 offset -= ratio * cstepi;
4886 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4887 (symbol/var1/const parts may be omitted). If we are looking for an
4888 address, find the cost of addressing this. */
4889 if (address_p)
4890 return add_costs (cost,
4891 get_address_cost (symbol_present, var_present,
4892 offset, ratio, cstepi,
4893 mem_mode,
4894 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4895 speed, stmt_is_after_inc,
4896 can_autoinc));
4898 /* Otherwise estimate the costs for computing the expression. */
4899 if (!symbol_present && !var_present && !offset)
4901 if (ratio != 1)
4902 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4903 return cost;
4906 /* Symbol + offset should be compile-time computable so consider that they
4907 are added once to the variable, if present. */
4908 if (var_present && (symbol_present || offset))
4909 cost.cost += adjust_setup_cost (data,
4910 add_cost (speed, TYPE_MODE (ctype)));
4912 /* Having offset does not affect runtime cost in case it is added to
4913 symbol, but it increases complexity. */
4914 if (offset)
4915 cost.complexity++;
4917 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4919 aratio = ratio > 0 ? ratio : -ratio;
4920 if (aratio != 1)
4921 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4922 return cost;
4924 fallback:
4925 if (can_autoinc)
4926 *can_autoinc = false;
4929 /* Just get the expression, expand it and measure the cost. */
4930 tree comp = get_computation_at (data->current_loop, use, cand, at);
4932 if (!comp)
4933 return infinite_cost;
4935 if (address_p)
4936 comp = build_simple_mem_ref (comp);
4938 cost = new_cost (computation_cost (comp, speed), 0);
4939 cost.scratch = 0;
4940 return cost;
4944 /* Determines the cost of the computation by that USE is expressed
4945 from induction variable CAND. If ADDRESS_P is true, we just need
4946 to create an address from it, otherwise we want to get it into
4947 register. A set of invariants we depend on is stored in
4948 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4949 autoinc addressing is likely. */
4951 static comp_cost
4952 get_computation_cost (struct ivopts_data *data,
4953 struct iv_use *use, struct iv_cand *cand,
4954 bool address_p, bitmap *depends_on,
4955 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4957 return get_computation_cost_at (data,
4958 use, cand, address_p, depends_on, use->stmt,
4959 can_autoinc, inv_expr);
4962 /* Determines cost of computing the use in GROUP with CAND in a generic
4963 expression. */
4965 static bool
4966 determine_group_iv_cost_generic (struct ivopts_data *data,
4967 struct iv_group *group, struct iv_cand *cand)
4969 comp_cost cost;
4970 iv_inv_expr_ent *inv_expr = NULL;
4971 bitmap depends_on = NULL;
4972 struct iv_use *use = group->vuses[0];
4974 /* The simple case first -- if we need to express value of the preserved
4975 original biv, the cost is 0. This also prevents us from counting the
4976 cost of increment twice -- once at this use and once in the cost of
4977 the candidate. */
4978 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4979 cost = no_cost;
4980 else
4981 cost = get_computation_cost (data, use, cand, false,
4982 &depends_on, NULL, &inv_expr);
4984 set_group_iv_cost (data, group, cand, cost, depends_on,
4985 NULL_TREE, ERROR_MARK, inv_expr);
4986 return !infinite_cost_p (cost);
4989 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4991 static bool
4992 determine_group_iv_cost_address (struct ivopts_data *data,
4993 struct iv_group *group, struct iv_cand *cand)
4995 unsigned i;
4996 bitmap depends_on;
4997 bool can_autoinc, first = true;
4998 iv_inv_expr_ent *inv_expr = NULL;
4999 struct iv_use *use = group->vuses[0];
5000 comp_cost sum_cost = no_cost, cost;
5002 cost = get_computation_cost (data, use, cand, true,
5003 &depends_on, &can_autoinc, &inv_expr);
5005 sum_cost = cost;
5006 if (!infinite_cost_p (sum_cost) && cand->ainc_use == use)
5008 if (can_autoinc)
5009 sum_cost.cost -= cand->cost_step;
5010 /* If we generated the candidate solely for exploiting autoincrement
5011 opportunities, and it turns out it can't be used, set the cost to
5012 infinity to make sure we ignore it. */
5013 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
5014 sum_cost = infinite_cost;
5017 /* Uses in a group can share setup code, so only add setup cost once. */
5018 cost.cost -= cost.scratch;
5019 /* Compute and add costs for rest uses of this group. */
5020 for (i = 1; i < group->vuses.length () && !infinite_cost_p (sum_cost); i++)
5022 struct iv_use *next = group->vuses[i];
5024 /* Compute cost for the first use with different offset to the main
5025 use and add it afterwards. Costs for these uses could be quite
5026 different. Given below uses in a group:
5027 use 0 : {base + A + offset_0, step}
5028 use 0.1: {base + A + offset_0, step}
5029 use 0.2: {base + A + offset_1, step}
5030 use 0.3: {base + A + offset_2, step}
5031 when we need to compute costs with candidate:
5032 cand 1 : {base + B + offset_0, step}
5034 The first use with different offset is use 0.2, its cost is larger
5035 than cost of use 0/0.1 because we need to compute:
5036 A - B + offset_1 - offset_0
5037 rather than:
5038 A - B. */
5039 if (first && next->addr_offset != use->addr_offset)
5041 first = false;
5042 cost = get_computation_cost (data, next, cand, true,
5043 NULL, &can_autoinc, NULL);
5044 /* Remove setup cost. */
5045 if (!infinite_cost_p (cost))
5046 cost.cost -= cost.scratch;
5048 sum_cost = add_costs (sum_cost, cost);
5050 set_group_iv_cost (data, group, cand, sum_cost, depends_on,
5051 NULL_TREE, ERROR_MARK, inv_expr);
5053 return !infinite_cost_p (sum_cost);
5056 /* Computes value of candidate CAND at position AT in iteration NITER, and
5057 stores it to VAL. */
5059 static void
5060 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
5061 aff_tree *val)
5063 aff_tree step, delta, nit;
5064 struct iv *iv = cand->iv;
5065 tree type = TREE_TYPE (iv->base);
5066 tree steptype = type;
5067 if (POINTER_TYPE_P (type))
5068 steptype = sizetype;
5069 steptype = unsigned_type_for (type);
5071 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
5072 aff_combination_convert (&step, steptype);
5073 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
5074 aff_combination_convert (&nit, steptype);
5075 aff_combination_mult (&nit, &step, &delta);
5076 if (stmt_after_increment (loop, cand, at))
5077 aff_combination_add (&delta, &step);
5079 tree_to_aff_combination (iv->base, type, val);
5080 if (!POINTER_TYPE_P (type))
5081 aff_combination_convert (val, steptype);
5082 aff_combination_add (val, &delta);
5085 /* Returns period of induction variable iv. */
5087 static tree
5088 iv_period (struct iv *iv)
5090 tree step = iv->step, period, type;
5091 tree pow2div;
5093 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
5095 type = unsigned_type_for (TREE_TYPE (step));
5096 /* Period of the iv is lcm (step, type_range)/step -1,
5097 i.e., N*type_range/step - 1. Since type range is power
5098 of two, N == (step >> num_of_ending_zeros_binary (step),
5099 so the final result is
5101 (type_range >> num_of_ending_zeros_binary (step)) - 1
5104 pow2div = num_ending_zeros (step);
5106 period = build_low_bits_mask (type,
5107 (TYPE_PRECISION (type)
5108 - tree_to_uhwi (pow2div)));
5110 return period;
5113 /* Returns the comparison operator used when eliminating the iv USE. */
5115 static enum tree_code
5116 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
5118 struct loop *loop = data->current_loop;
5119 basic_block ex_bb;
5120 edge exit;
5122 ex_bb = gimple_bb (use->stmt);
5123 exit = EDGE_SUCC (ex_bb, 0);
5124 if (flow_bb_inside_loop_p (loop, exit->dest))
5125 exit = EDGE_SUCC (ex_bb, 1);
5127 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
5130 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5131 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5132 calculation is performed in non-wrapping type.
5134 TODO: More generally, we could test for the situation that
5135 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5136 This would require knowing the sign of OFFSET. */
5138 static bool
5139 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5141 enum tree_code code;
5142 tree e1, e2;
5143 aff_tree aff_e1, aff_e2, aff_offset;
5145 if (!nowrap_type_p (TREE_TYPE (base)))
5146 return false;
5148 base = expand_simple_operations (base);
5150 if (TREE_CODE (base) == SSA_NAME)
5152 gimple *stmt = SSA_NAME_DEF_STMT (base);
5154 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5155 return false;
5157 code = gimple_assign_rhs_code (stmt);
5158 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5159 return false;
5161 e1 = gimple_assign_rhs1 (stmt);
5162 e2 = gimple_assign_rhs2 (stmt);
5164 else
5166 code = TREE_CODE (base);
5167 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5168 return false;
5169 e1 = TREE_OPERAND (base, 0);
5170 e2 = TREE_OPERAND (base, 1);
5173 /* Use affine expansion as deeper inspection to prove the equality. */
5174 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5175 &aff_e2, &data->name_expansion_cache);
5176 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5177 &aff_offset, &data->name_expansion_cache);
5178 aff_combination_scale (&aff_offset, -1);
5179 switch (code)
5181 case PLUS_EXPR:
5182 aff_combination_add (&aff_e2, &aff_offset);
5183 if (aff_combination_zero_p (&aff_e2))
5184 return true;
5186 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5187 &aff_e1, &data->name_expansion_cache);
5188 aff_combination_add (&aff_e1, &aff_offset);
5189 return aff_combination_zero_p (&aff_e1);
5191 case POINTER_PLUS_EXPR:
5192 aff_combination_add (&aff_e2, &aff_offset);
5193 return aff_combination_zero_p (&aff_e2);
5195 default:
5196 return false;
5200 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5201 comparison with CAND. NITER describes the number of iterations of
5202 the loops. If successful, the comparison in COMP_P is altered accordingly.
5204 We aim to handle the following situation:
5206 sometype *base, *p;
5207 int a, b, i;
5209 i = a;
5210 p = p_0 = base + a;
5214 bla (*p);
5215 p++;
5216 i++;
5218 while (i < b);
5220 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5221 We aim to optimize this to
5223 p = p_0 = base + a;
5226 bla (*p);
5227 p++;
5229 while (p < p_0 - a + b);
5231 This preserves the correctness, since the pointer arithmetics does not
5232 overflow. More precisely:
5234 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5235 overflow in computing it or the values of p.
5236 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5237 overflow. To prove this, we use the fact that p_0 = base + a. */
5239 static bool
5240 iv_elimination_compare_lt (struct ivopts_data *data,
5241 struct iv_cand *cand, enum tree_code *comp_p,
5242 struct tree_niter_desc *niter)
5244 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5245 struct aff_tree nit, tmpa, tmpb;
5246 enum tree_code comp;
5247 HOST_WIDE_INT step;
5249 /* We need to know that the candidate induction variable does not overflow.
5250 While more complex analysis may be used to prove this, for now just
5251 check that the variable appears in the original program and that it
5252 is computed in a type that guarantees no overflows. */
5253 cand_type = TREE_TYPE (cand->iv->base);
5254 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5255 return false;
5257 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5258 the calculation of the BOUND could overflow, making the comparison
5259 invalid. */
5260 if (!data->loop_single_exit_p)
5261 return false;
5263 /* We need to be able to decide whether candidate is increasing or decreasing
5264 in order to choose the right comparison operator. */
5265 if (!cst_and_fits_in_hwi (cand->iv->step))
5266 return false;
5267 step = int_cst_value (cand->iv->step);
5269 /* Check that the number of iterations matches the expected pattern:
5270 a + 1 > b ? 0 : b - a - 1. */
5271 mbz = niter->may_be_zero;
5272 if (TREE_CODE (mbz) == GT_EXPR)
5274 /* Handle a + 1 > b. */
5275 tree op0 = TREE_OPERAND (mbz, 0);
5276 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5278 a = TREE_OPERAND (op0, 0);
5279 b = TREE_OPERAND (mbz, 1);
5281 else
5282 return false;
5284 else if (TREE_CODE (mbz) == LT_EXPR)
5286 tree op1 = TREE_OPERAND (mbz, 1);
5288 /* Handle b < a + 1. */
5289 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5291 a = TREE_OPERAND (op1, 0);
5292 b = TREE_OPERAND (mbz, 0);
5294 else
5295 return false;
5297 else
5298 return false;
5300 /* Expected number of iterations is B - A - 1. Check that it matches
5301 the actual number, i.e., that B - A - NITER = 1. */
5302 tree_to_aff_combination (niter->niter, nit_type, &nit);
5303 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5304 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5305 aff_combination_scale (&nit, -1);
5306 aff_combination_scale (&tmpa, -1);
5307 aff_combination_add (&tmpb, &tmpa);
5308 aff_combination_add (&tmpb, &nit);
5309 if (tmpb.n != 0 || tmpb.offset != 1)
5310 return false;
5312 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5313 overflow. */
5314 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5315 cand->iv->step,
5316 fold_convert (TREE_TYPE (cand->iv->step), a));
5317 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5318 return false;
5320 /* Determine the new comparison operator. */
5321 comp = step < 0 ? GT_EXPR : LT_EXPR;
5322 if (*comp_p == NE_EXPR)
5323 *comp_p = comp;
5324 else if (*comp_p == EQ_EXPR)
5325 *comp_p = invert_tree_comparison (comp, false);
5326 else
5327 gcc_unreachable ();
5329 return true;
5332 /* Check whether it is possible to express the condition in USE by comparison
5333 of candidate CAND. If so, store the value compared with to BOUND, and the
5334 comparison operator to COMP. */
5336 static bool
5337 may_eliminate_iv (struct ivopts_data *data,
5338 struct iv_use *use, struct iv_cand *cand, tree *bound,
5339 enum tree_code *comp)
5341 basic_block ex_bb;
5342 edge exit;
5343 tree period;
5344 struct loop *loop = data->current_loop;
5345 aff_tree bnd;
5346 struct tree_niter_desc *desc = NULL;
5348 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5349 return false;
5351 /* For now works only for exits that dominate the loop latch.
5352 TODO: extend to other conditions inside loop body. */
5353 ex_bb = gimple_bb (use->stmt);
5354 if (use->stmt != last_stmt (ex_bb)
5355 || gimple_code (use->stmt) != GIMPLE_COND
5356 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5357 return false;
5359 exit = EDGE_SUCC (ex_bb, 0);
5360 if (flow_bb_inside_loop_p (loop, exit->dest))
5361 exit = EDGE_SUCC (ex_bb, 1);
5362 if (flow_bb_inside_loop_p (loop, exit->dest))
5363 return false;
5365 desc = niter_for_exit (data, exit);
5366 if (!desc)
5367 return false;
5369 /* Determine whether we can use the variable to test the exit condition.
5370 This is the case iff the period of the induction variable is greater
5371 than the number of iterations for which the exit condition is true. */
5372 period = iv_period (cand->iv);
5374 /* If the number of iterations is constant, compare against it directly. */
5375 if (TREE_CODE (desc->niter) == INTEGER_CST)
5377 /* See cand_value_at. */
5378 if (stmt_after_increment (loop, cand, use->stmt))
5380 if (!tree_int_cst_lt (desc->niter, period))
5381 return false;
5383 else
5385 if (tree_int_cst_lt (period, desc->niter))
5386 return false;
5390 /* If not, and if this is the only possible exit of the loop, see whether
5391 we can get a conservative estimate on the number of iterations of the
5392 entire loop and compare against that instead. */
5393 else
5395 widest_int period_value, max_niter;
5397 max_niter = desc->max;
5398 if (stmt_after_increment (loop, cand, use->stmt))
5399 max_niter += 1;
5400 period_value = wi::to_widest (period);
5401 if (wi::gtu_p (max_niter, period_value))
5403 /* See if we can take advantage of inferred loop bound
5404 information. */
5405 if (data->loop_single_exit_p)
5407 if (!max_loop_iterations (loop, &max_niter))
5408 return false;
5409 /* The loop bound is already adjusted by adding 1. */
5410 if (wi::gtu_p (max_niter, period_value))
5411 return false;
5413 else
5414 return false;
5418 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5420 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5421 aff_combination_to_tree (&bnd));
5422 *comp = iv_elimination_compare (data, use);
5424 /* It is unlikely that computing the number of iterations using division
5425 would be more profitable than keeping the original induction variable. */
5426 if (expression_expensive_p (*bound))
5427 return false;
5429 /* Sometimes, it is possible to handle the situation that the number of
5430 iterations may be zero unless additional assumtions by using <
5431 instead of != in the exit condition.
5433 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5434 base the exit condition on it. However, that is often too
5435 expensive. */
5436 if (!integer_zerop (desc->may_be_zero))
5437 return iv_elimination_compare_lt (data, cand, comp, desc);
5439 return true;
5442 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5443 be copied, if it is used in the loop body and DATA->body_includes_call. */
5445 static int
5446 parm_decl_cost (struct ivopts_data *data, tree bound)
5448 tree sbound = bound;
5449 STRIP_NOPS (sbound);
5451 if (TREE_CODE (sbound) == SSA_NAME
5452 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5453 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5454 && data->body_includes_call)
5455 return COSTS_N_INSNS (1);
5457 return 0;
5460 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5462 static bool
5463 determine_group_iv_cost_cond (struct ivopts_data *data,
5464 struct iv_group *group, struct iv_cand *cand)
5466 tree bound = NULL_TREE;
5467 struct iv *cmp_iv;
5468 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5469 comp_cost elim_cost, express_cost, cost, bound_cost;
5470 bool ok;
5471 iv_inv_expr_ent *elim_inv_expr = NULL, *express_inv_expr = NULL, *inv_expr;
5472 tree *control_var, *bound_cst;
5473 enum tree_code comp = ERROR_MARK;
5474 struct iv_use *use = group->vuses[0];
5476 gcc_assert (cand->iv);
5478 /* Try iv elimination. */
5479 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5481 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5482 if (elim_cost.cost == 0)
5483 elim_cost.cost = parm_decl_cost (data, bound);
5484 else if (TREE_CODE (bound) == INTEGER_CST)
5485 elim_cost.cost = 0;
5486 /* If we replace a loop condition 'i < n' with 'p < base + n',
5487 depends_on_elim will have 'base' and 'n' set, which implies
5488 that both 'base' and 'n' will be live during the loop. More likely,
5489 'base + n' will be loop invariant, resulting in only one live value
5490 during the loop. So in that case we clear depends_on_elim and set
5491 elim_inv_expr_id instead. */
5492 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5494 elim_inv_expr = record_inv_expr (data, bound);
5495 bitmap_clear (depends_on_elim);
5497 /* The bound is a loop invariant, so it will be only computed
5498 once. */
5499 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5501 else
5502 elim_cost = infinite_cost;
5504 /* Try expressing the original giv. If it is compared with an invariant,
5505 note that we cannot get rid of it. */
5506 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5507 NULL, &cmp_iv);
5508 gcc_assert (ok);
5510 /* When the condition is a comparison of the candidate IV against
5511 zero, prefer this IV.
5513 TODO: The constant that we're subtracting from the cost should
5514 be target-dependent. This information should be added to the
5515 target costs for each backend. */
5516 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5517 && integer_zerop (*bound_cst)
5518 && (operand_equal_p (*control_var, cand->var_after, 0)
5519 || operand_equal_p (*control_var, cand->var_before, 0)))
5520 elim_cost.cost -= 1;
5522 express_cost = get_computation_cost (data, use, cand, false,
5523 &depends_on_express, NULL,
5524 &express_inv_expr);
5525 fd_ivopts_data = data;
5526 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5528 /* Count the cost of the original bound as well. */
5529 bound_cost = force_var_cost (data, *bound_cst, NULL);
5530 if (bound_cost.cost == 0)
5531 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5532 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5533 bound_cost.cost = 0;
5534 express_cost.cost += bound_cost.cost;
5536 /* Choose the better approach, preferring the eliminated IV. */
5537 if (compare_costs (elim_cost, express_cost) <= 0)
5539 cost = elim_cost;
5540 depends_on = depends_on_elim;
5541 depends_on_elim = NULL;
5542 inv_expr = elim_inv_expr;
5544 else
5546 cost = express_cost;
5547 depends_on = depends_on_express;
5548 depends_on_express = NULL;
5549 bound = NULL_TREE;
5550 comp = ERROR_MARK;
5551 inv_expr = express_inv_expr;
5554 set_group_iv_cost (data, group, cand, cost,
5555 depends_on, bound, comp, inv_expr);
5557 if (depends_on_elim)
5558 BITMAP_FREE (depends_on_elim);
5559 if (depends_on_express)
5560 BITMAP_FREE (depends_on_express);
5562 return !infinite_cost_p (cost);
5565 /* Determines cost of computing uses in GROUP with CAND. Returns false
5566 if USE cannot be represented with CAND. */
5568 static bool
5569 determine_group_iv_cost (struct ivopts_data *data,
5570 struct iv_group *group, struct iv_cand *cand)
5572 switch (group->type)
5574 case USE_NONLINEAR_EXPR:
5575 return determine_group_iv_cost_generic (data, group, cand);
5577 case USE_ADDRESS:
5578 return determine_group_iv_cost_address (data, group, cand);
5580 case USE_COMPARE:
5581 return determine_group_iv_cost_cond (data, group, cand);
5583 default:
5584 gcc_unreachable ();
5588 /* Return true if get_computation_cost indicates that autoincrement is
5589 a possibility for the pair of USE and CAND, false otherwise. */
5591 static bool
5592 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5593 struct iv_cand *cand)
5595 bitmap depends_on;
5596 bool can_autoinc;
5597 comp_cost cost;
5599 if (use->type != USE_ADDRESS)
5600 return false;
5602 cost = get_computation_cost (data, use, cand, true, &depends_on,
5603 &can_autoinc, NULL);
5605 BITMAP_FREE (depends_on);
5607 return !infinite_cost_p (cost) && can_autoinc;
5610 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5611 use that allows autoincrement, and set their AINC_USE if possible. */
5613 static void
5614 set_autoinc_for_original_candidates (struct ivopts_data *data)
5616 unsigned i, j;
5618 for (i = 0; i < data->vcands.length (); i++)
5620 struct iv_cand *cand = data->vcands[i];
5621 struct iv_use *closest_before = NULL;
5622 struct iv_use *closest_after = NULL;
5623 if (cand->pos != IP_ORIGINAL)
5624 continue;
5626 for (j = 0; j < data->vgroups.length (); j++)
5628 struct iv_group *group = data->vgroups[j];
5629 struct iv_use *use = group->vuses[0];
5630 unsigned uid = gimple_uid (use->stmt);
5632 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5633 continue;
5635 if (uid < gimple_uid (cand->incremented_at)
5636 && (closest_before == NULL
5637 || uid > gimple_uid (closest_before->stmt)))
5638 closest_before = use;
5640 if (uid > gimple_uid (cand->incremented_at)
5641 && (closest_after == NULL
5642 || uid < gimple_uid (closest_after->stmt)))
5643 closest_after = use;
5646 if (closest_before != NULL
5647 && autoinc_possible_for_pair (data, closest_before, cand))
5648 cand->ainc_use = closest_before;
5649 else if (closest_after != NULL
5650 && autoinc_possible_for_pair (data, closest_after, cand))
5651 cand->ainc_use = closest_after;
5655 /* Finds the candidates for the induction variables. */
5657 static void
5658 find_iv_candidates (struct ivopts_data *data)
5660 /* Add commonly used ivs. */
5661 add_standard_iv_candidates (data);
5663 /* Add old induction variables. */
5664 add_iv_candidate_for_bivs (data);
5666 /* Add induction variables derived from uses. */
5667 add_iv_candidate_for_groups (data);
5669 set_autoinc_for_original_candidates (data);
5671 /* Record the important candidates. */
5672 record_important_candidates (data);
5674 if (dump_file && (dump_flags & TDF_DETAILS))
5676 unsigned i;
5678 fprintf (dump_file, "\n<Important Candidates>:\t");
5679 for (i = 0; i < data->vcands.length (); i++)
5680 if (data->vcands[i]->important)
5681 fprintf (dump_file, " %d,", data->vcands[i]->id);
5682 fprintf (dump_file, "\n");
5684 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5685 for (i = 0; i < data->vgroups.length (); i++)
5687 struct iv_group *group = data->vgroups[i];
5689 if (group->related_cands)
5691 fprintf (dump_file, " Group %d:\t", group->id);
5692 dump_bitmap (dump_file, group->related_cands);
5695 fprintf (dump_file, "\n");
5699 /* Determines costs of computing use of iv with an iv candidate. */
5701 static void
5702 determine_group_iv_costs (struct ivopts_data *data)
5704 unsigned i, j;
5705 struct iv_cand *cand;
5706 struct iv_group *group;
5707 bitmap to_clear = BITMAP_ALLOC (NULL);
5709 alloc_use_cost_map (data);
5711 for (i = 0; i < data->vgroups.length (); i++)
5713 group = data->vgroups[i];
5715 if (data->consider_all_candidates)
5717 for (j = 0; j < data->vcands.length (); j++)
5719 cand = data->vcands[j];
5720 determine_group_iv_cost (data, group, cand);
5723 else
5725 bitmap_iterator bi;
5727 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5729 cand = data->vcands[j];
5730 if (!determine_group_iv_cost (data, group, cand))
5731 bitmap_set_bit (to_clear, j);
5734 /* Remove the candidates for that the cost is infinite from
5735 the list of related candidates. */
5736 bitmap_and_compl_into (group->related_cands, to_clear);
5737 bitmap_clear (to_clear);
5741 BITMAP_FREE (to_clear);
5743 if (dump_file && (dump_flags & TDF_DETAILS))
5745 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5746 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5748 for (hash_table<iv_inv_expr_hasher>::iterator it
5749 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5750 ++it)
5751 list.safe_push (*it);
5753 list.qsort (sort_iv_inv_expr_ent);
5755 for (i = 0; i < list.length (); ++i)
5757 fprintf (dump_file, "inv_expr %d: \t", i);
5758 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5759 fprintf (dump_file, "\n");
5762 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5764 for (i = 0; i < data->vgroups.length (); i++)
5766 group = data->vgroups[i];
5768 fprintf (dump_file, "Group %d:\n", i);
5769 fprintf (dump_file, " cand\tcost\tcompl.\tinv.ex.\tdepends on\n");
5770 for (j = 0; j < group->n_map_members; j++)
5772 if (!group->cost_map[j].cand
5773 || infinite_cost_p (group->cost_map[j].cost))
5774 continue;
5776 fprintf (dump_file, " %d\t%d\t%d\t",
5777 group->cost_map[j].cand->id,
5778 group->cost_map[j].cost.cost,
5779 group->cost_map[j].cost.complexity);
5780 if (group->cost_map[j].inv_expr != NULL)
5781 fprintf (dump_file, "%d\t",
5782 group->cost_map[j].inv_expr->id);
5783 else
5784 fprintf (dump_file, "\t");
5785 if (group->cost_map[j].depends_on)
5786 bitmap_print (dump_file,
5787 group->cost_map[j].depends_on, "","");
5788 fprintf (dump_file, "\n");
5791 fprintf (dump_file, "\n");
5793 fprintf (dump_file, "\n");
5797 /* Determines cost of the candidate CAND. */
5799 static void
5800 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5802 comp_cost cost_base;
5803 unsigned cost, cost_step;
5804 tree base;
5806 if (!cand->iv)
5808 cand->cost = 0;
5809 return;
5812 /* There are two costs associated with the candidate -- its increment
5813 and its initialization. The second is almost negligible for any loop
5814 that rolls enough, so we take it just very little into account. */
5816 base = cand->iv->base;
5817 cost_base = force_var_cost (data, base, NULL);
5818 /* It will be exceptional that the iv register happens to be initialized with
5819 the proper value at no cost. In general, there will at least be a regcopy
5820 or a const set. */
5821 if (cost_base.cost == 0)
5822 cost_base.cost = COSTS_N_INSNS (1);
5823 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5825 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5827 /* Prefer the original ivs unless we may gain something by replacing it.
5828 The reason is to make debugging simpler; so this is not relevant for
5829 artificial ivs created by other optimization passes. */
5830 if (cand->pos != IP_ORIGINAL
5831 || !SSA_NAME_VAR (cand->var_before)
5832 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5833 cost++;
5835 /* Prefer not to insert statements into latch unless there are some
5836 already (so that we do not create unnecessary jumps). */
5837 if (cand->pos == IP_END
5838 && empty_block_p (ip_end_pos (data->current_loop)))
5839 cost++;
5841 cand->cost = cost;
5842 cand->cost_step = cost_step;
5845 /* Determines costs of computation of the candidates. */
5847 static void
5848 determine_iv_costs (struct ivopts_data *data)
5850 unsigned i;
5852 if (dump_file && (dump_flags & TDF_DETAILS))
5854 fprintf (dump_file, "<Candidate Costs>:\n");
5855 fprintf (dump_file, " cand\tcost\n");
5858 for (i = 0; i < data->vcands.length (); i++)
5860 struct iv_cand *cand = data->vcands[i];
5862 determine_iv_cost (data, cand);
5864 if (dump_file && (dump_flags & TDF_DETAILS))
5865 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5868 if (dump_file && (dump_flags & TDF_DETAILS))
5869 fprintf (dump_file, "\n");
5872 /* Calculates cost for having SIZE induction variables. */
5874 static unsigned
5875 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5877 /* We add size to the cost, so that we prefer eliminating ivs
5878 if possible. */
5879 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5880 data->body_includes_call);
5883 /* For each size of the induction variable set determine the penalty. */
5885 static void
5886 determine_set_costs (struct ivopts_data *data)
5888 unsigned j, n;
5889 gphi *phi;
5890 gphi_iterator psi;
5891 tree op;
5892 struct loop *loop = data->current_loop;
5893 bitmap_iterator bi;
5895 if (dump_file && (dump_flags & TDF_DETAILS))
5897 fprintf (dump_file, "<Global Costs>:\n");
5898 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5899 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5900 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5901 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5904 n = 0;
5905 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5907 phi = psi.phi ();
5908 op = PHI_RESULT (phi);
5910 if (virtual_operand_p (op))
5911 continue;
5913 if (get_iv (data, op))
5914 continue;
5916 n++;
5919 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5921 struct version_info *info = ver_info (data, j);
5923 if (info->inv_id && info->has_nonlin_use)
5924 n++;
5927 data->regs_used = n;
5928 if (dump_file && (dump_flags & TDF_DETAILS))
5929 fprintf (dump_file, " regs_used %d\n", n);
5931 if (dump_file && (dump_flags & TDF_DETAILS))
5933 fprintf (dump_file, " cost for size:\n");
5934 fprintf (dump_file, " ivs\tcost\n");
5935 for (j = 0; j <= 2 * target_avail_regs; j++)
5936 fprintf (dump_file, " %d\t%d\n", j,
5937 ivopts_global_cost_for_size (data, j));
5938 fprintf (dump_file, "\n");
5942 /* Returns true if A is a cheaper cost pair than B. */
5944 static bool
5945 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5947 int cmp;
5949 if (!a)
5950 return false;
5952 if (!b)
5953 return true;
5955 cmp = compare_costs (a->cost, b->cost);
5956 if (cmp < 0)
5957 return true;
5959 if (cmp > 0)
5960 return false;
5962 /* In case the costs are the same, prefer the cheaper candidate. */
5963 if (a->cand->cost < b->cand->cost)
5964 return true;
5966 return false;
5970 /* Returns candidate by that USE is expressed in IVS. */
5972 static struct cost_pair *
5973 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5975 return ivs->cand_for_group[group->id];
5978 /* Computes the cost field of IVS structure. */
5980 static void
5981 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5983 comp_cost cost = ivs->cand_use_cost;
5985 cost.cost += ivs->cand_cost;
5987 cost.cost += ivopts_global_cost_for_size (data,
5988 ivs->n_regs
5989 + ivs->used_inv_exprs->elements ());
5991 ivs->cost = cost;
5994 /* Remove invariants in set INVS to set IVS. */
5996 static void
5997 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5999 bitmap_iterator bi;
6000 unsigned iid;
6002 if (!invs)
6003 return;
6005 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6007 ivs->n_invariant_uses[iid]--;
6008 if (ivs->n_invariant_uses[iid] == 0)
6009 ivs->n_regs--;
6013 /* Set USE not to be expressed by any candidate in IVS. */
6015 static void
6016 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
6017 struct iv_group *group)
6019 unsigned gid = group->id, cid;
6020 struct cost_pair *cp;
6022 cp = ivs->cand_for_group[gid];
6023 if (!cp)
6024 return;
6025 cid = cp->cand->id;
6027 ivs->bad_groups++;
6028 ivs->cand_for_group[gid] = NULL;
6029 ivs->n_cand_uses[cid]--;
6031 if (ivs->n_cand_uses[cid] == 0)
6033 bitmap_clear_bit (ivs->cands, cid);
6034 /* Do not count the pseudocandidates. */
6035 if (cp->cand->iv)
6036 ivs->n_regs--;
6037 ivs->n_cands--;
6038 ivs->cand_cost -= cp->cand->cost;
6040 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
6043 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
6045 iv_ca_set_remove_invariants (ivs, cp->depends_on);
6047 if (cp->inv_expr != NULL)
6049 unsigned *slot = ivs->used_inv_exprs->get (cp->inv_expr);
6050 --(*slot);
6051 if (*slot == 0)
6052 ivs->used_inv_exprs->remove (cp->inv_expr);
6054 iv_ca_recount_cost (data, ivs);
6057 /* Add invariants in set INVS to set IVS. */
6059 static void
6060 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
6062 bitmap_iterator bi;
6063 unsigned iid;
6065 if (!invs)
6066 return;
6068 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
6070 ivs->n_invariant_uses[iid]++;
6071 if (ivs->n_invariant_uses[iid] == 1)
6072 ivs->n_regs++;
6076 /* Set cost pair for GROUP in set IVS to CP. */
6078 static void
6079 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
6080 struct iv_group *group, struct cost_pair *cp)
6082 unsigned gid = group->id, cid;
6084 if (ivs->cand_for_group[gid] == cp)
6085 return;
6087 if (ivs->cand_for_group[gid])
6088 iv_ca_set_no_cp (data, ivs, group);
6090 if (cp)
6092 cid = cp->cand->id;
6094 ivs->bad_groups--;
6095 ivs->cand_for_group[gid] = cp;
6096 ivs->n_cand_uses[cid]++;
6097 if (ivs->n_cand_uses[cid] == 1)
6099 bitmap_set_bit (ivs->cands, cid);
6100 /* Do not count the pseudocandidates. */
6101 if (cp->cand->iv)
6102 ivs->n_regs++;
6103 ivs->n_cands++;
6104 ivs->cand_cost += cp->cand->cost;
6106 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
6109 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
6110 iv_ca_set_add_invariants (ivs, cp->depends_on);
6112 if (cp->inv_expr != NULL)
6114 unsigned *slot = &ivs->used_inv_exprs->get_or_insert (cp->inv_expr);
6115 ++(*slot);
6117 iv_ca_recount_cost (data, ivs);
6121 /* Extend set IVS by expressing USE by some of the candidates in it
6122 if possible. Consider all important candidates if candidates in
6123 set IVS don't give any result. */
6125 static void
6126 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
6127 struct iv_group *group)
6129 struct cost_pair *best_cp = NULL, *cp;
6130 bitmap_iterator bi;
6131 unsigned i;
6132 struct iv_cand *cand;
6134 gcc_assert (ivs->upto >= group->id);
6135 ivs->upto++;
6136 ivs->bad_groups++;
6138 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6140 cand = data->vcands[i];
6141 cp = get_group_iv_cost (data, group, cand);
6142 if (cheaper_cost_pair (cp, best_cp))
6143 best_cp = cp;
6146 if (best_cp == NULL)
6148 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6150 cand = data->vcands[i];
6151 cp = get_group_iv_cost (data, group, cand);
6152 if (cheaper_cost_pair (cp, best_cp))
6153 best_cp = cp;
6157 iv_ca_set_cp (data, ivs, group, best_cp);
6160 /* Get cost for assignment IVS. */
6162 static comp_cost
6163 iv_ca_cost (struct iv_ca *ivs)
6165 /* This was a conditional expression but it triggered a bug in
6166 Sun C 5.5. */
6167 if (ivs->bad_groups)
6168 return infinite_cost;
6169 else
6170 return ivs->cost;
6173 /* Returns true if all dependences of CP are among invariants in IVS. */
6175 static bool
6176 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6178 unsigned i;
6179 bitmap_iterator bi;
6181 if (!cp->depends_on)
6182 return true;
6184 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6186 if (ivs->n_invariant_uses[i] == 0)
6187 return false;
6190 return true;
6193 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
6194 it before NEXT. */
6196 static struct iv_ca_delta *
6197 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6198 struct cost_pair *new_cp, struct iv_ca_delta *next)
6200 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6202 change->group = group;
6203 change->old_cp = old_cp;
6204 change->new_cp = new_cp;
6205 change->next = next;
6207 return change;
6210 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6211 are rewritten. */
6213 static struct iv_ca_delta *
6214 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6216 struct iv_ca_delta *last;
6218 if (!l2)
6219 return l1;
6221 if (!l1)
6222 return l2;
6224 for (last = l1; last->next; last = last->next)
6225 continue;
6226 last->next = l2;
6228 return l1;
6231 /* Reverse the list of changes DELTA, forming the inverse to it. */
6233 static struct iv_ca_delta *
6234 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6236 struct iv_ca_delta *act, *next, *prev = NULL;
6238 for (act = delta; act; act = next)
6240 next = act->next;
6241 act->next = prev;
6242 prev = act;
6244 std::swap (act->old_cp, act->new_cp);
6247 return prev;
6250 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6251 reverted instead. */
6253 static void
6254 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6255 struct iv_ca_delta *delta, bool forward)
6257 struct cost_pair *from, *to;
6258 struct iv_ca_delta *act;
6260 if (!forward)
6261 delta = iv_ca_delta_reverse (delta);
6263 for (act = delta; act; act = act->next)
6265 from = act->old_cp;
6266 to = act->new_cp;
6267 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6268 iv_ca_set_cp (data, ivs, act->group, to);
6271 if (!forward)
6272 iv_ca_delta_reverse (delta);
6275 /* Returns true if CAND is used in IVS. */
6277 static bool
6278 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6280 return ivs->n_cand_uses[cand->id] > 0;
6283 /* Returns number of induction variable candidates in the set IVS. */
6285 static unsigned
6286 iv_ca_n_cands (struct iv_ca *ivs)
6288 return ivs->n_cands;
6291 /* Free the list of changes DELTA. */
6293 static void
6294 iv_ca_delta_free (struct iv_ca_delta **delta)
6296 struct iv_ca_delta *act, *next;
6298 for (act = *delta; act; act = next)
6300 next = act->next;
6301 free (act);
6304 *delta = NULL;
6307 /* Allocates new iv candidates assignment. */
6309 static struct iv_ca *
6310 iv_ca_new (struct ivopts_data *data)
6312 struct iv_ca *nw = XNEW (struct iv_ca);
6314 nw->upto = 0;
6315 nw->bad_groups = 0;
6316 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6317 data->vgroups.length ());
6318 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6319 nw->cands = BITMAP_ALLOC (NULL);
6320 nw->n_cands = 0;
6321 nw->n_regs = 0;
6322 nw->cand_use_cost = no_cost;
6323 nw->cand_cost = 0;
6324 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6325 nw->used_inv_exprs = new hash_map <iv_inv_expr_ent *, unsigned> (13);
6326 nw->cost = no_cost;
6328 return nw;
6331 /* Free memory occupied by the set IVS. */
6333 static void
6334 iv_ca_free (struct iv_ca **ivs)
6336 free ((*ivs)->cand_for_group);
6337 free ((*ivs)->n_cand_uses);
6338 BITMAP_FREE ((*ivs)->cands);
6339 free ((*ivs)->n_invariant_uses);
6340 delete ((*ivs)->used_inv_exprs);
6341 free (*ivs);
6342 *ivs = NULL;
6345 /* Dumps IVS to FILE. */
6347 static void
6348 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6350 unsigned i;
6351 comp_cost cost = iv_ca_cost (ivs);
6353 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6354 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6355 ivs->cand_cost, ivs->cand_use_cost.cost,
6356 ivs->cand_use_cost.complexity);
6357 bitmap_print (file, ivs->cands, " candidates: ","\n");
6359 for (i = 0; i < ivs->upto; i++)
6361 struct iv_group *group = data->vgroups[i];
6362 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6363 if (cp)
6364 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6365 group->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6366 else
6367 fprintf (file, " group:%d --> ??\n", group->id);
6370 const char *pref = "";
6371 fprintf (file, " invariant variables: ");
6372 for (i = 1; i <= data->max_inv_id; i++)
6373 if (ivs->n_invariant_uses[i])
6375 fprintf (file, "%s%d", pref, i);
6376 pref = ", ";
6379 pref = "";
6380 fprintf (file, "\n invariant expressions: ");
6381 for (hash_map<iv_inv_expr_ent *, unsigned>::iterator it
6382 = ivs->used_inv_exprs->begin (); it != ivs->used_inv_exprs->end (); ++it)
6384 fprintf (file, "%s%d", pref, (*it).first->id);
6385 pref = ", ";
6388 fprintf (file, "\n\n");
6391 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6392 new set, and store differences in DELTA. Number of induction variables
6393 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6394 the function will try to find a solution with mimimal iv candidates. */
6396 static comp_cost
6397 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6398 struct iv_cand *cand, struct iv_ca_delta **delta,
6399 unsigned *n_ivs, bool min_ncand)
6401 unsigned i;
6402 comp_cost cost;
6403 struct iv_group *group;
6404 struct cost_pair *old_cp, *new_cp;
6406 *delta = NULL;
6407 for (i = 0; i < ivs->upto; i++)
6409 group = data->vgroups[i];
6410 old_cp = iv_ca_cand_for_group (ivs, group);
6412 if (old_cp
6413 && old_cp->cand == cand)
6414 continue;
6416 new_cp = get_group_iv_cost (data, group, cand);
6417 if (!new_cp)
6418 continue;
6420 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6421 continue;
6423 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6424 continue;
6426 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6429 iv_ca_delta_commit (data, ivs, *delta, true);
6430 cost = iv_ca_cost (ivs);
6431 if (n_ivs)
6432 *n_ivs = iv_ca_n_cands (ivs);
6433 iv_ca_delta_commit (data, ivs, *delta, false);
6435 return cost;
6438 /* Try narrowing set IVS by removing CAND. Return the cost of
6439 the new set and store the differences in DELTA. START is
6440 the candidate with which we start narrowing. */
6442 static comp_cost
6443 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6444 struct iv_cand *cand, struct iv_cand *start,
6445 struct iv_ca_delta **delta)
6447 unsigned i, ci;
6448 struct iv_group *group;
6449 struct cost_pair *old_cp, *new_cp, *cp;
6450 bitmap_iterator bi;
6451 struct iv_cand *cnd;
6452 comp_cost cost, best_cost, acost;
6454 *delta = NULL;
6455 for (i = 0; i < data->vgroups.length (); i++)
6457 group = data->vgroups[i];
6459 old_cp = iv_ca_cand_for_group (ivs, group);
6460 if (old_cp->cand != cand)
6461 continue;
6463 best_cost = iv_ca_cost (ivs);
6464 /* Start narrowing with START. */
6465 new_cp = get_group_iv_cost (data, group, start);
6467 if (data->consider_all_candidates)
6469 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6471 if (ci == cand->id || (start && ci == start->id))
6472 continue;
6474 cnd = data->vcands[ci];
6476 cp = get_group_iv_cost (data, group, cnd);
6477 if (!cp)
6478 continue;
6480 iv_ca_set_cp (data, ivs, group, cp);
6481 acost = iv_ca_cost (ivs);
6483 if (compare_costs (acost, best_cost) < 0)
6485 best_cost = acost;
6486 new_cp = cp;
6490 else
6492 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6494 if (ci == cand->id || (start && ci == start->id))
6495 continue;
6497 cnd = data->vcands[ci];
6499 cp = get_group_iv_cost (data, group, cnd);
6500 if (!cp)
6501 continue;
6503 iv_ca_set_cp (data, ivs, group, cp);
6504 acost = iv_ca_cost (ivs);
6506 if (compare_costs (acost, best_cost) < 0)
6508 best_cost = acost;
6509 new_cp = cp;
6513 /* Restore to old cp for use. */
6514 iv_ca_set_cp (data, ivs, group, old_cp);
6516 if (!new_cp)
6518 iv_ca_delta_free (delta);
6519 return infinite_cost;
6522 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6525 iv_ca_delta_commit (data, ivs, *delta, true);
6526 cost = iv_ca_cost (ivs);
6527 iv_ca_delta_commit (data, ivs, *delta, false);
6529 return cost;
6532 /* Try optimizing the set of candidates IVS by removing candidates different
6533 from to EXCEPT_CAND from it. Return cost of the new set, and store
6534 differences in DELTA. */
6536 static comp_cost
6537 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6538 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6540 bitmap_iterator bi;
6541 struct iv_ca_delta *act_delta, *best_delta;
6542 unsigned i;
6543 comp_cost best_cost, acost;
6544 struct iv_cand *cand;
6546 best_delta = NULL;
6547 best_cost = iv_ca_cost (ivs);
6549 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6551 cand = data->vcands[i];
6553 if (cand == except_cand)
6554 continue;
6556 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6558 if (compare_costs (acost, best_cost) < 0)
6560 best_cost = acost;
6561 iv_ca_delta_free (&best_delta);
6562 best_delta = act_delta;
6564 else
6565 iv_ca_delta_free (&act_delta);
6568 if (!best_delta)
6570 *delta = NULL;
6571 return best_cost;
6574 /* Recurse to possibly remove other unnecessary ivs. */
6575 iv_ca_delta_commit (data, ivs, best_delta, true);
6576 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6577 iv_ca_delta_commit (data, ivs, best_delta, false);
6578 *delta = iv_ca_delta_join (best_delta, *delta);
6579 return best_cost;
6582 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6583 cheaper local cost for GROUP than BEST_CP. Return pointer to
6584 the corresponding cost_pair, otherwise just return BEST_CP. */
6586 static struct cost_pair*
6587 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6588 unsigned int cand_idx, struct iv_cand *old_cand,
6589 struct cost_pair *best_cp)
6591 struct iv_cand *cand;
6592 struct cost_pair *cp;
6594 gcc_assert (old_cand != NULL && best_cp != NULL);
6595 if (cand_idx == old_cand->id)
6596 return best_cp;
6598 cand = data->vcands[cand_idx];
6599 cp = get_group_iv_cost (data, group, cand);
6600 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6601 return cp;
6603 return best_cp;
6606 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6607 which are used by more than one iv uses. For each of those candidates,
6608 this function tries to represent iv uses under that candidate using
6609 other ones with lower local cost, then tries to prune the new set.
6610 If the new set has lower cost, It returns the new cost after recording
6611 candidate replacement in list DELTA. */
6613 static comp_cost
6614 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6615 struct iv_ca_delta **delta)
6617 bitmap_iterator bi, bj;
6618 unsigned int i, j, k;
6619 struct iv_cand *cand;
6620 comp_cost orig_cost, acost;
6621 struct iv_ca_delta *act_delta, *tmp_delta;
6622 struct cost_pair *old_cp, *best_cp = NULL;
6624 *delta = NULL;
6625 orig_cost = iv_ca_cost (ivs);
6627 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6629 if (ivs->n_cand_uses[i] == 1
6630 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6631 continue;
6633 cand = data->vcands[i];
6635 act_delta = NULL;
6636 /* Represent uses under current candidate using other ones with
6637 lower local cost. */
6638 for (j = 0; j < ivs->upto; j++)
6640 struct iv_group *group = data->vgroups[j];
6641 old_cp = iv_ca_cand_for_group (ivs, group);
6643 if (old_cp->cand != cand)
6644 continue;
6646 best_cp = old_cp;
6647 if (data->consider_all_candidates)
6648 for (k = 0; k < data->vcands.length (); k++)
6649 best_cp = cheaper_cost_with_cand (data, group, k,
6650 old_cp->cand, best_cp);
6651 else
6652 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6653 best_cp = cheaper_cost_with_cand (data, group, k,
6654 old_cp->cand, best_cp);
6656 if (best_cp == old_cp)
6657 continue;
6659 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6661 /* No need for further prune. */
6662 if (!act_delta)
6663 continue;
6665 /* Prune the new candidate set. */
6666 iv_ca_delta_commit (data, ivs, act_delta, true);
6667 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6668 iv_ca_delta_commit (data, ivs, act_delta, false);
6669 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6671 if (compare_costs (acost, orig_cost) < 0)
6673 *delta = act_delta;
6674 return acost;
6676 else
6677 iv_ca_delta_free (&act_delta);
6680 return orig_cost;
6683 /* Tries to extend the sets IVS in the best possible way in order to
6684 express the GROUP. If ORIGINALP is true, prefer candidates from
6685 the original set of IVs, otherwise favor important candidates not
6686 based on any memory object. */
6688 static bool
6689 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6690 struct iv_group *group, bool originalp)
6692 comp_cost best_cost, act_cost;
6693 unsigned i;
6694 bitmap_iterator bi;
6695 struct iv_cand *cand;
6696 struct iv_ca_delta *best_delta = NULL, *act_delta;
6697 struct cost_pair *cp;
6699 iv_ca_add_group (data, ivs, group);
6700 best_cost = iv_ca_cost (ivs);
6701 cp = iv_ca_cand_for_group (ivs, group);
6702 if (cp)
6704 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6705 iv_ca_set_no_cp (data, ivs, group);
6708 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6709 first try important candidates not based on any memory object. Only if
6710 this fails, try the specific ones. Rationale -- in loops with many
6711 variables the best choice often is to use just one generic biv. If we
6712 added here many ivs specific to the uses, the optimization algorithm later
6713 would be likely to get stuck in a local minimum, thus causing us to create
6714 too many ivs. The approach from few ivs to more seems more likely to be
6715 successful -- starting from few ivs, replacing an expensive use by a
6716 specific iv should always be a win. */
6717 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6719 cand = data->vcands[i];
6721 if (originalp && cand->pos !=IP_ORIGINAL)
6722 continue;
6724 if (!originalp && cand->iv->base_object != NULL_TREE)
6725 continue;
6727 if (iv_ca_cand_used_p (ivs, cand))
6728 continue;
6730 cp = get_group_iv_cost (data, group, cand);
6731 if (!cp)
6732 continue;
6734 iv_ca_set_cp (data, ivs, group, cp);
6735 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6736 true);
6737 iv_ca_set_no_cp (data, ivs, group);
6738 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6740 if (compare_costs (act_cost, best_cost) < 0)
6742 best_cost = act_cost;
6744 iv_ca_delta_free (&best_delta);
6745 best_delta = act_delta;
6747 else
6748 iv_ca_delta_free (&act_delta);
6751 if (infinite_cost_p (best_cost))
6753 for (i = 0; i < group->n_map_members; i++)
6755 cp = group->cost_map + i;
6756 cand = cp->cand;
6757 if (!cand)
6758 continue;
6760 /* Already tried this. */
6761 if (cand->important)
6763 if (originalp && cand->pos == IP_ORIGINAL)
6764 continue;
6765 if (!originalp && cand->iv->base_object == NULL_TREE)
6766 continue;
6769 if (iv_ca_cand_used_p (ivs, cand))
6770 continue;
6772 act_delta = NULL;
6773 iv_ca_set_cp (data, ivs, group, cp);
6774 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6775 iv_ca_set_no_cp (data, ivs, group);
6776 act_delta = iv_ca_delta_add (group,
6777 iv_ca_cand_for_group (ivs, group),
6778 cp, act_delta);
6780 if (compare_costs (act_cost, best_cost) < 0)
6782 best_cost = act_cost;
6784 if (best_delta)
6785 iv_ca_delta_free (&best_delta);
6786 best_delta = act_delta;
6788 else
6789 iv_ca_delta_free (&act_delta);
6793 iv_ca_delta_commit (data, ivs, best_delta, true);
6794 iv_ca_delta_free (&best_delta);
6796 return !infinite_cost_p (best_cost);
6799 /* Finds an initial assignment of candidates to uses. */
6801 static struct iv_ca *
6802 get_initial_solution (struct ivopts_data *data, bool originalp)
6804 unsigned i;
6805 struct iv_ca *ivs = iv_ca_new (data);
6807 for (i = 0; i < data->vgroups.length (); i++)
6808 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6810 iv_ca_free (&ivs);
6811 return NULL;
6814 return ivs;
6817 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6818 points to a bool variable, this function tries to break local
6819 optimal fixed-point by replacing candidates in IVS if it's true. */
6821 static bool
6822 try_improve_iv_set (struct ivopts_data *data,
6823 struct iv_ca *ivs, bool *try_replace_p)
6825 unsigned i, n_ivs;
6826 comp_cost acost, best_cost = iv_ca_cost (ivs);
6827 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6828 struct iv_cand *cand;
6830 /* Try extending the set of induction variables by one. */
6831 for (i = 0; i < data->vcands.length (); i++)
6833 cand = data->vcands[i];
6835 if (iv_ca_cand_used_p (ivs, cand))
6836 continue;
6838 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6839 if (!act_delta)
6840 continue;
6842 /* If we successfully added the candidate and the set is small enough,
6843 try optimizing it by removing other candidates. */
6844 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6846 iv_ca_delta_commit (data, ivs, act_delta, true);
6847 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6848 iv_ca_delta_commit (data, ivs, act_delta, false);
6849 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6852 if (compare_costs (acost, best_cost) < 0)
6854 best_cost = acost;
6855 iv_ca_delta_free (&best_delta);
6856 best_delta = act_delta;
6858 else
6859 iv_ca_delta_free (&act_delta);
6862 if (!best_delta)
6864 /* Try removing the candidates from the set instead. */
6865 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6867 if (!best_delta && *try_replace_p)
6869 *try_replace_p = false;
6870 /* So far candidate selecting algorithm tends to choose fewer IVs
6871 so that it can handle cases in which loops have many variables
6872 but the best choice is often to use only one general biv. One
6873 weakness is it can't handle opposite cases, in which different
6874 candidates should be chosen with respect to each use. To solve
6875 the problem, we replace candidates in a manner described by the
6876 comments of iv_ca_replace, thus give general algorithm a chance
6877 to break local optimal fixed-point in these cases. */
6878 best_cost = iv_ca_replace (data, ivs, &best_delta);
6881 if (!best_delta)
6882 return false;
6885 iv_ca_delta_commit (data, ivs, best_delta, true);
6886 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6887 iv_ca_delta_free (&best_delta);
6888 return true;
6891 /* Attempts to find the optimal set of induction variables. We do simple
6892 greedy heuristic -- we try to replace at most one candidate in the selected
6893 solution and remove the unused ivs while this improves the cost. */
6895 static struct iv_ca *
6896 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6898 struct iv_ca *set;
6899 bool try_replace_p = true;
6901 /* Get the initial solution. */
6902 set = get_initial_solution (data, originalp);
6903 if (!set)
6905 if (dump_file && (dump_flags & TDF_DETAILS))
6906 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6907 return NULL;
6910 if (dump_file && (dump_flags & TDF_DETAILS))
6912 fprintf (dump_file, "Initial set of candidates:\n");
6913 iv_ca_dump (data, dump_file, set);
6916 while (try_improve_iv_set (data, set, &try_replace_p))
6918 if (dump_file && (dump_flags & TDF_DETAILS))
6920 fprintf (dump_file, "Improved to:\n");
6921 iv_ca_dump (data, dump_file, set);
6925 return set;
6928 static struct iv_ca *
6929 find_optimal_iv_set (struct ivopts_data *data)
6931 unsigned i;
6932 comp_cost cost, origcost;
6933 struct iv_ca *set, *origset;
6935 /* Determine the cost based on a strategy that starts with original IVs,
6936 and try again using a strategy that prefers candidates not based
6937 on any IVs. */
6938 origset = find_optimal_iv_set_1 (data, true);
6939 set = find_optimal_iv_set_1 (data, false);
6941 if (!origset && !set)
6942 return NULL;
6944 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6945 cost = set ? iv_ca_cost (set) : infinite_cost;
6947 if (dump_file && (dump_flags & TDF_DETAILS))
6949 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6950 origcost.cost, origcost.complexity);
6951 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6952 cost.cost, cost.complexity);
6955 /* Choose the one with the best cost. */
6956 if (compare_costs (origcost, cost) <= 0)
6958 if (set)
6959 iv_ca_free (&set);
6960 set = origset;
6962 else if (origset)
6963 iv_ca_free (&origset);
6965 for (i = 0; i < data->vgroups.length (); i++)
6967 struct iv_group *group = data->vgroups[i];
6968 group->selected = iv_ca_cand_for_group (set, group)->cand;
6971 return set;
6974 /* Creates a new induction variable corresponding to CAND. */
6976 static void
6977 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6979 gimple_stmt_iterator incr_pos;
6980 tree base;
6981 struct iv_use *use;
6982 struct iv_group *group;
6983 bool after = false;
6985 if (!cand->iv)
6986 return;
6988 switch (cand->pos)
6990 case IP_NORMAL:
6991 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6992 break;
6994 case IP_END:
6995 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6996 after = true;
6997 break;
6999 case IP_AFTER_USE:
7000 after = true;
7001 /* fall through */
7002 case IP_BEFORE_USE:
7003 incr_pos = gsi_for_stmt (cand->incremented_at);
7004 break;
7006 case IP_ORIGINAL:
7007 /* Mark that the iv is preserved. */
7008 name_info (data, cand->var_before)->preserve_biv = true;
7009 name_info (data, cand->var_after)->preserve_biv = true;
7011 /* Rewrite the increment so that it uses var_before directly. */
7012 use = find_interesting_uses_op (data, cand->var_after);
7013 group = data->vgroups[use->group_id];
7014 group->selected = cand;
7015 return;
7018 gimple_add_tmp_var (cand->var_before);
7020 base = unshare_expr (cand->iv->base);
7022 create_iv (base, unshare_expr (cand->iv->step),
7023 cand->var_before, data->current_loop,
7024 &incr_pos, after, &cand->var_before, &cand->var_after);
7027 /* Creates new induction variables described in SET. */
7029 static void
7030 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
7032 unsigned i;
7033 struct iv_cand *cand;
7034 bitmap_iterator bi;
7036 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7038 cand = data->vcands[i];
7039 create_new_iv (data, cand);
7042 if (dump_file && (dump_flags & TDF_DETAILS))
7044 fprintf (dump_file, "Selected IV set for loop %d",
7045 data->current_loop->num);
7046 if (data->loop_loc != UNKNOWN_LOCATION)
7047 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7048 LOCATION_LINE (data->loop_loc));
7049 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
7050 avg_loop_niter (data->current_loop));
7051 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_UNSIGNED " expressions",
7052 (unsigned HOST_WIDE_INT) set->used_inv_exprs->elements ());
7053 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
7054 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
7056 cand = data->vcands[i];
7057 dump_cand (dump_file, cand);
7059 fprintf (dump_file, "\n");
7063 /* Rewrites USE (definition of iv used in a nonlinear expression)
7064 using candidate CAND. */
7066 static void
7067 rewrite_use_nonlinear_expr (struct ivopts_data *data,
7068 struct iv_use *use, struct iv_cand *cand)
7070 tree comp;
7071 tree op, tgt;
7072 gassign *ass;
7073 gimple_stmt_iterator bsi;
7075 /* An important special case -- if we are asked to express value of
7076 the original iv by itself, just exit; there is no need to
7077 introduce a new computation (that might also need casting the
7078 variable to unsigned and back). */
7079 if (cand->pos == IP_ORIGINAL
7080 && cand->incremented_at == use->stmt)
7082 enum tree_code stmt_code;
7084 gcc_assert (is_gimple_assign (use->stmt));
7085 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7087 /* Check whether we may leave the computation unchanged.
7088 This is the case only if it does not rely on other
7089 computations in the loop -- otherwise, the computation
7090 we rely upon may be removed in remove_unused_ivs,
7091 thus leading to ICE. */
7092 stmt_code = gimple_assign_rhs_code (use->stmt);
7093 if (stmt_code == PLUS_EXPR
7094 || stmt_code == MINUS_EXPR
7095 || stmt_code == POINTER_PLUS_EXPR)
7097 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
7098 op = gimple_assign_rhs2 (use->stmt);
7099 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
7100 op = gimple_assign_rhs1 (use->stmt);
7101 else
7102 op = NULL_TREE;
7104 else
7105 op = NULL_TREE;
7107 if (op && expr_invariant_in_loop_p (data->current_loop, op))
7108 return;
7111 comp = get_computation (data->current_loop, use, cand);
7112 gcc_assert (comp != NULL_TREE);
7114 switch (gimple_code (use->stmt))
7116 case GIMPLE_PHI:
7117 tgt = PHI_RESULT (use->stmt);
7119 /* If we should keep the biv, do not replace it. */
7120 if (name_info (data, tgt)->preserve_biv)
7121 return;
7123 bsi = gsi_after_labels (gimple_bb (use->stmt));
7124 break;
7126 case GIMPLE_ASSIGN:
7127 tgt = gimple_assign_lhs (use->stmt);
7128 bsi = gsi_for_stmt (use->stmt);
7129 break;
7131 default:
7132 gcc_unreachable ();
7135 if (!valid_gimple_rhs_p (comp)
7136 || (gimple_code (use->stmt) != GIMPLE_PHI
7137 /* We can't allow re-allocating the stmt as it might be pointed
7138 to still. */
7139 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7140 >= gimple_num_ops (gsi_stmt (bsi)))))
7142 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
7143 true, GSI_SAME_STMT);
7144 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7146 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7147 /* As this isn't a plain copy we have to reset alignment
7148 information. */
7149 if (SSA_NAME_PTR_INFO (comp))
7150 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7154 if (gimple_code (use->stmt) == GIMPLE_PHI)
7156 ass = gimple_build_assign (tgt, comp);
7157 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7159 bsi = gsi_for_stmt (use->stmt);
7160 remove_phi_node (&bsi, false);
7162 else
7164 gimple_assign_set_rhs_from_tree (&bsi, comp);
7165 use->stmt = gsi_stmt (bsi);
7169 /* Performs a peephole optimization to reorder the iv update statement with
7170 a mem ref to enable instruction combining in later phases. The mem ref uses
7171 the iv value before the update, so the reordering transformation requires
7172 adjustment of the offset. CAND is the selected IV_CAND.
7174 Example:
7176 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7177 iv2 = iv1 + 1;
7179 if (t < val) (1)
7180 goto L;
7181 goto Head;
7184 directly propagating t over to (1) will introduce overlapping live range
7185 thus increase register pressure. This peephole transform it into:
7188 iv2 = iv1 + 1;
7189 t = MEM_REF (base, iv2, 8, 8);
7190 if (t < val)
7191 goto L;
7192 goto Head;
7195 static void
7196 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7198 tree var_after;
7199 gimple *iv_update, *stmt;
7200 basic_block bb;
7201 gimple_stmt_iterator gsi, gsi_iv;
7203 if (cand->pos != IP_NORMAL)
7204 return;
7206 var_after = cand->var_after;
7207 iv_update = SSA_NAME_DEF_STMT (var_after);
7209 bb = gimple_bb (iv_update);
7210 gsi = gsi_last_nondebug_bb (bb);
7211 stmt = gsi_stmt (gsi);
7213 /* Only handle conditional statement for now. */
7214 if (gimple_code (stmt) != GIMPLE_COND)
7215 return;
7217 gsi_prev_nondebug (&gsi);
7218 stmt = gsi_stmt (gsi);
7219 if (stmt != iv_update)
7220 return;
7222 gsi_prev_nondebug (&gsi);
7223 if (gsi_end_p (gsi))
7224 return;
7226 stmt = gsi_stmt (gsi);
7227 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7228 return;
7230 if (stmt != use->stmt)
7231 return;
7233 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7234 return;
7236 if (dump_file && (dump_flags & TDF_DETAILS))
7238 fprintf (dump_file, "Reordering \n");
7239 print_gimple_stmt (dump_file, iv_update, 0, 0);
7240 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7241 fprintf (dump_file, "\n");
7244 gsi = gsi_for_stmt (use->stmt);
7245 gsi_iv = gsi_for_stmt (iv_update);
7246 gsi_move_before (&gsi_iv, &gsi);
7248 cand->pos = IP_BEFORE_USE;
7249 cand->incremented_at = use->stmt;
7252 /* Rewrites USE (address that is an iv) using candidate CAND. */
7254 static void
7255 rewrite_use_address (struct ivopts_data *data,
7256 struct iv_use *use, struct iv_cand *cand)
7258 aff_tree aff;
7259 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7260 tree base_hint = NULL_TREE;
7261 tree ref, iv;
7262 bool ok;
7264 adjust_iv_update_pos (cand, use);
7265 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7266 gcc_assert (ok);
7267 unshare_aff_combination (&aff);
7269 /* To avoid undefined overflow problems, all IV candidates use unsigned
7270 integer types. The drawback is that this makes it impossible for
7271 create_mem_ref to distinguish an IV that is based on a memory object
7272 from one that represents simply an offset.
7274 To work around this problem, we pass a hint to create_mem_ref that
7275 indicates which variable (if any) in aff is an IV based on a memory
7276 object. Note that we only consider the candidate. If this is not
7277 based on an object, the base of the reference is in some subexpression
7278 of the use -- but these will use pointer types, so they are recognized
7279 by the create_mem_ref heuristics anyway. */
7280 if (cand->iv->base_object)
7281 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7283 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7284 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7285 reference_alias_ptr_type (*use->op_p),
7286 iv, base_hint, data->speed);
7287 copy_ref_info (ref, *use->op_p);
7288 *use->op_p = ref;
7291 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7292 candidate CAND. */
7294 static void
7295 rewrite_use_compare (struct ivopts_data *data,
7296 struct iv_use *use, struct iv_cand *cand)
7298 tree comp, *var_p, op, bound;
7299 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7300 enum tree_code compare;
7301 struct iv_group *group = data->vgroups[use->group_id];
7302 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7303 bool ok;
7305 bound = cp->value;
7306 if (bound)
7308 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7309 tree var_type = TREE_TYPE (var);
7310 gimple_seq stmts;
7312 if (dump_file && (dump_flags & TDF_DETAILS))
7314 fprintf (dump_file, "Replacing exit test: ");
7315 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7317 compare = cp->comp;
7318 bound = unshare_expr (fold_convert (var_type, bound));
7319 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7320 if (stmts)
7321 gsi_insert_seq_on_edge_immediate (
7322 loop_preheader_edge (data->current_loop),
7323 stmts);
7325 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7326 gimple_cond_set_lhs (cond_stmt, var);
7327 gimple_cond_set_code (cond_stmt, compare);
7328 gimple_cond_set_rhs (cond_stmt, op);
7329 return;
7332 /* The induction variable elimination failed; just express the original
7333 giv. */
7334 comp = get_computation (data->current_loop, use, cand);
7335 gcc_assert (comp != NULL_TREE);
7337 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7338 gcc_assert (ok);
7340 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7341 true, GSI_SAME_STMT);
7344 /* Rewrite the groups using the selected induction variables. */
7346 static void
7347 rewrite_groups (struct ivopts_data *data)
7349 unsigned i, j;
7351 for (i = 0; i < data->vgroups.length (); i++)
7353 struct iv_group *group = data->vgroups[i];
7354 struct iv_cand *cand = group->selected;
7356 gcc_assert (cand);
7358 if (group->type == USE_NONLINEAR_EXPR)
7360 for (j = 0; j < group->vuses.length (); j++)
7362 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7363 update_stmt (group->vuses[j]->stmt);
7366 else if (group->type == USE_ADDRESS)
7368 for (j = 0; j < group->vuses.length (); j++)
7370 rewrite_use_address (data, group->vuses[j], cand);
7371 update_stmt (group->vuses[j]->stmt);
7374 else
7376 gcc_assert (group->type == USE_COMPARE);
7378 for (j = 0; j < group->vuses.length (); j++)
7380 rewrite_use_compare (data, group->vuses[j], cand);
7381 update_stmt (group->vuses[j]->stmt);
7387 /* Removes the ivs that are not used after rewriting. */
7389 static void
7390 remove_unused_ivs (struct ivopts_data *data)
7392 unsigned j;
7393 bitmap_iterator bi;
7394 bitmap toremove = BITMAP_ALLOC (NULL);
7396 /* Figure out an order in which to release SSA DEFs so that we don't
7397 release something that we'd have to propagate into a debug stmt
7398 afterwards. */
7399 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7401 struct version_info *info;
7403 info = ver_info (data, j);
7404 if (info->iv
7405 && !integer_zerop (info->iv->step)
7406 && !info->inv_id
7407 && !info->iv->nonlin_use
7408 && !info->preserve_biv)
7410 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7412 tree def = info->iv->ssa_name;
7414 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7416 imm_use_iterator imm_iter;
7417 use_operand_p use_p;
7418 gimple *stmt;
7419 int count = 0;
7421 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7423 if (!gimple_debug_bind_p (stmt))
7424 continue;
7426 /* We just want to determine whether to do nothing
7427 (count == 0), to substitute the computed
7428 expression into a single use of the SSA DEF by
7429 itself (count == 1), or to use a debug temp
7430 because the SSA DEF is used multiple times or as
7431 part of a larger expression (count > 1). */
7432 count++;
7433 if (gimple_debug_bind_get_value (stmt) != def)
7434 count++;
7436 if (count > 1)
7437 BREAK_FROM_IMM_USE_STMT (imm_iter);
7440 if (!count)
7441 continue;
7443 struct iv_use dummy_use;
7444 struct iv_cand *best_cand = NULL, *cand;
7445 unsigned i, best_pref = 0, cand_pref;
7447 memset (&dummy_use, 0, sizeof (dummy_use));
7448 dummy_use.iv = info->iv;
7449 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7451 cand = data->vgroups[i]->selected;
7452 if (cand == best_cand)
7453 continue;
7454 cand_pref = operand_equal_p (cand->iv->step,
7455 info->iv->step, 0)
7456 ? 4 : 0;
7457 cand_pref
7458 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7459 == TYPE_MODE (TREE_TYPE (info->iv->base))
7460 ? 2 : 0;
7461 cand_pref
7462 += TREE_CODE (cand->iv->base) == INTEGER_CST
7463 ? 1 : 0;
7464 if (best_cand == NULL || best_pref < cand_pref)
7466 best_cand = cand;
7467 best_pref = cand_pref;
7471 if (!best_cand)
7472 continue;
7474 tree comp = get_computation_at (data->current_loop,
7475 &dummy_use, best_cand,
7476 SSA_NAME_DEF_STMT (def));
7477 if (!comp)
7478 continue;
7480 if (count > 1)
7482 tree vexpr = make_node (DEBUG_EXPR_DECL);
7483 DECL_ARTIFICIAL (vexpr) = 1;
7484 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7485 if (SSA_NAME_VAR (def))
7486 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7487 else
7488 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7489 gdebug *def_temp
7490 = gimple_build_debug_bind (vexpr, comp, NULL);
7491 gimple_stmt_iterator gsi;
7493 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7494 gsi = gsi_after_labels (gimple_bb
7495 (SSA_NAME_DEF_STMT (def)));
7496 else
7497 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7499 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7500 comp = vexpr;
7503 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7505 if (!gimple_debug_bind_p (stmt))
7506 continue;
7508 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7509 SET_USE (use_p, comp);
7511 update_stmt (stmt);
7517 release_defs_bitset (toremove);
7519 BITMAP_FREE (toremove);
7522 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7523 for hash_map::traverse. */
7525 bool
7526 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7528 free (value);
7529 return true;
7532 /* Frees data allocated by the optimization of a single loop. */
7534 static void
7535 free_loop_data (struct ivopts_data *data)
7537 unsigned i, j;
7538 bitmap_iterator bi;
7539 tree obj;
7541 if (data->niters)
7543 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7544 delete data->niters;
7545 data->niters = NULL;
7548 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7550 struct version_info *info;
7552 info = ver_info (data, i);
7553 info->iv = NULL;
7554 info->has_nonlin_use = false;
7555 info->preserve_biv = false;
7556 info->inv_id = 0;
7558 bitmap_clear (data->relevant);
7559 bitmap_clear (data->important_candidates);
7561 for (i = 0; i < data->vgroups.length (); i++)
7563 struct iv_group *group = data->vgroups[i];
7565 for (j = 0; j < group->vuses.length (); j++)
7566 free (group->vuses[j]);
7567 group->vuses.release ();
7569 BITMAP_FREE (group->related_cands);
7570 for (j = 0; j < group->n_map_members; j++)
7571 if (group->cost_map[j].depends_on)
7572 BITMAP_FREE (group->cost_map[j].depends_on);
7574 free (group->cost_map);
7575 free (group);
7577 data->vgroups.truncate (0);
7579 for (i = 0; i < data->vcands.length (); i++)
7581 struct iv_cand *cand = data->vcands[i];
7583 if (cand->depends_on)
7584 BITMAP_FREE (cand->depends_on);
7585 free (cand);
7587 data->vcands.truncate (0);
7589 if (data->version_info_size < num_ssa_names)
7591 data->version_info_size = 2 * num_ssa_names;
7592 free (data->version_info);
7593 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7596 data->max_inv_id = 0;
7598 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7599 SET_DECL_RTL (obj, NULL_RTX);
7601 decl_rtl_to_reset.truncate (0);
7603 data->inv_expr_tab->empty ();
7604 data->max_inv_expr_id = 0;
7606 data->iv_common_cand_tab->empty ();
7607 data->iv_common_cands.truncate (0);
7610 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7611 loop tree. */
7613 static void
7614 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7616 free_loop_data (data);
7617 free (data->version_info);
7618 BITMAP_FREE (data->relevant);
7619 BITMAP_FREE (data->important_candidates);
7621 decl_rtl_to_reset.release ();
7622 data->vgroups.release ();
7623 data->vcands.release ();
7624 delete data->inv_expr_tab;
7625 data->inv_expr_tab = NULL;
7626 free_affine_expand_cache (&data->name_expansion_cache);
7627 delete data->iv_common_cand_tab;
7628 data->iv_common_cand_tab = NULL;
7629 data->iv_common_cands.release ();
7630 obstack_free (&data->iv_obstack, NULL);
7633 /* Returns true if the loop body BODY includes any function calls. */
7635 static bool
7636 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7638 gimple_stmt_iterator gsi;
7639 unsigned i;
7641 for (i = 0; i < num_nodes; i++)
7642 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7644 gimple *stmt = gsi_stmt (gsi);
7645 if (is_gimple_call (stmt)
7646 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7647 return true;
7649 return false;
7652 /* Optimizes the LOOP. Returns true if anything changed. */
7654 static bool
7655 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7657 bool changed = false;
7658 struct iv_ca *iv_ca;
7659 edge exit = single_dom_exit (loop);
7660 basic_block *body;
7662 gcc_assert (!data->niters);
7663 data->current_loop = loop;
7664 data->loop_loc = find_loop_location (loop);
7665 data->speed = optimize_loop_for_speed_p (loop);
7667 if (dump_file && (dump_flags & TDF_DETAILS))
7669 fprintf (dump_file, "Processing loop %d", loop->num);
7670 if (data->loop_loc != UNKNOWN_LOCATION)
7671 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7672 LOCATION_LINE (data->loop_loc));
7673 fprintf (dump_file, "\n");
7675 if (exit)
7677 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7678 exit->src->index, exit->dest->index);
7679 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7680 fprintf (dump_file, "\n");
7683 fprintf (dump_file, "\n");
7686 body = get_loop_body (loop);
7687 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7688 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7689 free (body);
7691 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7693 /* For each ssa name determines whether it behaves as an induction variable
7694 in some loop. */
7695 if (!find_induction_variables (data))
7696 goto finish;
7698 /* Finds interesting uses (item 1). */
7699 find_interesting_uses (data);
7700 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7701 goto finish;
7703 /* Finds candidates for the induction variables (item 2). */
7704 find_iv_candidates (data);
7706 /* Calculates the costs (item 3, part 1). */
7707 determine_iv_costs (data);
7708 determine_group_iv_costs (data);
7709 determine_set_costs (data);
7711 /* Find the optimal set of induction variables (item 3, part 2). */
7712 iv_ca = find_optimal_iv_set (data);
7713 if (!iv_ca)
7714 goto finish;
7715 changed = true;
7717 /* Create the new induction variables (item 4, part 1). */
7718 create_new_ivs (data, iv_ca);
7719 iv_ca_free (&iv_ca);
7721 /* Rewrite the uses (item 4, part 2). */
7722 rewrite_groups (data);
7724 /* Remove the ivs that are unused after rewriting. */
7725 remove_unused_ivs (data);
7727 /* We have changed the structure of induction variables; it might happen
7728 that definitions in the scev database refer to some of them that were
7729 eliminated. */
7730 scev_reset ();
7732 finish:
7733 free_loop_data (data);
7735 return changed;
7738 /* Main entry point. Optimizes induction variables in loops. */
7740 void
7741 tree_ssa_iv_optimize (void)
7743 struct loop *loop;
7744 struct ivopts_data data;
7746 tree_ssa_iv_optimize_init (&data);
7748 /* Optimize the loops starting with the innermost ones. */
7749 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7751 if (dump_file && (dump_flags & TDF_DETAILS))
7752 flow_loop_dump (loop, dump_file, NULL, 1);
7754 tree_ssa_iv_optimize_loop (&data, loop);
7757 tree_ssa_iv_optimize_finalize (&data);