2015-05-22 Pascal Obry <obry@adacore.com>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob854d7baf10ae55746f3fd77bbb595d519c3e686c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "hash-set.h"
69 #include "machmode.h"
70 #include "vec.h"
71 #include "double-int.h"
72 #include "input.h"
73 #include "alias.h"
74 #include "symtab.h"
75 #include "wide-int.h"
76 #include "inchash.h"
77 #include "tree.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
80 #include "tm_p.h"
81 #include "predict.h"
82 #include "hard-reg-set.h"
83 #include "function.h"
84 #include "dominance.h"
85 #include "cfg.h"
86 #include "basic-block.h"
87 #include "gimple-pretty-print.h"
88 #include "hash-map.h"
89 #include "hash-table.h"
90 #include "tree-ssa-alias.h"
91 #include "internal-fn.h"
92 #include "tree-eh.h"
93 #include "gimple-expr.h"
94 #include "is-a.h"
95 #include "gimple.h"
96 #include "gimplify.h"
97 #include "gimple-iterator.h"
98 #include "gimplify-me.h"
99 #include "gimple-ssa.h"
100 #include "plugin-api.h"
101 #include "ipa-ref.h"
102 #include "cgraph.h"
103 #include "tree-cfg.h"
104 #include "tree-phinodes.h"
105 #include "ssa-iterators.h"
106 #include "stringpool.h"
107 #include "tree-ssanames.h"
108 #include "tree-ssa-loop-ivopts.h"
109 #include "tree-ssa-loop-manip.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop.h"
112 #include "hashtab.h"
113 #include "rtl.h"
114 #include "flags.h"
115 #include "statistics.h"
116 #include "real.h"
117 #include "fixed-value.h"
118 #include "insn-config.h"
119 #include "expmed.h"
120 #include "dojump.h"
121 #include "explow.h"
122 #include "calls.h"
123 #include "emit-rtl.h"
124 #include "varasm.h"
125 #include "stmt.h"
126 #include "expr.h"
127 #include "tree-dfa.h"
128 #include "tree-ssa.h"
129 #include "cfgloop.h"
130 #include "tree-pass.h"
131 #include "tree-chrec.h"
132 #include "tree-scalar-evolution.h"
133 #include "params.h"
134 #include "langhooks.h"
135 #include "tree-affine.h"
136 #include "target.h"
137 #include "tree-inline.h"
138 #include "tree-ssa-propagate.h"
139 #include "tree-ssa-address.h"
140 #include "builtins.h"
141 #include "tree-vectorizer.h"
143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
144 cost of different addressing modes. This should be moved to a TBD
145 interface between the GIMPLE and RTL worlds. */
146 #include "recog.h"
148 /* The infinite cost. */
149 #define INFTY 10000000
151 #define AVG_LOOP_NITER(LOOP) 5
153 /* Returns the expected number of loop iterations for LOOP.
154 The average trip count is computed from profile data if it
155 exists. */
157 static inline HOST_WIDE_INT
158 avg_loop_niter (struct loop *loop)
160 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
161 if (niter == -1)
162 return AVG_LOOP_NITER (loop);
164 return niter;
167 /* Representation of the induction variable. */
168 struct iv
170 tree base; /* Initial value of the iv. */
171 tree base_object; /* A memory object to that the induction variable points. */
172 tree step; /* Step of the iv (constant only). */
173 tree ssa_name; /* The ssa name with the value. */
174 bool biv_p; /* Is it a biv? */
175 bool have_use_for; /* Do we already have a use for it? */
176 unsigned use_id; /* The identifier in the use if it is the case. */
179 /* Per-ssa version information (induction variable descriptions, etc.). */
180 struct version_info
182 tree name; /* The ssa name. */
183 struct iv *iv; /* Induction variable description. */
184 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
185 an expression that is not an induction variable. */
186 bool preserve_biv; /* For the original biv, whether to preserve it. */
187 unsigned inv_id; /* Id of an invariant. */
190 /* Types of uses. */
191 enum use_type
193 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
194 USE_ADDRESS, /* Use in an address. */
195 USE_COMPARE /* Use is a compare. */
198 /* Cost of a computation. */
199 typedef struct
201 int cost; /* The runtime cost. */
202 unsigned complexity; /* The estimate of the complexity of the code for
203 the computation (in no concrete units --
204 complexity field should be larger for more
205 complex expressions and addressing modes). */
206 } comp_cost;
208 static const comp_cost no_cost = {0, 0};
209 static const comp_cost infinite_cost = {INFTY, INFTY};
211 /* The candidate - cost pair. */
212 struct cost_pair
214 struct iv_cand *cand; /* The candidate. */
215 comp_cost cost; /* The cost. */
216 bitmap depends_on; /* The list of invariants that have to be
217 preserved. */
218 tree value; /* For final value elimination, the expression for
219 the final value of the iv. For iv elimination,
220 the new bound to compare with. */
221 enum tree_code comp; /* For iv elimination, the comparison. */
222 int inv_expr_id; /* Loop invariant expression id. */
225 /* Use. */
226 struct iv_use
228 unsigned id; /* The id of the use. */
229 unsigned sub_id; /* The id of the sub use. */
230 enum use_type type; /* Type of the use. */
231 struct iv *iv; /* The induction variable it is based on. */
232 gimple stmt; /* Statement in that it occurs. */
233 tree *op_p; /* The place where it occurs. */
234 bitmap related_cands; /* The set of "related" iv candidates, plus the common
235 important ones. */
237 unsigned n_map_members; /* Number of candidates in the cost_map list. */
238 struct cost_pair *cost_map;
239 /* The costs wrto the iv candidates. */
241 struct iv_cand *selected;
242 /* The selected candidate. */
244 struct iv_use *next; /* The next sub use. */
245 tree addr_base; /* Base address with const offset stripped. */
246 unsigned HOST_WIDE_INT addr_offset;
247 /* Const offset stripped from base address. */
250 /* The position where the iv is computed. */
251 enum iv_position
253 IP_NORMAL, /* At the end, just before the exit condition. */
254 IP_END, /* At the end of the latch block. */
255 IP_BEFORE_USE, /* Immediately before a specific use. */
256 IP_AFTER_USE, /* Immediately after a specific use. */
257 IP_ORIGINAL /* The original biv. */
260 /* The induction variable candidate. */
261 struct iv_cand
263 unsigned id; /* The number of the candidate. */
264 bool important; /* Whether this is an "important" candidate, i.e. such
265 that it should be considered by all uses. */
266 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
267 gimple incremented_at;/* For original biv, the statement where it is
268 incremented. */
269 tree var_before; /* The variable used for it before increment. */
270 tree var_after; /* The variable used for it after increment. */
271 struct iv *iv; /* The value of the candidate. NULL for
272 "pseudocandidate" used to indicate the possibility
273 to replace the final value of an iv by direct
274 computation of the value. */
275 unsigned cost; /* Cost of the candidate. */
276 unsigned cost_step; /* Cost of the candidate's increment operation. */
277 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
278 where it is incremented. */
279 bitmap depends_on; /* The list of invariants that are used in step of the
280 biv. */
283 /* Loop invariant expression hashtable entry. */
284 struct iv_inv_expr_ent
286 tree expr;
287 int id;
288 hashval_t hash;
291 /* The data used by the induction variable optimizations. */
293 typedef struct iv_use *iv_use_p;
295 typedef struct iv_cand *iv_cand_p;
297 /* Hashtable helpers. */
299 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
301 typedef iv_inv_expr_ent *value_type;
302 typedef iv_inv_expr_ent *compare_type;
303 static inline hashval_t hash (const iv_inv_expr_ent *);
304 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
307 /* Hash function for loop invariant expressions. */
309 inline hashval_t
310 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
312 return expr->hash;
315 /* Hash table equality function for expressions. */
317 inline bool
318 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
319 const iv_inv_expr_ent *expr2)
321 return expr1->hash == expr2->hash
322 && operand_equal_p (expr1->expr, expr2->expr, 0);
325 struct ivopts_data
327 /* The currently optimized loop. */
328 struct loop *current_loop;
329 source_location loop_loc;
331 /* Numbers of iterations for all exits of the current loop. */
332 hash_map<edge, tree_niter_desc *> *niters;
334 /* Number of registers used in it. */
335 unsigned regs_used;
337 /* The size of version_info array allocated. */
338 unsigned version_info_size;
340 /* The array of information for the ssa names. */
341 struct version_info *version_info;
343 /* The hashtable of loop invariant expressions created
344 by ivopt. */
345 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
347 /* Loop invariant expression id. */
348 int inv_expr_id;
350 /* The bitmap of indices in version_info whose value was changed. */
351 bitmap relevant;
353 /* The uses of induction variables. */
354 vec<iv_use_p> iv_uses;
356 /* The candidates. */
357 vec<iv_cand_p> iv_candidates;
359 /* A bitmap of important candidates. */
360 bitmap important_candidates;
362 /* Cache used by tree_to_aff_combination_expand. */
363 hash_map<tree, name_expansion *> *name_expansion_cache;
365 /* The maximum invariant id. */
366 unsigned max_inv_id;
368 /* Whether to consider just related and important candidates when replacing a
369 use. */
370 bool consider_all_candidates;
372 /* Are we optimizing for speed? */
373 bool speed;
375 /* Whether the loop body includes any function calls. */
376 bool body_includes_call;
378 /* Whether the loop body can only be exited via single exit. */
379 bool loop_single_exit_p;
382 /* An assignment of iv candidates to uses. */
384 struct iv_ca
386 /* The number of uses covered by the assignment. */
387 unsigned upto;
389 /* Number of uses that cannot be expressed by the candidates in the set. */
390 unsigned bad_uses;
392 /* Candidate assigned to a use, together with the related costs. */
393 struct cost_pair **cand_for_use;
395 /* Number of times each candidate is used. */
396 unsigned *n_cand_uses;
398 /* The candidates used. */
399 bitmap cands;
401 /* The number of candidates in the set. */
402 unsigned n_cands;
404 /* Total number of registers needed. */
405 unsigned n_regs;
407 /* Total cost of expressing uses. */
408 comp_cost cand_use_cost;
410 /* Total cost of candidates. */
411 unsigned cand_cost;
413 /* Number of times each invariant is used. */
414 unsigned *n_invariant_uses;
416 /* The array holding the number of uses of each loop
417 invariant expressions created by ivopt. */
418 unsigned *used_inv_expr;
420 /* The number of created loop invariants. */
421 unsigned num_used_inv_expr;
423 /* Total cost of the assignment. */
424 comp_cost cost;
427 /* Difference of two iv candidate assignments. */
429 struct iv_ca_delta
431 /* Changed use. */
432 struct iv_use *use;
434 /* An old assignment (for rollback purposes). */
435 struct cost_pair *old_cp;
437 /* A new assignment. */
438 struct cost_pair *new_cp;
440 /* Next change in the list. */
441 struct iv_ca_delta *next_change;
444 /* Bound on number of candidates below that all candidates are considered. */
446 #define CONSIDER_ALL_CANDIDATES_BOUND \
447 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
449 /* If there are more iv occurrences, we just give up (it is quite unlikely that
450 optimizing such a loop would help, and it would take ages). */
452 #define MAX_CONSIDERED_USES \
453 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
455 /* If there are at most this number of ivs in the set, try removing unnecessary
456 ivs from the set always. */
458 #define ALWAYS_PRUNE_CAND_SET_BOUND \
459 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
461 /* The list of trees for that the decl_rtl field must be reset is stored
462 here. */
464 static vec<tree> decl_rtl_to_reset;
466 static comp_cost force_expr_to_var_cost (tree, bool);
468 /* Number of uses recorded in DATA. */
470 static inline unsigned
471 n_iv_uses (struct ivopts_data *data)
473 return data->iv_uses.length ();
476 /* Ith use recorded in DATA. */
478 static inline struct iv_use *
479 iv_use (struct ivopts_data *data, unsigned i)
481 return data->iv_uses[i];
484 /* Number of candidates recorded in DATA. */
486 static inline unsigned
487 n_iv_cands (struct ivopts_data *data)
489 return data->iv_candidates.length ();
492 /* Ith candidate recorded in DATA. */
494 static inline struct iv_cand *
495 iv_cand (struct ivopts_data *data, unsigned i)
497 return data->iv_candidates[i];
500 /* The single loop exit if it dominates the latch, NULL otherwise. */
502 edge
503 single_dom_exit (struct loop *loop)
505 edge exit = single_exit (loop);
507 if (!exit)
508 return NULL;
510 if (!just_once_each_iteration_p (loop, exit->src))
511 return NULL;
513 return exit;
516 /* Dumps information about the induction variable IV to FILE. */
518 void
519 dump_iv (FILE *file, struct iv *iv)
521 if (iv->ssa_name)
523 fprintf (file, "ssa name ");
524 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
525 fprintf (file, "\n");
528 fprintf (file, " type ");
529 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
530 fprintf (file, "\n");
532 if (iv->step)
534 fprintf (file, " base ");
535 print_generic_expr (file, iv->base, TDF_SLIM);
536 fprintf (file, "\n");
538 fprintf (file, " step ");
539 print_generic_expr (file, iv->step, TDF_SLIM);
540 fprintf (file, "\n");
542 else
544 fprintf (file, " invariant ");
545 print_generic_expr (file, iv->base, TDF_SLIM);
546 fprintf (file, "\n");
549 if (iv->base_object)
551 fprintf (file, " base object ");
552 print_generic_expr (file, iv->base_object, TDF_SLIM);
553 fprintf (file, "\n");
556 if (iv->biv_p)
557 fprintf (file, " is a biv\n");
560 /* Dumps information about the USE to FILE. */
562 void
563 dump_use (FILE *file, struct iv_use *use)
565 fprintf (file, "use %d", use->id);
566 if (use->sub_id)
567 fprintf (file, ".%d", use->sub_id);
569 fprintf (file, "\n");
571 switch (use->type)
573 case USE_NONLINEAR_EXPR:
574 fprintf (file, " generic\n");
575 break;
577 case USE_ADDRESS:
578 fprintf (file, " address\n");
579 break;
581 case USE_COMPARE:
582 fprintf (file, " compare\n");
583 break;
585 default:
586 gcc_unreachable ();
589 fprintf (file, " in statement ");
590 print_gimple_stmt (file, use->stmt, 0, 0);
591 fprintf (file, "\n");
593 fprintf (file, " at position ");
594 if (use->op_p)
595 print_generic_expr (file, *use->op_p, TDF_SLIM);
596 fprintf (file, "\n");
598 dump_iv (file, use->iv);
600 if (use->related_cands)
602 fprintf (file, " related candidates ");
603 dump_bitmap (file, use->related_cands);
607 /* Dumps information about the uses to FILE. */
609 void
610 dump_uses (FILE *file, struct ivopts_data *data)
612 unsigned i;
613 struct iv_use *use;
615 for (i = 0; i < n_iv_uses (data); i++)
617 use = iv_use (data, i);
620 dump_use (file, use);
621 use = use->next;
623 while (use);
624 fprintf (file, "\n");
628 /* Dumps information about induction variable candidate CAND to FILE. */
630 void
631 dump_cand (FILE *file, struct iv_cand *cand)
633 struct iv *iv = cand->iv;
635 fprintf (file, "candidate %d%s\n",
636 cand->id, cand->important ? " (important)" : "");
638 if (cand->depends_on)
640 fprintf (file, " depends on ");
641 dump_bitmap (file, cand->depends_on);
644 if (!iv)
646 fprintf (file, " final value replacement\n");
647 return;
650 if (cand->var_before)
652 fprintf (file, " var_before ");
653 print_generic_expr (file, cand->var_before, TDF_SLIM);
654 fprintf (file, "\n");
656 if (cand->var_after)
658 fprintf (file, " var_after ");
659 print_generic_expr (file, cand->var_after, TDF_SLIM);
660 fprintf (file, "\n");
663 switch (cand->pos)
665 case IP_NORMAL:
666 fprintf (file, " incremented before exit test\n");
667 break;
669 case IP_BEFORE_USE:
670 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
671 break;
673 case IP_AFTER_USE:
674 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
675 break;
677 case IP_END:
678 fprintf (file, " incremented at end\n");
679 break;
681 case IP_ORIGINAL:
682 fprintf (file, " original biv\n");
683 break;
686 dump_iv (file, iv);
689 /* Returns the info for ssa version VER. */
691 static inline struct version_info *
692 ver_info (struct ivopts_data *data, unsigned ver)
694 return data->version_info + ver;
697 /* Returns the info for ssa name NAME. */
699 static inline struct version_info *
700 name_info (struct ivopts_data *data, tree name)
702 return ver_info (data, SSA_NAME_VERSION (name));
705 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
706 emitted in LOOP. */
708 static bool
709 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
711 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
713 gcc_assert (bb);
715 if (sbb == loop->latch)
716 return true;
718 if (sbb != bb)
719 return false;
721 return stmt == last_stmt (bb);
724 /* Returns true if STMT if after the place where the original induction
725 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
726 if the positions are identical. */
728 static bool
729 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
731 basic_block cand_bb = gimple_bb (cand->incremented_at);
732 basic_block stmt_bb = gimple_bb (stmt);
734 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
735 return false;
737 if (stmt_bb != cand_bb)
738 return true;
740 if (true_if_equal
741 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
742 return true;
743 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
746 /* Returns true if STMT if after the place where the induction variable
747 CAND is incremented in LOOP. */
749 static bool
750 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
752 switch (cand->pos)
754 case IP_END:
755 return false;
757 case IP_NORMAL:
758 return stmt_after_ip_normal_pos (loop, stmt);
760 case IP_ORIGINAL:
761 case IP_AFTER_USE:
762 return stmt_after_inc_pos (cand, stmt, false);
764 case IP_BEFORE_USE:
765 return stmt_after_inc_pos (cand, stmt, true);
767 default:
768 gcc_unreachable ();
772 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
774 static bool
775 abnormal_ssa_name_p (tree exp)
777 if (!exp)
778 return false;
780 if (TREE_CODE (exp) != SSA_NAME)
781 return false;
783 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
786 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
787 abnormal phi node. Callback for for_each_index. */
789 static bool
790 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
791 void *data ATTRIBUTE_UNUSED)
793 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
795 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
796 return false;
797 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
798 return false;
801 return !abnormal_ssa_name_p (*index);
804 /* Returns true if EXPR contains a ssa name that occurs in an
805 abnormal phi node. */
807 bool
808 contains_abnormal_ssa_name_p (tree expr)
810 enum tree_code code;
811 enum tree_code_class codeclass;
813 if (!expr)
814 return false;
816 code = TREE_CODE (expr);
817 codeclass = TREE_CODE_CLASS (code);
819 if (code == SSA_NAME)
820 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
822 if (code == INTEGER_CST
823 || is_gimple_min_invariant (expr))
824 return false;
826 if (code == ADDR_EXPR)
827 return !for_each_index (&TREE_OPERAND (expr, 0),
828 idx_contains_abnormal_ssa_name_p,
829 NULL);
831 if (code == COND_EXPR)
832 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
833 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
834 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
836 switch (codeclass)
838 case tcc_binary:
839 case tcc_comparison:
840 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
841 return true;
843 /* Fallthru. */
844 case tcc_unary:
845 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
846 return true;
848 break;
850 default:
851 gcc_unreachable ();
854 return false;
857 /* Returns the structure describing number of iterations determined from
858 EXIT of DATA->current_loop, or NULL if something goes wrong. */
860 static struct tree_niter_desc *
861 niter_for_exit (struct ivopts_data *data, edge exit)
863 struct tree_niter_desc *desc;
864 tree_niter_desc **slot;
866 if (!data->niters)
868 data->niters = new hash_map<edge, tree_niter_desc *>;
869 slot = NULL;
871 else
872 slot = data->niters->get (exit);
874 if (!slot)
876 /* Try to determine number of iterations. We cannot safely work with ssa
877 names that appear in phi nodes on abnormal edges, so that we do not
878 create overlapping life ranges for them (PR 27283). */
879 desc = XNEW (struct tree_niter_desc);
880 if (!number_of_iterations_exit (data->current_loop,
881 exit, desc, true)
882 || contains_abnormal_ssa_name_p (desc->niter))
884 XDELETE (desc);
885 desc = NULL;
887 data->niters->put (exit, desc);
889 else
890 desc = *slot;
892 return desc;
895 /* Returns the structure describing number of iterations determined from
896 single dominating exit of DATA->current_loop, or NULL if something
897 goes wrong. */
899 static struct tree_niter_desc *
900 niter_for_single_dom_exit (struct ivopts_data *data)
902 edge exit = single_dom_exit (data->current_loop);
904 if (!exit)
905 return NULL;
907 return niter_for_exit (data, exit);
910 /* Initializes data structures used by the iv optimization pass, stored
911 in DATA. */
913 static void
914 tree_ssa_iv_optimize_init (struct ivopts_data *data)
916 data->version_info_size = 2 * num_ssa_names;
917 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
918 data->relevant = BITMAP_ALLOC (NULL);
919 data->important_candidates = BITMAP_ALLOC (NULL);
920 data->max_inv_id = 0;
921 data->niters = NULL;
922 data->iv_uses.create (20);
923 data->iv_candidates.create (20);
924 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
925 data->inv_expr_id = 0;
926 data->name_expansion_cache = NULL;
927 decl_rtl_to_reset.create (20);
930 /* Returns a memory object to that EXPR points. In case we are able to
931 determine that it does not point to any such object, NULL is returned. */
933 static tree
934 determine_base_object (tree expr)
936 enum tree_code code = TREE_CODE (expr);
937 tree base, obj;
939 /* If this is a pointer casted to any type, we need to determine
940 the base object for the pointer; so handle conversions before
941 throwing away non-pointer expressions. */
942 if (CONVERT_EXPR_P (expr))
943 return determine_base_object (TREE_OPERAND (expr, 0));
945 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
946 return NULL_TREE;
948 switch (code)
950 case INTEGER_CST:
951 return NULL_TREE;
953 case ADDR_EXPR:
954 obj = TREE_OPERAND (expr, 0);
955 base = get_base_address (obj);
957 if (!base)
958 return expr;
960 if (TREE_CODE (base) == MEM_REF)
961 return determine_base_object (TREE_OPERAND (base, 0));
963 return fold_convert (ptr_type_node,
964 build_fold_addr_expr (base));
966 case POINTER_PLUS_EXPR:
967 return determine_base_object (TREE_OPERAND (expr, 0));
969 case PLUS_EXPR:
970 case MINUS_EXPR:
971 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
972 gcc_unreachable ();
974 default:
975 return fold_convert (ptr_type_node, expr);
979 /* Return true if address expression with non-DECL_P operand appears
980 in EXPR. */
982 static bool
983 contain_complex_addr_expr (tree expr)
985 bool res = false;
987 STRIP_NOPS (expr);
988 switch (TREE_CODE (expr))
990 case POINTER_PLUS_EXPR:
991 case PLUS_EXPR:
992 case MINUS_EXPR:
993 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
994 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
995 break;
997 case ADDR_EXPR:
998 return (!DECL_P (TREE_OPERAND (expr, 0)));
1000 default:
1001 return false;
1004 return res;
1007 /* Allocates an induction variable with given initial value BASE and step STEP
1008 for loop LOOP. */
1010 static struct iv *
1011 alloc_iv (tree base, tree step)
1013 tree expr = base;
1014 struct iv *iv = XCNEW (struct iv);
1015 gcc_assert (step != NULL_TREE);
1017 /* Lower address expression in base except ones with DECL_P as operand.
1018 By doing this:
1019 1) More accurate cost can be computed for address expressions;
1020 2) Duplicate candidates won't be created for bases in different
1021 forms, like &a[0] and &a. */
1022 STRIP_NOPS (expr);
1023 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1024 || contain_complex_addr_expr (expr))
1026 aff_tree comb;
1027 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1028 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1031 iv->base = base;
1032 iv->base_object = determine_base_object (base);
1033 iv->step = step;
1034 iv->biv_p = false;
1035 iv->have_use_for = false;
1036 iv->use_id = 0;
1037 iv->ssa_name = NULL_TREE;
1039 return iv;
1042 /* Sets STEP and BASE for induction variable IV. */
1044 static void
1045 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1047 struct version_info *info = name_info (data, iv);
1049 gcc_assert (!info->iv);
1051 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1052 info->iv = alloc_iv (base, step);
1053 info->iv->ssa_name = iv;
1056 /* Finds induction variable declaration for VAR. */
1058 static struct iv *
1059 get_iv (struct ivopts_data *data, tree var)
1061 basic_block bb;
1062 tree type = TREE_TYPE (var);
1064 if (!POINTER_TYPE_P (type)
1065 && !INTEGRAL_TYPE_P (type))
1066 return NULL;
1068 if (!name_info (data, var)->iv)
1070 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1072 if (!bb
1073 || !flow_bb_inside_loop_p (data->current_loop, bb))
1074 set_iv (data, var, var, build_int_cst (type, 0));
1077 return name_info (data, var)->iv;
1080 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1081 not define a simple affine biv with nonzero step. */
1083 static tree
1084 determine_biv_step (gphi *phi)
1086 struct loop *loop = gimple_bb (phi)->loop_father;
1087 tree name = PHI_RESULT (phi);
1088 affine_iv iv;
1090 if (virtual_operand_p (name))
1091 return NULL_TREE;
1093 if (!simple_iv (loop, loop, name, &iv, true))
1094 return NULL_TREE;
1096 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1099 /* Return the first non-invariant ssa var found in EXPR. */
1101 static tree
1102 extract_single_var_from_expr (tree expr)
1104 int i, n;
1105 tree tmp;
1106 enum tree_code code;
1108 if (!expr || is_gimple_min_invariant (expr))
1109 return NULL;
1111 code = TREE_CODE (expr);
1112 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1114 n = TREE_OPERAND_LENGTH (expr);
1115 for (i = 0; i < n; i++)
1117 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1119 if (tmp)
1120 return tmp;
1123 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1126 /* Finds basic ivs. */
1128 static bool
1129 find_bivs (struct ivopts_data *data)
1131 gphi *phi;
1132 tree step, type, base, stop;
1133 bool found = false;
1134 struct loop *loop = data->current_loop;
1135 gphi_iterator psi;
1137 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1139 phi = psi.phi ();
1141 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1142 continue;
1144 step = determine_biv_step (phi);
1145 if (!step)
1146 continue;
1148 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1149 /* Stop expanding iv base at the first ssa var referred by iv step.
1150 Ideally we should stop at any ssa var, because that's expensive
1151 and unusual to happen, we just do it on the first one.
1153 See PR64705 for the rationale. */
1154 stop = extract_single_var_from_expr (step);
1155 base = expand_simple_operations (base, stop);
1156 if (contains_abnormal_ssa_name_p (base)
1157 || contains_abnormal_ssa_name_p (step))
1158 continue;
1160 type = TREE_TYPE (PHI_RESULT (phi));
1161 base = fold_convert (type, base);
1162 if (step)
1164 if (POINTER_TYPE_P (type))
1165 step = convert_to_ptrofftype (step);
1166 else
1167 step = fold_convert (type, step);
1170 set_iv (data, PHI_RESULT (phi), base, step);
1171 found = true;
1174 return found;
1177 /* Marks basic ivs. */
1179 static void
1180 mark_bivs (struct ivopts_data *data)
1182 gphi *phi;
1183 gimple def;
1184 tree var;
1185 struct iv *iv, *incr_iv;
1186 struct loop *loop = data->current_loop;
1187 basic_block incr_bb;
1188 gphi_iterator psi;
1190 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1192 phi = psi.phi ();
1194 iv = get_iv (data, PHI_RESULT (phi));
1195 if (!iv)
1196 continue;
1198 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1199 def = SSA_NAME_DEF_STMT (var);
1200 /* Don't mark iv peeled from other one as biv. */
1201 if (def
1202 && gimple_code (def) == GIMPLE_PHI
1203 && gimple_bb (def) == loop->header)
1204 continue;
1206 incr_iv = get_iv (data, var);
1207 if (!incr_iv)
1208 continue;
1210 /* If the increment is in the subloop, ignore it. */
1211 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1212 if (incr_bb->loop_father != data->current_loop
1213 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1214 continue;
1216 iv->biv_p = true;
1217 incr_iv->biv_p = true;
1221 /* Checks whether STMT defines a linear induction variable and stores its
1222 parameters to IV. */
1224 static bool
1225 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1227 tree lhs, stop;
1228 struct loop *loop = data->current_loop;
1230 iv->base = NULL_TREE;
1231 iv->step = NULL_TREE;
1233 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1234 return false;
1236 lhs = gimple_assign_lhs (stmt);
1237 if (TREE_CODE (lhs) != SSA_NAME)
1238 return false;
1240 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1241 return false;
1243 /* Stop expanding iv base at the first ssa var referred by iv step.
1244 Ideally we should stop at any ssa var, because that's expensive
1245 and unusual to happen, we just do it on the first one.
1247 See PR64705 for the rationale. */
1248 stop = extract_single_var_from_expr (iv->step);
1249 iv->base = expand_simple_operations (iv->base, stop);
1250 if (contains_abnormal_ssa_name_p (iv->base)
1251 || contains_abnormal_ssa_name_p (iv->step))
1252 return false;
1254 /* If STMT could throw, then do not consider STMT as defining a GIV.
1255 While this will suppress optimizations, we can not safely delete this
1256 GIV and associated statements, even if it appears it is not used. */
1257 if (stmt_could_throw_p (stmt))
1258 return false;
1260 return true;
1263 /* Finds general ivs in statement STMT. */
1265 static void
1266 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1268 affine_iv iv;
1270 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1271 return;
1273 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1276 /* Finds general ivs in basic block BB. */
1278 static void
1279 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1281 gimple_stmt_iterator bsi;
1283 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1284 find_givs_in_stmt (data, gsi_stmt (bsi));
1287 /* Finds general ivs. */
1289 static void
1290 find_givs (struct ivopts_data *data)
1292 struct loop *loop = data->current_loop;
1293 basic_block *body = get_loop_body_in_dom_order (loop);
1294 unsigned i;
1296 for (i = 0; i < loop->num_nodes; i++)
1297 find_givs_in_bb (data, body[i]);
1298 free (body);
1301 /* For each ssa name defined in LOOP determines whether it is an induction
1302 variable and if so, its initial value and step. */
1304 static bool
1305 find_induction_variables (struct ivopts_data *data)
1307 unsigned i;
1308 bitmap_iterator bi;
1310 if (!find_bivs (data))
1311 return false;
1313 find_givs (data);
1314 mark_bivs (data);
1316 if (dump_file && (dump_flags & TDF_DETAILS))
1318 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1320 if (niter)
1322 fprintf (dump_file, " number of iterations ");
1323 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1324 if (!integer_zerop (niter->may_be_zero))
1326 fprintf (dump_file, "; zero if ");
1327 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1329 fprintf (dump_file, "\n\n");
1332 fprintf (dump_file, "Induction variables:\n\n");
1334 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1336 if (ver_info (data, i)->iv)
1337 dump_iv (dump_file, ver_info (data, i)->iv);
1341 return true;
1344 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1345 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1346 is the const offset stripped from IV base. For uses of other types,
1347 ADDR_BASE and ADDR_OFFSET are zero by default. */
1349 static struct iv_use *
1350 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1351 gimple stmt, enum use_type use_type, tree addr_base = NULL,
1352 unsigned HOST_WIDE_INT addr_offset = 0)
1354 struct iv_use *use = XCNEW (struct iv_use);
1356 use->id = n_iv_uses (data);
1357 use->sub_id = 0;
1358 use->type = use_type;
1359 use->iv = iv;
1360 use->stmt = stmt;
1361 use->op_p = use_p;
1362 use->related_cands = BITMAP_ALLOC (NULL);
1363 use->next = NULL;
1364 use->addr_base = addr_base;
1365 use->addr_offset = addr_offset;
1367 /* To avoid showing ssa name in the dumps, if it was not reset by the
1368 caller. */
1369 iv->ssa_name = NULL_TREE;
1371 data->iv_uses.safe_push (use);
1373 return use;
1376 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1377 The sub use is recorded under the one whose use id is ID_GROUP. */
1379 static struct iv_use *
1380 record_sub_use (struct ivopts_data *data, tree *use_p,
1381 struct iv *iv, gimple stmt, enum use_type use_type,
1382 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1383 unsigned int id_group)
1385 struct iv_use *use = XCNEW (struct iv_use);
1386 struct iv_use *group = iv_use (data, id_group);
1388 use->id = group->id;
1389 use->sub_id = 0;
1390 use->type = use_type;
1391 use->iv = iv;
1392 use->stmt = stmt;
1393 use->op_p = use_p;
1394 use->related_cands = NULL;
1395 use->addr_base = addr_base;
1396 use->addr_offset = addr_offset;
1398 /* Sub use list is maintained in offset ascending order. */
1399 if (addr_offset <= group->addr_offset)
1401 use->related_cands = group->related_cands;
1402 group->related_cands = NULL;
1403 use->next = group;
1404 data->iv_uses[id_group] = use;
1406 else
1408 struct iv_use *pre;
1411 pre = group;
1412 group = group->next;
1414 while (group && addr_offset > group->addr_offset);
1415 use->next = pre->next;
1416 pre->next = use;
1419 /* To avoid showing ssa name in the dumps, if it was not reset by the
1420 caller. */
1421 iv->ssa_name = NULL_TREE;
1423 return use;
1426 /* Checks whether OP is a loop-level invariant and if so, records it.
1427 NONLINEAR_USE is true if the invariant is used in a way we do not
1428 handle specially. */
1430 static void
1431 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1433 basic_block bb;
1434 struct version_info *info;
1436 if (TREE_CODE (op) != SSA_NAME
1437 || virtual_operand_p (op))
1438 return;
1440 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1441 if (bb
1442 && flow_bb_inside_loop_p (data->current_loop, bb))
1443 return;
1445 info = name_info (data, op);
1446 info->name = op;
1447 info->has_nonlin_use |= nonlinear_use;
1448 if (!info->inv_id)
1449 info->inv_id = ++data->max_inv_id;
1450 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1453 /* Checks whether the use OP is interesting and if so, records it. */
1455 static struct iv_use *
1456 find_interesting_uses_op (struct ivopts_data *data, tree op)
1458 struct iv *iv;
1459 struct iv *civ;
1460 gimple stmt;
1461 struct iv_use *use;
1463 if (TREE_CODE (op) != SSA_NAME)
1464 return NULL;
1466 iv = get_iv (data, op);
1467 if (!iv)
1468 return NULL;
1470 if (iv->have_use_for)
1472 use = iv_use (data, iv->use_id);
1474 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1475 return use;
1478 if (integer_zerop (iv->step))
1480 record_invariant (data, op, true);
1481 return NULL;
1483 iv->have_use_for = true;
1485 civ = XNEW (struct iv);
1486 *civ = *iv;
1488 stmt = SSA_NAME_DEF_STMT (op);
1489 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1490 || is_gimple_assign (stmt));
1492 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1493 iv->use_id = use->id;
1495 return use;
1498 /* Given a condition in statement STMT, checks whether it is a compare
1499 of an induction variable and an invariant. If this is the case,
1500 CONTROL_VAR is set to location of the iv, BOUND to the location of
1501 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1502 induction variable descriptions, and true is returned. If this is not
1503 the case, CONTROL_VAR and BOUND are set to the arguments of the
1504 condition and false is returned. */
1506 static bool
1507 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1508 tree **control_var, tree **bound,
1509 struct iv **iv_var, struct iv **iv_bound)
1511 /* The objects returned when COND has constant operands. */
1512 static struct iv const_iv;
1513 static tree zero;
1514 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1515 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1516 bool ret = false;
1518 if (gimple_code (stmt) == GIMPLE_COND)
1520 gcond *cond_stmt = as_a <gcond *> (stmt);
1521 op0 = gimple_cond_lhs_ptr (cond_stmt);
1522 op1 = gimple_cond_rhs_ptr (cond_stmt);
1524 else
1526 op0 = gimple_assign_rhs1_ptr (stmt);
1527 op1 = gimple_assign_rhs2_ptr (stmt);
1530 zero = integer_zero_node;
1531 const_iv.step = integer_zero_node;
1533 if (TREE_CODE (*op0) == SSA_NAME)
1534 iv0 = get_iv (data, *op0);
1535 if (TREE_CODE (*op1) == SSA_NAME)
1536 iv1 = get_iv (data, *op1);
1538 /* Exactly one of the compared values must be an iv, and the other one must
1539 be an invariant. */
1540 if (!iv0 || !iv1)
1541 goto end;
1543 if (integer_zerop (iv0->step))
1545 /* Control variable may be on the other side. */
1546 tmp_op = op0; op0 = op1; op1 = tmp_op;
1547 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1549 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1551 end:
1552 if (control_var)
1553 *control_var = op0;
1554 if (iv_var)
1555 *iv_var = iv0;
1556 if (bound)
1557 *bound = op1;
1558 if (iv_bound)
1559 *iv_bound = iv1;
1561 return ret;
1564 /* Checks whether the condition in STMT is interesting and if so,
1565 records it. */
1567 static void
1568 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1570 tree *var_p, *bound_p;
1571 struct iv *var_iv, *civ;
1573 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1575 find_interesting_uses_op (data, *var_p);
1576 find_interesting_uses_op (data, *bound_p);
1577 return;
1580 civ = XNEW (struct iv);
1581 *civ = *var_iv;
1582 record_use (data, NULL, civ, stmt, USE_COMPARE);
1585 /* Returns the outermost loop EXPR is obviously invariant in
1586 relative to the loop LOOP, i.e. if all its operands are defined
1587 outside of the returned loop. Returns NULL if EXPR is not
1588 even obviously invariant in LOOP. */
1590 struct loop *
1591 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1593 basic_block def_bb;
1594 unsigned i, len;
1596 if (is_gimple_min_invariant (expr))
1597 return current_loops->tree_root;
1599 if (TREE_CODE (expr) == SSA_NAME)
1601 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1602 if (def_bb)
1604 if (flow_bb_inside_loop_p (loop, def_bb))
1605 return NULL;
1606 return superloop_at_depth (loop,
1607 loop_depth (def_bb->loop_father) + 1);
1610 return current_loops->tree_root;
1613 if (!EXPR_P (expr))
1614 return NULL;
1616 unsigned maxdepth = 0;
1617 len = TREE_OPERAND_LENGTH (expr);
1618 for (i = 0; i < len; i++)
1620 struct loop *ivloop;
1621 if (!TREE_OPERAND (expr, i))
1622 continue;
1624 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1625 if (!ivloop)
1626 return NULL;
1627 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1630 return superloop_at_depth (loop, maxdepth);
1633 /* Returns true if expression EXPR is obviously invariant in LOOP,
1634 i.e. if all its operands are defined outside of the LOOP. LOOP
1635 should not be the function body. */
1637 bool
1638 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1640 basic_block def_bb;
1641 unsigned i, len;
1643 gcc_assert (loop_depth (loop) > 0);
1645 if (is_gimple_min_invariant (expr))
1646 return true;
1648 if (TREE_CODE (expr) == SSA_NAME)
1650 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1651 if (def_bb
1652 && flow_bb_inside_loop_p (loop, def_bb))
1653 return false;
1655 return true;
1658 if (!EXPR_P (expr))
1659 return false;
1661 len = TREE_OPERAND_LENGTH (expr);
1662 for (i = 0; i < len; i++)
1663 if (TREE_OPERAND (expr, i)
1664 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1665 return false;
1667 return true;
1670 /* Cumulates the steps of indices into DATA and replaces their values with the
1671 initial ones. Returns false when the value of the index cannot be determined.
1672 Callback for for_each_index. */
1674 struct ifs_ivopts_data
1676 struct ivopts_data *ivopts_data;
1677 gimple stmt;
1678 tree step;
1681 static bool
1682 idx_find_step (tree base, tree *idx, void *data)
1684 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1685 struct iv *iv;
1686 tree step, iv_base, iv_step, lbound, off;
1687 struct loop *loop = dta->ivopts_data->current_loop;
1689 /* If base is a component ref, require that the offset of the reference
1690 be invariant. */
1691 if (TREE_CODE (base) == COMPONENT_REF)
1693 off = component_ref_field_offset (base);
1694 return expr_invariant_in_loop_p (loop, off);
1697 /* If base is array, first check whether we will be able to move the
1698 reference out of the loop (in order to take its address in strength
1699 reduction). In order for this to work we need both lower bound
1700 and step to be loop invariants. */
1701 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1703 /* Moreover, for a range, the size needs to be invariant as well. */
1704 if (TREE_CODE (base) == ARRAY_RANGE_REF
1705 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1706 return false;
1708 step = array_ref_element_size (base);
1709 lbound = array_ref_low_bound (base);
1711 if (!expr_invariant_in_loop_p (loop, step)
1712 || !expr_invariant_in_loop_p (loop, lbound))
1713 return false;
1716 if (TREE_CODE (*idx) != SSA_NAME)
1717 return true;
1719 iv = get_iv (dta->ivopts_data, *idx);
1720 if (!iv)
1721 return false;
1723 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1724 *&x[0], which is not folded and does not trigger the
1725 ARRAY_REF path below. */
1726 *idx = iv->base;
1728 if (integer_zerop (iv->step))
1729 return true;
1731 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1733 step = array_ref_element_size (base);
1735 /* We only handle addresses whose step is an integer constant. */
1736 if (TREE_CODE (step) != INTEGER_CST)
1737 return false;
1739 else
1740 /* The step for pointer arithmetics already is 1 byte. */
1741 step = size_one_node;
1743 iv_base = iv->base;
1744 iv_step = iv->step;
1745 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1746 sizetype, &iv_base, &iv_step, dta->stmt,
1747 false))
1749 /* The index might wrap. */
1750 return false;
1753 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1754 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1756 return true;
1759 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1760 object is passed to it in DATA. */
1762 static bool
1763 idx_record_use (tree base, tree *idx,
1764 void *vdata)
1766 struct ivopts_data *data = (struct ivopts_data *) vdata;
1767 find_interesting_uses_op (data, *idx);
1768 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1770 find_interesting_uses_op (data, array_ref_element_size (base));
1771 find_interesting_uses_op (data, array_ref_low_bound (base));
1773 return true;
1776 /* If we can prove that TOP = cst * BOT for some constant cst,
1777 store cst to MUL and return true. Otherwise return false.
1778 The returned value is always sign-extended, regardless of the
1779 signedness of TOP and BOT. */
1781 static bool
1782 constant_multiple_of (tree top, tree bot, widest_int *mul)
1784 tree mby;
1785 enum tree_code code;
1786 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1787 widest_int res, p0, p1;
1789 STRIP_NOPS (top);
1790 STRIP_NOPS (bot);
1792 if (operand_equal_p (top, bot, 0))
1794 *mul = 1;
1795 return true;
1798 code = TREE_CODE (top);
1799 switch (code)
1801 case MULT_EXPR:
1802 mby = TREE_OPERAND (top, 1);
1803 if (TREE_CODE (mby) != INTEGER_CST)
1804 return false;
1806 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1807 return false;
1809 *mul = wi::sext (res * wi::to_widest (mby), precision);
1810 return true;
1812 case PLUS_EXPR:
1813 case MINUS_EXPR:
1814 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1815 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1816 return false;
1818 if (code == MINUS_EXPR)
1819 p1 = -p1;
1820 *mul = wi::sext (p0 + p1, precision);
1821 return true;
1823 case INTEGER_CST:
1824 if (TREE_CODE (bot) != INTEGER_CST)
1825 return false;
1827 p0 = widest_int::from (top, SIGNED);
1828 p1 = widest_int::from (bot, SIGNED);
1829 if (p1 == 0)
1830 return false;
1831 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1832 return res == 0;
1834 default:
1835 return false;
1839 /* Return true if memory reference REF with step STEP may be unaligned. */
1841 static bool
1842 may_be_unaligned_p (tree ref, tree step)
1844 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1845 thus they are not misaligned. */
1846 if (TREE_CODE (ref) == TARGET_MEM_REF)
1847 return false;
1849 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1850 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1851 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1853 unsigned HOST_WIDE_INT bitpos;
1854 unsigned int ref_align;
1855 get_object_alignment_1 (ref, &ref_align, &bitpos);
1856 if (ref_align < align
1857 || (bitpos % align) != 0
1858 || (bitpos % BITS_PER_UNIT) != 0)
1859 return true;
1861 unsigned int trailing_zeros = tree_ctz (step);
1862 if (trailing_zeros < HOST_BITS_PER_INT
1863 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1864 return true;
1866 return false;
1869 /* Return true if EXPR may be non-addressable. */
1871 bool
1872 may_be_nonaddressable_p (tree expr)
1874 switch (TREE_CODE (expr))
1876 case TARGET_MEM_REF:
1877 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1878 target, thus they are always addressable. */
1879 return false;
1881 case COMPONENT_REF:
1882 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1883 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1885 case VIEW_CONVERT_EXPR:
1886 /* This kind of view-conversions may wrap non-addressable objects
1887 and make them look addressable. After some processing the
1888 non-addressability may be uncovered again, causing ADDR_EXPRs
1889 of inappropriate objects to be built. */
1890 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1891 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1892 return true;
1894 /* ... fall through ... */
1896 case ARRAY_REF:
1897 case ARRAY_RANGE_REF:
1898 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1900 CASE_CONVERT:
1901 return true;
1903 default:
1904 break;
1907 return false;
1910 static tree
1911 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1913 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1914 If there is an existing use which has same stripped iv base and step,
1915 this function records this one as a sub use to that; otherwise records
1916 it as a normal one. */
1918 static struct iv_use *
1919 record_group_use (struct ivopts_data *data, tree *use_p,
1920 struct iv *iv, gimple stmt, enum use_type use_type)
1922 unsigned int i;
1923 struct iv_use *use;
1924 tree addr_base;
1925 unsigned HOST_WIDE_INT addr_offset;
1927 /* Only support sub use for address type uses, that is, with base
1928 object. */
1929 if (!iv->base_object)
1930 return record_use (data, use_p, iv, stmt, use_type);
1932 addr_base = strip_offset (iv->base, &addr_offset);
1933 for (i = 0; i < n_iv_uses (data); i++)
1935 use = iv_use (data, i);
1936 if (use->type != USE_ADDRESS || !use->iv->base_object)
1937 continue;
1939 /* Check if it has the same stripped base and step. */
1940 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1941 && operand_equal_p (iv->step, use->iv->step, 0)
1942 && operand_equal_p (addr_base, use->addr_base, 0))
1943 break;
1946 if (i == n_iv_uses (data))
1947 return record_use (data, use_p, iv, stmt,
1948 use_type, addr_base, addr_offset);
1949 else
1950 return record_sub_use (data, use_p, iv, stmt,
1951 use_type, addr_base, addr_offset, i);
1954 /* Finds addresses in *OP_P inside STMT. */
1956 static void
1957 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1959 tree base = *op_p, step = size_zero_node;
1960 struct iv *civ;
1961 struct ifs_ivopts_data ifs_ivopts_data;
1963 /* Do not play with volatile memory references. A bit too conservative,
1964 perhaps, but safe. */
1965 if (gimple_has_volatile_ops (stmt))
1966 goto fail;
1968 /* Ignore bitfields for now. Not really something terribly complicated
1969 to handle. TODO. */
1970 if (TREE_CODE (base) == BIT_FIELD_REF)
1971 goto fail;
1973 base = unshare_expr (base);
1975 if (TREE_CODE (base) == TARGET_MEM_REF)
1977 tree type = build_pointer_type (TREE_TYPE (base));
1978 tree astep;
1980 if (TMR_BASE (base)
1981 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1983 civ = get_iv (data, TMR_BASE (base));
1984 if (!civ)
1985 goto fail;
1987 TMR_BASE (base) = civ->base;
1988 step = civ->step;
1990 if (TMR_INDEX2 (base)
1991 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1993 civ = get_iv (data, TMR_INDEX2 (base));
1994 if (!civ)
1995 goto fail;
1997 TMR_INDEX2 (base) = civ->base;
1998 step = civ->step;
2000 if (TMR_INDEX (base)
2001 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2003 civ = get_iv (data, TMR_INDEX (base));
2004 if (!civ)
2005 goto fail;
2007 TMR_INDEX (base) = civ->base;
2008 astep = civ->step;
2010 if (astep)
2012 if (TMR_STEP (base))
2013 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2015 step = fold_build2 (PLUS_EXPR, type, step, astep);
2019 if (integer_zerop (step))
2020 goto fail;
2021 base = tree_mem_ref_addr (type, base);
2023 else
2025 ifs_ivopts_data.ivopts_data = data;
2026 ifs_ivopts_data.stmt = stmt;
2027 ifs_ivopts_data.step = size_zero_node;
2028 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2029 || integer_zerop (ifs_ivopts_data.step))
2030 goto fail;
2031 step = ifs_ivopts_data.step;
2033 /* Check that the base expression is addressable. This needs
2034 to be done after substituting bases of IVs into it. */
2035 if (may_be_nonaddressable_p (base))
2036 goto fail;
2038 /* Moreover, on strict alignment platforms, check that it is
2039 sufficiently aligned. */
2040 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2041 goto fail;
2043 base = build_fold_addr_expr (base);
2045 /* Substituting bases of IVs into the base expression might
2046 have caused folding opportunities. */
2047 if (TREE_CODE (base) == ADDR_EXPR)
2049 tree *ref = &TREE_OPERAND (base, 0);
2050 while (handled_component_p (*ref))
2051 ref = &TREE_OPERAND (*ref, 0);
2052 if (TREE_CODE (*ref) == MEM_REF)
2054 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2055 TREE_OPERAND (*ref, 0),
2056 TREE_OPERAND (*ref, 1));
2057 if (tem)
2058 *ref = tem;
2063 civ = alloc_iv (base, step);
2064 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2065 return;
2067 fail:
2068 for_each_index (op_p, idx_record_use, data);
2071 /* Finds and records invariants used in STMT. */
2073 static void
2074 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2076 ssa_op_iter iter;
2077 use_operand_p use_p;
2078 tree op;
2080 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2082 op = USE_FROM_PTR (use_p);
2083 record_invariant (data, op, false);
2087 /* Finds interesting uses of induction variables in the statement STMT. */
2089 static void
2090 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2092 struct iv *iv;
2093 tree op, *lhs, *rhs;
2094 ssa_op_iter iter;
2095 use_operand_p use_p;
2096 enum tree_code code;
2098 find_invariants_stmt (data, stmt);
2100 if (gimple_code (stmt) == GIMPLE_COND)
2102 find_interesting_uses_cond (data, stmt);
2103 return;
2106 if (is_gimple_assign (stmt))
2108 lhs = gimple_assign_lhs_ptr (stmt);
2109 rhs = gimple_assign_rhs1_ptr (stmt);
2111 if (TREE_CODE (*lhs) == SSA_NAME)
2113 /* If the statement defines an induction variable, the uses are not
2114 interesting by themselves. */
2116 iv = get_iv (data, *lhs);
2118 if (iv && !integer_zerop (iv->step))
2119 return;
2122 code = gimple_assign_rhs_code (stmt);
2123 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2124 && (REFERENCE_CLASS_P (*rhs)
2125 || is_gimple_val (*rhs)))
2127 if (REFERENCE_CLASS_P (*rhs))
2128 find_interesting_uses_address (data, stmt, rhs);
2129 else
2130 find_interesting_uses_op (data, *rhs);
2132 if (REFERENCE_CLASS_P (*lhs))
2133 find_interesting_uses_address (data, stmt, lhs);
2134 return;
2136 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2138 find_interesting_uses_cond (data, stmt);
2139 return;
2142 /* TODO -- we should also handle address uses of type
2144 memory = call (whatever);
2148 call (memory). */
2151 if (gimple_code (stmt) == GIMPLE_PHI
2152 && gimple_bb (stmt) == data->current_loop->header)
2154 iv = get_iv (data, PHI_RESULT (stmt));
2156 if (iv && !integer_zerop (iv->step))
2157 return;
2160 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2162 op = USE_FROM_PTR (use_p);
2164 if (TREE_CODE (op) != SSA_NAME)
2165 continue;
2167 iv = get_iv (data, op);
2168 if (!iv)
2169 continue;
2171 find_interesting_uses_op (data, op);
2175 /* Finds interesting uses of induction variables outside of loops
2176 on loop exit edge EXIT. */
2178 static void
2179 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2181 gphi *phi;
2182 gphi_iterator psi;
2183 tree def;
2185 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2187 phi = psi.phi ();
2188 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2189 if (!virtual_operand_p (def))
2190 find_interesting_uses_op (data, def);
2194 /* Finds uses of the induction variables that are interesting. */
2196 static void
2197 find_interesting_uses (struct ivopts_data *data)
2199 basic_block bb;
2200 gimple_stmt_iterator bsi;
2201 basic_block *body = get_loop_body (data->current_loop);
2202 unsigned i;
2203 struct version_info *info;
2204 edge e;
2206 if (dump_file && (dump_flags & TDF_DETAILS))
2207 fprintf (dump_file, "Uses:\n\n");
2209 for (i = 0; i < data->current_loop->num_nodes; i++)
2211 edge_iterator ei;
2212 bb = body[i];
2214 FOR_EACH_EDGE (e, ei, bb->succs)
2215 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2216 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2217 find_interesting_uses_outside (data, e);
2219 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2220 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2221 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2222 if (!is_gimple_debug (gsi_stmt (bsi)))
2223 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2226 if (dump_file && (dump_flags & TDF_DETAILS))
2228 bitmap_iterator bi;
2230 fprintf (dump_file, "\n");
2232 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2234 info = ver_info (data, i);
2235 if (info->inv_id)
2237 fprintf (dump_file, " ");
2238 print_generic_expr (dump_file, info->name, TDF_SLIM);
2239 fprintf (dump_file, " is invariant (%d)%s\n",
2240 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2244 fprintf (dump_file, "\n");
2247 free (body);
2250 /* Compute maximum offset of [base + offset] addressing mode
2251 for memory reference represented by USE. */
2253 static HOST_WIDE_INT
2254 compute_max_addr_offset (struct iv_use *use)
2256 int width;
2257 rtx reg, addr;
2258 HOST_WIDE_INT i, off;
2259 unsigned list_index, num;
2260 addr_space_t as;
2261 machine_mode mem_mode, addr_mode;
2262 static vec<HOST_WIDE_INT> max_offset_list;
2264 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2265 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2267 num = max_offset_list.length ();
2268 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2269 if (list_index >= num)
2271 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2272 for (; num < max_offset_list.length (); num++)
2273 max_offset_list[num] = -1;
2276 off = max_offset_list[list_index];
2277 if (off != -1)
2278 return off;
2280 addr_mode = targetm.addr_space.address_mode (as);
2281 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2282 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2284 width = GET_MODE_BITSIZE (addr_mode) - 1;
2285 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2286 width = HOST_BITS_PER_WIDE_INT - 1;
2288 for (i = width; i > 0; i--)
2290 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2291 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2292 if (memory_address_addr_space_p (mem_mode, addr, as))
2293 break;
2295 /* For some strict-alignment targets, the offset must be naturally
2296 aligned. Try an aligned offset if mem_mode is not QImode. */
2297 off = ((unsigned HOST_WIDE_INT) 1 << i);
2298 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2300 off -= GET_MODE_SIZE (mem_mode);
2301 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2302 if (memory_address_addr_space_p (mem_mode, addr, as))
2303 break;
2306 if (i == 0)
2307 off = 0;
2309 max_offset_list[list_index] = off;
2310 return off;
2313 /* Check if all small groups should be split. Return true if and
2314 only if:
2316 1) At least one groups contain two uses with different offsets.
2317 2) No group contains more than two uses with different offsets.
2319 Return false otherwise. We want to split such groups because:
2321 1) Small groups don't have much benefit and may interfer with
2322 general candidate selection.
2323 2) Size for problem with only small groups is usually small and
2324 general algorithm can handle it well.
2326 TODO -- Above claim may not hold when auto increment is supported. */
2328 static bool
2329 split_all_small_groups (struct ivopts_data *data)
2331 bool split_p = false;
2332 unsigned int i, n, distinct;
2333 struct iv_use *pre, *use;
2335 n = n_iv_uses (data);
2336 for (i = 0; i < n; i++)
2338 use = iv_use (data, i);
2339 if (!use->next)
2340 continue;
2342 distinct = 1;
2343 gcc_assert (use->type == USE_ADDRESS);
2344 for (pre = use, use = use->next; use; pre = use, use = use->next)
2346 if (pre->addr_offset != use->addr_offset)
2347 distinct++;
2349 if (distinct > 2)
2350 return false;
2352 if (distinct == 2)
2353 split_p = true;
2356 return split_p;
2359 /* For each group of address type uses, this function further groups
2360 these uses according to the maximum offset supported by target's
2361 [base + offset] addressing mode. */
2363 static void
2364 group_address_uses (struct ivopts_data *data)
2366 HOST_WIDE_INT max_offset = -1;
2367 unsigned int i, n, sub_id;
2368 struct iv_use *pre, *use;
2369 unsigned HOST_WIDE_INT addr_offset_first;
2371 /* Reset max offset to split all small groups. */
2372 if (split_all_small_groups (data))
2373 max_offset = 0;
2375 n = n_iv_uses (data);
2376 for (i = 0; i < n; i++)
2378 use = iv_use (data, i);
2379 if (!use->next)
2380 continue;
2382 gcc_assert (use->type == USE_ADDRESS);
2383 if (max_offset != 0)
2384 max_offset = compute_max_addr_offset (use);
2386 while (use)
2388 sub_id = 0;
2389 addr_offset_first = use->addr_offset;
2390 /* Only uses with offset that can fit in offset part against
2391 the first use can be grouped together. */
2392 for (pre = use, use = use->next;
2393 use && (use->addr_offset - addr_offset_first
2394 <= (unsigned HOST_WIDE_INT) max_offset);
2395 pre = use, use = use->next)
2397 use->id = pre->id;
2398 use->sub_id = ++sub_id;
2401 /* Break the list and create new group. */
2402 if (use)
2404 pre->next = NULL;
2405 use->id = n_iv_uses (data);
2406 use->related_cands = BITMAP_ALLOC (NULL);
2407 data->iv_uses.safe_push (use);
2412 if (dump_file && (dump_flags & TDF_DETAILS))
2413 dump_uses (dump_file, data);
2416 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2417 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2418 we are at the top-level of the processed address. */
2420 static tree
2421 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2422 HOST_WIDE_INT *offset)
2424 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2425 enum tree_code code;
2426 tree type, orig_type = TREE_TYPE (expr);
2427 HOST_WIDE_INT off0, off1, st;
2428 tree orig_expr = expr;
2430 STRIP_NOPS (expr);
2432 type = TREE_TYPE (expr);
2433 code = TREE_CODE (expr);
2434 *offset = 0;
2436 switch (code)
2438 case INTEGER_CST:
2439 if (!cst_and_fits_in_hwi (expr)
2440 || integer_zerop (expr))
2441 return orig_expr;
2443 *offset = int_cst_value (expr);
2444 return build_int_cst (orig_type, 0);
2446 case POINTER_PLUS_EXPR:
2447 case PLUS_EXPR:
2448 case MINUS_EXPR:
2449 op0 = TREE_OPERAND (expr, 0);
2450 op1 = TREE_OPERAND (expr, 1);
2452 op0 = strip_offset_1 (op0, false, false, &off0);
2453 op1 = strip_offset_1 (op1, false, false, &off1);
2455 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2456 if (op0 == TREE_OPERAND (expr, 0)
2457 && op1 == TREE_OPERAND (expr, 1))
2458 return orig_expr;
2460 if (integer_zerop (op1))
2461 expr = op0;
2462 else if (integer_zerop (op0))
2464 if (code == MINUS_EXPR)
2465 expr = fold_build1 (NEGATE_EXPR, type, op1);
2466 else
2467 expr = op1;
2469 else
2470 expr = fold_build2 (code, type, op0, op1);
2472 return fold_convert (orig_type, expr);
2474 case MULT_EXPR:
2475 op1 = TREE_OPERAND (expr, 1);
2476 if (!cst_and_fits_in_hwi (op1))
2477 return orig_expr;
2479 op0 = TREE_OPERAND (expr, 0);
2480 op0 = strip_offset_1 (op0, false, false, &off0);
2481 if (op0 == TREE_OPERAND (expr, 0))
2482 return orig_expr;
2484 *offset = off0 * int_cst_value (op1);
2485 if (integer_zerop (op0))
2486 expr = op0;
2487 else
2488 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2490 return fold_convert (orig_type, expr);
2492 case ARRAY_REF:
2493 case ARRAY_RANGE_REF:
2494 if (!inside_addr)
2495 return orig_expr;
2497 step = array_ref_element_size (expr);
2498 if (!cst_and_fits_in_hwi (step))
2499 break;
2501 st = int_cst_value (step);
2502 op1 = TREE_OPERAND (expr, 1);
2503 op1 = strip_offset_1 (op1, false, false, &off1);
2504 *offset = off1 * st;
2506 if (top_compref
2507 && integer_zerop (op1))
2509 /* Strip the component reference completely. */
2510 op0 = TREE_OPERAND (expr, 0);
2511 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2512 *offset += off0;
2513 return op0;
2515 break;
2517 case COMPONENT_REF:
2519 tree field;
2521 if (!inside_addr)
2522 return orig_expr;
2524 tmp = component_ref_field_offset (expr);
2525 field = TREE_OPERAND (expr, 1);
2526 if (top_compref
2527 && cst_and_fits_in_hwi (tmp)
2528 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2530 HOST_WIDE_INT boffset, abs_off;
2532 /* Strip the component reference completely. */
2533 op0 = TREE_OPERAND (expr, 0);
2534 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2535 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2536 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2537 if (boffset < 0)
2538 abs_off = -abs_off;
2540 *offset = off0 + int_cst_value (tmp) + abs_off;
2541 return op0;
2544 break;
2546 case ADDR_EXPR:
2547 op0 = TREE_OPERAND (expr, 0);
2548 op0 = strip_offset_1 (op0, true, true, &off0);
2549 *offset += off0;
2551 if (op0 == TREE_OPERAND (expr, 0))
2552 return orig_expr;
2554 expr = build_fold_addr_expr (op0);
2555 return fold_convert (orig_type, expr);
2557 case MEM_REF:
2558 /* ??? Offset operand? */
2559 inside_addr = false;
2560 break;
2562 default:
2563 return orig_expr;
2566 /* Default handling of expressions for that we want to recurse into
2567 the first operand. */
2568 op0 = TREE_OPERAND (expr, 0);
2569 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2570 *offset += off0;
2572 if (op0 == TREE_OPERAND (expr, 0)
2573 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2574 return orig_expr;
2576 expr = copy_node (expr);
2577 TREE_OPERAND (expr, 0) = op0;
2578 if (op1)
2579 TREE_OPERAND (expr, 1) = op1;
2581 /* Inside address, we might strip the top level component references,
2582 thus changing type of the expression. Handling of ADDR_EXPR
2583 will fix that. */
2584 expr = fold_convert (orig_type, expr);
2586 return expr;
2589 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2591 static tree
2592 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2594 HOST_WIDE_INT off;
2595 tree core = strip_offset_1 (expr, false, false, &off);
2596 *offset = off;
2597 return core;
2600 /* Returns variant of TYPE that can be used as base for different uses.
2601 We return unsigned type with the same precision, which avoids problems
2602 with overflows. */
2604 static tree
2605 generic_type_for (tree type)
2607 if (POINTER_TYPE_P (type))
2608 return unsigned_type_for (type);
2610 if (TYPE_UNSIGNED (type))
2611 return type;
2613 return unsigned_type_for (type);
2616 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2617 the bitmap to that we should store it. */
2619 static struct ivopts_data *fd_ivopts_data;
2620 static tree
2621 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2623 bitmap *depends_on = (bitmap *) data;
2624 struct version_info *info;
2626 if (TREE_CODE (*expr_p) != SSA_NAME)
2627 return NULL_TREE;
2628 info = name_info (fd_ivopts_data, *expr_p);
2630 if (!info->inv_id || info->has_nonlin_use)
2631 return NULL_TREE;
2633 if (!*depends_on)
2634 *depends_on = BITMAP_ALLOC (NULL);
2635 bitmap_set_bit (*depends_on, info->inv_id);
2637 return NULL_TREE;
2640 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2641 position to POS. If USE is not NULL, the candidate is set as related to
2642 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2643 replacement of the final value of the iv by a direct computation. */
2645 static struct iv_cand *
2646 add_candidate_1 (struct ivopts_data *data,
2647 tree base, tree step, bool important, enum iv_position pos,
2648 struct iv_use *use, gimple incremented_at)
2650 unsigned i;
2651 struct iv_cand *cand = NULL;
2652 tree type, orig_type;
2654 /* For non-original variables, make sure their values are computed in a type
2655 that does not invoke undefined behavior on overflows (since in general,
2656 we cannot prove that these induction variables are non-wrapping). */
2657 if (pos != IP_ORIGINAL)
2659 orig_type = TREE_TYPE (base);
2660 type = generic_type_for (orig_type);
2661 if (type != orig_type)
2663 base = fold_convert (type, base);
2664 step = fold_convert (type, step);
2668 for (i = 0; i < n_iv_cands (data); i++)
2670 cand = iv_cand (data, i);
2672 if (cand->pos != pos)
2673 continue;
2675 if (cand->incremented_at != incremented_at
2676 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2677 && cand->ainc_use != use))
2678 continue;
2680 if (!cand->iv)
2682 if (!base && !step)
2683 break;
2685 continue;
2688 if (!base && !step)
2689 continue;
2691 if (operand_equal_p (base, cand->iv->base, 0)
2692 && operand_equal_p (step, cand->iv->step, 0)
2693 && (TYPE_PRECISION (TREE_TYPE (base))
2694 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2695 break;
2698 if (i == n_iv_cands (data))
2700 cand = XCNEW (struct iv_cand);
2701 cand->id = i;
2703 if (!base && !step)
2704 cand->iv = NULL;
2705 else
2706 cand->iv = alloc_iv (base, step);
2708 cand->pos = pos;
2709 if (pos != IP_ORIGINAL && cand->iv)
2711 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2712 cand->var_after = cand->var_before;
2714 cand->important = important;
2715 cand->incremented_at = incremented_at;
2716 data->iv_candidates.safe_push (cand);
2718 if (step
2719 && TREE_CODE (step) != INTEGER_CST)
2721 fd_ivopts_data = data;
2722 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2725 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2726 cand->ainc_use = use;
2727 else
2728 cand->ainc_use = NULL;
2730 if (dump_file && (dump_flags & TDF_DETAILS))
2731 dump_cand (dump_file, cand);
2734 if (important && !cand->important)
2736 cand->important = true;
2737 if (dump_file && (dump_flags & TDF_DETAILS))
2738 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2741 if (use)
2743 bitmap_set_bit (use->related_cands, i);
2744 if (dump_file && (dump_flags & TDF_DETAILS))
2745 fprintf (dump_file, "Candidate %d is related to use %d\n",
2746 cand->id, use->id);
2749 return cand;
2752 /* Returns true if incrementing the induction variable at the end of the LOOP
2753 is allowed.
2755 The purpose is to avoid splitting latch edge with a biv increment, thus
2756 creating a jump, possibly confusing other optimization passes and leaving
2757 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2758 is not available (so we do not have a better alternative), or if the latch
2759 edge is already nonempty. */
2761 static bool
2762 allow_ip_end_pos_p (struct loop *loop)
2764 if (!ip_normal_pos (loop))
2765 return true;
2767 if (!empty_block_p (ip_end_pos (loop)))
2768 return true;
2770 return false;
2773 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2774 Important field is set to IMPORTANT. */
2776 static void
2777 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2778 bool important, struct iv_use *use)
2780 basic_block use_bb = gimple_bb (use->stmt);
2781 machine_mode mem_mode;
2782 unsigned HOST_WIDE_INT cstepi;
2784 /* If we insert the increment in any position other than the standard
2785 ones, we must ensure that it is incremented once per iteration.
2786 It must not be in an inner nested loop, or one side of an if
2787 statement. */
2788 if (use_bb->loop_father != data->current_loop
2789 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2790 || stmt_could_throw_p (use->stmt)
2791 || !cst_and_fits_in_hwi (step))
2792 return;
2794 cstepi = int_cst_value (step);
2796 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2797 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2798 || USE_STORE_PRE_INCREMENT (mem_mode))
2799 && GET_MODE_SIZE (mem_mode) == cstepi)
2800 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2801 || USE_STORE_PRE_DECREMENT (mem_mode))
2802 && GET_MODE_SIZE (mem_mode) == -cstepi))
2804 enum tree_code code = MINUS_EXPR;
2805 tree new_base;
2806 tree new_step = step;
2808 if (POINTER_TYPE_P (TREE_TYPE (base)))
2810 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2811 code = POINTER_PLUS_EXPR;
2813 else
2814 new_step = fold_convert (TREE_TYPE (base), new_step);
2815 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2816 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2817 use->stmt);
2819 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2820 || USE_STORE_POST_INCREMENT (mem_mode))
2821 && GET_MODE_SIZE (mem_mode) == cstepi)
2822 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2823 || USE_STORE_POST_DECREMENT (mem_mode))
2824 && GET_MODE_SIZE (mem_mode) == -cstepi))
2826 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2827 use->stmt);
2831 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2832 position to POS. If USE is not NULL, the candidate is set as related to
2833 it. The candidate computation is scheduled on all available positions. */
2835 static void
2836 add_candidate (struct ivopts_data *data,
2837 tree base, tree step, bool important, struct iv_use *use)
2839 gcc_assert (use == NULL || use->sub_id == 0);
2841 if (ip_normal_pos (data->current_loop))
2842 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2843 if (ip_end_pos (data->current_loop)
2844 && allow_ip_end_pos_p (data->current_loop))
2845 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2847 if (use != NULL && use->type == USE_ADDRESS)
2848 add_autoinc_candidates (data, base, step, important, use);
2851 /* Adds standard iv candidates. */
2853 static void
2854 add_standard_iv_candidates (struct ivopts_data *data)
2856 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2858 /* The same for a double-integer type if it is still fast enough. */
2859 if (TYPE_PRECISION
2860 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2861 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2862 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2863 build_int_cst (long_integer_type_node, 1), true, NULL);
2865 /* The same for a double-integer type if it is still fast enough. */
2866 if (TYPE_PRECISION
2867 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2868 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2869 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2870 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2874 /* Adds candidates bases on the old induction variable IV. */
2876 static void
2877 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2879 gimple phi;
2880 tree def;
2881 struct iv_cand *cand;
2883 add_candidate (data, iv->base, iv->step, true, NULL);
2885 /* The same, but with initial value zero. */
2886 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2887 add_candidate (data, size_int (0), iv->step, true, NULL);
2888 else
2889 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2890 iv->step, true, NULL);
2892 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2893 if (gimple_code (phi) == GIMPLE_PHI)
2895 /* Additionally record the possibility of leaving the original iv
2896 untouched. */
2897 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2898 /* Don't add candidate if it's from another PHI node because
2899 it's an affine iv appearing in the form of PEELED_CHREC. */
2900 phi = SSA_NAME_DEF_STMT (def);
2901 if (gimple_code (phi) != GIMPLE_PHI)
2903 cand = add_candidate_1 (data,
2904 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2905 SSA_NAME_DEF_STMT (def));
2906 cand->var_before = iv->ssa_name;
2907 cand->var_after = def;
2909 else
2910 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2914 /* Adds candidates based on the old induction variables. */
2916 static void
2917 add_old_ivs_candidates (struct ivopts_data *data)
2919 unsigned i;
2920 struct iv *iv;
2921 bitmap_iterator bi;
2923 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2925 iv = ver_info (data, i)->iv;
2926 if (iv && iv->biv_p && !integer_zerop (iv->step))
2927 add_old_iv_candidates (data, iv);
2931 /* Adds candidates based on the value of the induction variable IV and USE. */
2933 static void
2934 add_iv_value_candidates (struct ivopts_data *data,
2935 struct iv *iv, struct iv_use *use)
2937 unsigned HOST_WIDE_INT offset;
2938 tree base;
2939 tree basetype;
2941 add_candidate (data, iv->base, iv->step, false, use);
2943 /* The same, but with initial value zero. Make such variable important,
2944 since it is generic enough so that possibly many uses may be based
2945 on it. */
2946 basetype = TREE_TYPE (iv->base);
2947 if (POINTER_TYPE_P (basetype))
2948 basetype = sizetype;
2949 add_candidate (data, build_int_cst (basetype, 0),
2950 iv->step, true, use);
2952 /* Third, try removing the constant offset. Make sure to even
2953 add a candidate for &a[0] vs. (T *)&a. */
2954 base = strip_offset (iv->base, &offset);
2955 if (offset
2956 || base != iv->base)
2957 add_candidate (data, base, iv->step, false, use);
2960 /* Adds candidates based on the uses. */
2962 static void
2963 add_derived_ivs_candidates (struct ivopts_data *data)
2965 unsigned i;
2967 for (i = 0; i < n_iv_uses (data); i++)
2969 struct iv_use *use = iv_use (data, i);
2971 if (!use)
2972 continue;
2974 switch (use->type)
2976 case USE_NONLINEAR_EXPR:
2977 case USE_COMPARE:
2978 case USE_ADDRESS:
2979 /* Just add the ivs based on the value of the iv used here. */
2980 add_iv_value_candidates (data, use->iv, use);
2981 break;
2983 default:
2984 gcc_unreachable ();
2989 /* Record important candidates and add them to related_cands bitmaps
2990 if needed. */
2992 static void
2993 record_important_candidates (struct ivopts_data *data)
2995 unsigned i;
2996 struct iv_use *use;
2998 for (i = 0; i < n_iv_cands (data); i++)
3000 struct iv_cand *cand = iv_cand (data, i);
3002 if (cand->important)
3003 bitmap_set_bit (data->important_candidates, i);
3006 data->consider_all_candidates = (n_iv_cands (data)
3007 <= CONSIDER_ALL_CANDIDATES_BOUND);
3009 if (data->consider_all_candidates)
3011 /* We will not need "related_cands" bitmaps in this case,
3012 so release them to decrease peak memory consumption. */
3013 for (i = 0; i < n_iv_uses (data); i++)
3015 use = iv_use (data, i);
3016 BITMAP_FREE (use->related_cands);
3019 else
3021 /* Add important candidates to the related_cands bitmaps. */
3022 for (i = 0; i < n_iv_uses (data); i++)
3023 bitmap_ior_into (iv_use (data, i)->related_cands,
3024 data->important_candidates);
3028 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3029 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3030 we allocate a simple list to every use. */
3032 static void
3033 alloc_use_cost_map (struct ivopts_data *data)
3035 unsigned i, size, s;
3037 for (i = 0; i < n_iv_uses (data); i++)
3039 struct iv_use *use = iv_use (data, i);
3041 if (data->consider_all_candidates)
3042 size = n_iv_cands (data);
3043 else
3045 s = bitmap_count_bits (use->related_cands);
3047 /* Round up to the power of two, so that moduling by it is fast. */
3048 size = s ? (1 << ceil_log2 (s)) : 1;
3051 use->n_map_members = size;
3052 use->cost_map = XCNEWVEC (struct cost_pair, size);
3056 /* Returns description of computation cost of expression whose runtime
3057 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3059 static comp_cost
3060 new_cost (unsigned runtime, unsigned complexity)
3062 comp_cost cost;
3064 cost.cost = runtime;
3065 cost.complexity = complexity;
3067 return cost;
3070 /* Returns true if COST is infinite. */
3072 static bool
3073 infinite_cost_p (comp_cost cost)
3075 return cost.cost == INFTY;
3078 /* Adds costs COST1 and COST2. */
3080 static comp_cost
3081 add_costs (comp_cost cost1, comp_cost cost2)
3083 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3084 return infinite_cost;
3086 cost1.cost += cost2.cost;
3087 cost1.complexity += cost2.complexity;
3089 return cost1;
3091 /* Subtracts costs COST1 and COST2. */
3093 static comp_cost
3094 sub_costs (comp_cost cost1, comp_cost cost2)
3096 cost1.cost -= cost2.cost;
3097 cost1.complexity -= cost2.complexity;
3099 return cost1;
3102 /* Returns a negative number if COST1 < COST2, a positive number if
3103 COST1 > COST2, and 0 if COST1 = COST2. */
3105 static int
3106 compare_costs (comp_cost cost1, comp_cost cost2)
3108 if (cost1.cost == cost2.cost)
3109 return cost1.complexity - cost2.complexity;
3111 return cost1.cost - cost2.cost;
3114 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3115 on invariants DEPENDS_ON and that the value used in expressing it
3116 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3118 static void
3119 set_use_iv_cost (struct ivopts_data *data,
3120 struct iv_use *use, struct iv_cand *cand,
3121 comp_cost cost, bitmap depends_on, tree value,
3122 enum tree_code comp, int inv_expr_id)
3124 unsigned i, s;
3126 if (infinite_cost_p (cost))
3128 BITMAP_FREE (depends_on);
3129 return;
3132 if (data->consider_all_candidates)
3134 use->cost_map[cand->id].cand = cand;
3135 use->cost_map[cand->id].cost = cost;
3136 use->cost_map[cand->id].depends_on = depends_on;
3137 use->cost_map[cand->id].value = value;
3138 use->cost_map[cand->id].comp = comp;
3139 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3140 return;
3143 /* n_map_members is a power of two, so this computes modulo. */
3144 s = cand->id & (use->n_map_members - 1);
3145 for (i = s; i < use->n_map_members; i++)
3146 if (!use->cost_map[i].cand)
3147 goto found;
3148 for (i = 0; i < s; i++)
3149 if (!use->cost_map[i].cand)
3150 goto found;
3152 gcc_unreachable ();
3154 found:
3155 use->cost_map[i].cand = cand;
3156 use->cost_map[i].cost = cost;
3157 use->cost_map[i].depends_on = depends_on;
3158 use->cost_map[i].value = value;
3159 use->cost_map[i].comp = comp;
3160 use->cost_map[i].inv_expr_id = inv_expr_id;
3163 /* Gets cost of (USE, CANDIDATE) pair. */
3165 static struct cost_pair *
3166 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3167 struct iv_cand *cand)
3169 unsigned i, s;
3170 struct cost_pair *ret;
3172 if (!cand)
3173 return NULL;
3175 if (data->consider_all_candidates)
3177 ret = use->cost_map + cand->id;
3178 if (!ret->cand)
3179 return NULL;
3181 return ret;
3184 /* n_map_members is a power of two, so this computes modulo. */
3185 s = cand->id & (use->n_map_members - 1);
3186 for (i = s; i < use->n_map_members; i++)
3187 if (use->cost_map[i].cand == cand)
3188 return use->cost_map + i;
3189 else if (use->cost_map[i].cand == NULL)
3190 return NULL;
3191 for (i = 0; i < s; i++)
3192 if (use->cost_map[i].cand == cand)
3193 return use->cost_map + i;
3194 else if (use->cost_map[i].cand == NULL)
3195 return NULL;
3197 return NULL;
3200 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3201 static rtx
3202 produce_memory_decl_rtl (tree obj, int *regno)
3204 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3205 machine_mode address_mode = targetm.addr_space.address_mode (as);
3206 rtx x;
3208 gcc_assert (obj);
3209 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3211 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3212 x = gen_rtx_SYMBOL_REF (address_mode, name);
3213 SET_SYMBOL_REF_DECL (x, obj);
3214 x = gen_rtx_MEM (DECL_MODE (obj), x);
3215 set_mem_addr_space (x, as);
3216 targetm.encode_section_info (obj, x, true);
3218 else
3220 x = gen_raw_REG (address_mode, (*regno)++);
3221 x = gen_rtx_MEM (DECL_MODE (obj), x);
3222 set_mem_addr_space (x, as);
3225 return x;
3228 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3229 walk_tree. DATA contains the actual fake register number. */
3231 static tree
3232 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3234 tree obj = NULL_TREE;
3235 rtx x = NULL_RTX;
3236 int *regno = (int *) data;
3238 switch (TREE_CODE (*expr_p))
3240 case ADDR_EXPR:
3241 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3242 handled_component_p (*expr_p);
3243 expr_p = &TREE_OPERAND (*expr_p, 0))
3244 continue;
3245 obj = *expr_p;
3246 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3247 x = produce_memory_decl_rtl (obj, regno);
3248 break;
3250 case SSA_NAME:
3251 *ws = 0;
3252 obj = SSA_NAME_VAR (*expr_p);
3253 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3254 if (!obj)
3255 return NULL_TREE;
3256 if (!DECL_RTL_SET_P (obj))
3257 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3258 break;
3260 case VAR_DECL:
3261 case PARM_DECL:
3262 case RESULT_DECL:
3263 *ws = 0;
3264 obj = *expr_p;
3266 if (DECL_RTL_SET_P (obj))
3267 break;
3269 if (DECL_MODE (obj) == BLKmode)
3270 x = produce_memory_decl_rtl (obj, regno);
3271 else
3272 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3274 break;
3276 default:
3277 break;
3280 if (x)
3282 decl_rtl_to_reset.safe_push (obj);
3283 SET_DECL_RTL (obj, x);
3286 return NULL_TREE;
3289 /* Determines cost of the computation of EXPR. */
3291 static unsigned
3292 computation_cost (tree expr, bool speed)
3294 rtx_insn *seq;
3295 rtx rslt;
3296 tree type = TREE_TYPE (expr);
3297 unsigned cost;
3298 /* Avoid using hard regs in ways which may be unsupported. */
3299 int regno = LAST_VIRTUAL_REGISTER + 1;
3300 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3301 enum node_frequency real_frequency = node->frequency;
3303 node->frequency = NODE_FREQUENCY_NORMAL;
3304 crtl->maybe_hot_insn_p = speed;
3305 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3306 start_sequence ();
3307 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3308 seq = get_insns ();
3309 end_sequence ();
3310 default_rtl_profile ();
3311 node->frequency = real_frequency;
3313 cost = seq_cost (seq, speed);
3314 if (MEM_P (rslt))
3315 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3316 TYPE_ADDR_SPACE (type), speed);
3317 else if (!REG_P (rslt))
3318 cost += set_src_cost (rslt, speed);
3320 return cost;
3323 /* Returns variable containing the value of candidate CAND at statement AT. */
3325 static tree
3326 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3328 if (stmt_after_increment (loop, cand, stmt))
3329 return cand->var_after;
3330 else
3331 return cand->var_before;
3334 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3335 same precision that is at least as wide as the precision of TYPE, stores
3336 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3337 type of A and B. */
3339 static tree
3340 determine_common_wider_type (tree *a, tree *b)
3342 tree wider_type = NULL;
3343 tree suba, subb;
3344 tree atype = TREE_TYPE (*a);
3346 if (CONVERT_EXPR_P (*a))
3348 suba = TREE_OPERAND (*a, 0);
3349 wider_type = TREE_TYPE (suba);
3350 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3351 return atype;
3353 else
3354 return atype;
3356 if (CONVERT_EXPR_P (*b))
3358 subb = TREE_OPERAND (*b, 0);
3359 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3360 return atype;
3362 else
3363 return atype;
3365 *a = suba;
3366 *b = subb;
3367 return wider_type;
3370 /* Determines the expression by that USE is expressed from induction variable
3371 CAND at statement AT in LOOP. The expression is stored in a decomposed
3372 form into AFF. Returns false if USE cannot be expressed using CAND. */
3374 static bool
3375 get_computation_aff (struct loop *loop,
3376 struct iv_use *use, struct iv_cand *cand, gimple at,
3377 struct aff_tree *aff)
3379 tree ubase = use->iv->base;
3380 tree ustep = use->iv->step;
3381 tree cbase = cand->iv->base;
3382 tree cstep = cand->iv->step, cstep_common;
3383 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3384 tree common_type, var;
3385 tree uutype;
3386 aff_tree cbase_aff, var_aff;
3387 widest_int rat;
3389 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3391 /* We do not have a precision to express the values of use. */
3392 return false;
3395 var = var_at_stmt (loop, cand, at);
3396 uutype = unsigned_type_for (utype);
3398 /* If the conversion is not noop, perform it. */
3399 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3401 cstep = fold_convert (uutype, cstep);
3402 cbase = fold_convert (uutype, cbase);
3403 var = fold_convert (uutype, var);
3406 if (!constant_multiple_of (ustep, cstep, &rat))
3407 return false;
3409 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3410 type, we achieve better folding by computing their difference in this
3411 wider type, and cast the result to UUTYPE. We do not need to worry about
3412 overflows, as all the arithmetics will in the end be performed in UUTYPE
3413 anyway. */
3414 common_type = determine_common_wider_type (&ubase, &cbase);
3416 /* use = ubase - ratio * cbase + ratio * var. */
3417 tree_to_aff_combination (ubase, common_type, aff);
3418 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3419 tree_to_aff_combination (var, uutype, &var_aff);
3421 /* We need to shift the value if we are after the increment. */
3422 if (stmt_after_increment (loop, cand, at))
3424 aff_tree cstep_aff;
3426 if (common_type != uutype)
3427 cstep_common = fold_convert (common_type, cstep);
3428 else
3429 cstep_common = cstep;
3431 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3432 aff_combination_add (&cbase_aff, &cstep_aff);
3435 aff_combination_scale (&cbase_aff, -rat);
3436 aff_combination_add (aff, &cbase_aff);
3437 if (common_type != uutype)
3438 aff_combination_convert (aff, uutype);
3440 aff_combination_scale (&var_aff, rat);
3441 aff_combination_add (aff, &var_aff);
3443 return true;
3446 /* Return the type of USE. */
3448 static tree
3449 get_use_type (struct iv_use *use)
3451 tree base_type = TREE_TYPE (use->iv->base);
3452 tree type;
3454 if (use->type == USE_ADDRESS)
3456 /* The base_type may be a void pointer. Create a pointer type based on
3457 the mem_ref instead. */
3458 type = build_pointer_type (TREE_TYPE (*use->op_p));
3459 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3460 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3462 else
3463 type = base_type;
3465 return type;
3468 /* Determines the expression by that USE is expressed from induction variable
3469 CAND at statement AT in LOOP. The computation is unshared. */
3471 static tree
3472 get_computation_at (struct loop *loop,
3473 struct iv_use *use, struct iv_cand *cand, gimple at)
3475 aff_tree aff;
3476 tree type = get_use_type (use);
3478 if (!get_computation_aff (loop, use, cand, at, &aff))
3479 return NULL_TREE;
3480 unshare_aff_combination (&aff);
3481 return fold_convert (type, aff_combination_to_tree (&aff));
3484 /* Determines the expression by that USE is expressed from induction variable
3485 CAND in LOOP. The computation is unshared. */
3487 static tree
3488 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3490 return get_computation_at (loop, use, cand, use->stmt);
3493 /* Adjust the cost COST for being in loop setup rather than loop body.
3494 If we're optimizing for space, the loop setup overhead is constant;
3495 if we're optimizing for speed, amortize it over the per-iteration cost. */
3496 static unsigned
3497 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3499 if (cost == INFTY)
3500 return cost;
3501 else if (optimize_loop_for_speed_p (data->current_loop))
3502 return cost / avg_loop_niter (data->current_loop);
3503 else
3504 return cost;
3507 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3508 validity for a memory reference accessing memory of mode MODE in
3509 address space AS. */
3512 bool
3513 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3514 addr_space_t as)
3516 #define MAX_RATIO 128
3517 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3518 static vec<sbitmap> valid_mult_list;
3519 sbitmap valid_mult;
3521 if (data_index >= valid_mult_list.length ())
3522 valid_mult_list.safe_grow_cleared (data_index + 1);
3524 valid_mult = valid_mult_list[data_index];
3525 if (!valid_mult)
3527 machine_mode address_mode = targetm.addr_space.address_mode (as);
3528 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3529 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3530 rtx addr, scaled;
3531 HOST_WIDE_INT i;
3533 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3534 bitmap_clear (valid_mult);
3535 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3536 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3537 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3539 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3540 if (memory_address_addr_space_p (mode, addr, as)
3541 || memory_address_addr_space_p (mode, scaled, as))
3542 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3545 if (dump_file && (dump_flags & TDF_DETAILS))
3547 fprintf (dump_file, " allowed multipliers:");
3548 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3549 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3550 fprintf (dump_file, " %d", (int) i);
3551 fprintf (dump_file, "\n");
3552 fprintf (dump_file, "\n");
3555 valid_mult_list[data_index] = valid_mult;
3558 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3559 return false;
3561 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3564 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3565 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3566 variable is omitted. Compute the cost for a memory reference that accesses
3567 a memory location of mode MEM_MODE in address space AS.
3569 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3570 size of MEM_MODE / RATIO) is available. To make this determination, we
3571 look at the size of the increment to be made, which is given in CSTEP.
3572 CSTEP may be zero if the step is unknown.
3573 STMT_AFTER_INC is true iff the statement we're looking at is after the
3574 increment of the original biv.
3576 TODO -- there must be some better way. This all is quite crude. */
3578 enum ainc_type
3580 AINC_PRE_INC, /* Pre increment. */
3581 AINC_PRE_DEC, /* Pre decrement. */
3582 AINC_POST_INC, /* Post increment. */
3583 AINC_POST_DEC, /* Post decrement. */
3584 AINC_NONE /* Also the number of auto increment types. */
3587 typedef struct address_cost_data_s
3589 HOST_WIDE_INT min_offset, max_offset;
3590 unsigned costs[2][2][2][2];
3591 unsigned ainc_costs[AINC_NONE];
3592 } *address_cost_data;
3595 static comp_cost
3596 get_address_cost (bool symbol_present, bool var_present,
3597 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3598 HOST_WIDE_INT cstep, machine_mode mem_mode,
3599 addr_space_t as, bool speed,
3600 bool stmt_after_inc, bool *may_autoinc)
3602 machine_mode address_mode = targetm.addr_space.address_mode (as);
3603 static vec<address_cost_data> address_cost_data_list;
3604 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3605 address_cost_data data;
3606 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3607 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3608 unsigned cost, acost, complexity;
3609 enum ainc_type autoinc_type;
3610 bool offset_p, ratio_p, autoinc;
3611 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3612 unsigned HOST_WIDE_INT mask;
3613 unsigned bits;
3615 if (data_index >= address_cost_data_list.length ())
3616 address_cost_data_list.safe_grow_cleared (data_index + 1);
3618 data = address_cost_data_list[data_index];
3619 if (!data)
3621 HOST_WIDE_INT i;
3622 HOST_WIDE_INT rat, off = 0;
3623 int old_cse_not_expected, width;
3624 unsigned sym_p, var_p, off_p, rat_p, add_c;
3625 rtx_insn *seq;
3626 rtx addr, base;
3627 rtx reg0, reg1;
3629 data = (address_cost_data) xcalloc (1, sizeof (*data));
3631 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3633 width = GET_MODE_BITSIZE (address_mode) - 1;
3634 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3635 width = HOST_BITS_PER_WIDE_INT - 1;
3636 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3638 for (i = width; i >= 0; i--)
3640 off = -((unsigned HOST_WIDE_INT) 1 << i);
3641 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3642 if (memory_address_addr_space_p (mem_mode, addr, as))
3643 break;
3645 data->min_offset = (i == -1? 0 : off);
3647 for (i = width; i >= 0; i--)
3649 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3650 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3651 if (memory_address_addr_space_p (mem_mode, addr, as))
3652 break;
3653 /* For some strict-alignment targets, the offset must be naturally
3654 aligned. Try an aligned offset if mem_mode is not QImode. */
3655 off = mem_mode != QImode
3656 ? ((unsigned HOST_WIDE_INT) 1 << i)
3657 - GET_MODE_SIZE (mem_mode)
3658 : 0;
3659 if (off > 0)
3661 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3662 if (memory_address_addr_space_p (mem_mode, addr, as))
3663 break;
3666 if (i == -1)
3667 off = 0;
3668 data->max_offset = off;
3670 if (dump_file && (dump_flags & TDF_DETAILS))
3672 fprintf (dump_file, "get_address_cost:\n");
3673 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3674 GET_MODE_NAME (mem_mode),
3675 data->min_offset);
3676 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3677 GET_MODE_NAME (mem_mode),
3678 data->max_offset);
3681 rat = 1;
3682 for (i = 2; i <= MAX_RATIO; i++)
3683 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3685 rat = i;
3686 break;
3689 /* Compute the cost of various addressing modes. */
3690 acost = 0;
3691 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3692 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3694 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3695 || USE_STORE_PRE_DECREMENT (mem_mode))
3697 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3698 has_predec[mem_mode]
3699 = memory_address_addr_space_p (mem_mode, addr, as);
3701 if (has_predec[mem_mode])
3702 data->ainc_costs[AINC_PRE_DEC]
3703 = address_cost (addr, mem_mode, as, speed);
3705 if (USE_LOAD_POST_DECREMENT (mem_mode)
3706 || USE_STORE_POST_DECREMENT (mem_mode))
3708 addr = gen_rtx_POST_DEC (address_mode, reg0);
3709 has_postdec[mem_mode]
3710 = memory_address_addr_space_p (mem_mode, addr, as);
3712 if (has_postdec[mem_mode])
3713 data->ainc_costs[AINC_POST_DEC]
3714 = address_cost (addr, mem_mode, as, speed);
3716 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3717 || USE_STORE_PRE_DECREMENT (mem_mode))
3719 addr = gen_rtx_PRE_INC (address_mode, reg0);
3720 has_preinc[mem_mode]
3721 = memory_address_addr_space_p (mem_mode, addr, as);
3723 if (has_preinc[mem_mode])
3724 data->ainc_costs[AINC_PRE_INC]
3725 = address_cost (addr, mem_mode, as, speed);
3727 if (USE_LOAD_POST_INCREMENT (mem_mode)
3728 || USE_STORE_POST_INCREMENT (mem_mode))
3730 addr = gen_rtx_POST_INC (address_mode, reg0);
3731 has_postinc[mem_mode]
3732 = memory_address_addr_space_p (mem_mode, addr, as);
3734 if (has_postinc[mem_mode])
3735 data->ainc_costs[AINC_POST_INC]
3736 = address_cost (addr, mem_mode, as, speed);
3738 for (i = 0; i < 16; i++)
3740 sym_p = i & 1;
3741 var_p = (i >> 1) & 1;
3742 off_p = (i >> 2) & 1;
3743 rat_p = (i >> 3) & 1;
3745 addr = reg0;
3746 if (rat_p)
3747 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3748 gen_int_mode (rat, address_mode));
3750 if (var_p)
3751 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3753 if (sym_p)
3755 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3756 /* ??? We can run into trouble with some backends by presenting
3757 it with symbols which haven't been properly passed through
3758 targetm.encode_section_info. By setting the local bit, we
3759 enhance the probability of things working. */
3760 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3762 if (off_p)
3763 base = gen_rtx_fmt_e (CONST, address_mode,
3764 gen_rtx_fmt_ee
3765 (PLUS, address_mode, base,
3766 gen_int_mode (off, address_mode)));
3768 else if (off_p)
3769 base = gen_int_mode (off, address_mode);
3770 else
3771 base = NULL_RTX;
3773 if (base)
3774 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3776 start_sequence ();
3777 /* To avoid splitting addressing modes, pretend that no cse will
3778 follow. */
3779 old_cse_not_expected = cse_not_expected;
3780 cse_not_expected = true;
3781 addr = memory_address_addr_space (mem_mode, addr, as);
3782 cse_not_expected = old_cse_not_expected;
3783 seq = get_insns ();
3784 end_sequence ();
3786 acost = seq_cost (seq, speed);
3787 acost += address_cost (addr, mem_mode, as, speed);
3789 if (!acost)
3790 acost = 1;
3791 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3794 /* On some targets, it is quite expensive to load symbol to a register,
3795 which makes addresses that contain symbols look much more expensive.
3796 However, the symbol will have to be loaded in any case before the
3797 loop (and quite likely we have it in register already), so it does not
3798 make much sense to penalize them too heavily. So make some final
3799 tweaks for the SYMBOL_PRESENT modes:
3801 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3802 var is cheaper, use this mode with small penalty.
3803 If VAR_PRESENT is true, try whether the mode with
3804 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3805 if this is the case, use it. */
3806 add_c = add_cost (speed, address_mode);
3807 for (i = 0; i < 8; i++)
3809 var_p = i & 1;
3810 off_p = (i >> 1) & 1;
3811 rat_p = (i >> 2) & 1;
3813 acost = data->costs[0][1][off_p][rat_p] + 1;
3814 if (var_p)
3815 acost += add_c;
3817 if (acost < data->costs[1][var_p][off_p][rat_p])
3818 data->costs[1][var_p][off_p][rat_p] = acost;
3821 if (dump_file && (dump_flags & TDF_DETAILS))
3823 fprintf (dump_file, "Address costs:\n");
3825 for (i = 0; i < 16; i++)
3827 sym_p = i & 1;
3828 var_p = (i >> 1) & 1;
3829 off_p = (i >> 2) & 1;
3830 rat_p = (i >> 3) & 1;
3832 fprintf (dump_file, " ");
3833 if (sym_p)
3834 fprintf (dump_file, "sym + ");
3835 if (var_p)
3836 fprintf (dump_file, "var + ");
3837 if (off_p)
3838 fprintf (dump_file, "cst + ");
3839 if (rat_p)
3840 fprintf (dump_file, "rat * ");
3842 acost = data->costs[sym_p][var_p][off_p][rat_p];
3843 fprintf (dump_file, "index costs %d\n", acost);
3845 if (has_predec[mem_mode] || has_postdec[mem_mode]
3846 || has_preinc[mem_mode] || has_postinc[mem_mode])
3847 fprintf (dump_file, " May include autoinc/dec\n");
3848 fprintf (dump_file, "\n");
3851 address_cost_data_list[data_index] = data;
3854 bits = GET_MODE_BITSIZE (address_mode);
3855 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3856 offset &= mask;
3857 if ((offset >> (bits - 1) & 1))
3858 offset |= ~mask;
3859 s_offset = offset;
3861 autoinc = false;
3862 autoinc_type = AINC_NONE;
3863 msize = GET_MODE_SIZE (mem_mode);
3864 autoinc_offset = offset;
3865 if (stmt_after_inc)
3866 autoinc_offset += ratio * cstep;
3867 if (symbol_present || var_present || ratio != 1)
3868 autoinc = false;
3869 else
3871 if (has_postinc[mem_mode] && autoinc_offset == 0
3872 && msize == cstep)
3873 autoinc_type = AINC_POST_INC;
3874 else if (has_postdec[mem_mode] && autoinc_offset == 0
3875 && msize == -cstep)
3876 autoinc_type = AINC_POST_DEC;
3877 else if (has_preinc[mem_mode] && autoinc_offset == msize
3878 && msize == cstep)
3879 autoinc_type = AINC_PRE_INC;
3880 else if (has_predec[mem_mode] && autoinc_offset == -msize
3881 && msize == -cstep)
3882 autoinc_type = AINC_PRE_DEC;
3884 if (autoinc_type != AINC_NONE)
3885 autoinc = true;
3888 cost = 0;
3889 offset_p = (s_offset != 0
3890 && data->min_offset <= s_offset
3891 && s_offset <= data->max_offset);
3892 ratio_p = (ratio != 1
3893 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3895 if (ratio != 1 && !ratio_p)
3896 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3898 if (s_offset && !offset_p && !symbol_present)
3899 cost += add_cost (speed, address_mode);
3901 if (may_autoinc)
3902 *may_autoinc = autoinc;
3903 if (autoinc)
3904 acost = data->ainc_costs[autoinc_type];
3905 else
3906 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3907 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3908 return new_cost (cost + acost, complexity);
3911 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3912 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3913 calculating the operands of EXPR. Returns true if successful, and returns
3914 the cost in COST. */
3916 static bool
3917 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3918 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3920 comp_cost res;
3921 tree op1 = TREE_OPERAND (expr, 1);
3922 tree cst = TREE_OPERAND (mult, 1);
3923 tree multop = TREE_OPERAND (mult, 0);
3924 int m = exact_log2 (int_cst_value (cst));
3925 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3926 int as_cost, sa_cost;
3927 bool mult_in_op1;
3929 if (!(m >= 0 && m < maxm))
3930 return false;
3932 mult_in_op1 = operand_equal_p (op1, mult, 0);
3934 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3936 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3937 use that in preference to a shift insn followed by an add insn. */
3938 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3939 ? shiftadd_cost (speed, mode, m)
3940 : (mult_in_op1
3941 ? shiftsub1_cost (speed, mode, m)
3942 : shiftsub0_cost (speed, mode, m)));
3944 res = new_cost (MIN (as_cost, sa_cost), 0);
3945 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3947 STRIP_NOPS (multop);
3948 if (!is_gimple_val (multop))
3949 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3951 *cost = res;
3952 return true;
3955 /* Estimates cost of forcing expression EXPR into a variable. */
3957 static comp_cost
3958 force_expr_to_var_cost (tree expr, bool speed)
3960 static bool costs_initialized = false;
3961 static unsigned integer_cost [2];
3962 static unsigned symbol_cost [2];
3963 static unsigned address_cost [2];
3964 tree op0, op1;
3965 comp_cost cost0, cost1, cost;
3966 machine_mode mode;
3968 if (!costs_initialized)
3970 tree type = build_pointer_type (integer_type_node);
3971 tree var, addr;
3972 rtx x;
3973 int i;
3975 var = create_tmp_var_raw (integer_type_node, "test_var");
3976 TREE_STATIC (var) = 1;
3977 x = produce_memory_decl_rtl (var, NULL);
3978 SET_DECL_RTL (var, x);
3980 addr = build1 (ADDR_EXPR, type, var);
3983 for (i = 0; i < 2; i++)
3985 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3986 2000), i);
3988 symbol_cost[i] = computation_cost (addr, i) + 1;
3990 address_cost[i]
3991 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3992 if (dump_file && (dump_flags & TDF_DETAILS))
3994 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3995 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3996 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3997 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3998 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3999 fprintf (dump_file, "\n");
4003 costs_initialized = true;
4006 STRIP_NOPS (expr);
4008 if (SSA_VAR_P (expr))
4009 return no_cost;
4011 if (is_gimple_min_invariant (expr))
4013 if (TREE_CODE (expr) == INTEGER_CST)
4014 return new_cost (integer_cost [speed], 0);
4016 if (TREE_CODE (expr) == ADDR_EXPR)
4018 tree obj = TREE_OPERAND (expr, 0);
4020 if (TREE_CODE (obj) == VAR_DECL
4021 || TREE_CODE (obj) == PARM_DECL
4022 || TREE_CODE (obj) == RESULT_DECL)
4023 return new_cost (symbol_cost [speed], 0);
4026 return new_cost (address_cost [speed], 0);
4029 switch (TREE_CODE (expr))
4031 case POINTER_PLUS_EXPR:
4032 case PLUS_EXPR:
4033 case MINUS_EXPR:
4034 case MULT_EXPR:
4035 op0 = TREE_OPERAND (expr, 0);
4036 op1 = TREE_OPERAND (expr, 1);
4037 STRIP_NOPS (op0);
4038 STRIP_NOPS (op1);
4039 break;
4041 CASE_CONVERT:
4042 case NEGATE_EXPR:
4043 op0 = TREE_OPERAND (expr, 0);
4044 STRIP_NOPS (op0);
4045 op1 = NULL_TREE;
4046 break;
4048 default:
4049 /* Just an arbitrary value, FIXME. */
4050 return new_cost (target_spill_cost[speed], 0);
4053 if (op0 == NULL_TREE
4054 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4055 cost0 = no_cost;
4056 else
4057 cost0 = force_expr_to_var_cost (op0, speed);
4059 if (op1 == NULL_TREE
4060 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4061 cost1 = no_cost;
4062 else
4063 cost1 = force_expr_to_var_cost (op1, speed);
4065 mode = TYPE_MODE (TREE_TYPE (expr));
4066 switch (TREE_CODE (expr))
4068 case POINTER_PLUS_EXPR:
4069 case PLUS_EXPR:
4070 case MINUS_EXPR:
4071 case NEGATE_EXPR:
4072 cost = new_cost (add_cost (speed, mode), 0);
4073 if (TREE_CODE (expr) != NEGATE_EXPR)
4075 tree mult = NULL_TREE;
4076 comp_cost sa_cost;
4077 if (TREE_CODE (op1) == MULT_EXPR)
4078 mult = op1;
4079 else if (TREE_CODE (op0) == MULT_EXPR)
4080 mult = op0;
4082 if (mult != NULL_TREE
4083 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4084 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4085 speed, &sa_cost))
4086 return sa_cost;
4088 break;
4090 CASE_CONVERT:
4092 tree inner_mode, outer_mode;
4093 outer_mode = TREE_TYPE (expr);
4094 inner_mode = TREE_TYPE (op0);
4095 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4096 TYPE_MODE (inner_mode), speed), 0);
4098 break;
4100 case MULT_EXPR:
4101 if (cst_and_fits_in_hwi (op0))
4102 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4103 mode, speed), 0);
4104 else if (cst_and_fits_in_hwi (op1))
4105 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4106 mode, speed), 0);
4107 else
4108 return new_cost (target_spill_cost [speed], 0);
4109 break;
4111 default:
4112 gcc_unreachable ();
4115 cost = add_costs (cost, cost0);
4116 cost = add_costs (cost, cost1);
4118 /* Bound the cost by target_spill_cost. The parts of complicated
4119 computations often are either loop invariant or at least can
4120 be shared between several iv uses, so letting this grow without
4121 limits would not give reasonable results. */
4122 if (cost.cost > (int) target_spill_cost [speed])
4123 cost.cost = target_spill_cost [speed];
4125 return cost;
4128 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4129 invariants the computation depends on. */
4131 static comp_cost
4132 force_var_cost (struct ivopts_data *data,
4133 tree expr, bitmap *depends_on)
4135 if (depends_on)
4137 fd_ivopts_data = data;
4138 walk_tree (&expr, find_depends, depends_on, NULL);
4141 return force_expr_to_var_cost (expr, data->speed);
4144 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4145 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4146 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4147 invariants the computation depends on. */
4149 static comp_cost
4150 split_address_cost (struct ivopts_data *data,
4151 tree addr, bool *symbol_present, bool *var_present,
4152 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4154 tree core;
4155 HOST_WIDE_INT bitsize;
4156 HOST_WIDE_INT bitpos;
4157 tree toffset;
4158 machine_mode mode;
4159 int unsignedp, volatilep;
4161 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4162 &unsignedp, &volatilep, false);
4164 if (toffset != 0
4165 || bitpos % BITS_PER_UNIT != 0
4166 || TREE_CODE (core) != VAR_DECL)
4168 *symbol_present = false;
4169 *var_present = true;
4170 fd_ivopts_data = data;
4171 walk_tree (&addr, find_depends, depends_on, NULL);
4172 return new_cost (target_spill_cost[data->speed], 0);
4175 *offset += bitpos / BITS_PER_UNIT;
4176 if (TREE_STATIC (core)
4177 || DECL_EXTERNAL (core))
4179 *symbol_present = true;
4180 *var_present = false;
4181 return no_cost;
4184 *symbol_present = false;
4185 *var_present = true;
4186 return no_cost;
4189 /* Estimates cost of expressing difference of addresses E1 - E2 as
4190 var + symbol + offset. The value of offset is added to OFFSET,
4191 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4192 part is missing. DEPENDS_ON is a set of the invariants the computation
4193 depends on. */
4195 static comp_cost
4196 ptr_difference_cost (struct ivopts_data *data,
4197 tree e1, tree e2, bool *symbol_present, bool *var_present,
4198 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4200 HOST_WIDE_INT diff = 0;
4201 aff_tree aff_e1, aff_e2;
4202 tree type;
4204 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4206 if (ptr_difference_const (e1, e2, &diff))
4208 *offset += diff;
4209 *symbol_present = false;
4210 *var_present = false;
4211 return no_cost;
4214 if (integer_zerop (e2))
4215 return split_address_cost (data, TREE_OPERAND (e1, 0),
4216 symbol_present, var_present, offset, depends_on);
4218 *symbol_present = false;
4219 *var_present = true;
4221 type = signed_type_for (TREE_TYPE (e1));
4222 tree_to_aff_combination (e1, type, &aff_e1);
4223 tree_to_aff_combination (e2, type, &aff_e2);
4224 aff_combination_scale (&aff_e2, -1);
4225 aff_combination_add (&aff_e1, &aff_e2);
4227 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4230 /* Estimates cost of expressing difference E1 - E2 as
4231 var + symbol + offset. The value of offset is added to OFFSET,
4232 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4233 part is missing. DEPENDS_ON is a set of the invariants the computation
4234 depends on. */
4236 static comp_cost
4237 difference_cost (struct ivopts_data *data,
4238 tree e1, tree e2, bool *symbol_present, bool *var_present,
4239 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4241 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4242 unsigned HOST_WIDE_INT off1, off2;
4243 aff_tree aff_e1, aff_e2;
4244 tree type;
4246 e1 = strip_offset (e1, &off1);
4247 e2 = strip_offset (e2, &off2);
4248 *offset += off1 - off2;
4250 STRIP_NOPS (e1);
4251 STRIP_NOPS (e2);
4253 if (TREE_CODE (e1) == ADDR_EXPR)
4254 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4255 offset, depends_on);
4256 *symbol_present = false;
4258 if (operand_equal_p (e1, e2, 0))
4260 *var_present = false;
4261 return no_cost;
4264 *var_present = true;
4266 if (integer_zerop (e2))
4267 return force_var_cost (data, e1, depends_on);
4269 if (integer_zerop (e1))
4271 comp_cost cost = force_var_cost (data, e2, depends_on);
4272 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4273 return cost;
4276 type = signed_type_for (TREE_TYPE (e1));
4277 tree_to_aff_combination (e1, type, &aff_e1);
4278 tree_to_aff_combination (e2, type, &aff_e2);
4279 aff_combination_scale (&aff_e2, -1);
4280 aff_combination_add (&aff_e1, &aff_e2);
4282 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4285 /* Returns true if AFF1 and AFF2 are identical. */
4287 static bool
4288 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4290 unsigned i;
4292 if (aff1->n != aff2->n)
4293 return false;
4295 for (i = 0; i < aff1->n; i++)
4297 if (aff1->elts[i].coef != aff2->elts[i].coef)
4298 return false;
4300 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4301 return false;
4303 return true;
4306 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4308 static int
4309 get_expr_id (struct ivopts_data *data, tree expr)
4311 struct iv_inv_expr_ent ent;
4312 struct iv_inv_expr_ent **slot;
4314 ent.expr = expr;
4315 ent.hash = iterative_hash_expr (expr, 0);
4316 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4317 if (*slot)
4318 return (*slot)->id;
4320 *slot = XNEW (struct iv_inv_expr_ent);
4321 (*slot)->expr = expr;
4322 (*slot)->hash = ent.hash;
4323 (*slot)->id = data->inv_expr_id++;
4324 return (*slot)->id;
4327 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4328 requires a new compiler generated temporary. Returns -1 otherwise.
4329 ADDRESS_P is a flag indicating if the expression is for address
4330 computation. */
4332 static int
4333 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4334 tree cbase, HOST_WIDE_INT ratio,
4335 bool address_p)
4337 aff_tree ubase_aff, cbase_aff;
4338 tree expr, ub, cb;
4340 STRIP_NOPS (ubase);
4341 STRIP_NOPS (cbase);
4342 ub = ubase;
4343 cb = cbase;
4345 if ((TREE_CODE (ubase) == INTEGER_CST)
4346 && (TREE_CODE (cbase) == INTEGER_CST))
4347 return -1;
4349 /* Strips the constant part. */
4350 if (TREE_CODE (ubase) == PLUS_EXPR
4351 || TREE_CODE (ubase) == MINUS_EXPR
4352 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4354 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4355 ubase = TREE_OPERAND (ubase, 0);
4358 /* Strips the constant part. */
4359 if (TREE_CODE (cbase) == PLUS_EXPR
4360 || TREE_CODE (cbase) == MINUS_EXPR
4361 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4363 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4364 cbase = TREE_OPERAND (cbase, 0);
4367 if (address_p)
4369 if (((TREE_CODE (ubase) == SSA_NAME)
4370 || (TREE_CODE (ubase) == ADDR_EXPR
4371 && is_gimple_min_invariant (ubase)))
4372 && (TREE_CODE (cbase) == INTEGER_CST))
4373 return -1;
4375 if (((TREE_CODE (cbase) == SSA_NAME)
4376 || (TREE_CODE (cbase) == ADDR_EXPR
4377 && is_gimple_min_invariant (cbase)))
4378 && (TREE_CODE (ubase) == INTEGER_CST))
4379 return -1;
4382 if (ratio == 1)
4384 if (operand_equal_p (ubase, cbase, 0))
4385 return -1;
4387 if (TREE_CODE (ubase) == ADDR_EXPR
4388 && TREE_CODE (cbase) == ADDR_EXPR)
4390 tree usym, csym;
4392 usym = TREE_OPERAND (ubase, 0);
4393 csym = TREE_OPERAND (cbase, 0);
4394 if (TREE_CODE (usym) == ARRAY_REF)
4396 tree ind = TREE_OPERAND (usym, 1);
4397 if (TREE_CODE (ind) == INTEGER_CST
4398 && tree_fits_shwi_p (ind)
4399 && tree_to_shwi (ind) == 0)
4400 usym = TREE_OPERAND (usym, 0);
4402 if (TREE_CODE (csym) == ARRAY_REF)
4404 tree ind = TREE_OPERAND (csym, 1);
4405 if (TREE_CODE (ind) == INTEGER_CST
4406 && tree_fits_shwi_p (ind)
4407 && tree_to_shwi (ind) == 0)
4408 csym = TREE_OPERAND (csym, 0);
4410 if (operand_equal_p (usym, csym, 0))
4411 return -1;
4413 /* Now do more complex comparison */
4414 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4415 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4416 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4417 return -1;
4420 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4421 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4423 aff_combination_scale (&cbase_aff, -1 * ratio);
4424 aff_combination_add (&ubase_aff, &cbase_aff);
4425 expr = aff_combination_to_tree (&ubase_aff);
4426 return get_expr_id (data, expr);
4431 /* Determines the cost of the computation by that USE is expressed
4432 from induction variable CAND. If ADDRESS_P is true, we just need
4433 to create an address from it, otherwise we want to get it into
4434 register. A set of invariants we depend on is stored in
4435 DEPENDS_ON. AT is the statement at that the value is computed.
4436 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4437 addressing is likely. */
4439 static comp_cost
4440 get_computation_cost_at (struct ivopts_data *data,
4441 struct iv_use *use, struct iv_cand *cand,
4442 bool address_p, bitmap *depends_on, gimple at,
4443 bool *can_autoinc,
4444 int *inv_expr_id)
4446 tree ubase = use->iv->base, ustep = use->iv->step;
4447 tree cbase, cstep;
4448 tree utype = TREE_TYPE (ubase), ctype;
4449 unsigned HOST_WIDE_INT cstepi, offset = 0;
4450 HOST_WIDE_INT ratio, aratio;
4451 bool var_present, symbol_present, stmt_is_after_inc;
4452 comp_cost cost;
4453 widest_int rat;
4454 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4455 machine_mode mem_mode = (address_p
4456 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4457 : VOIDmode);
4459 *depends_on = NULL;
4461 /* Only consider real candidates. */
4462 if (!cand->iv)
4463 return infinite_cost;
4465 cbase = cand->iv->base;
4466 cstep = cand->iv->step;
4467 ctype = TREE_TYPE (cbase);
4469 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4471 /* We do not have a precision to express the values of use. */
4472 return infinite_cost;
4475 if (address_p
4476 || (use->iv->base_object
4477 && cand->iv->base_object
4478 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4479 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4481 /* Do not try to express address of an object with computation based
4482 on address of a different object. This may cause problems in rtl
4483 level alias analysis (that does not expect this to be happening,
4484 as this is illegal in C), and would be unlikely to be useful
4485 anyway. */
4486 if (use->iv->base_object
4487 && cand->iv->base_object
4488 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4489 return infinite_cost;
4492 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4494 /* TODO -- add direct handling of this case. */
4495 goto fallback;
4498 /* CSTEPI is removed from the offset in case statement is after the
4499 increment. If the step is not constant, we use zero instead.
4500 This is a bit imprecise (there is the extra addition), but
4501 redundancy elimination is likely to transform the code so that
4502 it uses value of the variable before increment anyway,
4503 so it is not that much unrealistic. */
4504 if (cst_and_fits_in_hwi (cstep))
4505 cstepi = int_cst_value (cstep);
4506 else
4507 cstepi = 0;
4509 if (!constant_multiple_of (ustep, cstep, &rat))
4510 return infinite_cost;
4512 if (wi::fits_shwi_p (rat))
4513 ratio = rat.to_shwi ();
4514 else
4515 return infinite_cost;
4517 STRIP_NOPS (cbase);
4518 ctype = TREE_TYPE (cbase);
4520 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4522 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4523 or ratio == 1, it is better to handle this like
4525 ubase - ratio * cbase + ratio * var
4527 (also holds in the case ratio == -1, TODO. */
4529 if (cst_and_fits_in_hwi (cbase))
4531 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4532 cost = difference_cost (data,
4533 ubase, build_int_cst (utype, 0),
4534 &symbol_present, &var_present, &offset,
4535 depends_on);
4536 cost.cost /= avg_loop_niter (data->current_loop);
4538 else if (ratio == 1)
4540 tree real_cbase = cbase;
4542 /* Check to see if any adjustment is needed. */
4543 if (cstepi == 0 && stmt_is_after_inc)
4545 aff_tree real_cbase_aff;
4546 aff_tree cstep_aff;
4548 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4549 &real_cbase_aff);
4550 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4552 aff_combination_add (&real_cbase_aff, &cstep_aff);
4553 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4556 cost = difference_cost (data,
4557 ubase, real_cbase,
4558 &symbol_present, &var_present, &offset,
4559 depends_on);
4560 cost.cost /= avg_loop_niter (data->current_loop);
4562 else if (address_p
4563 && !POINTER_TYPE_P (ctype)
4564 && multiplier_allowed_in_address_p
4565 (ratio, mem_mode,
4566 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4568 cbase
4569 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4570 cost = difference_cost (data,
4571 ubase, cbase,
4572 &symbol_present, &var_present, &offset,
4573 depends_on);
4574 cost.cost /= avg_loop_niter (data->current_loop);
4576 else
4578 cost = force_var_cost (data, cbase, depends_on);
4579 cost = add_costs (cost,
4580 difference_cost (data,
4581 ubase, build_int_cst (utype, 0),
4582 &symbol_present, &var_present,
4583 &offset, depends_on));
4584 cost.cost /= avg_loop_niter (data->current_loop);
4585 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4588 /* Set of invariants depended on by sub use has already been computed
4589 for the first use in the group. */
4590 if (use->sub_id)
4592 cost.cost = 0;
4593 if (depends_on && *depends_on)
4594 bitmap_clear (*depends_on);
4596 else if (inv_expr_id)
4598 *inv_expr_id =
4599 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4600 /* Clear depends on. */
4601 if (*inv_expr_id != -1 && depends_on && *depends_on)
4602 bitmap_clear (*depends_on);
4605 /* If we are after the increment, the value of the candidate is higher by
4606 one iteration. */
4607 if (stmt_is_after_inc)
4608 offset -= ratio * cstepi;
4610 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4611 (symbol/var1/const parts may be omitted). If we are looking for an
4612 address, find the cost of addressing this. */
4613 if (address_p)
4614 return add_costs (cost,
4615 get_address_cost (symbol_present, var_present,
4616 offset, ratio, cstepi,
4617 mem_mode,
4618 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4619 speed, stmt_is_after_inc,
4620 can_autoinc));
4622 /* Otherwise estimate the costs for computing the expression. */
4623 if (!symbol_present && !var_present && !offset)
4625 if (ratio != 1)
4626 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4627 return cost;
4630 /* Symbol + offset should be compile-time computable so consider that they
4631 are added once to the variable, if present. */
4632 if (var_present && (symbol_present || offset))
4633 cost.cost += adjust_setup_cost (data,
4634 add_cost (speed, TYPE_MODE (ctype)));
4636 /* Having offset does not affect runtime cost in case it is added to
4637 symbol, but it increases complexity. */
4638 if (offset)
4639 cost.complexity++;
4641 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4643 aratio = ratio > 0 ? ratio : -ratio;
4644 if (aratio != 1)
4645 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4646 return cost;
4648 fallback:
4649 if (can_autoinc)
4650 *can_autoinc = false;
4653 /* Just get the expression, expand it and measure the cost. */
4654 tree comp = get_computation_at (data->current_loop, use, cand, at);
4656 if (!comp)
4657 return infinite_cost;
4659 if (address_p)
4660 comp = build_simple_mem_ref (comp);
4662 return new_cost (computation_cost (comp, speed), 0);
4666 /* Determines the cost of the computation by that USE is expressed
4667 from induction variable CAND. If ADDRESS_P is true, we just need
4668 to create an address from it, otherwise we want to get it into
4669 register. A set of invariants we depend on is stored in
4670 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4671 autoinc addressing is likely. */
4673 static comp_cost
4674 get_computation_cost (struct ivopts_data *data,
4675 struct iv_use *use, struct iv_cand *cand,
4676 bool address_p, bitmap *depends_on,
4677 bool *can_autoinc, int *inv_expr_id)
4679 return get_computation_cost_at (data,
4680 use, cand, address_p, depends_on, use->stmt,
4681 can_autoinc, inv_expr_id);
4684 /* Determines cost of basing replacement of USE on CAND in a generic
4685 expression. */
4687 static bool
4688 determine_use_iv_cost_generic (struct ivopts_data *data,
4689 struct iv_use *use, struct iv_cand *cand)
4691 bitmap depends_on;
4692 comp_cost cost;
4693 int inv_expr_id = -1;
4695 /* The simple case first -- if we need to express value of the preserved
4696 original biv, the cost is 0. This also prevents us from counting the
4697 cost of increment twice -- once at this use and once in the cost of
4698 the candidate. */
4699 if (cand->pos == IP_ORIGINAL
4700 && cand->incremented_at == use->stmt)
4702 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4703 ERROR_MARK, -1);
4704 return true;
4707 cost = get_computation_cost (data, use, cand, false, &depends_on,
4708 NULL, &inv_expr_id);
4710 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4711 inv_expr_id);
4713 return !infinite_cost_p (cost);
4716 /* Determines cost of basing replacement of USE on CAND in an address. */
4718 static bool
4719 determine_use_iv_cost_address (struct ivopts_data *data,
4720 struct iv_use *use, struct iv_cand *cand)
4722 bitmap depends_on;
4723 bool can_autoinc;
4724 int inv_expr_id = -1;
4725 struct iv_use *sub_use;
4726 comp_cost sub_cost;
4727 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4728 &can_autoinc, &inv_expr_id);
4730 if (cand->ainc_use == use)
4732 if (can_autoinc)
4733 cost.cost -= cand->cost_step;
4734 /* If we generated the candidate solely for exploiting autoincrement
4735 opportunities, and it turns out it can't be used, set the cost to
4736 infinity to make sure we ignore it. */
4737 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4738 cost = infinite_cost;
4740 for (sub_use = use->next;
4741 sub_use && !infinite_cost_p (cost);
4742 sub_use = sub_use->next)
4744 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4745 &can_autoinc, &inv_expr_id);
4746 cost = add_costs (cost, sub_cost);
4749 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4750 inv_expr_id);
4752 return !infinite_cost_p (cost);
4755 /* Computes value of candidate CAND at position AT in iteration NITER, and
4756 stores it to VAL. */
4758 static void
4759 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4760 aff_tree *val)
4762 aff_tree step, delta, nit;
4763 struct iv *iv = cand->iv;
4764 tree type = TREE_TYPE (iv->base);
4765 tree steptype = type;
4766 if (POINTER_TYPE_P (type))
4767 steptype = sizetype;
4768 steptype = unsigned_type_for (type);
4770 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4771 aff_combination_convert (&step, steptype);
4772 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4773 aff_combination_convert (&nit, steptype);
4774 aff_combination_mult (&nit, &step, &delta);
4775 if (stmt_after_increment (loop, cand, at))
4776 aff_combination_add (&delta, &step);
4778 tree_to_aff_combination (iv->base, type, val);
4779 if (!POINTER_TYPE_P (type))
4780 aff_combination_convert (val, steptype);
4781 aff_combination_add (val, &delta);
4784 /* Returns period of induction variable iv. */
4786 static tree
4787 iv_period (struct iv *iv)
4789 tree step = iv->step, period, type;
4790 tree pow2div;
4792 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4794 type = unsigned_type_for (TREE_TYPE (step));
4795 /* Period of the iv is lcm (step, type_range)/step -1,
4796 i.e., N*type_range/step - 1. Since type range is power
4797 of two, N == (step >> num_of_ending_zeros_binary (step),
4798 so the final result is
4800 (type_range >> num_of_ending_zeros_binary (step)) - 1
4803 pow2div = num_ending_zeros (step);
4805 period = build_low_bits_mask (type,
4806 (TYPE_PRECISION (type)
4807 - tree_to_uhwi (pow2div)));
4809 return period;
4812 /* Returns the comparison operator used when eliminating the iv USE. */
4814 static enum tree_code
4815 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4817 struct loop *loop = data->current_loop;
4818 basic_block ex_bb;
4819 edge exit;
4821 ex_bb = gimple_bb (use->stmt);
4822 exit = EDGE_SUCC (ex_bb, 0);
4823 if (flow_bb_inside_loop_p (loop, exit->dest))
4824 exit = EDGE_SUCC (ex_bb, 1);
4826 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4829 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4830 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4831 calculation is performed in non-wrapping type.
4833 TODO: More generally, we could test for the situation that
4834 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4835 This would require knowing the sign of OFFSET. */
4837 static bool
4838 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4840 enum tree_code code;
4841 tree e1, e2;
4842 aff_tree aff_e1, aff_e2, aff_offset;
4844 if (!nowrap_type_p (TREE_TYPE (base)))
4845 return false;
4847 base = expand_simple_operations (base);
4849 if (TREE_CODE (base) == SSA_NAME)
4851 gimple stmt = SSA_NAME_DEF_STMT (base);
4853 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4854 return false;
4856 code = gimple_assign_rhs_code (stmt);
4857 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4858 return false;
4860 e1 = gimple_assign_rhs1 (stmt);
4861 e2 = gimple_assign_rhs2 (stmt);
4863 else
4865 code = TREE_CODE (base);
4866 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4867 return false;
4868 e1 = TREE_OPERAND (base, 0);
4869 e2 = TREE_OPERAND (base, 1);
4872 /* Use affine expansion as deeper inspection to prove the equality. */
4873 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4874 &aff_e2, &data->name_expansion_cache);
4875 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4876 &aff_offset, &data->name_expansion_cache);
4877 aff_combination_scale (&aff_offset, -1);
4878 switch (code)
4880 case PLUS_EXPR:
4881 aff_combination_add (&aff_e2, &aff_offset);
4882 if (aff_combination_zero_p (&aff_e2))
4883 return true;
4885 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4886 &aff_e1, &data->name_expansion_cache);
4887 aff_combination_add (&aff_e1, &aff_offset);
4888 return aff_combination_zero_p (&aff_e1);
4890 case POINTER_PLUS_EXPR:
4891 aff_combination_add (&aff_e2, &aff_offset);
4892 return aff_combination_zero_p (&aff_e2);
4894 default:
4895 return false;
4899 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4900 comparison with CAND. NITER describes the number of iterations of
4901 the loops. If successful, the comparison in COMP_P is altered accordingly.
4903 We aim to handle the following situation:
4905 sometype *base, *p;
4906 int a, b, i;
4908 i = a;
4909 p = p_0 = base + a;
4913 bla (*p);
4914 p++;
4915 i++;
4917 while (i < b);
4919 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4920 We aim to optimize this to
4922 p = p_0 = base + a;
4925 bla (*p);
4926 p++;
4928 while (p < p_0 - a + b);
4930 This preserves the correctness, since the pointer arithmetics does not
4931 overflow. More precisely:
4933 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4934 overflow in computing it or the values of p.
4935 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4936 overflow. To prove this, we use the fact that p_0 = base + a. */
4938 static bool
4939 iv_elimination_compare_lt (struct ivopts_data *data,
4940 struct iv_cand *cand, enum tree_code *comp_p,
4941 struct tree_niter_desc *niter)
4943 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4944 struct aff_tree nit, tmpa, tmpb;
4945 enum tree_code comp;
4946 HOST_WIDE_INT step;
4948 /* We need to know that the candidate induction variable does not overflow.
4949 While more complex analysis may be used to prove this, for now just
4950 check that the variable appears in the original program and that it
4951 is computed in a type that guarantees no overflows. */
4952 cand_type = TREE_TYPE (cand->iv->base);
4953 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4954 return false;
4956 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4957 the calculation of the BOUND could overflow, making the comparison
4958 invalid. */
4959 if (!data->loop_single_exit_p)
4960 return false;
4962 /* We need to be able to decide whether candidate is increasing or decreasing
4963 in order to choose the right comparison operator. */
4964 if (!cst_and_fits_in_hwi (cand->iv->step))
4965 return false;
4966 step = int_cst_value (cand->iv->step);
4968 /* Check that the number of iterations matches the expected pattern:
4969 a + 1 > b ? 0 : b - a - 1. */
4970 mbz = niter->may_be_zero;
4971 if (TREE_CODE (mbz) == GT_EXPR)
4973 /* Handle a + 1 > b. */
4974 tree op0 = TREE_OPERAND (mbz, 0);
4975 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4977 a = TREE_OPERAND (op0, 0);
4978 b = TREE_OPERAND (mbz, 1);
4980 else
4981 return false;
4983 else if (TREE_CODE (mbz) == LT_EXPR)
4985 tree op1 = TREE_OPERAND (mbz, 1);
4987 /* Handle b < a + 1. */
4988 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4990 a = TREE_OPERAND (op1, 0);
4991 b = TREE_OPERAND (mbz, 0);
4993 else
4994 return false;
4996 else
4997 return false;
4999 /* Expected number of iterations is B - A - 1. Check that it matches
5000 the actual number, i.e., that B - A - NITER = 1. */
5001 tree_to_aff_combination (niter->niter, nit_type, &nit);
5002 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5003 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5004 aff_combination_scale (&nit, -1);
5005 aff_combination_scale (&tmpa, -1);
5006 aff_combination_add (&tmpb, &tmpa);
5007 aff_combination_add (&tmpb, &nit);
5008 if (tmpb.n != 0 || tmpb.offset != 1)
5009 return false;
5011 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5012 overflow. */
5013 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5014 cand->iv->step,
5015 fold_convert (TREE_TYPE (cand->iv->step), a));
5016 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5017 return false;
5019 /* Determine the new comparison operator. */
5020 comp = step < 0 ? GT_EXPR : LT_EXPR;
5021 if (*comp_p == NE_EXPR)
5022 *comp_p = comp;
5023 else if (*comp_p == EQ_EXPR)
5024 *comp_p = invert_tree_comparison (comp, false);
5025 else
5026 gcc_unreachable ();
5028 return true;
5031 /* Check whether it is possible to express the condition in USE by comparison
5032 of candidate CAND. If so, store the value compared with to BOUND, and the
5033 comparison operator to COMP. */
5035 static bool
5036 may_eliminate_iv (struct ivopts_data *data,
5037 struct iv_use *use, struct iv_cand *cand, tree *bound,
5038 enum tree_code *comp)
5040 basic_block ex_bb;
5041 edge exit;
5042 tree period;
5043 struct loop *loop = data->current_loop;
5044 aff_tree bnd;
5045 struct tree_niter_desc *desc = NULL;
5047 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5048 return false;
5050 /* For now works only for exits that dominate the loop latch.
5051 TODO: extend to other conditions inside loop body. */
5052 ex_bb = gimple_bb (use->stmt);
5053 if (use->stmt != last_stmt (ex_bb)
5054 || gimple_code (use->stmt) != GIMPLE_COND
5055 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5056 return false;
5058 exit = EDGE_SUCC (ex_bb, 0);
5059 if (flow_bb_inside_loop_p (loop, exit->dest))
5060 exit = EDGE_SUCC (ex_bb, 1);
5061 if (flow_bb_inside_loop_p (loop, exit->dest))
5062 return false;
5064 desc = niter_for_exit (data, exit);
5065 if (!desc)
5066 return false;
5068 /* Determine whether we can use the variable to test the exit condition.
5069 This is the case iff the period of the induction variable is greater
5070 than the number of iterations for which the exit condition is true. */
5071 period = iv_period (cand->iv);
5073 /* If the number of iterations is constant, compare against it directly. */
5074 if (TREE_CODE (desc->niter) == INTEGER_CST)
5076 /* See cand_value_at. */
5077 if (stmt_after_increment (loop, cand, use->stmt))
5079 if (!tree_int_cst_lt (desc->niter, period))
5080 return false;
5082 else
5084 if (tree_int_cst_lt (period, desc->niter))
5085 return false;
5089 /* If not, and if this is the only possible exit of the loop, see whether
5090 we can get a conservative estimate on the number of iterations of the
5091 entire loop and compare against that instead. */
5092 else
5094 widest_int period_value, max_niter;
5096 max_niter = desc->max;
5097 if (stmt_after_increment (loop, cand, use->stmt))
5098 max_niter += 1;
5099 period_value = wi::to_widest (period);
5100 if (wi::gtu_p (max_niter, period_value))
5102 /* See if we can take advantage of inferred loop bound information. */
5103 if (data->loop_single_exit_p)
5105 if (!max_loop_iterations (loop, &max_niter))
5106 return false;
5107 /* The loop bound is already adjusted by adding 1. */
5108 if (wi::gtu_p (max_niter, period_value))
5109 return false;
5111 else
5112 return false;
5116 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5118 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5119 aff_combination_to_tree (&bnd));
5120 *comp = iv_elimination_compare (data, use);
5122 /* It is unlikely that computing the number of iterations using division
5123 would be more profitable than keeping the original induction variable. */
5124 if (expression_expensive_p (*bound))
5125 return false;
5127 /* Sometimes, it is possible to handle the situation that the number of
5128 iterations may be zero unless additional assumtions by using <
5129 instead of != in the exit condition.
5131 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5132 base the exit condition on it. However, that is often too
5133 expensive. */
5134 if (!integer_zerop (desc->may_be_zero))
5135 return iv_elimination_compare_lt (data, cand, comp, desc);
5137 return true;
5140 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5141 be copied, if is is used in the loop body and DATA->body_includes_call. */
5143 static int
5144 parm_decl_cost (struct ivopts_data *data, tree bound)
5146 tree sbound = bound;
5147 STRIP_NOPS (sbound);
5149 if (TREE_CODE (sbound) == SSA_NAME
5150 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5151 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5152 && data->body_includes_call)
5153 return COSTS_N_INSNS (1);
5155 return 0;
5158 /* Determines cost of basing replacement of USE on CAND in a condition. */
5160 static bool
5161 determine_use_iv_cost_condition (struct ivopts_data *data,
5162 struct iv_use *use, struct iv_cand *cand)
5164 tree bound = NULL_TREE;
5165 struct iv *cmp_iv;
5166 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5167 comp_cost elim_cost, express_cost, cost, bound_cost;
5168 bool ok;
5169 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5170 tree *control_var, *bound_cst;
5171 enum tree_code comp = ERROR_MARK;
5173 /* Only consider real candidates. */
5174 if (!cand->iv)
5176 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5177 ERROR_MARK, -1);
5178 return false;
5181 /* Try iv elimination. */
5182 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5184 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5185 if (elim_cost.cost == 0)
5186 elim_cost.cost = parm_decl_cost (data, bound);
5187 else if (TREE_CODE (bound) == INTEGER_CST)
5188 elim_cost.cost = 0;
5189 /* If we replace a loop condition 'i < n' with 'p < base + n',
5190 depends_on_elim will have 'base' and 'n' set, which implies
5191 that both 'base' and 'n' will be live during the loop. More likely,
5192 'base + n' will be loop invariant, resulting in only one live value
5193 during the loop. So in that case we clear depends_on_elim and set
5194 elim_inv_expr_id instead. */
5195 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5197 elim_inv_expr_id = get_expr_id (data, bound);
5198 bitmap_clear (depends_on_elim);
5200 /* The bound is a loop invariant, so it will be only computed
5201 once. */
5202 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5204 else
5205 elim_cost = infinite_cost;
5207 /* Try expressing the original giv. If it is compared with an invariant,
5208 note that we cannot get rid of it. */
5209 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5210 NULL, &cmp_iv);
5211 gcc_assert (ok);
5213 /* When the condition is a comparison of the candidate IV against
5214 zero, prefer this IV.
5216 TODO: The constant that we're subtracting from the cost should
5217 be target-dependent. This information should be added to the
5218 target costs for each backend. */
5219 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5220 && integer_zerop (*bound_cst)
5221 && (operand_equal_p (*control_var, cand->var_after, 0)
5222 || operand_equal_p (*control_var, cand->var_before, 0)))
5223 elim_cost.cost -= 1;
5225 express_cost = get_computation_cost (data, use, cand, false,
5226 &depends_on_express, NULL,
5227 &express_inv_expr_id);
5228 fd_ivopts_data = data;
5229 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5231 /* Count the cost of the original bound as well. */
5232 bound_cost = force_var_cost (data, *bound_cst, NULL);
5233 if (bound_cost.cost == 0)
5234 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5235 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5236 bound_cost.cost = 0;
5237 express_cost.cost += bound_cost.cost;
5239 /* Choose the better approach, preferring the eliminated IV. */
5240 if (compare_costs (elim_cost, express_cost) <= 0)
5242 cost = elim_cost;
5243 depends_on = depends_on_elim;
5244 depends_on_elim = NULL;
5245 inv_expr_id = elim_inv_expr_id;
5247 else
5249 cost = express_cost;
5250 depends_on = depends_on_express;
5251 depends_on_express = NULL;
5252 bound = NULL_TREE;
5253 comp = ERROR_MARK;
5254 inv_expr_id = express_inv_expr_id;
5257 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5259 if (depends_on_elim)
5260 BITMAP_FREE (depends_on_elim);
5261 if (depends_on_express)
5262 BITMAP_FREE (depends_on_express);
5264 return !infinite_cost_p (cost);
5267 /* Determines cost of basing replacement of USE on CAND. Returns false
5268 if USE cannot be based on CAND. */
5270 static bool
5271 determine_use_iv_cost (struct ivopts_data *data,
5272 struct iv_use *use, struct iv_cand *cand)
5274 switch (use->type)
5276 case USE_NONLINEAR_EXPR:
5277 return determine_use_iv_cost_generic (data, use, cand);
5279 case USE_ADDRESS:
5280 return determine_use_iv_cost_address (data, use, cand);
5282 case USE_COMPARE:
5283 return determine_use_iv_cost_condition (data, use, cand);
5285 default:
5286 gcc_unreachable ();
5290 /* Return true if get_computation_cost indicates that autoincrement is
5291 a possibility for the pair of USE and CAND, false otherwise. */
5293 static bool
5294 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5295 struct iv_cand *cand)
5297 bitmap depends_on;
5298 bool can_autoinc;
5299 comp_cost cost;
5301 if (use->type != USE_ADDRESS)
5302 return false;
5304 cost = get_computation_cost (data, use, cand, true, &depends_on,
5305 &can_autoinc, NULL);
5307 BITMAP_FREE (depends_on);
5309 return !infinite_cost_p (cost) && can_autoinc;
5312 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5313 use that allows autoincrement, and set their AINC_USE if possible. */
5315 static void
5316 set_autoinc_for_original_candidates (struct ivopts_data *data)
5318 unsigned i, j;
5320 for (i = 0; i < n_iv_cands (data); i++)
5322 struct iv_cand *cand = iv_cand (data, i);
5323 struct iv_use *closest_before = NULL;
5324 struct iv_use *closest_after = NULL;
5325 if (cand->pos != IP_ORIGINAL)
5326 continue;
5328 for (j = 0; j < n_iv_uses (data); j++)
5330 struct iv_use *use = iv_use (data, j);
5331 unsigned uid = gimple_uid (use->stmt);
5333 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5334 continue;
5336 if (uid < gimple_uid (cand->incremented_at)
5337 && (closest_before == NULL
5338 || uid > gimple_uid (closest_before->stmt)))
5339 closest_before = use;
5341 if (uid > gimple_uid (cand->incremented_at)
5342 && (closest_after == NULL
5343 || uid < gimple_uid (closest_after->stmt)))
5344 closest_after = use;
5347 if (closest_before != NULL
5348 && autoinc_possible_for_pair (data, closest_before, cand))
5349 cand->ainc_use = closest_before;
5350 else if (closest_after != NULL
5351 && autoinc_possible_for_pair (data, closest_after, cand))
5352 cand->ainc_use = closest_after;
5356 /* Finds the candidates for the induction variables. */
5358 static void
5359 find_iv_candidates (struct ivopts_data *data)
5361 /* Add commonly used ivs. */
5362 add_standard_iv_candidates (data);
5364 /* Add old induction variables. */
5365 add_old_ivs_candidates (data);
5367 /* Add induction variables derived from uses. */
5368 add_derived_ivs_candidates (data);
5370 set_autoinc_for_original_candidates (data);
5372 /* Record the important candidates. */
5373 record_important_candidates (data);
5376 /* Determines costs of basing the use of the iv on an iv candidate. */
5378 static void
5379 determine_use_iv_costs (struct ivopts_data *data)
5381 unsigned i, j;
5382 struct iv_use *use;
5383 struct iv_cand *cand;
5384 bitmap to_clear = BITMAP_ALLOC (NULL);
5386 alloc_use_cost_map (data);
5388 for (i = 0; i < n_iv_uses (data); i++)
5390 use = iv_use (data, i);
5392 if (data->consider_all_candidates)
5394 for (j = 0; j < n_iv_cands (data); j++)
5396 cand = iv_cand (data, j);
5397 determine_use_iv_cost (data, use, cand);
5400 else
5402 bitmap_iterator bi;
5404 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5406 cand = iv_cand (data, j);
5407 if (!determine_use_iv_cost (data, use, cand))
5408 bitmap_set_bit (to_clear, j);
5411 /* Remove the candidates for that the cost is infinite from
5412 the list of related candidates. */
5413 bitmap_and_compl_into (use->related_cands, to_clear);
5414 bitmap_clear (to_clear);
5418 BITMAP_FREE (to_clear);
5420 if (dump_file && (dump_flags & TDF_DETAILS))
5422 fprintf (dump_file, "Use-candidate costs:\n");
5424 for (i = 0; i < n_iv_uses (data); i++)
5426 use = iv_use (data, i);
5428 fprintf (dump_file, "Use %d:\n", i);
5429 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5430 for (j = 0; j < use->n_map_members; j++)
5432 if (!use->cost_map[j].cand
5433 || infinite_cost_p (use->cost_map[j].cost))
5434 continue;
5436 fprintf (dump_file, " %d\t%d\t%d\t",
5437 use->cost_map[j].cand->id,
5438 use->cost_map[j].cost.cost,
5439 use->cost_map[j].cost.complexity);
5440 if (use->cost_map[j].depends_on)
5441 bitmap_print (dump_file,
5442 use->cost_map[j].depends_on, "","");
5443 if (use->cost_map[j].inv_expr_id != -1)
5444 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5445 fprintf (dump_file, "\n");
5448 fprintf (dump_file, "\n");
5450 fprintf (dump_file, "\n");
5454 /* Determines cost of the candidate CAND. */
5456 static void
5457 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5459 comp_cost cost_base;
5460 unsigned cost, cost_step;
5461 tree base;
5463 if (!cand->iv)
5465 cand->cost = 0;
5466 return;
5469 /* There are two costs associated with the candidate -- its increment
5470 and its initialization. The second is almost negligible for any loop
5471 that rolls enough, so we take it just very little into account. */
5473 base = cand->iv->base;
5474 cost_base = force_var_cost (data, base, NULL);
5475 /* It will be exceptional that the iv register happens to be initialized with
5476 the proper value at no cost. In general, there will at least be a regcopy
5477 or a const set. */
5478 if (cost_base.cost == 0)
5479 cost_base.cost = COSTS_N_INSNS (1);
5480 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5482 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5484 /* Prefer the original ivs unless we may gain something by replacing it.
5485 The reason is to make debugging simpler; so this is not relevant for
5486 artificial ivs created by other optimization passes. */
5487 if (cand->pos != IP_ORIGINAL
5488 || !SSA_NAME_VAR (cand->var_before)
5489 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5490 cost++;
5492 /* Prefer not to insert statements into latch unless there are some
5493 already (so that we do not create unnecessary jumps). */
5494 if (cand->pos == IP_END
5495 && empty_block_p (ip_end_pos (data->current_loop)))
5496 cost++;
5498 cand->cost = cost;
5499 cand->cost_step = cost_step;
5502 /* Determines costs of computation of the candidates. */
5504 static void
5505 determine_iv_costs (struct ivopts_data *data)
5507 unsigned i;
5509 if (dump_file && (dump_flags & TDF_DETAILS))
5511 fprintf (dump_file, "Candidate costs:\n");
5512 fprintf (dump_file, " cand\tcost\n");
5515 for (i = 0; i < n_iv_cands (data); i++)
5517 struct iv_cand *cand = iv_cand (data, i);
5519 determine_iv_cost (data, cand);
5521 if (dump_file && (dump_flags & TDF_DETAILS))
5522 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5525 if (dump_file && (dump_flags & TDF_DETAILS))
5526 fprintf (dump_file, "\n");
5529 /* Calculates cost for having SIZE induction variables. */
5531 static unsigned
5532 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5534 /* We add size to the cost, so that we prefer eliminating ivs
5535 if possible. */
5536 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5537 data->body_includes_call);
5540 /* For each size of the induction variable set determine the penalty. */
5542 static void
5543 determine_set_costs (struct ivopts_data *data)
5545 unsigned j, n;
5546 gphi *phi;
5547 gphi_iterator psi;
5548 tree op;
5549 struct loop *loop = data->current_loop;
5550 bitmap_iterator bi;
5552 if (dump_file && (dump_flags & TDF_DETAILS))
5554 fprintf (dump_file, "Global costs:\n");
5555 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5556 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5557 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5558 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5561 n = 0;
5562 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5564 phi = psi.phi ();
5565 op = PHI_RESULT (phi);
5567 if (virtual_operand_p (op))
5568 continue;
5570 if (get_iv (data, op))
5571 continue;
5573 n++;
5576 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5578 struct version_info *info = ver_info (data, j);
5580 if (info->inv_id && info->has_nonlin_use)
5581 n++;
5584 data->regs_used = n;
5585 if (dump_file && (dump_flags & TDF_DETAILS))
5586 fprintf (dump_file, " regs_used %d\n", n);
5588 if (dump_file && (dump_flags & TDF_DETAILS))
5590 fprintf (dump_file, " cost for size:\n");
5591 fprintf (dump_file, " ivs\tcost\n");
5592 for (j = 0; j <= 2 * target_avail_regs; j++)
5593 fprintf (dump_file, " %d\t%d\n", j,
5594 ivopts_global_cost_for_size (data, j));
5595 fprintf (dump_file, "\n");
5599 /* Returns true if A is a cheaper cost pair than B. */
5601 static bool
5602 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5604 int cmp;
5606 if (!a)
5607 return false;
5609 if (!b)
5610 return true;
5612 cmp = compare_costs (a->cost, b->cost);
5613 if (cmp < 0)
5614 return true;
5616 if (cmp > 0)
5617 return false;
5619 /* In case the costs are the same, prefer the cheaper candidate. */
5620 if (a->cand->cost < b->cand->cost)
5621 return true;
5623 return false;
5627 /* Returns candidate by that USE is expressed in IVS. */
5629 static struct cost_pair *
5630 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5632 return ivs->cand_for_use[use->id];
5635 /* Computes the cost field of IVS structure. */
5637 static void
5638 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5640 comp_cost cost = ivs->cand_use_cost;
5642 cost.cost += ivs->cand_cost;
5644 cost.cost += ivopts_global_cost_for_size (data,
5645 ivs->n_regs + ivs->num_used_inv_expr);
5647 ivs->cost = cost;
5650 /* Remove invariants in set INVS to set IVS. */
5652 static void
5653 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5655 bitmap_iterator bi;
5656 unsigned iid;
5658 if (!invs)
5659 return;
5661 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5663 ivs->n_invariant_uses[iid]--;
5664 if (ivs->n_invariant_uses[iid] == 0)
5665 ivs->n_regs--;
5669 /* Set USE not to be expressed by any candidate in IVS. */
5671 static void
5672 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5673 struct iv_use *use)
5675 unsigned uid = use->id, cid;
5676 struct cost_pair *cp;
5678 cp = ivs->cand_for_use[uid];
5679 if (!cp)
5680 return;
5681 cid = cp->cand->id;
5683 ivs->bad_uses++;
5684 ivs->cand_for_use[uid] = NULL;
5685 ivs->n_cand_uses[cid]--;
5687 if (ivs->n_cand_uses[cid] == 0)
5689 bitmap_clear_bit (ivs->cands, cid);
5690 /* Do not count the pseudocandidates. */
5691 if (cp->cand->iv)
5692 ivs->n_regs--;
5693 ivs->n_cands--;
5694 ivs->cand_cost -= cp->cand->cost;
5696 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5699 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5701 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5703 if (cp->inv_expr_id != -1)
5705 ivs->used_inv_expr[cp->inv_expr_id]--;
5706 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5707 ivs->num_used_inv_expr--;
5709 iv_ca_recount_cost (data, ivs);
5712 /* Add invariants in set INVS to set IVS. */
5714 static void
5715 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5717 bitmap_iterator bi;
5718 unsigned iid;
5720 if (!invs)
5721 return;
5723 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5725 ivs->n_invariant_uses[iid]++;
5726 if (ivs->n_invariant_uses[iid] == 1)
5727 ivs->n_regs++;
5731 /* Set cost pair for USE in set IVS to CP. */
5733 static void
5734 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5735 struct iv_use *use, struct cost_pair *cp)
5737 unsigned uid = use->id, cid;
5739 if (ivs->cand_for_use[uid] == cp)
5740 return;
5742 if (ivs->cand_for_use[uid])
5743 iv_ca_set_no_cp (data, ivs, use);
5745 if (cp)
5747 cid = cp->cand->id;
5749 ivs->bad_uses--;
5750 ivs->cand_for_use[uid] = cp;
5751 ivs->n_cand_uses[cid]++;
5752 if (ivs->n_cand_uses[cid] == 1)
5754 bitmap_set_bit (ivs->cands, cid);
5755 /* Do not count the pseudocandidates. */
5756 if (cp->cand->iv)
5757 ivs->n_regs++;
5758 ivs->n_cands++;
5759 ivs->cand_cost += cp->cand->cost;
5761 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5764 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5765 iv_ca_set_add_invariants (ivs, cp->depends_on);
5767 if (cp->inv_expr_id != -1)
5769 ivs->used_inv_expr[cp->inv_expr_id]++;
5770 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5771 ivs->num_used_inv_expr++;
5773 iv_ca_recount_cost (data, ivs);
5777 /* Extend set IVS by expressing USE by some of the candidates in it
5778 if possible. Consider all important candidates if candidates in
5779 set IVS don't give any result. */
5781 static void
5782 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5783 struct iv_use *use)
5785 struct cost_pair *best_cp = NULL, *cp;
5786 bitmap_iterator bi;
5787 unsigned i;
5788 struct iv_cand *cand;
5790 gcc_assert (ivs->upto >= use->id);
5791 ivs->upto++;
5792 ivs->bad_uses++;
5794 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5796 cand = iv_cand (data, i);
5797 cp = get_use_iv_cost (data, use, cand);
5798 if (cheaper_cost_pair (cp, best_cp))
5799 best_cp = cp;
5802 if (best_cp == NULL)
5804 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5806 cand = iv_cand (data, i);
5807 cp = get_use_iv_cost (data, use, cand);
5808 if (cheaper_cost_pair (cp, best_cp))
5809 best_cp = cp;
5813 iv_ca_set_cp (data, ivs, use, best_cp);
5816 /* Get cost for assignment IVS. */
5818 static comp_cost
5819 iv_ca_cost (struct iv_ca *ivs)
5821 /* This was a conditional expression but it triggered a bug in
5822 Sun C 5.5. */
5823 if (ivs->bad_uses)
5824 return infinite_cost;
5825 else
5826 return ivs->cost;
5829 /* Returns true if all dependences of CP are among invariants in IVS. */
5831 static bool
5832 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5834 unsigned i;
5835 bitmap_iterator bi;
5837 if (!cp->depends_on)
5838 return true;
5840 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5842 if (ivs->n_invariant_uses[i] == 0)
5843 return false;
5846 return true;
5849 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5850 it before NEXT_CHANGE. */
5852 static struct iv_ca_delta *
5853 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5854 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5856 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5858 change->use = use;
5859 change->old_cp = old_cp;
5860 change->new_cp = new_cp;
5861 change->next_change = next_change;
5863 return change;
5866 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5867 are rewritten. */
5869 static struct iv_ca_delta *
5870 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5872 struct iv_ca_delta *last;
5874 if (!l2)
5875 return l1;
5877 if (!l1)
5878 return l2;
5880 for (last = l1; last->next_change; last = last->next_change)
5881 continue;
5882 last->next_change = l2;
5884 return l1;
5887 /* Reverse the list of changes DELTA, forming the inverse to it. */
5889 static struct iv_ca_delta *
5890 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5892 struct iv_ca_delta *act, *next, *prev = NULL;
5894 for (act = delta; act; act = next)
5896 next = act->next_change;
5897 act->next_change = prev;
5898 prev = act;
5900 std::swap (act->old_cp, act->new_cp);
5903 return prev;
5906 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5907 reverted instead. */
5909 static void
5910 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5911 struct iv_ca_delta *delta, bool forward)
5913 struct cost_pair *from, *to;
5914 struct iv_ca_delta *act;
5916 if (!forward)
5917 delta = iv_ca_delta_reverse (delta);
5919 for (act = delta; act; act = act->next_change)
5921 from = act->old_cp;
5922 to = act->new_cp;
5923 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5924 iv_ca_set_cp (data, ivs, act->use, to);
5927 if (!forward)
5928 iv_ca_delta_reverse (delta);
5931 /* Returns true if CAND is used in IVS. */
5933 static bool
5934 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5936 return ivs->n_cand_uses[cand->id] > 0;
5939 /* Returns number of induction variable candidates in the set IVS. */
5941 static unsigned
5942 iv_ca_n_cands (struct iv_ca *ivs)
5944 return ivs->n_cands;
5947 /* Free the list of changes DELTA. */
5949 static void
5950 iv_ca_delta_free (struct iv_ca_delta **delta)
5952 struct iv_ca_delta *act, *next;
5954 for (act = *delta; act; act = next)
5956 next = act->next_change;
5957 free (act);
5960 *delta = NULL;
5963 /* Allocates new iv candidates assignment. */
5965 static struct iv_ca *
5966 iv_ca_new (struct ivopts_data *data)
5968 struct iv_ca *nw = XNEW (struct iv_ca);
5970 nw->upto = 0;
5971 nw->bad_uses = 0;
5972 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5973 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5974 nw->cands = BITMAP_ALLOC (NULL);
5975 nw->n_cands = 0;
5976 nw->n_regs = 0;
5977 nw->cand_use_cost = no_cost;
5978 nw->cand_cost = 0;
5979 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5980 nw->cost = no_cost;
5981 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5982 nw->num_used_inv_expr = 0;
5984 return nw;
5987 /* Free memory occupied by the set IVS. */
5989 static void
5990 iv_ca_free (struct iv_ca **ivs)
5992 free ((*ivs)->cand_for_use);
5993 free ((*ivs)->n_cand_uses);
5994 BITMAP_FREE ((*ivs)->cands);
5995 free ((*ivs)->n_invariant_uses);
5996 free ((*ivs)->used_inv_expr);
5997 free (*ivs);
5998 *ivs = NULL;
6001 /* Dumps IVS to FILE. */
6003 static void
6004 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6006 const char *pref = " invariants ";
6007 unsigned i;
6008 comp_cost cost = iv_ca_cost (ivs);
6010 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6011 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6012 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6013 bitmap_print (file, ivs->cands, " candidates: ","\n");
6015 for (i = 0; i < ivs->upto; i++)
6017 struct iv_use *use = iv_use (data, i);
6018 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6019 if (cp)
6020 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6021 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6022 else
6023 fprintf (file, " use:%d --> ??\n", use->id);
6026 for (i = 1; i <= data->max_inv_id; i++)
6027 if (ivs->n_invariant_uses[i])
6029 fprintf (file, "%s%d", pref, i);
6030 pref = ", ";
6032 fprintf (file, "\n\n");
6035 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6036 new set, and store differences in DELTA. Number of induction variables
6037 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6038 the function will try to find a solution with mimimal iv candidates. */
6040 static comp_cost
6041 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6042 struct iv_cand *cand, struct iv_ca_delta **delta,
6043 unsigned *n_ivs, bool min_ncand)
6045 unsigned i;
6046 comp_cost cost;
6047 struct iv_use *use;
6048 struct cost_pair *old_cp, *new_cp;
6050 *delta = NULL;
6051 for (i = 0; i < ivs->upto; i++)
6053 use = iv_use (data, i);
6054 old_cp = iv_ca_cand_for_use (ivs, use);
6056 if (old_cp
6057 && old_cp->cand == cand)
6058 continue;
6060 new_cp = get_use_iv_cost (data, use, cand);
6061 if (!new_cp)
6062 continue;
6064 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6065 continue;
6067 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6068 continue;
6070 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6073 iv_ca_delta_commit (data, ivs, *delta, true);
6074 cost = iv_ca_cost (ivs);
6075 if (n_ivs)
6076 *n_ivs = iv_ca_n_cands (ivs);
6077 iv_ca_delta_commit (data, ivs, *delta, false);
6079 return cost;
6082 /* Try narrowing set IVS by removing CAND. Return the cost of
6083 the new set and store the differences in DELTA. START is
6084 the candidate with which we start narrowing. */
6086 static comp_cost
6087 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6088 struct iv_cand *cand, struct iv_cand *start,
6089 struct iv_ca_delta **delta)
6091 unsigned i, ci;
6092 struct iv_use *use;
6093 struct cost_pair *old_cp, *new_cp, *cp;
6094 bitmap_iterator bi;
6095 struct iv_cand *cnd;
6096 comp_cost cost, best_cost, acost;
6098 *delta = NULL;
6099 for (i = 0; i < n_iv_uses (data); i++)
6101 use = iv_use (data, i);
6103 old_cp = iv_ca_cand_for_use (ivs, use);
6104 if (old_cp->cand != cand)
6105 continue;
6107 best_cost = iv_ca_cost (ivs);
6108 /* Start narrowing with START. */
6109 new_cp = get_use_iv_cost (data, use, start);
6111 if (data->consider_all_candidates)
6113 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6115 if (ci == cand->id || (start && ci == start->id))
6116 continue;
6118 cnd = iv_cand (data, ci);
6120 cp = get_use_iv_cost (data, use, cnd);
6121 if (!cp)
6122 continue;
6124 iv_ca_set_cp (data, ivs, use, cp);
6125 acost = iv_ca_cost (ivs);
6127 if (compare_costs (acost, best_cost) < 0)
6129 best_cost = acost;
6130 new_cp = cp;
6134 else
6136 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6138 if (ci == cand->id || (start && ci == start->id))
6139 continue;
6141 cnd = iv_cand (data, ci);
6143 cp = get_use_iv_cost (data, use, cnd);
6144 if (!cp)
6145 continue;
6147 iv_ca_set_cp (data, ivs, use, cp);
6148 acost = iv_ca_cost (ivs);
6150 if (compare_costs (acost, best_cost) < 0)
6152 best_cost = acost;
6153 new_cp = cp;
6157 /* Restore to old cp for use. */
6158 iv_ca_set_cp (data, ivs, use, old_cp);
6160 if (!new_cp)
6162 iv_ca_delta_free (delta);
6163 return infinite_cost;
6166 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6169 iv_ca_delta_commit (data, ivs, *delta, true);
6170 cost = iv_ca_cost (ivs);
6171 iv_ca_delta_commit (data, ivs, *delta, false);
6173 return cost;
6176 /* Try optimizing the set of candidates IVS by removing candidates different
6177 from to EXCEPT_CAND from it. Return cost of the new set, and store
6178 differences in DELTA. */
6180 static comp_cost
6181 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6182 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6184 bitmap_iterator bi;
6185 struct iv_ca_delta *act_delta, *best_delta;
6186 unsigned i;
6187 comp_cost best_cost, acost;
6188 struct iv_cand *cand;
6190 best_delta = NULL;
6191 best_cost = iv_ca_cost (ivs);
6193 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6195 cand = iv_cand (data, i);
6197 if (cand == except_cand)
6198 continue;
6200 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6202 if (compare_costs (acost, best_cost) < 0)
6204 best_cost = acost;
6205 iv_ca_delta_free (&best_delta);
6206 best_delta = act_delta;
6208 else
6209 iv_ca_delta_free (&act_delta);
6212 if (!best_delta)
6214 *delta = NULL;
6215 return best_cost;
6218 /* Recurse to possibly remove other unnecessary ivs. */
6219 iv_ca_delta_commit (data, ivs, best_delta, true);
6220 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6221 iv_ca_delta_commit (data, ivs, best_delta, false);
6222 *delta = iv_ca_delta_join (best_delta, *delta);
6223 return best_cost;
6226 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6227 cheaper local cost for USE than BEST_CP. Return pointer to
6228 the corresponding cost_pair, otherwise just return BEST_CP. */
6230 static struct cost_pair*
6231 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6232 unsigned int cand_idx, struct iv_cand *old_cand,
6233 struct cost_pair *best_cp)
6235 struct iv_cand *cand;
6236 struct cost_pair *cp;
6238 gcc_assert (old_cand != NULL && best_cp != NULL);
6239 if (cand_idx == old_cand->id)
6240 return best_cp;
6242 cand = iv_cand (data, cand_idx);
6243 cp = get_use_iv_cost (data, use, cand);
6244 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6245 return cp;
6247 return best_cp;
6250 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6251 which are used by more than one iv uses. For each of those candidates,
6252 this function tries to represent iv uses under that candidate using
6253 other ones with lower local cost, then tries to prune the new set.
6254 If the new set has lower cost, It returns the new cost after recording
6255 candidate replacement in list DELTA. */
6257 static comp_cost
6258 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6259 struct iv_ca_delta **delta)
6261 bitmap_iterator bi, bj;
6262 unsigned int i, j, k;
6263 struct iv_use *use;
6264 struct iv_cand *cand;
6265 comp_cost orig_cost, acost;
6266 struct iv_ca_delta *act_delta, *tmp_delta;
6267 struct cost_pair *old_cp, *best_cp = NULL;
6269 *delta = NULL;
6270 orig_cost = iv_ca_cost (ivs);
6272 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6274 if (ivs->n_cand_uses[i] == 1
6275 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6276 continue;
6278 cand = iv_cand (data, i);
6280 act_delta = NULL;
6281 /* Represent uses under current candidate using other ones with
6282 lower local cost. */
6283 for (j = 0; j < ivs->upto; j++)
6285 use = iv_use (data, j);
6286 old_cp = iv_ca_cand_for_use (ivs, use);
6288 if (old_cp->cand != cand)
6289 continue;
6291 best_cp = old_cp;
6292 if (data->consider_all_candidates)
6293 for (k = 0; k < n_iv_cands (data); k++)
6294 best_cp = cheaper_cost_with_cand (data, use, k,
6295 old_cp->cand, best_cp);
6296 else
6297 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6298 best_cp = cheaper_cost_with_cand (data, use, k,
6299 old_cp->cand, best_cp);
6301 if (best_cp == old_cp)
6302 continue;
6304 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6306 /* No need for further prune. */
6307 if (!act_delta)
6308 continue;
6310 /* Prune the new candidate set. */
6311 iv_ca_delta_commit (data, ivs, act_delta, true);
6312 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6313 iv_ca_delta_commit (data, ivs, act_delta, false);
6314 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6316 if (compare_costs (acost, orig_cost) < 0)
6318 *delta = act_delta;
6319 return acost;
6321 else
6322 iv_ca_delta_free (&act_delta);
6325 return orig_cost;
6328 /* Tries to extend the sets IVS in the best possible way in order
6329 to express the USE. If ORIGINALP is true, prefer candidates from
6330 the original set of IVs, otherwise favor important candidates not
6331 based on any memory object. */
6333 static bool
6334 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6335 struct iv_use *use, bool originalp)
6337 comp_cost best_cost, act_cost;
6338 unsigned i;
6339 bitmap_iterator bi;
6340 struct iv_cand *cand;
6341 struct iv_ca_delta *best_delta = NULL, *act_delta;
6342 struct cost_pair *cp;
6344 iv_ca_add_use (data, ivs, use);
6345 best_cost = iv_ca_cost (ivs);
6346 cp = iv_ca_cand_for_use (ivs, use);
6347 if (cp)
6349 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6350 iv_ca_set_no_cp (data, ivs, use);
6353 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6354 first try important candidates not based on any memory object. Only if
6355 this fails, try the specific ones. Rationale -- in loops with many
6356 variables the best choice often is to use just one generic biv. If we
6357 added here many ivs specific to the uses, the optimization algorithm later
6358 would be likely to get stuck in a local minimum, thus causing us to create
6359 too many ivs. The approach from few ivs to more seems more likely to be
6360 successful -- starting from few ivs, replacing an expensive use by a
6361 specific iv should always be a win. */
6362 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6364 cand = iv_cand (data, i);
6366 if (originalp && cand->pos !=IP_ORIGINAL)
6367 continue;
6369 if (!originalp && cand->iv->base_object != NULL_TREE)
6370 continue;
6372 if (iv_ca_cand_used_p (ivs, cand))
6373 continue;
6375 cp = get_use_iv_cost (data, use, cand);
6376 if (!cp)
6377 continue;
6379 iv_ca_set_cp (data, ivs, use, cp);
6380 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6381 true);
6382 iv_ca_set_no_cp (data, ivs, use);
6383 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6385 if (compare_costs (act_cost, best_cost) < 0)
6387 best_cost = act_cost;
6389 iv_ca_delta_free (&best_delta);
6390 best_delta = act_delta;
6392 else
6393 iv_ca_delta_free (&act_delta);
6396 if (infinite_cost_p (best_cost))
6398 for (i = 0; i < use->n_map_members; i++)
6400 cp = use->cost_map + i;
6401 cand = cp->cand;
6402 if (!cand)
6403 continue;
6405 /* Already tried this. */
6406 if (cand->important)
6408 if (originalp && cand->pos == IP_ORIGINAL)
6409 continue;
6410 if (!originalp && cand->iv->base_object == NULL_TREE)
6411 continue;
6414 if (iv_ca_cand_used_p (ivs, cand))
6415 continue;
6417 act_delta = NULL;
6418 iv_ca_set_cp (data, ivs, use, cp);
6419 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6420 iv_ca_set_no_cp (data, ivs, use);
6421 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6422 cp, act_delta);
6424 if (compare_costs (act_cost, best_cost) < 0)
6426 best_cost = act_cost;
6428 if (best_delta)
6429 iv_ca_delta_free (&best_delta);
6430 best_delta = act_delta;
6432 else
6433 iv_ca_delta_free (&act_delta);
6437 iv_ca_delta_commit (data, ivs, best_delta, true);
6438 iv_ca_delta_free (&best_delta);
6440 return !infinite_cost_p (best_cost);
6443 /* Finds an initial assignment of candidates to uses. */
6445 static struct iv_ca *
6446 get_initial_solution (struct ivopts_data *data, bool originalp)
6448 struct iv_ca *ivs = iv_ca_new (data);
6449 unsigned i;
6451 for (i = 0; i < n_iv_uses (data); i++)
6452 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6454 iv_ca_free (&ivs);
6455 return NULL;
6458 return ivs;
6461 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6462 points to a bool variable, this function tries to break local
6463 optimal fixed-point by replacing candidates in IVS if it's true. */
6465 static bool
6466 try_improve_iv_set (struct ivopts_data *data,
6467 struct iv_ca *ivs, bool *try_replace_p)
6469 unsigned i, n_ivs;
6470 comp_cost acost, best_cost = iv_ca_cost (ivs);
6471 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6472 struct iv_cand *cand;
6474 /* Try extending the set of induction variables by one. */
6475 for (i = 0; i < n_iv_cands (data); i++)
6477 cand = iv_cand (data, i);
6479 if (iv_ca_cand_used_p (ivs, cand))
6480 continue;
6482 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6483 if (!act_delta)
6484 continue;
6486 /* If we successfully added the candidate and the set is small enough,
6487 try optimizing it by removing other candidates. */
6488 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6490 iv_ca_delta_commit (data, ivs, act_delta, true);
6491 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6492 iv_ca_delta_commit (data, ivs, act_delta, false);
6493 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6496 if (compare_costs (acost, best_cost) < 0)
6498 best_cost = acost;
6499 iv_ca_delta_free (&best_delta);
6500 best_delta = act_delta;
6502 else
6503 iv_ca_delta_free (&act_delta);
6506 if (!best_delta)
6508 /* Try removing the candidates from the set instead. */
6509 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6511 if (!best_delta && *try_replace_p)
6513 *try_replace_p = false;
6514 /* So far candidate selecting algorithm tends to choose fewer IVs
6515 so that it can handle cases in which loops have many variables
6516 but the best choice is often to use only one general biv. One
6517 weakness is it can't handle opposite cases, in which different
6518 candidates should be chosen with respect to each use. To solve
6519 the problem, we replace candidates in a manner described by the
6520 comments of iv_ca_replace, thus give general algorithm a chance
6521 to break local optimal fixed-point in these cases. */
6522 best_cost = iv_ca_replace (data, ivs, &best_delta);
6525 if (!best_delta)
6526 return false;
6529 iv_ca_delta_commit (data, ivs, best_delta, true);
6530 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6531 iv_ca_delta_free (&best_delta);
6532 return true;
6535 /* Attempts to find the optimal set of induction variables. We do simple
6536 greedy heuristic -- we try to replace at most one candidate in the selected
6537 solution and remove the unused ivs while this improves the cost. */
6539 static struct iv_ca *
6540 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6542 struct iv_ca *set;
6543 bool try_replace_p = true;
6545 /* Get the initial solution. */
6546 set = get_initial_solution (data, originalp);
6547 if (!set)
6549 if (dump_file && (dump_flags & TDF_DETAILS))
6550 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6551 return NULL;
6554 if (dump_file && (dump_flags & TDF_DETAILS))
6556 fprintf (dump_file, "Initial set of candidates:\n");
6557 iv_ca_dump (data, dump_file, set);
6560 while (try_improve_iv_set (data, set, &try_replace_p))
6562 if (dump_file && (dump_flags & TDF_DETAILS))
6564 fprintf (dump_file, "Improved to:\n");
6565 iv_ca_dump (data, dump_file, set);
6569 return set;
6572 static struct iv_ca *
6573 find_optimal_iv_set (struct ivopts_data *data)
6575 unsigned i;
6576 struct iv_ca *set, *origset;
6577 struct iv_use *use;
6578 comp_cost cost, origcost;
6580 /* Determine the cost based on a strategy that starts with original IVs,
6581 and try again using a strategy that prefers candidates not based
6582 on any IVs. */
6583 origset = find_optimal_iv_set_1 (data, true);
6584 set = find_optimal_iv_set_1 (data, false);
6586 if (!origset && !set)
6587 return NULL;
6589 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6590 cost = set ? iv_ca_cost (set) : infinite_cost;
6592 if (dump_file && (dump_flags & TDF_DETAILS))
6594 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6595 origcost.cost, origcost.complexity);
6596 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6597 cost.cost, cost.complexity);
6600 /* Choose the one with the best cost. */
6601 if (compare_costs (origcost, cost) <= 0)
6603 if (set)
6604 iv_ca_free (&set);
6605 set = origset;
6607 else if (origset)
6608 iv_ca_free (&origset);
6610 for (i = 0; i < n_iv_uses (data); i++)
6612 use = iv_use (data, i);
6613 use->selected = iv_ca_cand_for_use (set, use)->cand;
6616 return set;
6619 /* Creates a new induction variable corresponding to CAND. */
6621 static void
6622 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6624 gimple_stmt_iterator incr_pos;
6625 tree base;
6626 bool after = false;
6628 if (!cand->iv)
6629 return;
6631 switch (cand->pos)
6633 case IP_NORMAL:
6634 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6635 break;
6637 case IP_END:
6638 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6639 after = true;
6640 break;
6642 case IP_AFTER_USE:
6643 after = true;
6644 /* fall through */
6645 case IP_BEFORE_USE:
6646 incr_pos = gsi_for_stmt (cand->incremented_at);
6647 break;
6649 case IP_ORIGINAL:
6650 /* Mark that the iv is preserved. */
6651 name_info (data, cand->var_before)->preserve_biv = true;
6652 name_info (data, cand->var_after)->preserve_biv = true;
6654 /* Rewrite the increment so that it uses var_before directly. */
6655 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6656 return;
6659 gimple_add_tmp_var (cand->var_before);
6661 base = unshare_expr (cand->iv->base);
6663 create_iv (base, unshare_expr (cand->iv->step),
6664 cand->var_before, data->current_loop,
6665 &incr_pos, after, &cand->var_before, &cand->var_after);
6668 /* Creates new induction variables described in SET. */
6670 static void
6671 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6673 unsigned i;
6674 struct iv_cand *cand;
6675 bitmap_iterator bi;
6677 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6679 cand = iv_cand (data, i);
6680 create_new_iv (data, cand);
6683 if (dump_file && (dump_flags & TDF_DETAILS))
6685 fprintf (dump_file, "Selected IV set for loop %d",
6686 data->current_loop->num);
6687 if (data->loop_loc != UNKNOWN_LOCATION)
6688 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6689 LOCATION_LINE (data->loop_loc));
6690 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6691 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6693 cand = iv_cand (data, i);
6694 dump_cand (dump_file, cand);
6696 fprintf (dump_file, "\n");
6700 /* Rewrites USE (definition of iv used in a nonlinear expression)
6701 using candidate CAND. */
6703 static void
6704 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6705 struct iv_use *use, struct iv_cand *cand)
6707 tree comp;
6708 tree op, tgt;
6709 gassign *ass;
6710 gimple_stmt_iterator bsi;
6712 /* An important special case -- if we are asked to express value of
6713 the original iv by itself, just exit; there is no need to
6714 introduce a new computation (that might also need casting the
6715 variable to unsigned and back). */
6716 if (cand->pos == IP_ORIGINAL
6717 && cand->incremented_at == use->stmt)
6719 enum tree_code stmt_code;
6721 gcc_assert (is_gimple_assign (use->stmt));
6722 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6724 /* Check whether we may leave the computation unchanged.
6725 This is the case only if it does not rely on other
6726 computations in the loop -- otherwise, the computation
6727 we rely upon may be removed in remove_unused_ivs,
6728 thus leading to ICE. */
6729 stmt_code = gimple_assign_rhs_code (use->stmt);
6730 if (stmt_code == PLUS_EXPR
6731 || stmt_code == MINUS_EXPR
6732 || stmt_code == POINTER_PLUS_EXPR)
6734 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6735 op = gimple_assign_rhs2 (use->stmt);
6736 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6737 op = gimple_assign_rhs1 (use->stmt);
6738 else
6739 op = NULL_TREE;
6741 else
6742 op = NULL_TREE;
6744 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6745 return;
6748 comp = get_computation (data->current_loop, use, cand);
6749 gcc_assert (comp != NULL_TREE);
6751 switch (gimple_code (use->stmt))
6753 case GIMPLE_PHI:
6754 tgt = PHI_RESULT (use->stmt);
6756 /* If we should keep the biv, do not replace it. */
6757 if (name_info (data, tgt)->preserve_biv)
6758 return;
6760 bsi = gsi_after_labels (gimple_bb (use->stmt));
6761 break;
6763 case GIMPLE_ASSIGN:
6764 tgt = gimple_assign_lhs (use->stmt);
6765 bsi = gsi_for_stmt (use->stmt);
6766 break;
6768 default:
6769 gcc_unreachable ();
6772 if (!valid_gimple_rhs_p (comp)
6773 || (gimple_code (use->stmt) != GIMPLE_PHI
6774 /* We can't allow re-allocating the stmt as it might be pointed
6775 to still. */
6776 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6777 >= gimple_num_ops (gsi_stmt (bsi)))))
6779 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6780 true, GSI_SAME_STMT);
6781 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6783 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6784 /* As this isn't a plain copy we have to reset alignment
6785 information. */
6786 if (SSA_NAME_PTR_INFO (comp))
6787 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6791 if (gimple_code (use->stmt) == GIMPLE_PHI)
6793 ass = gimple_build_assign (tgt, comp);
6794 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6796 bsi = gsi_for_stmt (use->stmt);
6797 remove_phi_node (&bsi, false);
6799 else
6801 gimple_assign_set_rhs_from_tree (&bsi, comp);
6802 use->stmt = gsi_stmt (bsi);
6806 /* Performs a peephole optimization to reorder the iv update statement with
6807 a mem ref to enable instruction combining in later phases. The mem ref uses
6808 the iv value before the update, so the reordering transformation requires
6809 adjustment of the offset. CAND is the selected IV_CAND.
6811 Example:
6813 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6814 iv2 = iv1 + 1;
6816 if (t < val) (1)
6817 goto L;
6818 goto Head;
6821 directly propagating t over to (1) will introduce overlapping live range
6822 thus increase register pressure. This peephole transform it into:
6825 iv2 = iv1 + 1;
6826 t = MEM_REF (base, iv2, 8, 8);
6827 if (t < val)
6828 goto L;
6829 goto Head;
6832 static void
6833 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6835 tree var_after;
6836 gimple iv_update, stmt;
6837 basic_block bb;
6838 gimple_stmt_iterator gsi, gsi_iv;
6840 if (cand->pos != IP_NORMAL)
6841 return;
6843 var_after = cand->var_after;
6844 iv_update = SSA_NAME_DEF_STMT (var_after);
6846 bb = gimple_bb (iv_update);
6847 gsi = gsi_last_nondebug_bb (bb);
6848 stmt = gsi_stmt (gsi);
6850 /* Only handle conditional statement for now. */
6851 if (gimple_code (stmt) != GIMPLE_COND)
6852 return;
6854 gsi_prev_nondebug (&gsi);
6855 stmt = gsi_stmt (gsi);
6856 if (stmt != iv_update)
6857 return;
6859 gsi_prev_nondebug (&gsi);
6860 if (gsi_end_p (gsi))
6861 return;
6863 stmt = gsi_stmt (gsi);
6864 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6865 return;
6867 if (stmt != use->stmt)
6868 return;
6870 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6871 return;
6873 if (dump_file && (dump_flags & TDF_DETAILS))
6875 fprintf (dump_file, "Reordering \n");
6876 print_gimple_stmt (dump_file, iv_update, 0, 0);
6877 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6878 fprintf (dump_file, "\n");
6881 gsi = gsi_for_stmt (use->stmt);
6882 gsi_iv = gsi_for_stmt (iv_update);
6883 gsi_move_before (&gsi_iv, &gsi);
6885 cand->pos = IP_BEFORE_USE;
6886 cand->incremented_at = use->stmt;
6889 /* Rewrites USE (address that is an iv) using candidate CAND. */
6891 static void
6892 rewrite_use_address_1 (struct ivopts_data *data,
6893 struct iv_use *use, struct iv_cand *cand)
6895 aff_tree aff;
6896 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6897 tree base_hint = NULL_TREE;
6898 tree ref, iv;
6899 bool ok;
6901 adjust_iv_update_pos (cand, use);
6902 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6903 gcc_assert (ok);
6904 unshare_aff_combination (&aff);
6906 /* To avoid undefined overflow problems, all IV candidates use unsigned
6907 integer types. The drawback is that this makes it impossible for
6908 create_mem_ref to distinguish an IV that is based on a memory object
6909 from one that represents simply an offset.
6911 To work around this problem, we pass a hint to create_mem_ref that
6912 indicates which variable (if any) in aff is an IV based on a memory
6913 object. Note that we only consider the candidate. If this is not
6914 based on an object, the base of the reference is in some subexpression
6915 of the use -- but these will use pointer types, so they are recognized
6916 by the create_mem_ref heuristics anyway. */
6917 if (cand->iv->base_object)
6918 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6920 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6921 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6922 reference_alias_ptr_type (*use->op_p),
6923 iv, base_hint, data->speed);
6924 copy_ref_info (ref, *use->op_p);
6925 *use->op_p = ref;
6928 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6929 first use of a group, rewrites sub uses in the group too. */
6931 static void
6932 rewrite_use_address (struct ivopts_data *data,
6933 struct iv_use *use, struct iv_cand *cand)
6935 struct iv_use *next;
6937 gcc_assert (use->sub_id == 0);
6938 rewrite_use_address_1 (data, use, cand);
6939 update_stmt (use->stmt);
6941 for (next = use->next; next != NULL; next = next->next)
6943 rewrite_use_address_1 (data, next, cand);
6944 update_stmt (next->stmt);
6947 return;
6950 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6951 candidate CAND. */
6953 static void
6954 rewrite_use_compare (struct ivopts_data *data,
6955 struct iv_use *use, struct iv_cand *cand)
6957 tree comp, *var_p, op, bound;
6958 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6959 enum tree_code compare;
6960 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6961 bool ok;
6963 bound = cp->value;
6964 if (bound)
6966 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6967 tree var_type = TREE_TYPE (var);
6968 gimple_seq stmts;
6970 if (dump_file && (dump_flags & TDF_DETAILS))
6972 fprintf (dump_file, "Replacing exit test: ");
6973 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6975 compare = cp->comp;
6976 bound = unshare_expr (fold_convert (var_type, bound));
6977 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6978 if (stmts)
6979 gsi_insert_seq_on_edge_immediate (
6980 loop_preheader_edge (data->current_loop),
6981 stmts);
6983 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6984 gimple_cond_set_lhs (cond_stmt, var);
6985 gimple_cond_set_code (cond_stmt, compare);
6986 gimple_cond_set_rhs (cond_stmt, op);
6987 return;
6990 /* The induction variable elimination failed; just express the original
6991 giv. */
6992 comp = get_computation (data->current_loop, use, cand);
6993 gcc_assert (comp != NULL_TREE);
6995 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6996 gcc_assert (ok);
6998 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6999 true, GSI_SAME_STMT);
7002 /* Rewrites USE using candidate CAND. */
7004 static void
7005 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7007 switch (use->type)
7009 case USE_NONLINEAR_EXPR:
7010 rewrite_use_nonlinear_expr (data, use, cand);
7011 break;
7013 case USE_ADDRESS:
7014 rewrite_use_address (data, use, cand);
7015 break;
7017 case USE_COMPARE:
7018 rewrite_use_compare (data, use, cand);
7019 break;
7021 default:
7022 gcc_unreachable ();
7025 update_stmt (use->stmt);
7028 /* Rewrite the uses using the selected induction variables. */
7030 static void
7031 rewrite_uses (struct ivopts_data *data)
7033 unsigned i;
7034 struct iv_cand *cand;
7035 struct iv_use *use;
7037 for (i = 0; i < n_iv_uses (data); i++)
7039 use = iv_use (data, i);
7040 cand = use->selected;
7041 gcc_assert (cand);
7043 rewrite_use (data, use, cand);
7047 /* Removes the ivs that are not used after rewriting. */
7049 static void
7050 remove_unused_ivs (struct ivopts_data *data)
7052 unsigned j;
7053 bitmap_iterator bi;
7054 bitmap toremove = BITMAP_ALLOC (NULL);
7056 /* Figure out an order in which to release SSA DEFs so that we don't
7057 release something that we'd have to propagate into a debug stmt
7058 afterwards. */
7059 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7061 struct version_info *info;
7063 info = ver_info (data, j);
7064 if (info->iv
7065 && !integer_zerop (info->iv->step)
7066 && !info->inv_id
7067 && !info->iv->have_use_for
7068 && !info->preserve_biv)
7070 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7072 tree def = info->iv->ssa_name;
7074 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7076 imm_use_iterator imm_iter;
7077 use_operand_p use_p;
7078 gimple stmt;
7079 int count = 0;
7081 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7083 if (!gimple_debug_bind_p (stmt))
7084 continue;
7086 /* We just want to determine whether to do nothing
7087 (count == 0), to substitute the computed
7088 expression into a single use of the SSA DEF by
7089 itself (count == 1), or to use a debug temp
7090 because the SSA DEF is used multiple times or as
7091 part of a larger expression (count > 1). */
7092 count++;
7093 if (gimple_debug_bind_get_value (stmt) != def)
7094 count++;
7096 if (count > 1)
7097 BREAK_FROM_IMM_USE_STMT (imm_iter);
7100 if (!count)
7101 continue;
7103 struct iv_use dummy_use;
7104 struct iv_cand *best_cand = NULL, *cand;
7105 unsigned i, best_pref = 0, cand_pref;
7107 memset (&dummy_use, 0, sizeof (dummy_use));
7108 dummy_use.iv = info->iv;
7109 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7111 cand = iv_use (data, i)->selected;
7112 if (cand == best_cand)
7113 continue;
7114 cand_pref = operand_equal_p (cand->iv->step,
7115 info->iv->step, 0)
7116 ? 4 : 0;
7117 cand_pref
7118 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7119 == TYPE_MODE (TREE_TYPE (info->iv->base))
7120 ? 2 : 0;
7121 cand_pref
7122 += TREE_CODE (cand->iv->base) == INTEGER_CST
7123 ? 1 : 0;
7124 if (best_cand == NULL || best_pref < cand_pref)
7126 best_cand = cand;
7127 best_pref = cand_pref;
7131 if (!best_cand)
7132 continue;
7134 tree comp = get_computation_at (data->current_loop,
7135 &dummy_use, best_cand,
7136 SSA_NAME_DEF_STMT (def));
7137 if (!comp)
7138 continue;
7140 if (count > 1)
7142 tree vexpr = make_node (DEBUG_EXPR_DECL);
7143 DECL_ARTIFICIAL (vexpr) = 1;
7144 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7145 if (SSA_NAME_VAR (def))
7146 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7147 else
7148 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7149 gdebug *def_temp
7150 = gimple_build_debug_bind (vexpr, comp, NULL);
7151 gimple_stmt_iterator gsi;
7153 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7154 gsi = gsi_after_labels (gimple_bb
7155 (SSA_NAME_DEF_STMT (def)));
7156 else
7157 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7159 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7160 comp = vexpr;
7163 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7165 if (!gimple_debug_bind_p (stmt))
7166 continue;
7168 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7169 SET_USE (use_p, comp);
7171 update_stmt (stmt);
7177 release_defs_bitset (toremove);
7179 BITMAP_FREE (toremove);
7182 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7183 for hash_map::traverse. */
7185 bool
7186 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7188 free (value);
7189 return true;
7192 /* Frees data allocated by the optimization of a single loop. */
7194 static void
7195 free_loop_data (struct ivopts_data *data)
7197 unsigned i, j;
7198 bitmap_iterator bi;
7199 tree obj;
7201 if (data->niters)
7203 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7204 delete data->niters;
7205 data->niters = NULL;
7208 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7210 struct version_info *info;
7212 info = ver_info (data, i);
7213 free (info->iv);
7214 info->iv = NULL;
7215 info->has_nonlin_use = false;
7216 info->preserve_biv = false;
7217 info->inv_id = 0;
7219 bitmap_clear (data->relevant);
7220 bitmap_clear (data->important_candidates);
7222 for (i = 0; i < n_iv_uses (data); i++)
7224 struct iv_use *use = iv_use (data, i);
7225 struct iv_use *pre = use, *sub = use->next;
7227 while (sub)
7229 gcc_assert (sub->related_cands == NULL);
7230 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7232 free (sub->iv);
7233 pre = sub;
7234 sub = sub->next;
7235 free (pre);
7238 free (use->iv);
7239 BITMAP_FREE (use->related_cands);
7240 for (j = 0; j < use->n_map_members; j++)
7241 if (use->cost_map[j].depends_on)
7242 BITMAP_FREE (use->cost_map[j].depends_on);
7243 free (use->cost_map);
7244 free (use);
7246 data->iv_uses.truncate (0);
7248 for (i = 0; i < n_iv_cands (data); i++)
7250 struct iv_cand *cand = iv_cand (data, i);
7252 free (cand->iv);
7253 if (cand->depends_on)
7254 BITMAP_FREE (cand->depends_on);
7255 free (cand);
7257 data->iv_candidates.truncate (0);
7259 if (data->version_info_size < num_ssa_names)
7261 data->version_info_size = 2 * num_ssa_names;
7262 free (data->version_info);
7263 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7266 data->max_inv_id = 0;
7268 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7269 SET_DECL_RTL (obj, NULL_RTX);
7271 decl_rtl_to_reset.truncate (0);
7273 data->inv_expr_tab->empty ();
7274 data->inv_expr_id = 0;
7277 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7278 loop tree. */
7280 static void
7281 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7283 free_loop_data (data);
7284 free (data->version_info);
7285 BITMAP_FREE (data->relevant);
7286 BITMAP_FREE (data->important_candidates);
7288 decl_rtl_to_reset.release ();
7289 data->iv_uses.release ();
7290 data->iv_candidates.release ();
7291 delete data->inv_expr_tab;
7292 data->inv_expr_tab = NULL;
7293 free_affine_expand_cache (&data->name_expansion_cache);
7296 /* Returns true if the loop body BODY includes any function calls. */
7298 static bool
7299 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7301 gimple_stmt_iterator gsi;
7302 unsigned i;
7304 for (i = 0; i < num_nodes; i++)
7305 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7307 gimple stmt = gsi_stmt (gsi);
7308 if (is_gimple_call (stmt)
7309 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7310 return true;
7312 return false;
7315 /* Optimizes the LOOP. Returns true if anything changed. */
7317 static bool
7318 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7320 bool changed = false;
7321 struct iv_ca *iv_ca;
7322 edge exit = single_dom_exit (loop);
7323 basic_block *body;
7325 gcc_assert (!data->niters);
7326 data->current_loop = loop;
7327 data->loop_loc = find_loop_location (loop);
7328 data->speed = optimize_loop_for_speed_p (loop);
7330 if (dump_file && (dump_flags & TDF_DETAILS))
7332 fprintf (dump_file, "Processing loop %d", loop->num);
7333 if (data->loop_loc != UNKNOWN_LOCATION)
7334 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7335 LOCATION_LINE (data->loop_loc));
7336 fprintf (dump_file, "\n");
7338 if (exit)
7340 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7341 exit->src->index, exit->dest->index);
7342 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7343 fprintf (dump_file, "\n");
7346 fprintf (dump_file, "\n");
7349 body = get_loop_body (loop);
7350 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7351 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7352 free (body);
7354 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7356 /* For each ssa name determines whether it behaves as an induction variable
7357 in some loop. */
7358 if (!find_induction_variables (data))
7359 goto finish;
7361 /* Finds interesting uses (item 1). */
7362 find_interesting_uses (data);
7363 group_address_uses (data);
7364 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7365 goto finish;
7367 /* Finds candidates for the induction variables (item 2). */
7368 find_iv_candidates (data);
7370 /* Calculates the costs (item 3, part 1). */
7371 determine_iv_costs (data);
7372 determine_use_iv_costs (data);
7373 determine_set_costs (data);
7375 /* Find the optimal set of induction variables (item 3, part 2). */
7376 iv_ca = find_optimal_iv_set (data);
7377 if (!iv_ca)
7378 goto finish;
7379 changed = true;
7381 /* Create the new induction variables (item 4, part 1). */
7382 create_new_ivs (data, iv_ca);
7383 iv_ca_free (&iv_ca);
7385 /* Rewrite the uses (item 4, part 2). */
7386 rewrite_uses (data);
7388 /* Remove the ivs that are unused after rewriting. */
7389 remove_unused_ivs (data);
7391 /* We have changed the structure of induction variables; it might happen
7392 that definitions in the scev database refer to some of them that were
7393 eliminated. */
7394 scev_reset ();
7396 finish:
7397 free_loop_data (data);
7399 return changed;
7402 /* Main entry point. Optimizes induction variables in loops. */
7404 void
7405 tree_ssa_iv_optimize (void)
7407 struct loop *loop;
7408 struct ivopts_data data;
7410 tree_ssa_iv_optimize_init (&data);
7412 /* Optimize the loops starting with the innermost ones. */
7413 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7415 if (dump_file && (dump_flags & TDF_DETAILS))
7416 flow_loop_dump (loop, dump_file, NULL, 1);
7418 tree_ssa_iv_optimize_loop (&data, loop);
7421 tree_ssa_iv_optimize_finalize (&data);