2015-04-07 Richard Biener <rguenther@suse.de>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob0fd4a9082906ece26e1a0060bdf5e414c3a45cba
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "hash-set.h"
69 #include "machmode.h"
70 #include "vec.h"
71 #include "double-int.h"
72 #include "input.h"
73 #include "alias.h"
74 #include "symtab.h"
75 #include "wide-int.h"
76 #include "inchash.h"
77 #include "tree.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
80 #include "tm_p.h"
81 #include "predict.h"
82 #include "hard-reg-set.h"
83 #include "function.h"
84 #include "dominance.h"
85 #include "cfg.h"
86 #include "basic-block.h"
87 #include "gimple-pretty-print.h"
88 #include "hash-map.h"
89 #include "hash-table.h"
90 #include "tree-ssa-alias.h"
91 #include "internal-fn.h"
92 #include "tree-eh.h"
93 #include "gimple-expr.h"
94 #include "is-a.h"
95 #include "gimple.h"
96 #include "gimplify.h"
97 #include "gimple-iterator.h"
98 #include "gimplify-me.h"
99 #include "gimple-ssa.h"
100 #include "plugin-api.h"
101 #include "ipa-ref.h"
102 #include "cgraph.h"
103 #include "tree-cfg.h"
104 #include "tree-phinodes.h"
105 #include "ssa-iterators.h"
106 #include "stringpool.h"
107 #include "tree-ssanames.h"
108 #include "tree-ssa-loop-ivopts.h"
109 #include "tree-ssa-loop-manip.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop.h"
112 #include "hashtab.h"
113 #include "rtl.h"
114 #include "flags.h"
115 #include "statistics.h"
116 #include "real.h"
117 #include "fixed-value.h"
118 #include "insn-config.h"
119 #include "expmed.h"
120 #include "dojump.h"
121 #include "explow.h"
122 #include "calls.h"
123 #include "emit-rtl.h"
124 #include "varasm.h"
125 #include "stmt.h"
126 #include "expr.h"
127 #include "tree-dfa.h"
128 #include "tree-ssa.h"
129 #include "cfgloop.h"
130 #include "tree-pass.h"
131 #include "tree-chrec.h"
132 #include "tree-scalar-evolution.h"
133 #include "params.h"
134 #include "langhooks.h"
135 #include "tree-affine.h"
136 #include "target.h"
137 #include "tree-inline.h"
138 #include "tree-ssa-propagate.h"
139 #include "tree-ssa-address.h"
140 #include "builtins.h"
141 #include "tree-vectorizer.h"
143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
144 cost of different addressing modes. This should be moved to a TBD
145 interface between the GIMPLE and RTL worlds. */
146 #include "recog.h"
148 /* The infinite cost. */
149 #define INFTY 10000000
151 #define AVG_LOOP_NITER(LOOP) 5
153 /* Returns the expected number of loop iterations for LOOP.
154 The average trip count is computed from profile data if it
155 exists. */
157 static inline HOST_WIDE_INT
158 avg_loop_niter (struct loop *loop)
160 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
161 if (niter == -1)
162 return AVG_LOOP_NITER (loop);
164 return niter;
167 /* Representation of the induction variable. */
168 struct iv
170 tree base; /* Initial value of the iv. */
171 tree base_object; /* A memory object to that the induction variable points. */
172 tree step; /* Step of the iv (constant only). */
173 tree ssa_name; /* The ssa name with the value. */
174 bool biv_p; /* Is it a biv? */
175 bool have_use_for; /* Do we already have a use for it? */
176 unsigned use_id; /* The identifier in the use if it is the case. */
179 /* Per-ssa version information (induction variable descriptions, etc.). */
180 struct version_info
182 tree name; /* The ssa name. */
183 struct iv *iv; /* Induction variable description. */
184 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
185 an expression that is not an induction variable. */
186 bool preserve_biv; /* For the original biv, whether to preserve it. */
187 unsigned inv_id; /* Id of an invariant. */
190 /* Types of uses. */
191 enum use_type
193 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
194 USE_ADDRESS, /* Use in an address. */
195 USE_COMPARE /* Use is a compare. */
198 /* Cost of a computation. */
199 typedef struct
201 int cost; /* The runtime cost. */
202 unsigned complexity; /* The estimate of the complexity of the code for
203 the computation (in no concrete units --
204 complexity field should be larger for more
205 complex expressions and addressing modes). */
206 } comp_cost;
208 static const comp_cost no_cost = {0, 0};
209 static const comp_cost infinite_cost = {INFTY, INFTY};
211 /* The candidate - cost pair. */
212 struct cost_pair
214 struct iv_cand *cand; /* The candidate. */
215 comp_cost cost; /* The cost. */
216 bitmap depends_on; /* The list of invariants that have to be
217 preserved. */
218 tree value; /* For final value elimination, the expression for
219 the final value of the iv. For iv elimination,
220 the new bound to compare with. */
221 enum tree_code comp; /* For iv elimination, the comparison. */
222 int inv_expr_id; /* Loop invariant expression id. */
225 /* Use. */
226 struct iv_use
228 unsigned id; /* The id of the use. */
229 enum use_type type; /* Type of the use. */
230 struct iv *iv; /* The induction variable it is based on. */
231 gimple stmt; /* Statement in that it occurs. */
232 tree *op_p; /* The place where it occurs. */
233 bitmap related_cands; /* The set of "related" iv candidates, plus the common
234 important ones. */
236 unsigned n_map_members; /* Number of candidates in the cost_map list. */
237 struct cost_pair *cost_map;
238 /* The costs wrto the iv candidates. */
240 struct iv_cand *selected;
241 /* The selected candidate. */
244 /* The position where the iv is computed. */
245 enum iv_position
247 IP_NORMAL, /* At the end, just before the exit condition. */
248 IP_END, /* At the end of the latch block. */
249 IP_BEFORE_USE, /* Immediately before a specific use. */
250 IP_AFTER_USE, /* Immediately after a specific use. */
251 IP_ORIGINAL /* The original biv. */
254 /* The induction variable candidate. */
255 struct iv_cand
257 unsigned id; /* The number of the candidate. */
258 bool important; /* Whether this is an "important" candidate, i.e. such
259 that it should be considered by all uses. */
260 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
261 gimple incremented_at;/* For original biv, the statement where it is
262 incremented. */
263 tree var_before; /* The variable used for it before increment. */
264 tree var_after; /* The variable used for it after increment. */
265 struct iv *iv; /* The value of the candidate. NULL for
266 "pseudocandidate" used to indicate the possibility
267 to replace the final value of an iv by direct
268 computation of the value. */
269 unsigned cost; /* Cost of the candidate. */
270 unsigned cost_step; /* Cost of the candidate's increment operation. */
271 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
272 where it is incremented. */
273 bitmap depends_on; /* The list of invariants that are used in step of the
274 biv. */
277 /* Loop invariant expression hashtable entry. */
278 struct iv_inv_expr_ent
280 tree expr;
281 int id;
282 hashval_t hash;
285 /* The data used by the induction variable optimizations. */
287 typedef struct iv_use *iv_use_p;
289 typedef struct iv_cand *iv_cand_p;
291 /* Hashtable helpers. */
293 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
295 typedef iv_inv_expr_ent value_type;
296 typedef iv_inv_expr_ent compare_type;
297 static inline hashval_t hash (const value_type *);
298 static inline bool equal (const value_type *, const compare_type *);
301 /* Hash function for loop invariant expressions. */
303 inline hashval_t
304 iv_inv_expr_hasher::hash (const value_type *expr)
306 return expr->hash;
309 /* Hash table equality function for expressions. */
311 inline bool
312 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
314 return expr1->hash == expr2->hash
315 && operand_equal_p (expr1->expr, expr2->expr, 0);
318 struct ivopts_data
320 /* The currently optimized loop. */
321 struct loop *current_loop;
322 source_location loop_loc;
324 /* Numbers of iterations for all exits of the current loop. */
325 hash_map<edge, tree_niter_desc *> *niters;
327 /* Number of registers used in it. */
328 unsigned regs_used;
330 /* The size of version_info array allocated. */
331 unsigned version_info_size;
333 /* The array of information for the ssa names. */
334 struct version_info *version_info;
336 /* The hashtable of loop invariant expressions created
337 by ivopt. */
338 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
340 /* Loop invariant expression id. */
341 int inv_expr_id;
343 /* The bitmap of indices in version_info whose value was changed. */
344 bitmap relevant;
346 /* The uses of induction variables. */
347 vec<iv_use_p> iv_uses;
349 /* The candidates. */
350 vec<iv_cand_p> iv_candidates;
352 /* A bitmap of important candidates. */
353 bitmap important_candidates;
355 /* Cache used by tree_to_aff_combination_expand. */
356 hash_map<tree, name_expansion *> *name_expansion_cache;
358 /* The maximum invariant id. */
359 unsigned max_inv_id;
361 /* Whether to consider just related and important candidates when replacing a
362 use. */
363 bool consider_all_candidates;
365 /* Are we optimizing for speed? */
366 bool speed;
368 /* Whether the loop body includes any function calls. */
369 bool body_includes_call;
371 /* Whether the loop body can only be exited via single exit. */
372 bool loop_single_exit_p;
375 /* An assignment of iv candidates to uses. */
377 struct iv_ca
379 /* The number of uses covered by the assignment. */
380 unsigned upto;
382 /* Number of uses that cannot be expressed by the candidates in the set. */
383 unsigned bad_uses;
385 /* Candidate assigned to a use, together with the related costs. */
386 struct cost_pair **cand_for_use;
388 /* Number of times each candidate is used. */
389 unsigned *n_cand_uses;
391 /* The candidates used. */
392 bitmap cands;
394 /* The number of candidates in the set. */
395 unsigned n_cands;
397 /* Total number of registers needed. */
398 unsigned n_regs;
400 /* Total cost of expressing uses. */
401 comp_cost cand_use_cost;
403 /* Total cost of candidates. */
404 unsigned cand_cost;
406 /* Number of times each invariant is used. */
407 unsigned *n_invariant_uses;
409 /* The array holding the number of uses of each loop
410 invariant expressions created by ivopt. */
411 unsigned *used_inv_expr;
413 /* The number of created loop invariants. */
414 unsigned num_used_inv_expr;
416 /* Total cost of the assignment. */
417 comp_cost cost;
420 /* Difference of two iv candidate assignments. */
422 struct iv_ca_delta
424 /* Changed use. */
425 struct iv_use *use;
427 /* An old assignment (for rollback purposes). */
428 struct cost_pair *old_cp;
430 /* A new assignment. */
431 struct cost_pair *new_cp;
433 /* Next change in the list. */
434 struct iv_ca_delta *next_change;
437 /* Bound on number of candidates below that all candidates are considered. */
439 #define CONSIDER_ALL_CANDIDATES_BOUND \
440 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
442 /* If there are more iv occurrences, we just give up (it is quite unlikely that
443 optimizing such a loop would help, and it would take ages). */
445 #define MAX_CONSIDERED_USES \
446 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
448 /* If there are at most this number of ivs in the set, try removing unnecessary
449 ivs from the set always. */
451 #define ALWAYS_PRUNE_CAND_SET_BOUND \
452 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
454 /* The list of trees for that the decl_rtl field must be reset is stored
455 here. */
457 static vec<tree> decl_rtl_to_reset;
459 static comp_cost force_expr_to_var_cost (tree, bool);
461 /* Number of uses recorded in DATA. */
463 static inline unsigned
464 n_iv_uses (struct ivopts_data *data)
466 return data->iv_uses.length ();
469 /* Ith use recorded in DATA. */
471 static inline struct iv_use *
472 iv_use (struct ivopts_data *data, unsigned i)
474 return data->iv_uses[i];
477 /* Number of candidates recorded in DATA. */
479 static inline unsigned
480 n_iv_cands (struct ivopts_data *data)
482 return data->iv_candidates.length ();
485 /* Ith candidate recorded in DATA. */
487 static inline struct iv_cand *
488 iv_cand (struct ivopts_data *data, unsigned i)
490 return data->iv_candidates[i];
493 /* The single loop exit if it dominates the latch, NULL otherwise. */
495 edge
496 single_dom_exit (struct loop *loop)
498 edge exit = single_exit (loop);
500 if (!exit)
501 return NULL;
503 if (!just_once_each_iteration_p (loop, exit->src))
504 return NULL;
506 return exit;
509 /* Dumps information about the induction variable IV to FILE. */
511 void
512 dump_iv (FILE *file, struct iv *iv)
514 if (iv->ssa_name)
516 fprintf (file, "ssa name ");
517 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
518 fprintf (file, "\n");
521 fprintf (file, " type ");
522 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
523 fprintf (file, "\n");
525 if (iv->step)
527 fprintf (file, " base ");
528 print_generic_expr (file, iv->base, TDF_SLIM);
529 fprintf (file, "\n");
531 fprintf (file, " step ");
532 print_generic_expr (file, iv->step, TDF_SLIM);
533 fprintf (file, "\n");
535 else
537 fprintf (file, " invariant ");
538 print_generic_expr (file, iv->base, TDF_SLIM);
539 fprintf (file, "\n");
542 if (iv->base_object)
544 fprintf (file, " base object ");
545 print_generic_expr (file, iv->base_object, TDF_SLIM);
546 fprintf (file, "\n");
549 if (iv->biv_p)
550 fprintf (file, " is a biv\n");
553 /* Dumps information about the USE to FILE. */
555 void
556 dump_use (FILE *file, struct iv_use *use)
558 fprintf (file, "use %d\n", use->id);
560 switch (use->type)
562 case USE_NONLINEAR_EXPR:
563 fprintf (file, " generic\n");
564 break;
566 case USE_ADDRESS:
567 fprintf (file, " address\n");
568 break;
570 case USE_COMPARE:
571 fprintf (file, " compare\n");
572 break;
574 default:
575 gcc_unreachable ();
578 fprintf (file, " in statement ");
579 print_gimple_stmt (file, use->stmt, 0, 0);
580 fprintf (file, "\n");
582 fprintf (file, " at position ");
583 if (use->op_p)
584 print_generic_expr (file, *use->op_p, TDF_SLIM);
585 fprintf (file, "\n");
587 dump_iv (file, use->iv);
589 if (use->related_cands)
591 fprintf (file, " related candidates ");
592 dump_bitmap (file, use->related_cands);
596 /* Dumps information about the uses to FILE. */
598 void
599 dump_uses (FILE *file, struct ivopts_data *data)
601 unsigned i;
602 struct iv_use *use;
604 for (i = 0; i < n_iv_uses (data); i++)
606 use = iv_use (data, i);
608 dump_use (file, use);
609 fprintf (file, "\n");
613 /* Dumps information about induction variable candidate CAND to FILE. */
615 void
616 dump_cand (FILE *file, struct iv_cand *cand)
618 struct iv *iv = cand->iv;
620 fprintf (file, "candidate %d%s\n",
621 cand->id, cand->important ? " (important)" : "");
623 if (cand->depends_on)
625 fprintf (file, " depends on ");
626 dump_bitmap (file, cand->depends_on);
629 if (!iv)
631 fprintf (file, " final value replacement\n");
632 return;
635 if (cand->var_before)
637 fprintf (file, " var_before ");
638 print_generic_expr (file, cand->var_before, TDF_SLIM);
639 fprintf (file, "\n");
641 if (cand->var_after)
643 fprintf (file, " var_after ");
644 print_generic_expr (file, cand->var_after, TDF_SLIM);
645 fprintf (file, "\n");
648 switch (cand->pos)
650 case IP_NORMAL:
651 fprintf (file, " incremented before exit test\n");
652 break;
654 case IP_BEFORE_USE:
655 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
656 break;
658 case IP_AFTER_USE:
659 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
660 break;
662 case IP_END:
663 fprintf (file, " incremented at end\n");
664 break;
666 case IP_ORIGINAL:
667 fprintf (file, " original biv\n");
668 break;
671 dump_iv (file, iv);
674 /* Returns the info for ssa version VER. */
676 static inline struct version_info *
677 ver_info (struct ivopts_data *data, unsigned ver)
679 return data->version_info + ver;
682 /* Returns the info for ssa name NAME. */
684 static inline struct version_info *
685 name_info (struct ivopts_data *data, tree name)
687 return ver_info (data, SSA_NAME_VERSION (name));
690 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
691 emitted in LOOP. */
693 static bool
694 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
696 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
698 gcc_assert (bb);
700 if (sbb == loop->latch)
701 return true;
703 if (sbb != bb)
704 return false;
706 return stmt == last_stmt (bb);
709 /* Returns true if STMT if after the place where the original induction
710 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
711 if the positions are identical. */
713 static bool
714 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
716 basic_block cand_bb = gimple_bb (cand->incremented_at);
717 basic_block stmt_bb = gimple_bb (stmt);
719 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
720 return false;
722 if (stmt_bb != cand_bb)
723 return true;
725 if (true_if_equal
726 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
727 return true;
728 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
731 /* Returns true if STMT if after the place where the induction variable
732 CAND is incremented in LOOP. */
734 static bool
735 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
737 switch (cand->pos)
739 case IP_END:
740 return false;
742 case IP_NORMAL:
743 return stmt_after_ip_normal_pos (loop, stmt);
745 case IP_ORIGINAL:
746 case IP_AFTER_USE:
747 return stmt_after_inc_pos (cand, stmt, false);
749 case IP_BEFORE_USE:
750 return stmt_after_inc_pos (cand, stmt, true);
752 default:
753 gcc_unreachable ();
757 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
759 static bool
760 abnormal_ssa_name_p (tree exp)
762 if (!exp)
763 return false;
765 if (TREE_CODE (exp) != SSA_NAME)
766 return false;
768 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
771 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
772 abnormal phi node. Callback for for_each_index. */
774 static bool
775 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
776 void *data ATTRIBUTE_UNUSED)
778 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
780 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
781 return false;
782 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
783 return false;
786 return !abnormal_ssa_name_p (*index);
789 /* Returns true if EXPR contains a ssa name that occurs in an
790 abnormal phi node. */
792 bool
793 contains_abnormal_ssa_name_p (tree expr)
795 enum tree_code code;
796 enum tree_code_class codeclass;
798 if (!expr)
799 return false;
801 code = TREE_CODE (expr);
802 codeclass = TREE_CODE_CLASS (code);
804 if (code == SSA_NAME)
805 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
807 if (code == INTEGER_CST
808 || is_gimple_min_invariant (expr))
809 return false;
811 if (code == ADDR_EXPR)
812 return !for_each_index (&TREE_OPERAND (expr, 0),
813 idx_contains_abnormal_ssa_name_p,
814 NULL);
816 if (code == COND_EXPR)
817 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
818 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
819 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
821 switch (codeclass)
823 case tcc_binary:
824 case tcc_comparison:
825 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
826 return true;
828 /* Fallthru. */
829 case tcc_unary:
830 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
831 return true;
833 break;
835 default:
836 gcc_unreachable ();
839 return false;
842 /* Returns the structure describing number of iterations determined from
843 EXIT of DATA->current_loop, or NULL if something goes wrong. */
845 static struct tree_niter_desc *
846 niter_for_exit (struct ivopts_data *data, edge exit)
848 struct tree_niter_desc *desc;
849 tree_niter_desc **slot;
851 if (!data->niters)
853 data->niters = new hash_map<edge, tree_niter_desc *>;
854 slot = NULL;
856 else
857 slot = data->niters->get (exit);
859 if (!slot)
861 /* Try to determine number of iterations. We cannot safely work with ssa
862 names that appear in phi nodes on abnormal edges, so that we do not
863 create overlapping life ranges for them (PR 27283). */
864 desc = XNEW (struct tree_niter_desc);
865 if (!number_of_iterations_exit (data->current_loop,
866 exit, desc, true)
867 || contains_abnormal_ssa_name_p (desc->niter))
869 XDELETE (desc);
870 desc = NULL;
872 data->niters->put (exit, desc);
874 else
875 desc = *slot;
877 return desc;
880 /* Returns the structure describing number of iterations determined from
881 single dominating exit of DATA->current_loop, or NULL if something
882 goes wrong. */
884 static struct tree_niter_desc *
885 niter_for_single_dom_exit (struct ivopts_data *data)
887 edge exit = single_dom_exit (data->current_loop);
889 if (!exit)
890 return NULL;
892 return niter_for_exit (data, exit);
895 /* Initializes data structures used by the iv optimization pass, stored
896 in DATA. */
898 static void
899 tree_ssa_iv_optimize_init (struct ivopts_data *data)
901 data->version_info_size = 2 * num_ssa_names;
902 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
903 data->relevant = BITMAP_ALLOC (NULL);
904 data->important_candidates = BITMAP_ALLOC (NULL);
905 data->max_inv_id = 0;
906 data->niters = NULL;
907 data->iv_uses.create (20);
908 data->iv_candidates.create (20);
909 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
910 data->inv_expr_id = 0;
911 data->name_expansion_cache = NULL;
912 decl_rtl_to_reset.create (20);
915 /* Returns a memory object to that EXPR points. In case we are able to
916 determine that it does not point to any such object, NULL is returned. */
918 static tree
919 determine_base_object (tree expr)
921 enum tree_code code = TREE_CODE (expr);
922 tree base, obj;
924 /* If this is a pointer casted to any type, we need to determine
925 the base object for the pointer; so handle conversions before
926 throwing away non-pointer expressions. */
927 if (CONVERT_EXPR_P (expr))
928 return determine_base_object (TREE_OPERAND (expr, 0));
930 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
931 return NULL_TREE;
933 switch (code)
935 case INTEGER_CST:
936 return NULL_TREE;
938 case ADDR_EXPR:
939 obj = TREE_OPERAND (expr, 0);
940 base = get_base_address (obj);
942 if (!base)
943 return expr;
945 if (TREE_CODE (base) == MEM_REF)
946 return determine_base_object (TREE_OPERAND (base, 0));
948 return fold_convert (ptr_type_node,
949 build_fold_addr_expr (base));
951 case POINTER_PLUS_EXPR:
952 return determine_base_object (TREE_OPERAND (expr, 0));
954 case PLUS_EXPR:
955 case MINUS_EXPR:
956 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
957 gcc_unreachable ();
959 default:
960 return fold_convert (ptr_type_node, expr);
964 /* Return true if address expression with non-DECL_P operand appears
965 in EXPR. */
967 static bool
968 contain_complex_addr_expr (tree expr)
970 bool res = false;
972 STRIP_NOPS (expr);
973 switch (TREE_CODE (expr))
975 case POINTER_PLUS_EXPR:
976 case PLUS_EXPR:
977 case MINUS_EXPR:
978 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
979 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
980 break;
982 case ADDR_EXPR:
983 return (!DECL_P (TREE_OPERAND (expr, 0)));
985 default:
986 return false;
989 return res;
992 /* Allocates an induction variable with given initial value BASE and step STEP
993 for loop LOOP. */
995 static struct iv *
996 alloc_iv (tree base, tree step)
998 tree expr = base;
999 struct iv *iv = XCNEW (struct iv);
1000 gcc_assert (step != NULL_TREE);
1002 /* Lower address expression in base except ones with DECL_P as operand.
1003 By doing this:
1004 1) More accurate cost can be computed for address expressions;
1005 2) Duplicate candidates won't be created for bases in different
1006 forms, like &a[0] and &a. */
1007 STRIP_NOPS (expr);
1008 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1009 || contain_complex_addr_expr (expr))
1011 aff_tree comb;
1012 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1013 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1016 iv->base = base;
1017 iv->base_object = determine_base_object (base);
1018 iv->step = step;
1019 iv->biv_p = false;
1020 iv->have_use_for = false;
1021 iv->use_id = 0;
1022 iv->ssa_name = NULL_TREE;
1024 return iv;
1027 /* Sets STEP and BASE for induction variable IV. */
1029 static void
1030 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1032 struct version_info *info = name_info (data, iv);
1034 gcc_assert (!info->iv);
1036 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1037 info->iv = alloc_iv (base, step);
1038 info->iv->ssa_name = iv;
1041 /* Finds induction variable declaration for VAR. */
1043 static struct iv *
1044 get_iv (struct ivopts_data *data, tree var)
1046 basic_block bb;
1047 tree type = TREE_TYPE (var);
1049 if (!POINTER_TYPE_P (type)
1050 && !INTEGRAL_TYPE_P (type))
1051 return NULL;
1053 if (!name_info (data, var)->iv)
1055 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1057 if (!bb
1058 || !flow_bb_inside_loop_p (data->current_loop, bb))
1059 set_iv (data, var, var, build_int_cst (type, 0));
1062 return name_info (data, var)->iv;
1065 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1066 not define a simple affine biv with nonzero step. */
1068 static tree
1069 determine_biv_step (gphi *phi)
1071 struct loop *loop = gimple_bb (phi)->loop_father;
1072 tree name = PHI_RESULT (phi);
1073 affine_iv iv;
1075 if (virtual_operand_p (name))
1076 return NULL_TREE;
1078 if (!simple_iv (loop, loop, name, &iv, true))
1079 return NULL_TREE;
1081 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1084 /* Return the first non-invariant ssa var found in EXPR. */
1086 static tree
1087 extract_single_var_from_expr (tree expr)
1089 int i, n;
1090 tree tmp;
1091 enum tree_code code;
1093 if (!expr || is_gimple_min_invariant (expr))
1094 return NULL;
1096 code = TREE_CODE (expr);
1097 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1099 n = TREE_OPERAND_LENGTH (expr);
1100 for (i = 0; i < n; i++)
1102 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1104 if (tmp)
1105 return tmp;
1108 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1111 /* Finds basic ivs. */
1113 static bool
1114 find_bivs (struct ivopts_data *data)
1116 gphi *phi;
1117 tree step, type, base, stop;
1118 bool found = false;
1119 struct loop *loop = data->current_loop;
1120 gphi_iterator psi;
1122 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1124 phi = psi.phi ();
1126 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1127 continue;
1129 step = determine_biv_step (phi);
1130 if (!step)
1131 continue;
1133 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1134 /* Stop expanding iv base at the first ssa var referred by iv step.
1135 Ideally we should stop at any ssa var, because that's expensive
1136 and unusual to happen, we just do it on the first one.
1138 See PR64705 for the rationale. */
1139 stop = extract_single_var_from_expr (step);
1140 base = expand_simple_operations (base, stop);
1141 if (contains_abnormal_ssa_name_p (base)
1142 || contains_abnormal_ssa_name_p (step))
1143 continue;
1145 type = TREE_TYPE (PHI_RESULT (phi));
1146 base = fold_convert (type, base);
1147 if (step)
1149 if (POINTER_TYPE_P (type))
1150 step = convert_to_ptrofftype (step);
1151 else
1152 step = fold_convert (type, step);
1155 set_iv (data, PHI_RESULT (phi), base, step);
1156 found = true;
1159 return found;
1162 /* Marks basic ivs. */
1164 static void
1165 mark_bivs (struct ivopts_data *data)
1167 gphi *phi;
1168 gimple def;
1169 tree var;
1170 struct iv *iv, *incr_iv;
1171 struct loop *loop = data->current_loop;
1172 basic_block incr_bb;
1173 gphi_iterator psi;
1175 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1177 phi = psi.phi ();
1179 iv = get_iv (data, PHI_RESULT (phi));
1180 if (!iv)
1181 continue;
1183 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1184 def = SSA_NAME_DEF_STMT (var);
1185 /* Don't mark iv peeled from other one as biv. */
1186 if (def
1187 && gimple_code (def) == GIMPLE_PHI
1188 && gimple_bb (def) == loop->header)
1189 continue;
1191 incr_iv = get_iv (data, var);
1192 if (!incr_iv)
1193 continue;
1195 /* If the increment is in the subloop, ignore it. */
1196 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1197 if (incr_bb->loop_father != data->current_loop
1198 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1199 continue;
1201 iv->biv_p = true;
1202 incr_iv->biv_p = true;
1206 /* Checks whether STMT defines a linear induction variable and stores its
1207 parameters to IV. */
1209 static bool
1210 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1212 tree lhs, stop;
1213 struct loop *loop = data->current_loop;
1215 iv->base = NULL_TREE;
1216 iv->step = NULL_TREE;
1218 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1219 return false;
1221 lhs = gimple_assign_lhs (stmt);
1222 if (TREE_CODE (lhs) != SSA_NAME)
1223 return false;
1225 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1226 return false;
1228 /* Stop expanding iv base at the first ssa var referred by iv step.
1229 Ideally we should stop at any ssa var, because that's expensive
1230 and unusual to happen, we just do it on the first one.
1232 See PR64705 for the rationale. */
1233 stop = extract_single_var_from_expr (iv->step);
1234 iv->base = expand_simple_operations (iv->base, stop);
1235 if (contains_abnormal_ssa_name_p (iv->base)
1236 || contains_abnormal_ssa_name_p (iv->step))
1237 return false;
1239 /* If STMT could throw, then do not consider STMT as defining a GIV.
1240 While this will suppress optimizations, we can not safely delete this
1241 GIV and associated statements, even if it appears it is not used. */
1242 if (stmt_could_throw_p (stmt))
1243 return false;
1245 return true;
1248 /* Finds general ivs in statement STMT. */
1250 static void
1251 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1253 affine_iv iv;
1255 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1256 return;
1258 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1261 /* Finds general ivs in basic block BB. */
1263 static void
1264 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1266 gimple_stmt_iterator bsi;
1268 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1269 find_givs_in_stmt (data, gsi_stmt (bsi));
1272 /* Finds general ivs. */
1274 static void
1275 find_givs (struct ivopts_data *data)
1277 struct loop *loop = data->current_loop;
1278 basic_block *body = get_loop_body_in_dom_order (loop);
1279 unsigned i;
1281 for (i = 0; i < loop->num_nodes; i++)
1282 find_givs_in_bb (data, body[i]);
1283 free (body);
1286 /* For each ssa name defined in LOOP determines whether it is an induction
1287 variable and if so, its initial value and step. */
1289 static bool
1290 find_induction_variables (struct ivopts_data *data)
1292 unsigned i;
1293 bitmap_iterator bi;
1295 if (!find_bivs (data))
1296 return false;
1298 find_givs (data);
1299 mark_bivs (data);
1301 if (dump_file && (dump_flags & TDF_DETAILS))
1303 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1305 if (niter)
1307 fprintf (dump_file, " number of iterations ");
1308 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1309 if (!integer_zerop (niter->may_be_zero))
1311 fprintf (dump_file, "; zero if ");
1312 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1314 fprintf (dump_file, "\n\n");
1317 fprintf (dump_file, "Induction variables:\n\n");
1319 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1321 if (ver_info (data, i)->iv)
1322 dump_iv (dump_file, ver_info (data, i)->iv);
1326 return true;
1329 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1331 static struct iv_use *
1332 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1333 gimple stmt, enum use_type use_type)
1335 struct iv_use *use = XCNEW (struct iv_use);
1337 use->id = n_iv_uses (data);
1338 use->type = use_type;
1339 use->iv = iv;
1340 use->stmt = stmt;
1341 use->op_p = use_p;
1342 use->related_cands = BITMAP_ALLOC (NULL);
1344 /* To avoid showing ssa name in the dumps, if it was not reset by the
1345 caller. */
1346 iv->ssa_name = NULL_TREE;
1348 if (dump_file && (dump_flags & TDF_DETAILS))
1349 dump_use (dump_file, use);
1351 data->iv_uses.safe_push (use);
1353 return use;
1356 /* Checks whether OP is a loop-level invariant and if so, records it.
1357 NONLINEAR_USE is true if the invariant is used in a way we do not
1358 handle specially. */
1360 static void
1361 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1363 basic_block bb;
1364 struct version_info *info;
1366 if (TREE_CODE (op) != SSA_NAME
1367 || virtual_operand_p (op))
1368 return;
1370 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1371 if (bb
1372 && flow_bb_inside_loop_p (data->current_loop, bb))
1373 return;
1375 info = name_info (data, op);
1376 info->name = op;
1377 info->has_nonlin_use |= nonlinear_use;
1378 if (!info->inv_id)
1379 info->inv_id = ++data->max_inv_id;
1380 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1383 /* Checks whether the use OP is interesting and if so, records it. */
1385 static struct iv_use *
1386 find_interesting_uses_op (struct ivopts_data *data, tree op)
1388 struct iv *iv;
1389 struct iv *civ;
1390 gimple stmt;
1391 struct iv_use *use;
1393 if (TREE_CODE (op) != SSA_NAME)
1394 return NULL;
1396 iv = get_iv (data, op);
1397 if (!iv)
1398 return NULL;
1400 if (iv->have_use_for)
1402 use = iv_use (data, iv->use_id);
1404 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1405 return use;
1408 if (integer_zerop (iv->step))
1410 record_invariant (data, op, true);
1411 return NULL;
1413 iv->have_use_for = true;
1415 civ = XNEW (struct iv);
1416 *civ = *iv;
1418 stmt = SSA_NAME_DEF_STMT (op);
1419 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1420 || is_gimple_assign (stmt));
1422 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1423 iv->use_id = use->id;
1425 return use;
1428 /* Given a condition in statement STMT, checks whether it is a compare
1429 of an induction variable and an invariant. If this is the case,
1430 CONTROL_VAR is set to location of the iv, BOUND to the location of
1431 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1432 induction variable descriptions, and true is returned. If this is not
1433 the case, CONTROL_VAR and BOUND are set to the arguments of the
1434 condition and false is returned. */
1436 static bool
1437 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1438 tree **control_var, tree **bound,
1439 struct iv **iv_var, struct iv **iv_bound)
1441 /* The objects returned when COND has constant operands. */
1442 static struct iv const_iv;
1443 static tree zero;
1444 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1445 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1446 bool ret = false;
1448 if (gimple_code (stmt) == GIMPLE_COND)
1450 gcond *cond_stmt = as_a <gcond *> (stmt);
1451 op0 = gimple_cond_lhs_ptr (cond_stmt);
1452 op1 = gimple_cond_rhs_ptr (cond_stmt);
1454 else
1456 op0 = gimple_assign_rhs1_ptr (stmt);
1457 op1 = gimple_assign_rhs2_ptr (stmt);
1460 zero = integer_zero_node;
1461 const_iv.step = integer_zero_node;
1463 if (TREE_CODE (*op0) == SSA_NAME)
1464 iv0 = get_iv (data, *op0);
1465 if (TREE_CODE (*op1) == SSA_NAME)
1466 iv1 = get_iv (data, *op1);
1468 /* Exactly one of the compared values must be an iv, and the other one must
1469 be an invariant. */
1470 if (!iv0 || !iv1)
1471 goto end;
1473 if (integer_zerop (iv0->step))
1475 /* Control variable may be on the other side. */
1476 tmp_op = op0; op0 = op1; op1 = tmp_op;
1477 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1479 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1481 end:
1482 if (control_var)
1483 *control_var = op0;;
1484 if (iv_var)
1485 *iv_var = iv0;;
1486 if (bound)
1487 *bound = op1;
1488 if (iv_bound)
1489 *iv_bound = iv1;
1491 return ret;
1494 /* Checks whether the condition in STMT is interesting and if so,
1495 records it. */
1497 static void
1498 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1500 tree *var_p, *bound_p;
1501 struct iv *var_iv, *civ;
1503 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1505 find_interesting_uses_op (data, *var_p);
1506 find_interesting_uses_op (data, *bound_p);
1507 return;
1510 civ = XNEW (struct iv);
1511 *civ = *var_iv;
1512 record_use (data, NULL, civ, stmt, USE_COMPARE);
1515 /* Returns the outermost loop EXPR is obviously invariant in
1516 relative to the loop LOOP, i.e. if all its operands are defined
1517 outside of the returned loop. Returns NULL if EXPR is not
1518 even obviously invariant in LOOP. */
1520 struct loop *
1521 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1523 basic_block def_bb;
1524 unsigned i, len;
1526 if (is_gimple_min_invariant (expr))
1527 return current_loops->tree_root;
1529 if (TREE_CODE (expr) == SSA_NAME)
1531 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1532 if (def_bb)
1534 if (flow_bb_inside_loop_p (loop, def_bb))
1535 return NULL;
1536 return superloop_at_depth (loop,
1537 loop_depth (def_bb->loop_father) + 1);
1540 return current_loops->tree_root;
1543 if (!EXPR_P (expr))
1544 return NULL;
1546 unsigned maxdepth = 0;
1547 len = TREE_OPERAND_LENGTH (expr);
1548 for (i = 0; i < len; i++)
1550 struct loop *ivloop;
1551 if (!TREE_OPERAND (expr, i))
1552 continue;
1554 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1555 if (!ivloop)
1556 return NULL;
1557 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1560 return superloop_at_depth (loop, maxdepth);
1563 /* Returns true if expression EXPR is obviously invariant in LOOP,
1564 i.e. if all its operands are defined outside of the LOOP. LOOP
1565 should not be the function body. */
1567 bool
1568 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1570 basic_block def_bb;
1571 unsigned i, len;
1573 gcc_assert (loop_depth (loop) > 0);
1575 if (is_gimple_min_invariant (expr))
1576 return true;
1578 if (TREE_CODE (expr) == SSA_NAME)
1580 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1581 if (def_bb
1582 && flow_bb_inside_loop_p (loop, def_bb))
1583 return false;
1585 return true;
1588 if (!EXPR_P (expr))
1589 return false;
1591 len = TREE_OPERAND_LENGTH (expr);
1592 for (i = 0; i < len; i++)
1593 if (TREE_OPERAND (expr, i)
1594 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1595 return false;
1597 return true;
1600 /* Cumulates the steps of indices into DATA and replaces their values with the
1601 initial ones. Returns false when the value of the index cannot be determined.
1602 Callback for for_each_index. */
1604 struct ifs_ivopts_data
1606 struct ivopts_data *ivopts_data;
1607 gimple stmt;
1608 tree step;
1611 static bool
1612 idx_find_step (tree base, tree *idx, void *data)
1614 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1615 struct iv *iv;
1616 tree step, iv_base, iv_step, lbound, off;
1617 struct loop *loop = dta->ivopts_data->current_loop;
1619 /* If base is a component ref, require that the offset of the reference
1620 be invariant. */
1621 if (TREE_CODE (base) == COMPONENT_REF)
1623 off = component_ref_field_offset (base);
1624 return expr_invariant_in_loop_p (loop, off);
1627 /* If base is array, first check whether we will be able to move the
1628 reference out of the loop (in order to take its address in strength
1629 reduction). In order for this to work we need both lower bound
1630 and step to be loop invariants. */
1631 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1633 /* Moreover, for a range, the size needs to be invariant as well. */
1634 if (TREE_CODE (base) == ARRAY_RANGE_REF
1635 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1636 return false;
1638 step = array_ref_element_size (base);
1639 lbound = array_ref_low_bound (base);
1641 if (!expr_invariant_in_loop_p (loop, step)
1642 || !expr_invariant_in_loop_p (loop, lbound))
1643 return false;
1646 if (TREE_CODE (*idx) != SSA_NAME)
1647 return true;
1649 iv = get_iv (dta->ivopts_data, *idx);
1650 if (!iv)
1651 return false;
1653 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1654 *&x[0], which is not folded and does not trigger the
1655 ARRAY_REF path below. */
1656 *idx = iv->base;
1658 if (integer_zerop (iv->step))
1659 return true;
1661 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1663 step = array_ref_element_size (base);
1665 /* We only handle addresses whose step is an integer constant. */
1666 if (TREE_CODE (step) != INTEGER_CST)
1667 return false;
1669 else
1670 /* The step for pointer arithmetics already is 1 byte. */
1671 step = size_one_node;
1673 iv_base = iv->base;
1674 iv_step = iv->step;
1675 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1676 sizetype, &iv_base, &iv_step, dta->stmt,
1677 false))
1679 /* The index might wrap. */
1680 return false;
1683 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1684 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1686 return true;
1689 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1690 object is passed to it in DATA. */
1692 static bool
1693 idx_record_use (tree base, tree *idx,
1694 void *vdata)
1696 struct ivopts_data *data = (struct ivopts_data *) vdata;
1697 find_interesting_uses_op (data, *idx);
1698 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1700 find_interesting_uses_op (data, array_ref_element_size (base));
1701 find_interesting_uses_op (data, array_ref_low_bound (base));
1703 return true;
1706 /* If we can prove that TOP = cst * BOT for some constant cst,
1707 store cst to MUL and return true. Otherwise return false.
1708 The returned value is always sign-extended, regardless of the
1709 signedness of TOP and BOT. */
1711 static bool
1712 constant_multiple_of (tree top, tree bot, widest_int *mul)
1714 tree mby;
1715 enum tree_code code;
1716 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1717 widest_int res, p0, p1;
1719 STRIP_NOPS (top);
1720 STRIP_NOPS (bot);
1722 if (operand_equal_p (top, bot, 0))
1724 *mul = 1;
1725 return true;
1728 code = TREE_CODE (top);
1729 switch (code)
1731 case MULT_EXPR:
1732 mby = TREE_OPERAND (top, 1);
1733 if (TREE_CODE (mby) != INTEGER_CST)
1734 return false;
1736 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1737 return false;
1739 *mul = wi::sext (res * wi::to_widest (mby), precision);
1740 return true;
1742 case PLUS_EXPR:
1743 case MINUS_EXPR:
1744 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1745 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1746 return false;
1748 if (code == MINUS_EXPR)
1749 p1 = -p1;
1750 *mul = wi::sext (p0 + p1, precision);
1751 return true;
1753 case INTEGER_CST:
1754 if (TREE_CODE (bot) != INTEGER_CST)
1755 return false;
1757 p0 = widest_int::from (top, SIGNED);
1758 p1 = widest_int::from (bot, SIGNED);
1759 if (p1 == 0)
1760 return false;
1761 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1762 return res == 0;
1764 default:
1765 return false;
1769 /* Return true if memory reference REF with step STEP may be unaligned. */
1771 static bool
1772 may_be_unaligned_p (tree ref, tree step)
1774 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1775 thus they are not misaligned. */
1776 if (TREE_CODE (ref) == TARGET_MEM_REF)
1777 return false;
1779 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1780 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1781 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1783 unsigned HOST_WIDE_INT bitpos;
1784 unsigned int ref_align;
1785 get_object_alignment_1 (ref, &ref_align, &bitpos);
1786 if (ref_align < align
1787 || (bitpos % align) != 0
1788 || (bitpos % BITS_PER_UNIT) != 0)
1789 return true;
1791 unsigned int trailing_zeros = tree_ctz (step);
1792 if (trailing_zeros < HOST_BITS_PER_INT
1793 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1794 return true;
1796 return false;
1799 /* Return true if EXPR may be non-addressable. */
1801 bool
1802 may_be_nonaddressable_p (tree expr)
1804 switch (TREE_CODE (expr))
1806 case TARGET_MEM_REF:
1807 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1808 target, thus they are always addressable. */
1809 return false;
1811 case COMPONENT_REF:
1812 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1813 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1815 case VIEW_CONVERT_EXPR:
1816 /* This kind of view-conversions may wrap non-addressable objects
1817 and make them look addressable. After some processing the
1818 non-addressability may be uncovered again, causing ADDR_EXPRs
1819 of inappropriate objects to be built. */
1820 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1821 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1822 return true;
1824 /* ... fall through ... */
1826 case ARRAY_REF:
1827 case ARRAY_RANGE_REF:
1828 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1830 CASE_CONVERT:
1831 return true;
1833 default:
1834 break;
1837 return false;
1840 /* Finds addresses in *OP_P inside STMT. */
1842 static void
1843 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1845 tree base = *op_p, step = size_zero_node;
1846 struct iv *civ;
1847 struct ifs_ivopts_data ifs_ivopts_data;
1849 /* Do not play with volatile memory references. A bit too conservative,
1850 perhaps, but safe. */
1851 if (gimple_has_volatile_ops (stmt))
1852 goto fail;
1854 /* Ignore bitfields for now. Not really something terribly complicated
1855 to handle. TODO. */
1856 if (TREE_CODE (base) == BIT_FIELD_REF)
1857 goto fail;
1859 base = unshare_expr (base);
1861 if (TREE_CODE (base) == TARGET_MEM_REF)
1863 tree type = build_pointer_type (TREE_TYPE (base));
1864 tree astep;
1866 if (TMR_BASE (base)
1867 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1869 civ = get_iv (data, TMR_BASE (base));
1870 if (!civ)
1871 goto fail;
1873 TMR_BASE (base) = civ->base;
1874 step = civ->step;
1876 if (TMR_INDEX2 (base)
1877 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1879 civ = get_iv (data, TMR_INDEX2 (base));
1880 if (!civ)
1881 goto fail;
1883 TMR_INDEX2 (base) = civ->base;
1884 step = civ->step;
1886 if (TMR_INDEX (base)
1887 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1889 civ = get_iv (data, TMR_INDEX (base));
1890 if (!civ)
1891 goto fail;
1893 TMR_INDEX (base) = civ->base;
1894 astep = civ->step;
1896 if (astep)
1898 if (TMR_STEP (base))
1899 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1901 step = fold_build2 (PLUS_EXPR, type, step, astep);
1905 if (integer_zerop (step))
1906 goto fail;
1907 base = tree_mem_ref_addr (type, base);
1909 else
1911 ifs_ivopts_data.ivopts_data = data;
1912 ifs_ivopts_data.stmt = stmt;
1913 ifs_ivopts_data.step = size_zero_node;
1914 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1915 || integer_zerop (ifs_ivopts_data.step))
1916 goto fail;
1917 step = ifs_ivopts_data.step;
1919 /* Check that the base expression is addressable. This needs
1920 to be done after substituting bases of IVs into it. */
1921 if (may_be_nonaddressable_p (base))
1922 goto fail;
1924 /* Moreover, on strict alignment platforms, check that it is
1925 sufficiently aligned. */
1926 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1927 goto fail;
1929 base = build_fold_addr_expr (base);
1931 /* Substituting bases of IVs into the base expression might
1932 have caused folding opportunities. */
1933 if (TREE_CODE (base) == ADDR_EXPR)
1935 tree *ref = &TREE_OPERAND (base, 0);
1936 while (handled_component_p (*ref))
1937 ref = &TREE_OPERAND (*ref, 0);
1938 if (TREE_CODE (*ref) == MEM_REF)
1940 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1941 TREE_OPERAND (*ref, 0),
1942 TREE_OPERAND (*ref, 1));
1943 if (tem)
1944 *ref = tem;
1949 civ = alloc_iv (base, step);
1950 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1951 return;
1953 fail:
1954 for_each_index (op_p, idx_record_use, data);
1957 /* Finds and records invariants used in STMT. */
1959 static void
1960 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1962 ssa_op_iter iter;
1963 use_operand_p use_p;
1964 tree op;
1966 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1968 op = USE_FROM_PTR (use_p);
1969 record_invariant (data, op, false);
1973 /* Finds interesting uses of induction variables in the statement STMT. */
1975 static void
1976 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1978 struct iv *iv;
1979 tree op, *lhs, *rhs;
1980 ssa_op_iter iter;
1981 use_operand_p use_p;
1982 enum tree_code code;
1984 find_invariants_stmt (data, stmt);
1986 if (gimple_code (stmt) == GIMPLE_COND)
1988 find_interesting_uses_cond (data, stmt);
1989 return;
1992 if (is_gimple_assign (stmt))
1994 lhs = gimple_assign_lhs_ptr (stmt);
1995 rhs = gimple_assign_rhs1_ptr (stmt);
1997 if (TREE_CODE (*lhs) == SSA_NAME)
1999 /* If the statement defines an induction variable, the uses are not
2000 interesting by themselves. */
2002 iv = get_iv (data, *lhs);
2004 if (iv && !integer_zerop (iv->step))
2005 return;
2008 code = gimple_assign_rhs_code (stmt);
2009 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2010 && (REFERENCE_CLASS_P (*rhs)
2011 || is_gimple_val (*rhs)))
2013 if (REFERENCE_CLASS_P (*rhs))
2014 find_interesting_uses_address (data, stmt, rhs);
2015 else
2016 find_interesting_uses_op (data, *rhs);
2018 if (REFERENCE_CLASS_P (*lhs))
2019 find_interesting_uses_address (data, stmt, lhs);
2020 return;
2022 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2024 find_interesting_uses_cond (data, stmt);
2025 return;
2028 /* TODO -- we should also handle address uses of type
2030 memory = call (whatever);
2034 call (memory). */
2037 if (gimple_code (stmt) == GIMPLE_PHI
2038 && gimple_bb (stmt) == data->current_loop->header)
2040 iv = get_iv (data, PHI_RESULT (stmt));
2042 if (iv && !integer_zerop (iv->step))
2043 return;
2046 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2048 op = USE_FROM_PTR (use_p);
2050 if (TREE_CODE (op) != SSA_NAME)
2051 continue;
2053 iv = get_iv (data, op);
2054 if (!iv)
2055 continue;
2057 find_interesting_uses_op (data, op);
2061 /* Finds interesting uses of induction variables outside of loops
2062 on loop exit edge EXIT. */
2064 static void
2065 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2067 gphi *phi;
2068 gphi_iterator psi;
2069 tree def;
2071 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2073 phi = psi.phi ();
2074 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2075 if (!virtual_operand_p (def))
2076 find_interesting_uses_op (data, def);
2080 /* Finds uses of the induction variables that are interesting. */
2082 static void
2083 find_interesting_uses (struct ivopts_data *data)
2085 basic_block bb;
2086 gimple_stmt_iterator bsi;
2087 basic_block *body = get_loop_body (data->current_loop);
2088 unsigned i;
2089 struct version_info *info;
2090 edge e;
2092 if (dump_file && (dump_flags & TDF_DETAILS))
2093 fprintf (dump_file, "Uses:\n\n");
2095 for (i = 0; i < data->current_loop->num_nodes; i++)
2097 edge_iterator ei;
2098 bb = body[i];
2100 FOR_EACH_EDGE (e, ei, bb->succs)
2101 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2102 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2103 find_interesting_uses_outside (data, e);
2105 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2106 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2107 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2108 if (!is_gimple_debug (gsi_stmt (bsi)))
2109 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2112 if (dump_file && (dump_flags & TDF_DETAILS))
2114 bitmap_iterator bi;
2116 fprintf (dump_file, "\n");
2118 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2120 info = ver_info (data, i);
2121 if (info->inv_id)
2123 fprintf (dump_file, " ");
2124 print_generic_expr (dump_file, info->name, TDF_SLIM);
2125 fprintf (dump_file, " is invariant (%d)%s\n",
2126 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2130 fprintf (dump_file, "\n");
2133 free (body);
2136 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2137 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2138 we are at the top-level of the processed address. */
2140 static tree
2141 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2142 HOST_WIDE_INT *offset)
2144 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2145 enum tree_code code;
2146 tree type, orig_type = TREE_TYPE (expr);
2147 HOST_WIDE_INT off0, off1, st;
2148 tree orig_expr = expr;
2150 STRIP_NOPS (expr);
2152 type = TREE_TYPE (expr);
2153 code = TREE_CODE (expr);
2154 *offset = 0;
2156 switch (code)
2158 case INTEGER_CST:
2159 if (!cst_and_fits_in_hwi (expr)
2160 || integer_zerop (expr))
2161 return orig_expr;
2163 *offset = int_cst_value (expr);
2164 return build_int_cst (orig_type, 0);
2166 case POINTER_PLUS_EXPR:
2167 case PLUS_EXPR:
2168 case MINUS_EXPR:
2169 op0 = TREE_OPERAND (expr, 0);
2170 op1 = TREE_OPERAND (expr, 1);
2172 op0 = strip_offset_1 (op0, false, false, &off0);
2173 op1 = strip_offset_1 (op1, false, false, &off1);
2175 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2176 if (op0 == TREE_OPERAND (expr, 0)
2177 && op1 == TREE_OPERAND (expr, 1))
2178 return orig_expr;
2180 if (integer_zerop (op1))
2181 expr = op0;
2182 else if (integer_zerop (op0))
2184 if (code == MINUS_EXPR)
2185 expr = fold_build1 (NEGATE_EXPR, type, op1);
2186 else
2187 expr = op1;
2189 else
2190 expr = fold_build2 (code, type, op0, op1);
2192 return fold_convert (orig_type, expr);
2194 case MULT_EXPR:
2195 op1 = TREE_OPERAND (expr, 1);
2196 if (!cst_and_fits_in_hwi (op1))
2197 return orig_expr;
2199 op0 = TREE_OPERAND (expr, 0);
2200 op0 = strip_offset_1 (op0, false, false, &off0);
2201 if (op0 == TREE_OPERAND (expr, 0))
2202 return orig_expr;
2204 *offset = off0 * int_cst_value (op1);
2205 if (integer_zerop (op0))
2206 expr = op0;
2207 else
2208 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2210 return fold_convert (orig_type, expr);
2212 case ARRAY_REF:
2213 case ARRAY_RANGE_REF:
2214 if (!inside_addr)
2215 return orig_expr;
2217 step = array_ref_element_size (expr);
2218 if (!cst_and_fits_in_hwi (step))
2219 break;
2221 st = int_cst_value (step);
2222 op1 = TREE_OPERAND (expr, 1);
2223 op1 = strip_offset_1 (op1, false, false, &off1);
2224 *offset = off1 * st;
2226 if (top_compref
2227 && integer_zerop (op1))
2229 /* Strip the component reference completely. */
2230 op0 = TREE_OPERAND (expr, 0);
2231 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2232 *offset += off0;
2233 return op0;
2235 break;
2237 case COMPONENT_REF:
2239 tree field;
2241 if (!inside_addr)
2242 return orig_expr;
2244 tmp = component_ref_field_offset (expr);
2245 field = TREE_OPERAND (expr, 1);
2246 if (top_compref
2247 && cst_and_fits_in_hwi (tmp)
2248 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2250 HOST_WIDE_INT boffset, abs_off;
2252 /* Strip the component reference completely. */
2253 op0 = TREE_OPERAND (expr, 0);
2254 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2255 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2256 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2257 if (boffset < 0)
2258 abs_off = -abs_off;
2260 *offset = off0 + int_cst_value (tmp) + abs_off;
2261 return op0;
2264 break;
2266 case ADDR_EXPR:
2267 op0 = TREE_OPERAND (expr, 0);
2268 op0 = strip_offset_1 (op0, true, true, &off0);
2269 *offset += off0;
2271 if (op0 == TREE_OPERAND (expr, 0))
2272 return orig_expr;
2274 expr = build_fold_addr_expr (op0);
2275 return fold_convert (orig_type, expr);
2277 case MEM_REF:
2278 /* ??? Offset operand? */
2279 inside_addr = false;
2280 break;
2282 default:
2283 return orig_expr;
2286 /* Default handling of expressions for that we want to recurse into
2287 the first operand. */
2288 op0 = TREE_OPERAND (expr, 0);
2289 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2290 *offset += off0;
2292 if (op0 == TREE_OPERAND (expr, 0)
2293 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2294 return orig_expr;
2296 expr = copy_node (expr);
2297 TREE_OPERAND (expr, 0) = op0;
2298 if (op1)
2299 TREE_OPERAND (expr, 1) = op1;
2301 /* Inside address, we might strip the top level component references,
2302 thus changing type of the expression. Handling of ADDR_EXPR
2303 will fix that. */
2304 expr = fold_convert (orig_type, expr);
2306 return expr;
2309 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2311 static tree
2312 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2314 HOST_WIDE_INT off;
2315 tree core = strip_offset_1 (expr, false, false, &off);
2316 *offset = off;
2317 return core;
2320 /* Returns variant of TYPE that can be used as base for different uses.
2321 We return unsigned type with the same precision, which avoids problems
2322 with overflows. */
2324 static tree
2325 generic_type_for (tree type)
2327 if (POINTER_TYPE_P (type))
2328 return unsigned_type_for (type);
2330 if (TYPE_UNSIGNED (type))
2331 return type;
2333 return unsigned_type_for (type);
2336 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2337 the bitmap to that we should store it. */
2339 static struct ivopts_data *fd_ivopts_data;
2340 static tree
2341 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2343 bitmap *depends_on = (bitmap *) data;
2344 struct version_info *info;
2346 if (TREE_CODE (*expr_p) != SSA_NAME)
2347 return NULL_TREE;
2348 info = name_info (fd_ivopts_data, *expr_p);
2350 if (!info->inv_id || info->has_nonlin_use)
2351 return NULL_TREE;
2353 if (!*depends_on)
2354 *depends_on = BITMAP_ALLOC (NULL);
2355 bitmap_set_bit (*depends_on, info->inv_id);
2357 return NULL_TREE;
2360 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2361 position to POS. If USE is not NULL, the candidate is set as related to
2362 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2363 replacement of the final value of the iv by a direct computation. */
2365 static struct iv_cand *
2366 add_candidate_1 (struct ivopts_data *data,
2367 tree base, tree step, bool important, enum iv_position pos,
2368 struct iv_use *use, gimple incremented_at)
2370 unsigned i;
2371 struct iv_cand *cand = NULL;
2372 tree type, orig_type;
2374 /* For non-original variables, make sure their values are computed in a type
2375 that does not invoke undefined behavior on overflows (since in general,
2376 we cannot prove that these induction variables are non-wrapping). */
2377 if (pos != IP_ORIGINAL)
2379 orig_type = TREE_TYPE (base);
2380 type = generic_type_for (orig_type);
2381 if (type != orig_type)
2383 base = fold_convert (type, base);
2384 step = fold_convert (type, step);
2388 for (i = 0; i < n_iv_cands (data); i++)
2390 cand = iv_cand (data, i);
2392 if (cand->pos != pos)
2393 continue;
2395 if (cand->incremented_at != incremented_at
2396 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2397 && cand->ainc_use != use))
2398 continue;
2400 if (!cand->iv)
2402 if (!base && !step)
2403 break;
2405 continue;
2408 if (!base && !step)
2409 continue;
2411 if (operand_equal_p (base, cand->iv->base, 0)
2412 && operand_equal_p (step, cand->iv->step, 0)
2413 && (TYPE_PRECISION (TREE_TYPE (base))
2414 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2415 break;
2418 if (i == n_iv_cands (data))
2420 cand = XCNEW (struct iv_cand);
2421 cand->id = i;
2423 if (!base && !step)
2424 cand->iv = NULL;
2425 else
2426 cand->iv = alloc_iv (base, step);
2428 cand->pos = pos;
2429 if (pos != IP_ORIGINAL && cand->iv)
2431 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2432 cand->var_after = cand->var_before;
2434 cand->important = important;
2435 cand->incremented_at = incremented_at;
2436 data->iv_candidates.safe_push (cand);
2438 if (step
2439 && TREE_CODE (step) != INTEGER_CST)
2441 fd_ivopts_data = data;
2442 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2445 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2446 cand->ainc_use = use;
2447 else
2448 cand->ainc_use = NULL;
2450 if (dump_file && (dump_flags & TDF_DETAILS))
2451 dump_cand (dump_file, cand);
2454 if (important && !cand->important)
2456 cand->important = true;
2457 if (dump_file && (dump_flags & TDF_DETAILS))
2458 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2461 if (use)
2463 bitmap_set_bit (use->related_cands, i);
2464 if (dump_file && (dump_flags & TDF_DETAILS))
2465 fprintf (dump_file, "Candidate %d is related to use %d\n",
2466 cand->id, use->id);
2469 return cand;
2472 /* Returns true if incrementing the induction variable at the end of the LOOP
2473 is allowed.
2475 The purpose is to avoid splitting latch edge with a biv increment, thus
2476 creating a jump, possibly confusing other optimization passes and leaving
2477 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2478 is not available (so we do not have a better alternative), or if the latch
2479 edge is already nonempty. */
2481 static bool
2482 allow_ip_end_pos_p (struct loop *loop)
2484 if (!ip_normal_pos (loop))
2485 return true;
2487 if (!empty_block_p (ip_end_pos (loop)))
2488 return true;
2490 return false;
2493 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2494 Important field is set to IMPORTANT. */
2496 static void
2497 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2498 bool important, struct iv_use *use)
2500 basic_block use_bb = gimple_bb (use->stmt);
2501 machine_mode mem_mode;
2502 unsigned HOST_WIDE_INT cstepi;
2504 /* If we insert the increment in any position other than the standard
2505 ones, we must ensure that it is incremented once per iteration.
2506 It must not be in an inner nested loop, or one side of an if
2507 statement. */
2508 if (use_bb->loop_father != data->current_loop
2509 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2510 || stmt_could_throw_p (use->stmt)
2511 || !cst_and_fits_in_hwi (step))
2512 return;
2514 cstepi = int_cst_value (step);
2516 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2517 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2518 || USE_STORE_PRE_INCREMENT (mem_mode))
2519 && GET_MODE_SIZE (mem_mode) == cstepi)
2520 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2521 || USE_STORE_PRE_DECREMENT (mem_mode))
2522 && GET_MODE_SIZE (mem_mode) == -cstepi))
2524 enum tree_code code = MINUS_EXPR;
2525 tree new_base;
2526 tree new_step = step;
2528 if (POINTER_TYPE_P (TREE_TYPE (base)))
2530 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2531 code = POINTER_PLUS_EXPR;
2533 else
2534 new_step = fold_convert (TREE_TYPE (base), new_step);
2535 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2536 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2537 use->stmt);
2539 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2540 || USE_STORE_POST_INCREMENT (mem_mode))
2541 && GET_MODE_SIZE (mem_mode) == cstepi)
2542 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2543 || USE_STORE_POST_DECREMENT (mem_mode))
2544 && GET_MODE_SIZE (mem_mode) == -cstepi))
2546 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2547 use->stmt);
2551 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2552 position to POS. If USE is not NULL, the candidate is set as related to
2553 it. The candidate computation is scheduled on all available positions. */
2555 static void
2556 add_candidate (struct ivopts_data *data,
2557 tree base, tree step, bool important, struct iv_use *use)
2559 if (ip_normal_pos (data->current_loop))
2560 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2561 if (ip_end_pos (data->current_loop)
2562 && allow_ip_end_pos_p (data->current_loop))
2563 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2565 if (use != NULL && use->type == USE_ADDRESS)
2566 add_autoinc_candidates (data, base, step, important, use);
2569 /* Adds standard iv candidates. */
2571 static void
2572 add_standard_iv_candidates (struct ivopts_data *data)
2574 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2576 /* The same for a double-integer type if it is still fast enough. */
2577 if (TYPE_PRECISION
2578 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2579 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2580 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2581 build_int_cst (long_integer_type_node, 1), true, NULL);
2583 /* The same for a double-integer type if it is still fast enough. */
2584 if (TYPE_PRECISION
2585 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2586 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2587 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2588 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2592 /* Adds candidates bases on the old induction variable IV. */
2594 static void
2595 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2597 gimple phi;
2598 tree def;
2599 struct iv_cand *cand;
2601 add_candidate (data, iv->base, iv->step, true, NULL);
2603 /* The same, but with initial value zero. */
2604 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2605 add_candidate (data, size_int (0), iv->step, true, NULL);
2606 else
2607 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2608 iv->step, true, NULL);
2610 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2611 if (gimple_code (phi) == GIMPLE_PHI)
2613 /* Additionally record the possibility of leaving the original iv
2614 untouched. */
2615 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2616 /* Don't add candidate if it's from another PHI node because
2617 it's an affine iv appearing in the form of PEELED_CHREC. */
2618 phi = SSA_NAME_DEF_STMT (def);
2619 if (gimple_code (phi) != GIMPLE_PHI)
2621 cand = add_candidate_1 (data,
2622 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2623 SSA_NAME_DEF_STMT (def));
2624 cand->var_before = iv->ssa_name;
2625 cand->var_after = def;
2627 else
2628 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2632 /* Adds candidates based on the old induction variables. */
2634 static void
2635 add_old_ivs_candidates (struct ivopts_data *data)
2637 unsigned i;
2638 struct iv *iv;
2639 bitmap_iterator bi;
2641 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2643 iv = ver_info (data, i)->iv;
2644 if (iv && iv->biv_p && !integer_zerop (iv->step))
2645 add_old_iv_candidates (data, iv);
2649 /* Adds candidates based on the value of the induction variable IV and USE. */
2651 static void
2652 add_iv_value_candidates (struct ivopts_data *data,
2653 struct iv *iv, struct iv_use *use)
2655 unsigned HOST_WIDE_INT offset;
2656 tree base;
2657 tree basetype;
2659 add_candidate (data, iv->base, iv->step, false, use);
2661 /* The same, but with initial value zero. Make such variable important,
2662 since it is generic enough so that possibly many uses may be based
2663 on it. */
2664 basetype = TREE_TYPE (iv->base);
2665 if (POINTER_TYPE_P (basetype))
2666 basetype = sizetype;
2667 add_candidate (data, build_int_cst (basetype, 0),
2668 iv->step, true, use);
2670 /* Third, try removing the constant offset. Make sure to even
2671 add a candidate for &a[0] vs. (T *)&a. */
2672 base = strip_offset (iv->base, &offset);
2673 if (offset
2674 || base != iv->base)
2675 add_candidate (data, base, iv->step, false, use);
2678 /* Adds candidates based on the uses. */
2680 static void
2681 add_derived_ivs_candidates (struct ivopts_data *data)
2683 unsigned i;
2685 for (i = 0; i < n_iv_uses (data); i++)
2687 struct iv_use *use = iv_use (data, i);
2689 if (!use)
2690 continue;
2692 switch (use->type)
2694 case USE_NONLINEAR_EXPR:
2695 case USE_COMPARE:
2696 case USE_ADDRESS:
2697 /* Just add the ivs based on the value of the iv used here. */
2698 add_iv_value_candidates (data, use->iv, use);
2699 break;
2701 default:
2702 gcc_unreachable ();
2707 /* Record important candidates and add them to related_cands bitmaps
2708 if needed. */
2710 static void
2711 record_important_candidates (struct ivopts_data *data)
2713 unsigned i;
2714 struct iv_use *use;
2716 for (i = 0; i < n_iv_cands (data); i++)
2718 struct iv_cand *cand = iv_cand (data, i);
2720 if (cand->important)
2721 bitmap_set_bit (data->important_candidates, i);
2724 data->consider_all_candidates = (n_iv_cands (data)
2725 <= CONSIDER_ALL_CANDIDATES_BOUND);
2727 if (data->consider_all_candidates)
2729 /* We will not need "related_cands" bitmaps in this case,
2730 so release them to decrease peak memory consumption. */
2731 for (i = 0; i < n_iv_uses (data); i++)
2733 use = iv_use (data, i);
2734 BITMAP_FREE (use->related_cands);
2737 else
2739 /* Add important candidates to the related_cands bitmaps. */
2740 for (i = 0; i < n_iv_uses (data); i++)
2741 bitmap_ior_into (iv_use (data, i)->related_cands,
2742 data->important_candidates);
2746 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2747 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2748 we allocate a simple list to every use. */
2750 static void
2751 alloc_use_cost_map (struct ivopts_data *data)
2753 unsigned i, size, s;
2755 for (i = 0; i < n_iv_uses (data); i++)
2757 struct iv_use *use = iv_use (data, i);
2759 if (data->consider_all_candidates)
2760 size = n_iv_cands (data);
2761 else
2763 s = bitmap_count_bits (use->related_cands);
2765 /* Round up to the power of two, so that moduling by it is fast. */
2766 size = s ? (1 << ceil_log2 (s)) : 1;
2769 use->n_map_members = size;
2770 use->cost_map = XCNEWVEC (struct cost_pair, size);
2774 /* Returns description of computation cost of expression whose runtime
2775 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2777 static comp_cost
2778 new_cost (unsigned runtime, unsigned complexity)
2780 comp_cost cost;
2782 cost.cost = runtime;
2783 cost.complexity = complexity;
2785 return cost;
2788 /* Adds costs COST1 and COST2. */
2790 static comp_cost
2791 add_costs (comp_cost cost1, comp_cost cost2)
2793 cost1.cost += cost2.cost;
2794 cost1.complexity += cost2.complexity;
2796 return cost1;
2798 /* Subtracts costs COST1 and COST2. */
2800 static comp_cost
2801 sub_costs (comp_cost cost1, comp_cost cost2)
2803 cost1.cost -= cost2.cost;
2804 cost1.complexity -= cost2.complexity;
2806 return cost1;
2809 /* Returns a negative number if COST1 < COST2, a positive number if
2810 COST1 > COST2, and 0 if COST1 = COST2. */
2812 static int
2813 compare_costs (comp_cost cost1, comp_cost cost2)
2815 if (cost1.cost == cost2.cost)
2816 return cost1.complexity - cost2.complexity;
2818 return cost1.cost - cost2.cost;
2821 /* Returns true if COST is infinite. */
2823 static bool
2824 infinite_cost_p (comp_cost cost)
2826 return cost.cost == INFTY;
2829 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2830 on invariants DEPENDS_ON and that the value used in expressing it
2831 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2833 static void
2834 set_use_iv_cost (struct ivopts_data *data,
2835 struct iv_use *use, struct iv_cand *cand,
2836 comp_cost cost, bitmap depends_on, tree value,
2837 enum tree_code comp, int inv_expr_id)
2839 unsigned i, s;
2841 if (infinite_cost_p (cost))
2843 BITMAP_FREE (depends_on);
2844 return;
2847 if (data->consider_all_candidates)
2849 use->cost_map[cand->id].cand = cand;
2850 use->cost_map[cand->id].cost = cost;
2851 use->cost_map[cand->id].depends_on = depends_on;
2852 use->cost_map[cand->id].value = value;
2853 use->cost_map[cand->id].comp = comp;
2854 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2855 return;
2858 /* n_map_members is a power of two, so this computes modulo. */
2859 s = cand->id & (use->n_map_members - 1);
2860 for (i = s; i < use->n_map_members; i++)
2861 if (!use->cost_map[i].cand)
2862 goto found;
2863 for (i = 0; i < s; i++)
2864 if (!use->cost_map[i].cand)
2865 goto found;
2867 gcc_unreachable ();
2869 found:
2870 use->cost_map[i].cand = cand;
2871 use->cost_map[i].cost = cost;
2872 use->cost_map[i].depends_on = depends_on;
2873 use->cost_map[i].value = value;
2874 use->cost_map[i].comp = comp;
2875 use->cost_map[i].inv_expr_id = inv_expr_id;
2878 /* Gets cost of (USE, CANDIDATE) pair. */
2880 static struct cost_pair *
2881 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2882 struct iv_cand *cand)
2884 unsigned i, s;
2885 struct cost_pair *ret;
2887 if (!cand)
2888 return NULL;
2890 if (data->consider_all_candidates)
2892 ret = use->cost_map + cand->id;
2893 if (!ret->cand)
2894 return NULL;
2896 return ret;
2899 /* n_map_members is a power of two, so this computes modulo. */
2900 s = cand->id & (use->n_map_members - 1);
2901 for (i = s; i < use->n_map_members; i++)
2902 if (use->cost_map[i].cand == cand)
2903 return use->cost_map + i;
2904 else if (use->cost_map[i].cand == NULL)
2905 return NULL;
2906 for (i = 0; i < s; i++)
2907 if (use->cost_map[i].cand == cand)
2908 return use->cost_map + i;
2909 else if (use->cost_map[i].cand == NULL)
2910 return NULL;
2912 return NULL;
2915 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2916 static rtx
2917 produce_memory_decl_rtl (tree obj, int *regno)
2919 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2920 machine_mode address_mode = targetm.addr_space.address_mode (as);
2921 rtx x;
2923 gcc_assert (obj);
2924 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2926 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2927 x = gen_rtx_SYMBOL_REF (address_mode, name);
2928 SET_SYMBOL_REF_DECL (x, obj);
2929 x = gen_rtx_MEM (DECL_MODE (obj), x);
2930 set_mem_addr_space (x, as);
2931 targetm.encode_section_info (obj, x, true);
2933 else
2935 x = gen_raw_REG (address_mode, (*regno)++);
2936 x = gen_rtx_MEM (DECL_MODE (obj), x);
2937 set_mem_addr_space (x, as);
2940 return x;
2943 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2944 walk_tree. DATA contains the actual fake register number. */
2946 static tree
2947 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2949 tree obj = NULL_TREE;
2950 rtx x = NULL_RTX;
2951 int *regno = (int *) data;
2953 switch (TREE_CODE (*expr_p))
2955 case ADDR_EXPR:
2956 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2957 handled_component_p (*expr_p);
2958 expr_p = &TREE_OPERAND (*expr_p, 0))
2959 continue;
2960 obj = *expr_p;
2961 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2962 x = produce_memory_decl_rtl (obj, regno);
2963 break;
2965 case SSA_NAME:
2966 *ws = 0;
2967 obj = SSA_NAME_VAR (*expr_p);
2968 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2969 if (!obj)
2970 return NULL_TREE;
2971 if (!DECL_RTL_SET_P (obj))
2972 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2973 break;
2975 case VAR_DECL:
2976 case PARM_DECL:
2977 case RESULT_DECL:
2978 *ws = 0;
2979 obj = *expr_p;
2981 if (DECL_RTL_SET_P (obj))
2982 break;
2984 if (DECL_MODE (obj) == BLKmode)
2985 x = produce_memory_decl_rtl (obj, regno);
2986 else
2987 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2989 break;
2991 default:
2992 break;
2995 if (x)
2997 decl_rtl_to_reset.safe_push (obj);
2998 SET_DECL_RTL (obj, x);
3001 return NULL_TREE;
3004 /* Determines cost of the computation of EXPR. */
3006 static unsigned
3007 computation_cost (tree expr, bool speed)
3009 rtx_insn *seq;
3010 rtx rslt;
3011 tree type = TREE_TYPE (expr);
3012 unsigned cost;
3013 /* Avoid using hard regs in ways which may be unsupported. */
3014 int regno = LAST_VIRTUAL_REGISTER + 1;
3015 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3016 enum node_frequency real_frequency = node->frequency;
3018 node->frequency = NODE_FREQUENCY_NORMAL;
3019 crtl->maybe_hot_insn_p = speed;
3020 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3021 start_sequence ();
3022 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3023 seq = get_insns ();
3024 end_sequence ();
3025 default_rtl_profile ();
3026 node->frequency = real_frequency;
3028 cost = seq_cost (seq, speed);
3029 if (MEM_P (rslt))
3030 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3031 TYPE_ADDR_SPACE (type), speed);
3032 else if (!REG_P (rslt))
3033 cost += set_src_cost (rslt, speed);
3035 return cost;
3038 /* Returns variable containing the value of candidate CAND at statement AT. */
3040 static tree
3041 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3043 if (stmt_after_increment (loop, cand, stmt))
3044 return cand->var_after;
3045 else
3046 return cand->var_before;
3049 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3050 same precision that is at least as wide as the precision of TYPE, stores
3051 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3052 type of A and B. */
3054 static tree
3055 determine_common_wider_type (tree *a, tree *b)
3057 tree wider_type = NULL;
3058 tree suba, subb;
3059 tree atype = TREE_TYPE (*a);
3061 if (CONVERT_EXPR_P (*a))
3063 suba = TREE_OPERAND (*a, 0);
3064 wider_type = TREE_TYPE (suba);
3065 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3066 return atype;
3068 else
3069 return atype;
3071 if (CONVERT_EXPR_P (*b))
3073 subb = TREE_OPERAND (*b, 0);
3074 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3075 return atype;
3077 else
3078 return atype;
3080 *a = suba;
3081 *b = subb;
3082 return wider_type;
3085 /* Determines the expression by that USE is expressed from induction variable
3086 CAND at statement AT in LOOP. The expression is stored in a decomposed
3087 form into AFF. Returns false if USE cannot be expressed using CAND. */
3089 static bool
3090 get_computation_aff (struct loop *loop,
3091 struct iv_use *use, struct iv_cand *cand, gimple at,
3092 struct aff_tree *aff)
3094 tree ubase = use->iv->base;
3095 tree ustep = use->iv->step;
3096 tree cbase = cand->iv->base;
3097 tree cstep = cand->iv->step, cstep_common;
3098 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3099 tree common_type, var;
3100 tree uutype;
3101 aff_tree cbase_aff, var_aff;
3102 widest_int rat;
3104 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3106 /* We do not have a precision to express the values of use. */
3107 return false;
3110 var = var_at_stmt (loop, cand, at);
3111 uutype = unsigned_type_for (utype);
3113 /* If the conversion is not noop, perform it. */
3114 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3116 cstep = fold_convert (uutype, cstep);
3117 cbase = fold_convert (uutype, cbase);
3118 var = fold_convert (uutype, var);
3121 if (!constant_multiple_of (ustep, cstep, &rat))
3122 return false;
3124 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3125 type, we achieve better folding by computing their difference in this
3126 wider type, and cast the result to UUTYPE. We do not need to worry about
3127 overflows, as all the arithmetics will in the end be performed in UUTYPE
3128 anyway. */
3129 common_type = determine_common_wider_type (&ubase, &cbase);
3131 /* use = ubase - ratio * cbase + ratio * var. */
3132 tree_to_aff_combination (ubase, common_type, aff);
3133 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3134 tree_to_aff_combination (var, uutype, &var_aff);
3136 /* We need to shift the value if we are after the increment. */
3137 if (stmt_after_increment (loop, cand, at))
3139 aff_tree cstep_aff;
3141 if (common_type != uutype)
3142 cstep_common = fold_convert (common_type, cstep);
3143 else
3144 cstep_common = cstep;
3146 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3147 aff_combination_add (&cbase_aff, &cstep_aff);
3150 aff_combination_scale (&cbase_aff, -rat);
3151 aff_combination_add (aff, &cbase_aff);
3152 if (common_type != uutype)
3153 aff_combination_convert (aff, uutype);
3155 aff_combination_scale (&var_aff, rat);
3156 aff_combination_add (aff, &var_aff);
3158 return true;
3161 /* Return the type of USE. */
3163 static tree
3164 get_use_type (struct iv_use *use)
3166 tree base_type = TREE_TYPE (use->iv->base);
3167 tree type;
3169 if (use->type == USE_ADDRESS)
3171 /* The base_type may be a void pointer. Create a pointer type based on
3172 the mem_ref instead. */
3173 type = build_pointer_type (TREE_TYPE (*use->op_p));
3174 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3175 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3177 else
3178 type = base_type;
3180 return type;
3183 /* Determines the expression by that USE is expressed from induction variable
3184 CAND at statement AT in LOOP. The computation is unshared. */
3186 static tree
3187 get_computation_at (struct loop *loop,
3188 struct iv_use *use, struct iv_cand *cand, gimple at)
3190 aff_tree aff;
3191 tree type = get_use_type (use);
3193 if (!get_computation_aff (loop, use, cand, at, &aff))
3194 return NULL_TREE;
3195 unshare_aff_combination (&aff);
3196 return fold_convert (type, aff_combination_to_tree (&aff));
3199 /* Determines the expression by that USE is expressed from induction variable
3200 CAND in LOOP. The computation is unshared. */
3202 static tree
3203 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3205 return get_computation_at (loop, use, cand, use->stmt);
3208 /* Adjust the cost COST for being in loop setup rather than loop body.
3209 If we're optimizing for space, the loop setup overhead is constant;
3210 if we're optimizing for speed, amortize it over the per-iteration cost. */
3211 static unsigned
3212 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3214 if (cost == INFTY)
3215 return cost;
3216 else if (optimize_loop_for_speed_p (data->current_loop))
3217 return cost / avg_loop_niter (data->current_loop);
3218 else
3219 return cost;
3222 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3223 validity for a memory reference accessing memory of mode MODE in
3224 address space AS. */
3227 bool
3228 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3229 addr_space_t as)
3231 #define MAX_RATIO 128
3232 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3233 static vec<sbitmap> valid_mult_list;
3234 sbitmap valid_mult;
3236 if (data_index >= valid_mult_list.length ())
3237 valid_mult_list.safe_grow_cleared (data_index + 1);
3239 valid_mult = valid_mult_list[data_index];
3240 if (!valid_mult)
3242 machine_mode address_mode = targetm.addr_space.address_mode (as);
3243 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3244 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3245 rtx addr, scaled;
3246 HOST_WIDE_INT i;
3248 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3249 bitmap_clear (valid_mult);
3250 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3251 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3252 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3254 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3255 if (memory_address_addr_space_p (mode, addr, as)
3256 || memory_address_addr_space_p (mode, scaled, as))
3257 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3260 if (dump_file && (dump_flags & TDF_DETAILS))
3262 fprintf (dump_file, " allowed multipliers:");
3263 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3264 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3265 fprintf (dump_file, " %d", (int) i);
3266 fprintf (dump_file, "\n");
3267 fprintf (dump_file, "\n");
3270 valid_mult_list[data_index] = valid_mult;
3273 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3274 return false;
3276 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3279 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3280 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3281 variable is omitted. Compute the cost for a memory reference that accesses
3282 a memory location of mode MEM_MODE in address space AS.
3284 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3285 size of MEM_MODE / RATIO) is available. To make this determination, we
3286 look at the size of the increment to be made, which is given in CSTEP.
3287 CSTEP may be zero if the step is unknown.
3288 STMT_AFTER_INC is true iff the statement we're looking at is after the
3289 increment of the original biv.
3291 TODO -- there must be some better way. This all is quite crude. */
3293 enum ainc_type
3295 AINC_PRE_INC, /* Pre increment. */
3296 AINC_PRE_DEC, /* Pre decrement. */
3297 AINC_POST_INC, /* Post increment. */
3298 AINC_POST_DEC, /* Post decrement. */
3299 AINC_NONE /* Also the number of auto increment types. */
3302 typedef struct address_cost_data_s
3304 HOST_WIDE_INT min_offset, max_offset;
3305 unsigned costs[2][2][2][2];
3306 unsigned ainc_costs[AINC_NONE];
3307 } *address_cost_data;
3310 static comp_cost
3311 get_address_cost (bool symbol_present, bool var_present,
3312 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3313 HOST_WIDE_INT cstep, machine_mode mem_mode,
3314 addr_space_t as, bool speed,
3315 bool stmt_after_inc, bool *may_autoinc)
3317 machine_mode address_mode = targetm.addr_space.address_mode (as);
3318 static vec<address_cost_data> address_cost_data_list;
3319 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3320 address_cost_data data;
3321 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3322 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3323 unsigned cost, acost, complexity;
3324 enum ainc_type autoinc_type;
3325 bool offset_p, ratio_p, autoinc;
3326 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3327 unsigned HOST_WIDE_INT mask;
3328 unsigned bits;
3330 if (data_index >= address_cost_data_list.length ())
3331 address_cost_data_list.safe_grow_cleared (data_index + 1);
3333 data = address_cost_data_list[data_index];
3334 if (!data)
3336 HOST_WIDE_INT i;
3337 HOST_WIDE_INT rat, off = 0;
3338 int old_cse_not_expected, width;
3339 unsigned sym_p, var_p, off_p, rat_p, add_c;
3340 rtx_insn *seq;
3341 rtx addr, base;
3342 rtx reg0, reg1;
3344 data = (address_cost_data) xcalloc (1, sizeof (*data));
3346 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3348 width = GET_MODE_BITSIZE (address_mode) - 1;
3349 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3350 width = HOST_BITS_PER_WIDE_INT - 1;
3351 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3353 for (i = width; i >= 0; i--)
3355 off = -((unsigned HOST_WIDE_INT) 1 << i);
3356 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3357 if (memory_address_addr_space_p (mem_mode, addr, as))
3358 break;
3360 data->min_offset = (i == -1? 0 : off);
3362 for (i = width; i >= 0; i--)
3364 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3365 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3366 if (memory_address_addr_space_p (mem_mode, addr, as))
3367 break;
3368 /* For some strict-alignment targets, the offset must be naturally
3369 aligned. Try an aligned offset if mem_mode is not QImode. */
3370 off = mem_mode != QImode
3371 ? ((unsigned HOST_WIDE_INT) 1 << i)
3372 - GET_MODE_SIZE (mem_mode)
3373 : 0;
3374 if (off > 0)
3376 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3377 if (memory_address_addr_space_p (mem_mode, addr, as))
3378 break;
3381 if (i == -1)
3382 off = 0;
3383 data->max_offset = off;
3385 if (dump_file && (dump_flags & TDF_DETAILS))
3387 fprintf (dump_file, "get_address_cost:\n");
3388 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3389 GET_MODE_NAME (mem_mode),
3390 data->min_offset);
3391 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3392 GET_MODE_NAME (mem_mode),
3393 data->max_offset);
3396 rat = 1;
3397 for (i = 2; i <= MAX_RATIO; i++)
3398 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3400 rat = i;
3401 break;
3404 /* Compute the cost of various addressing modes. */
3405 acost = 0;
3406 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3407 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3409 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3410 || USE_STORE_PRE_DECREMENT (mem_mode))
3412 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3413 has_predec[mem_mode]
3414 = memory_address_addr_space_p (mem_mode, addr, as);
3416 if (has_predec[mem_mode])
3417 data->ainc_costs[AINC_PRE_DEC]
3418 = address_cost (addr, mem_mode, as, speed);
3420 if (USE_LOAD_POST_DECREMENT (mem_mode)
3421 || USE_STORE_POST_DECREMENT (mem_mode))
3423 addr = gen_rtx_POST_DEC (address_mode, reg0);
3424 has_postdec[mem_mode]
3425 = memory_address_addr_space_p (mem_mode, addr, as);
3427 if (has_postdec[mem_mode])
3428 data->ainc_costs[AINC_POST_DEC]
3429 = address_cost (addr, mem_mode, as, speed);
3431 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3432 || USE_STORE_PRE_DECREMENT (mem_mode))
3434 addr = gen_rtx_PRE_INC (address_mode, reg0);
3435 has_preinc[mem_mode]
3436 = memory_address_addr_space_p (mem_mode, addr, as);
3438 if (has_preinc[mem_mode])
3439 data->ainc_costs[AINC_PRE_INC]
3440 = address_cost (addr, mem_mode, as, speed);
3442 if (USE_LOAD_POST_INCREMENT (mem_mode)
3443 || USE_STORE_POST_INCREMENT (mem_mode))
3445 addr = gen_rtx_POST_INC (address_mode, reg0);
3446 has_postinc[mem_mode]
3447 = memory_address_addr_space_p (mem_mode, addr, as);
3449 if (has_postinc[mem_mode])
3450 data->ainc_costs[AINC_POST_INC]
3451 = address_cost (addr, mem_mode, as, speed);
3453 for (i = 0; i < 16; i++)
3455 sym_p = i & 1;
3456 var_p = (i >> 1) & 1;
3457 off_p = (i >> 2) & 1;
3458 rat_p = (i >> 3) & 1;
3460 addr = reg0;
3461 if (rat_p)
3462 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3463 gen_int_mode (rat, address_mode));
3465 if (var_p)
3466 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3468 if (sym_p)
3470 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3471 /* ??? We can run into trouble with some backends by presenting
3472 it with symbols which haven't been properly passed through
3473 targetm.encode_section_info. By setting the local bit, we
3474 enhance the probability of things working. */
3475 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3477 if (off_p)
3478 base = gen_rtx_fmt_e (CONST, address_mode,
3479 gen_rtx_fmt_ee
3480 (PLUS, address_mode, base,
3481 gen_int_mode (off, address_mode)));
3483 else if (off_p)
3484 base = gen_int_mode (off, address_mode);
3485 else
3486 base = NULL_RTX;
3488 if (base)
3489 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3491 start_sequence ();
3492 /* To avoid splitting addressing modes, pretend that no cse will
3493 follow. */
3494 old_cse_not_expected = cse_not_expected;
3495 cse_not_expected = true;
3496 addr = memory_address_addr_space (mem_mode, addr, as);
3497 cse_not_expected = old_cse_not_expected;
3498 seq = get_insns ();
3499 end_sequence ();
3501 acost = seq_cost (seq, speed);
3502 acost += address_cost (addr, mem_mode, as, speed);
3504 if (!acost)
3505 acost = 1;
3506 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3509 /* On some targets, it is quite expensive to load symbol to a register,
3510 which makes addresses that contain symbols look much more expensive.
3511 However, the symbol will have to be loaded in any case before the
3512 loop (and quite likely we have it in register already), so it does not
3513 make much sense to penalize them too heavily. So make some final
3514 tweaks for the SYMBOL_PRESENT modes:
3516 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3517 var is cheaper, use this mode with small penalty.
3518 If VAR_PRESENT is true, try whether the mode with
3519 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3520 if this is the case, use it. */
3521 add_c = add_cost (speed, address_mode);
3522 for (i = 0; i < 8; i++)
3524 var_p = i & 1;
3525 off_p = (i >> 1) & 1;
3526 rat_p = (i >> 2) & 1;
3528 acost = data->costs[0][1][off_p][rat_p] + 1;
3529 if (var_p)
3530 acost += add_c;
3532 if (acost < data->costs[1][var_p][off_p][rat_p])
3533 data->costs[1][var_p][off_p][rat_p] = acost;
3536 if (dump_file && (dump_flags & TDF_DETAILS))
3538 fprintf (dump_file, "Address costs:\n");
3540 for (i = 0; i < 16; i++)
3542 sym_p = i & 1;
3543 var_p = (i >> 1) & 1;
3544 off_p = (i >> 2) & 1;
3545 rat_p = (i >> 3) & 1;
3547 fprintf (dump_file, " ");
3548 if (sym_p)
3549 fprintf (dump_file, "sym + ");
3550 if (var_p)
3551 fprintf (dump_file, "var + ");
3552 if (off_p)
3553 fprintf (dump_file, "cst + ");
3554 if (rat_p)
3555 fprintf (dump_file, "rat * ");
3557 acost = data->costs[sym_p][var_p][off_p][rat_p];
3558 fprintf (dump_file, "index costs %d\n", acost);
3560 if (has_predec[mem_mode] || has_postdec[mem_mode]
3561 || has_preinc[mem_mode] || has_postinc[mem_mode])
3562 fprintf (dump_file, " May include autoinc/dec\n");
3563 fprintf (dump_file, "\n");
3566 address_cost_data_list[data_index] = data;
3569 bits = GET_MODE_BITSIZE (address_mode);
3570 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3571 offset &= mask;
3572 if ((offset >> (bits - 1) & 1))
3573 offset |= ~mask;
3574 s_offset = offset;
3576 autoinc = false;
3577 autoinc_type = AINC_NONE;
3578 msize = GET_MODE_SIZE (mem_mode);
3579 autoinc_offset = offset;
3580 if (stmt_after_inc)
3581 autoinc_offset += ratio * cstep;
3582 if (symbol_present || var_present || ratio != 1)
3583 autoinc = false;
3584 else
3586 if (has_postinc[mem_mode] && autoinc_offset == 0
3587 && msize == cstep)
3588 autoinc_type = AINC_POST_INC;
3589 else if (has_postdec[mem_mode] && autoinc_offset == 0
3590 && msize == -cstep)
3591 autoinc_type = AINC_POST_DEC;
3592 else if (has_preinc[mem_mode] && autoinc_offset == msize
3593 && msize == cstep)
3594 autoinc_type = AINC_PRE_INC;
3595 else if (has_predec[mem_mode] && autoinc_offset == -msize
3596 && msize == -cstep)
3597 autoinc_type = AINC_PRE_DEC;
3599 if (autoinc_type != AINC_NONE)
3600 autoinc = true;
3603 cost = 0;
3604 offset_p = (s_offset != 0
3605 && data->min_offset <= s_offset
3606 && s_offset <= data->max_offset);
3607 ratio_p = (ratio != 1
3608 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3610 if (ratio != 1 && !ratio_p)
3611 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3613 if (s_offset && !offset_p && !symbol_present)
3614 cost += add_cost (speed, address_mode);
3616 if (may_autoinc)
3617 *may_autoinc = autoinc;
3618 if (autoinc)
3619 acost = data->ainc_costs[autoinc_type];
3620 else
3621 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3622 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3623 return new_cost (cost + acost, complexity);
3626 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3627 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3628 calculating the operands of EXPR. Returns true if successful, and returns
3629 the cost in COST. */
3631 static bool
3632 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3633 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3635 comp_cost res;
3636 tree op1 = TREE_OPERAND (expr, 1);
3637 tree cst = TREE_OPERAND (mult, 1);
3638 tree multop = TREE_OPERAND (mult, 0);
3639 int m = exact_log2 (int_cst_value (cst));
3640 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3641 int as_cost, sa_cost;
3642 bool mult_in_op1;
3644 if (!(m >= 0 && m < maxm))
3645 return false;
3647 mult_in_op1 = operand_equal_p (op1, mult, 0);
3649 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3651 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3652 use that in preference to a shift insn followed by an add insn. */
3653 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3654 ? shiftadd_cost (speed, mode, m)
3655 : (mult_in_op1
3656 ? shiftsub1_cost (speed, mode, m)
3657 : shiftsub0_cost (speed, mode, m)));
3659 res = new_cost (MIN (as_cost, sa_cost), 0);
3660 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3662 STRIP_NOPS (multop);
3663 if (!is_gimple_val (multop))
3664 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3666 *cost = res;
3667 return true;
3670 /* Estimates cost of forcing expression EXPR into a variable. */
3672 static comp_cost
3673 force_expr_to_var_cost (tree expr, bool speed)
3675 static bool costs_initialized = false;
3676 static unsigned integer_cost [2];
3677 static unsigned symbol_cost [2];
3678 static unsigned address_cost [2];
3679 tree op0, op1;
3680 comp_cost cost0, cost1, cost;
3681 machine_mode mode;
3683 if (!costs_initialized)
3685 tree type = build_pointer_type (integer_type_node);
3686 tree var, addr;
3687 rtx x;
3688 int i;
3690 var = create_tmp_var_raw (integer_type_node, "test_var");
3691 TREE_STATIC (var) = 1;
3692 x = produce_memory_decl_rtl (var, NULL);
3693 SET_DECL_RTL (var, x);
3695 addr = build1 (ADDR_EXPR, type, var);
3698 for (i = 0; i < 2; i++)
3700 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3701 2000), i);
3703 symbol_cost[i] = computation_cost (addr, i) + 1;
3705 address_cost[i]
3706 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3707 if (dump_file && (dump_flags & TDF_DETAILS))
3709 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3710 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3711 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3712 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3713 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3714 fprintf (dump_file, "\n");
3718 costs_initialized = true;
3721 STRIP_NOPS (expr);
3723 if (SSA_VAR_P (expr))
3724 return no_cost;
3726 if (is_gimple_min_invariant (expr))
3728 if (TREE_CODE (expr) == INTEGER_CST)
3729 return new_cost (integer_cost [speed], 0);
3731 if (TREE_CODE (expr) == ADDR_EXPR)
3733 tree obj = TREE_OPERAND (expr, 0);
3735 if (TREE_CODE (obj) == VAR_DECL
3736 || TREE_CODE (obj) == PARM_DECL
3737 || TREE_CODE (obj) == RESULT_DECL)
3738 return new_cost (symbol_cost [speed], 0);
3741 return new_cost (address_cost [speed], 0);
3744 switch (TREE_CODE (expr))
3746 case POINTER_PLUS_EXPR:
3747 case PLUS_EXPR:
3748 case MINUS_EXPR:
3749 case MULT_EXPR:
3750 op0 = TREE_OPERAND (expr, 0);
3751 op1 = TREE_OPERAND (expr, 1);
3752 STRIP_NOPS (op0);
3753 STRIP_NOPS (op1);
3754 break;
3756 CASE_CONVERT:
3757 case NEGATE_EXPR:
3758 op0 = TREE_OPERAND (expr, 0);
3759 STRIP_NOPS (op0);
3760 op1 = NULL_TREE;
3761 break;
3763 default:
3764 /* Just an arbitrary value, FIXME. */
3765 return new_cost (target_spill_cost[speed], 0);
3768 if (op0 == NULL_TREE
3769 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3770 cost0 = no_cost;
3771 else
3772 cost0 = force_expr_to_var_cost (op0, speed);
3774 if (op1 == NULL_TREE
3775 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3776 cost1 = no_cost;
3777 else
3778 cost1 = force_expr_to_var_cost (op1, speed);
3780 mode = TYPE_MODE (TREE_TYPE (expr));
3781 switch (TREE_CODE (expr))
3783 case POINTER_PLUS_EXPR:
3784 case PLUS_EXPR:
3785 case MINUS_EXPR:
3786 case NEGATE_EXPR:
3787 cost = new_cost (add_cost (speed, mode), 0);
3788 if (TREE_CODE (expr) != NEGATE_EXPR)
3790 tree mult = NULL_TREE;
3791 comp_cost sa_cost;
3792 if (TREE_CODE (op1) == MULT_EXPR)
3793 mult = op1;
3794 else if (TREE_CODE (op0) == MULT_EXPR)
3795 mult = op0;
3797 if (mult != NULL_TREE
3798 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3799 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3800 speed, &sa_cost))
3801 return sa_cost;
3803 break;
3805 CASE_CONVERT:
3807 tree inner_mode, outer_mode;
3808 outer_mode = TREE_TYPE (expr);
3809 inner_mode = TREE_TYPE (op0);
3810 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3811 TYPE_MODE (inner_mode), speed), 0);
3813 break;
3815 case MULT_EXPR:
3816 if (cst_and_fits_in_hwi (op0))
3817 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3818 mode, speed), 0);
3819 else if (cst_and_fits_in_hwi (op1))
3820 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3821 mode, speed), 0);
3822 else
3823 return new_cost (target_spill_cost [speed], 0);
3824 break;
3826 default:
3827 gcc_unreachable ();
3830 cost = add_costs (cost, cost0);
3831 cost = add_costs (cost, cost1);
3833 /* Bound the cost by target_spill_cost. The parts of complicated
3834 computations often are either loop invariant or at least can
3835 be shared between several iv uses, so letting this grow without
3836 limits would not give reasonable results. */
3837 if (cost.cost > (int) target_spill_cost [speed])
3838 cost.cost = target_spill_cost [speed];
3840 return cost;
3843 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3844 invariants the computation depends on. */
3846 static comp_cost
3847 force_var_cost (struct ivopts_data *data,
3848 tree expr, bitmap *depends_on)
3850 if (depends_on)
3852 fd_ivopts_data = data;
3853 walk_tree (&expr, find_depends, depends_on, NULL);
3856 return force_expr_to_var_cost (expr, data->speed);
3859 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3860 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3861 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3862 invariants the computation depends on. */
3864 static comp_cost
3865 split_address_cost (struct ivopts_data *data,
3866 tree addr, bool *symbol_present, bool *var_present,
3867 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3869 tree core;
3870 HOST_WIDE_INT bitsize;
3871 HOST_WIDE_INT bitpos;
3872 tree toffset;
3873 machine_mode mode;
3874 int unsignedp, volatilep;
3876 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3877 &unsignedp, &volatilep, false);
3879 if (toffset != 0
3880 || bitpos % BITS_PER_UNIT != 0
3881 || TREE_CODE (core) != VAR_DECL)
3883 *symbol_present = false;
3884 *var_present = true;
3885 fd_ivopts_data = data;
3886 walk_tree (&addr, find_depends, depends_on, NULL);
3887 return new_cost (target_spill_cost[data->speed], 0);
3890 *offset += bitpos / BITS_PER_UNIT;
3891 if (TREE_STATIC (core)
3892 || DECL_EXTERNAL (core))
3894 *symbol_present = true;
3895 *var_present = false;
3896 return no_cost;
3899 *symbol_present = false;
3900 *var_present = true;
3901 return no_cost;
3904 /* Estimates cost of expressing difference of addresses E1 - E2 as
3905 var + symbol + offset. The value of offset is added to OFFSET,
3906 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3907 part is missing. DEPENDS_ON is a set of the invariants the computation
3908 depends on. */
3910 static comp_cost
3911 ptr_difference_cost (struct ivopts_data *data,
3912 tree e1, tree e2, bool *symbol_present, bool *var_present,
3913 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3915 HOST_WIDE_INT diff = 0;
3916 aff_tree aff_e1, aff_e2;
3917 tree type;
3919 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3921 if (ptr_difference_const (e1, e2, &diff))
3923 *offset += diff;
3924 *symbol_present = false;
3925 *var_present = false;
3926 return no_cost;
3929 if (integer_zerop (e2))
3930 return split_address_cost (data, TREE_OPERAND (e1, 0),
3931 symbol_present, var_present, offset, depends_on);
3933 *symbol_present = false;
3934 *var_present = true;
3936 type = signed_type_for (TREE_TYPE (e1));
3937 tree_to_aff_combination (e1, type, &aff_e1);
3938 tree_to_aff_combination (e2, type, &aff_e2);
3939 aff_combination_scale (&aff_e2, -1);
3940 aff_combination_add (&aff_e1, &aff_e2);
3942 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3945 /* Estimates cost of expressing difference E1 - E2 as
3946 var + symbol + offset. The value of offset is added to OFFSET,
3947 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3948 part is missing. DEPENDS_ON is a set of the invariants the computation
3949 depends on. */
3951 static comp_cost
3952 difference_cost (struct ivopts_data *data,
3953 tree e1, tree e2, bool *symbol_present, bool *var_present,
3954 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3956 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3957 unsigned HOST_WIDE_INT off1, off2;
3958 aff_tree aff_e1, aff_e2;
3959 tree type;
3961 e1 = strip_offset (e1, &off1);
3962 e2 = strip_offset (e2, &off2);
3963 *offset += off1 - off2;
3965 STRIP_NOPS (e1);
3966 STRIP_NOPS (e2);
3968 if (TREE_CODE (e1) == ADDR_EXPR)
3969 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3970 offset, depends_on);
3971 *symbol_present = false;
3973 if (operand_equal_p (e1, e2, 0))
3975 *var_present = false;
3976 return no_cost;
3979 *var_present = true;
3981 if (integer_zerop (e2))
3982 return force_var_cost (data, e1, depends_on);
3984 if (integer_zerop (e1))
3986 comp_cost cost = force_var_cost (data, e2, depends_on);
3987 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3988 return cost;
3991 type = signed_type_for (TREE_TYPE (e1));
3992 tree_to_aff_combination (e1, type, &aff_e1);
3993 tree_to_aff_combination (e2, type, &aff_e2);
3994 aff_combination_scale (&aff_e2, -1);
3995 aff_combination_add (&aff_e1, &aff_e2);
3997 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4000 /* Returns true if AFF1 and AFF2 are identical. */
4002 static bool
4003 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4005 unsigned i;
4007 if (aff1->n != aff2->n)
4008 return false;
4010 for (i = 0; i < aff1->n; i++)
4012 if (aff1->elts[i].coef != aff2->elts[i].coef)
4013 return false;
4015 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4016 return false;
4018 return true;
4021 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4023 static int
4024 get_expr_id (struct ivopts_data *data, tree expr)
4026 struct iv_inv_expr_ent ent;
4027 struct iv_inv_expr_ent **slot;
4029 ent.expr = expr;
4030 ent.hash = iterative_hash_expr (expr, 0);
4031 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4032 if (*slot)
4033 return (*slot)->id;
4035 *slot = XNEW (struct iv_inv_expr_ent);
4036 (*slot)->expr = expr;
4037 (*slot)->hash = ent.hash;
4038 (*slot)->id = data->inv_expr_id++;
4039 return (*slot)->id;
4042 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4043 requires a new compiler generated temporary. Returns -1 otherwise.
4044 ADDRESS_P is a flag indicating if the expression is for address
4045 computation. */
4047 static int
4048 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4049 tree cbase, HOST_WIDE_INT ratio,
4050 bool address_p)
4052 aff_tree ubase_aff, cbase_aff;
4053 tree expr, ub, cb;
4055 STRIP_NOPS (ubase);
4056 STRIP_NOPS (cbase);
4057 ub = ubase;
4058 cb = cbase;
4060 if ((TREE_CODE (ubase) == INTEGER_CST)
4061 && (TREE_CODE (cbase) == INTEGER_CST))
4062 return -1;
4064 /* Strips the constant part. */
4065 if (TREE_CODE (ubase) == PLUS_EXPR
4066 || TREE_CODE (ubase) == MINUS_EXPR
4067 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4069 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4070 ubase = TREE_OPERAND (ubase, 0);
4073 /* Strips the constant part. */
4074 if (TREE_CODE (cbase) == PLUS_EXPR
4075 || TREE_CODE (cbase) == MINUS_EXPR
4076 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4078 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4079 cbase = TREE_OPERAND (cbase, 0);
4082 if (address_p)
4084 if (((TREE_CODE (ubase) == SSA_NAME)
4085 || (TREE_CODE (ubase) == ADDR_EXPR
4086 && is_gimple_min_invariant (ubase)))
4087 && (TREE_CODE (cbase) == INTEGER_CST))
4088 return -1;
4090 if (((TREE_CODE (cbase) == SSA_NAME)
4091 || (TREE_CODE (cbase) == ADDR_EXPR
4092 && is_gimple_min_invariant (cbase)))
4093 && (TREE_CODE (ubase) == INTEGER_CST))
4094 return -1;
4097 if (ratio == 1)
4099 if (operand_equal_p (ubase, cbase, 0))
4100 return -1;
4102 if (TREE_CODE (ubase) == ADDR_EXPR
4103 && TREE_CODE (cbase) == ADDR_EXPR)
4105 tree usym, csym;
4107 usym = TREE_OPERAND (ubase, 0);
4108 csym = TREE_OPERAND (cbase, 0);
4109 if (TREE_CODE (usym) == ARRAY_REF)
4111 tree ind = TREE_OPERAND (usym, 1);
4112 if (TREE_CODE (ind) == INTEGER_CST
4113 && tree_fits_shwi_p (ind)
4114 && tree_to_shwi (ind) == 0)
4115 usym = TREE_OPERAND (usym, 0);
4117 if (TREE_CODE (csym) == ARRAY_REF)
4119 tree ind = TREE_OPERAND (csym, 1);
4120 if (TREE_CODE (ind) == INTEGER_CST
4121 && tree_fits_shwi_p (ind)
4122 && tree_to_shwi (ind) == 0)
4123 csym = TREE_OPERAND (csym, 0);
4125 if (operand_equal_p (usym, csym, 0))
4126 return -1;
4128 /* Now do more complex comparison */
4129 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4130 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4131 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4132 return -1;
4135 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4136 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4138 aff_combination_scale (&cbase_aff, -1 * ratio);
4139 aff_combination_add (&ubase_aff, &cbase_aff);
4140 expr = aff_combination_to_tree (&ubase_aff);
4141 return get_expr_id (data, expr);
4146 /* Determines the cost of the computation by that USE is expressed
4147 from induction variable CAND. If ADDRESS_P is true, we just need
4148 to create an address from it, otherwise we want to get it into
4149 register. A set of invariants we depend on is stored in
4150 DEPENDS_ON. AT is the statement at that the value is computed.
4151 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4152 addressing is likely. */
4154 static comp_cost
4155 get_computation_cost_at (struct ivopts_data *data,
4156 struct iv_use *use, struct iv_cand *cand,
4157 bool address_p, bitmap *depends_on, gimple at,
4158 bool *can_autoinc,
4159 int *inv_expr_id)
4161 tree ubase = use->iv->base, ustep = use->iv->step;
4162 tree cbase, cstep;
4163 tree utype = TREE_TYPE (ubase), ctype;
4164 unsigned HOST_WIDE_INT cstepi, offset = 0;
4165 HOST_WIDE_INT ratio, aratio;
4166 bool var_present, symbol_present, stmt_is_after_inc;
4167 comp_cost cost;
4168 widest_int rat;
4169 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4170 machine_mode mem_mode = (address_p
4171 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4172 : VOIDmode);
4174 *depends_on = NULL;
4176 /* Only consider real candidates. */
4177 if (!cand->iv)
4178 return infinite_cost;
4180 cbase = cand->iv->base;
4181 cstep = cand->iv->step;
4182 ctype = TREE_TYPE (cbase);
4184 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4186 /* We do not have a precision to express the values of use. */
4187 return infinite_cost;
4190 if (address_p
4191 || (use->iv->base_object
4192 && cand->iv->base_object
4193 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4194 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4196 /* Do not try to express address of an object with computation based
4197 on address of a different object. This may cause problems in rtl
4198 level alias analysis (that does not expect this to be happening,
4199 as this is illegal in C), and would be unlikely to be useful
4200 anyway. */
4201 if (use->iv->base_object
4202 && cand->iv->base_object
4203 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4204 return infinite_cost;
4207 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4209 /* TODO -- add direct handling of this case. */
4210 goto fallback;
4213 /* CSTEPI is removed from the offset in case statement is after the
4214 increment. If the step is not constant, we use zero instead.
4215 This is a bit imprecise (there is the extra addition), but
4216 redundancy elimination is likely to transform the code so that
4217 it uses value of the variable before increment anyway,
4218 so it is not that much unrealistic. */
4219 if (cst_and_fits_in_hwi (cstep))
4220 cstepi = int_cst_value (cstep);
4221 else
4222 cstepi = 0;
4224 if (!constant_multiple_of (ustep, cstep, &rat))
4225 return infinite_cost;
4227 if (wi::fits_shwi_p (rat))
4228 ratio = rat.to_shwi ();
4229 else
4230 return infinite_cost;
4232 STRIP_NOPS (cbase);
4233 ctype = TREE_TYPE (cbase);
4235 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4237 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4238 or ratio == 1, it is better to handle this like
4240 ubase - ratio * cbase + ratio * var
4242 (also holds in the case ratio == -1, TODO. */
4244 if (cst_and_fits_in_hwi (cbase))
4246 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4247 cost = difference_cost (data,
4248 ubase, build_int_cst (utype, 0),
4249 &symbol_present, &var_present, &offset,
4250 depends_on);
4251 cost.cost /= avg_loop_niter (data->current_loop);
4253 else if (ratio == 1)
4255 tree real_cbase = cbase;
4257 /* Check to see if any adjustment is needed. */
4258 if (cstepi == 0 && stmt_is_after_inc)
4260 aff_tree real_cbase_aff;
4261 aff_tree cstep_aff;
4263 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4264 &real_cbase_aff);
4265 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4267 aff_combination_add (&real_cbase_aff, &cstep_aff);
4268 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4271 cost = difference_cost (data,
4272 ubase, real_cbase,
4273 &symbol_present, &var_present, &offset,
4274 depends_on);
4275 cost.cost /= avg_loop_niter (data->current_loop);
4277 else if (address_p
4278 && !POINTER_TYPE_P (ctype)
4279 && multiplier_allowed_in_address_p
4280 (ratio, mem_mode,
4281 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4283 cbase
4284 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4285 cost = difference_cost (data,
4286 ubase, cbase,
4287 &symbol_present, &var_present, &offset,
4288 depends_on);
4289 cost.cost /= avg_loop_niter (data->current_loop);
4291 else
4293 cost = force_var_cost (data, cbase, depends_on);
4294 cost = add_costs (cost,
4295 difference_cost (data,
4296 ubase, build_int_cst (utype, 0),
4297 &symbol_present, &var_present,
4298 &offset, depends_on));
4299 cost.cost /= avg_loop_niter (data->current_loop);
4300 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4303 if (inv_expr_id)
4305 *inv_expr_id =
4306 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4307 /* Clear depends on. */
4308 if (*inv_expr_id != -1 && depends_on && *depends_on)
4309 bitmap_clear (*depends_on);
4312 /* If we are after the increment, the value of the candidate is higher by
4313 one iteration. */
4314 if (stmt_is_after_inc)
4315 offset -= ratio * cstepi;
4317 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4318 (symbol/var1/const parts may be omitted). If we are looking for an
4319 address, find the cost of addressing this. */
4320 if (address_p)
4321 return add_costs (cost,
4322 get_address_cost (symbol_present, var_present,
4323 offset, ratio, cstepi,
4324 mem_mode,
4325 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4326 speed, stmt_is_after_inc,
4327 can_autoinc));
4329 /* Otherwise estimate the costs for computing the expression. */
4330 if (!symbol_present && !var_present && !offset)
4332 if (ratio != 1)
4333 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4334 return cost;
4337 /* Symbol + offset should be compile-time computable so consider that they
4338 are added once to the variable, if present. */
4339 if (var_present && (symbol_present || offset))
4340 cost.cost += adjust_setup_cost (data,
4341 add_cost (speed, TYPE_MODE (ctype)));
4343 /* Having offset does not affect runtime cost in case it is added to
4344 symbol, but it increases complexity. */
4345 if (offset)
4346 cost.complexity++;
4348 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4350 aratio = ratio > 0 ? ratio : -ratio;
4351 if (aratio != 1)
4352 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4353 return cost;
4355 fallback:
4356 if (can_autoinc)
4357 *can_autoinc = false;
4360 /* Just get the expression, expand it and measure the cost. */
4361 tree comp = get_computation_at (data->current_loop, use, cand, at);
4363 if (!comp)
4364 return infinite_cost;
4366 if (address_p)
4367 comp = build_simple_mem_ref (comp);
4369 return new_cost (computation_cost (comp, speed), 0);
4373 /* Determines the cost of the computation by that USE is expressed
4374 from induction variable CAND. If ADDRESS_P is true, we just need
4375 to create an address from it, otherwise we want to get it into
4376 register. A set of invariants we depend on is stored in
4377 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4378 autoinc addressing is likely. */
4380 static comp_cost
4381 get_computation_cost (struct ivopts_data *data,
4382 struct iv_use *use, struct iv_cand *cand,
4383 bool address_p, bitmap *depends_on,
4384 bool *can_autoinc, int *inv_expr_id)
4386 return get_computation_cost_at (data,
4387 use, cand, address_p, depends_on, use->stmt,
4388 can_autoinc, inv_expr_id);
4391 /* Determines cost of basing replacement of USE on CAND in a generic
4392 expression. */
4394 static bool
4395 determine_use_iv_cost_generic (struct ivopts_data *data,
4396 struct iv_use *use, struct iv_cand *cand)
4398 bitmap depends_on;
4399 comp_cost cost;
4400 int inv_expr_id = -1;
4402 /* The simple case first -- if we need to express value of the preserved
4403 original biv, the cost is 0. This also prevents us from counting the
4404 cost of increment twice -- once at this use and once in the cost of
4405 the candidate. */
4406 if (cand->pos == IP_ORIGINAL
4407 && cand->incremented_at == use->stmt)
4409 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4410 ERROR_MARK, -1);
4411 return true;
4414 cost = get_computation_cost (data, use, cand, false, &depends_on,
4415 NULL, &inv_expr_id);
4417 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4418 inv_expr_id);
4420 return !infinite_cost_p (cost);
4423 /* Determines cost of basing replacement of USE on CAND in an address. */
4425 static bool
4426 determine_use_iv_cost_address (struct ivopts_data *data,
4427 struct iv_use *use, struct iv_cand *cand)
4429 bitmap depends_on;
4430 bool can_autoinc;
4431 int inv_expr_id = -1;
4432 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4433 &can_autoinc, &inv_expr_id);
4435 if (cand->ainc_use == use)
4437 if (can_autoinc)
4438 cost.cost -= cand->cost_step;
4439 /* If we generated the candidate solely for exploiting autoincrement
4440 opportunities, and it turns out it can't be used, set the cost to
4441 infinity to make sure we ignore it. */
4442 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4443 cost = infinite_cost;
4445 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4446 inv_expr_id);
4448 return !infinite_cost_p (cost);
4451 /* Computes value of candidate CAND at position AT in iteration NITER, and
4452 stores it to VAL. */
4454 static void
4455 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4456 aff_tree *val)
4458 aff_tree step, delta, nit;
4459 struct iv *iv = cand->iv;
4460 tree type = TREE_TYPE (iv->base);
4461 tree steptype = type;
4462 if (POINTER_TYPE_P (type))
4463 steptype = sizetype;
4464 steptype = unsigned_type_for (type);
4466 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4467 aff_combination_convert (&step, steptype);
4468 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4469 aff_combination_convert (&nit, steptype);
4470 aff_combination_mult (&nit, &step, &delta);
4471 if (stmt_after_increment (loop, cand, at))
4472 aff_combination_add (&delta, &step);
4474 tree_to_aff_combination (iv->base, type, val);
4475 if (!POINTER_TYPE_P (type))
4476 aff_combination_convert (val, steptype);
4477 aff_combination_add (val, &delta);
4480 /* Returns period of induction variable iv. */
4482 static tree
4483 iv_period (struct iv *iv)
4485 tree step = iv->step, period, type;
4486 tree pow2div;
4488 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4490 type = unsigned_type_for (TREE_TYPE (step));
4491 /* Period of the iv is lcm (step, type_range)/step -1,
4492 i.e., N*type_range/step - 1. Since type range is power
4493 of two, N == (step >> num_of_ending_zeros_binary (step),
4494 so the final result is
4496 (type_range >> num_of_ending_zeros_binary (step)) - 1
4499 pow2div = num_ending_zeros (step);
4501 period = build_low_bits_mask (type,
4502 (TYPE_PRECISION (type)
4503 - tree_to_uhwi (pow2div)));
4505 return period;
4508 /* Returns the comparison operator used when eliminating the iv USE. */
4510 static enum tree_code
4511 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4513 struct loop *loop = data->current_loop;
4514 basic_block ex_bb;
4515 edge exit;
4517 ex_bb = gimple_bb (use->stmt);
4518 exit = EDGE_SUCC (ex_bb, 0);
4519 if (flow_bb_inside_loop_p (loop, exit->dest))
4520 exit = EDGE_SUCC (ex_bb, 1);
4522 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4525 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4526 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4527 calculation is performed in non-wrapping type.
4529 TODO: More generally, we could test for the situation that
4530 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4531 This would require knowing the sign of OFFSET. */
4533 static bool
4534 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4536 enum tree_code code;
4537 tree e1, e2;
4538 aff_tree aff_e1, aff_e2, aff_offset;
4540 if (!nowrap_type_p (TREE_TYPE (base)))
4541 return false;
4543 base = expand_simple_operations (base);
4545 if (TREE_CODE (base) == SSA_NAME)
4547 gimple stmt = SSA_NAME_DEF_STMT (base);
4549 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4550 return false;
4552 code = gimple_assign_rhs_code (stmt);
4553 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4554 return false;
4556 e1 = gimple_assign_rhs1 (stmt);
4557 e2 = gimple_assign_rhs2 (stmt);
4559 else
4561 code = TREE_CODE (base);
4562 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4563 return false;
4564 e1 = TREE_OPERAND (base, 0);
4565 e2 = TREE_OPERAND (base, 1);
4568 /* Use affine expansion as deeper inspection to prove the equality. */
4569 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4570 &aff_e2, &data->name_expansion_cache);
4571 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4572 &aff_offset, &data->name_expansion_cache);
4573 aff_combination_scale (&aff_offset, -1);
4574 switch (code)
4576 case PLUS_EXPR:
4577 aff_combination_add (&aff_e2, &aff_offset);
4578 if (aff_combination_zero_p (&aff_e2))
4579 return true;
4581 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4582 &aff_e1, &data->name_expansion_cache);
4583 aff_combination_add (&aff_e1, &aff_offset);
4584 return aff_combination_zero_p (&aff_e1);
4586 case POINTER_PLUS_EXPR:
4587 aff_combination_add (&aff_e2, &aff_offset);
4588 return aff_combination_zero_p (&aff_e2);
4590 default:
4591 return false;
4595 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4596 comparison with CAND. NITER describes the number of iterations of
4597 the loops. If successful, the comparison in COMP_P is altered accordingly.
4599 We aim to handle the following situation:
4601 sometype *base, *p;
4602 int a, b, i;
4604 i = a;
4605 p = p_0 = base + a;
4609 bla (*p);
4610 p++;
4611 i++;
4613 while (i < b);
4615 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4616 We aim to optimize this to
4618 p = p_0 = base + a;
4621 bla (*p);
4622 p++;
4624 while (p < p_0 - a + b);
4626 This preserves the correctness, since the pointer arithmetics does not
4627 overflow. More precisely:
4629 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4630 overflow in computing it or the values of p.
4631 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4632 overflow. To prove this, we use the fact that p_0 = base + a. */
4634 static bool
4635 iv_elimination_compare_lt (struct ivopts_data *data,
4636 struct iv_cand *cand, enum tree_code *comp_p,
4637 struct tree_niter_desc *niter)
4639 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4640 struct aff_tree nit, tmpa, tmpb;
4641 enum tree_code comp;
4642 HOST_WIDE_INT step;
4644 /* We need to know that the candidate induction variable does not overflow.
4645 While more complex analysis may be used to prove this, for now just
4646 check that the variable appears in the original program and that it
4647 is computed in a type that guarantees no overflows. */
4648 cand_type = TREE_TYPE (cand->iv->base);
4649 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4650 return false;
4652 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4653 the calculation of the BOUND could overflow, making the comparison
4654 invalid. */
4655 if (!data->loop_single_exit_p)
4656 return false;
4658 /* We need to be able to decide whether candidate is increasing or decreasing
4659 in order to choose the right comparison operator. */
4660 if (!cst_and_fits_in_hwi (cand->iv->step))
4661 return false;
4662 step = int_cst_value (cand->iv->step);
4664 /* Check that the number of iterations matches the expected pattern:
4665 a + 1 > b ? 0 : b - a - 1. */
4666 mbz = niter->may_be_zero;
4667 if (TREE_CODE (mbz) == GT_EXPR)
4669 /* Handle a + 1 > b. */
4670 tree op0 = TREE_OPERAND (mbz, 0);
4671 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4673 a = TREE_OPERAND (op0, 0);
4674 b = TREE_OPERAND (mbz, 1);
4676 else
4677 return false;
4679 else if (TREE_CODE (mbz) == LT_EXPR)
4681 tree op1 = TREE_OPERAND (mbz, 1);
4683 /* Handle b < a + 1. */
4684 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4686 a = TREE_OPERAND (op1, 0);
4687 b = TREE_OPERAND (mbz, 0);
4689 else
4690 return false;
4692 else
4693 return false;
4695 /* Expected number of iterations is B - A - 1. Check that it matches
4696 the actual number, i.e., that B - A - NITER = 1. */
4697 tree_to_aff_combination (niter->niter, nit_type, &nit);
4698 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4699 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4700 aff_combination_scale (&nit, -1);
4701 aff_combination_scale (&tmpa, -1);
4702 aff_combination_add (&tmpb, &tmpa);
4703 aff_combination_add (&tmpb, &nit);
4704 if (tmpb.n != 0 || tmpb.offset != 1)
4705 return false;
4707 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4708 overflow. */
4709 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4710 cand->iv->step,
4711 fold_convert (TREE_TYPE (cand->iv->step), a));
4712 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4713 return false;
4715 /* Determine the new comparison operator. */
4716 comp = step < 0 ? GT_EXPR : LT_EXPR;
4717 if (*comp_p == NE_EXPR)
4718 *comp_p = comp;
4719 else if (*comp_p == EQ_EXPR)
4720 *comp_p = invert_tree_comparison (comp, false);
4721 else
4722 gcc_unreachable ();
4724 return true;
4727 /* Check whether it is possible to express the condition in USE by comparison
4728 of candidate CAND. If so, store the value compared with to BOUND, and the
4729 comparison operator to COMP. */
4731 static bool
4732 may_eliminate_iv (struct ivopts_data *data,
4733 struct iv_use *use, struct iv_cand *cand, tree *bound,
4734 enum tree_code *comp)
4736 basic_block ex_bb;
4737 edge exit;
4738 tree period;
4739 struct loop *loop = data->current_loop;
4740 aff_tree bnd;
4741 struct tree_niter_desc *desc = NULL;
4743 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4744 return false;
4746 /* For now works only for exits that dominate the loop latch.
4747 TODO: extend to other conditions inside loop body. */
4748 ex_bb = gimple_bb (use->stmt);
4749 if (use->stmt != last_stmt (ex_bb)
4750 || gimple_code (use->stmt) != GIMPLE_COND
4751 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4752 return false;
4754 exit = EDGE_SUCC (ex_bb, 0);
4755 if (flow_bb_inside_loop_p (loop, exit->dest))
4756 exit = EDGE_SUCC (ex_bb, 1);
4757 if (flow_bb_inside_loop_p (loop, exit->dest))
4758 return false;
4760 desc = niter_for_exit (data, exit);
4761 if (!desc)
4762 return false;
4764 /* Determine whether we can use the variable to test the exit condition.
4765 This is the case iff the period of the induction variable is greater
4766 than the number of iterations for which the exit condition is true. */
4767 period = iv_period (cand->iv);
4769 /* If the number of iterations is constant, compare against it directly. */
4770 if (TREE_CODE (desc->niter) == INTEGER_CST)
4772 /* See cand_value_at. */
4773 if (stmt_after_increment (loop, cand, use->stmt))
4775 if (!tree_int_cst_lt (desc->niter, period))
4776 return false;
4778 else
4780 if (tree_int_cst_lt (period, desc->niter))
4781 return false;
4785 /* If not, and if this is the only possible exit of the loop, see whether
4786 we can get a conservative estimate on the number of iterations of the
4787 entire loop and compare against that instead. */
4788 else
4790 widest_int period_value, max_niter;
4792 max_niter = desc->max;
4793 if (stmt_after_increment (loop, cand, use->stmt))
4794 max_niter += 1;
4795 period_value = wi::to_widest (period);
4796 if (wi::gtu_p (max_niter, period_value))
4798 /* See if we can take advantage of inferred loop bound information. */
4799 if (data->loop_single_exit_p)
4801 if (!max_loop_iterations (loop, &max_niter))
4802 return false;
4803 /* The loop bound is already adjusted by adding 1. */
4804 if (wi::gtu_p (max_niter, period_value))
4805 return false;
4807 else
4808 return false;
4812 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4814 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4815 aff_combination_to_tree (&bnd));
4816 *comp = iv_elimination_compare (data, use);
4818 /* It is unlikely that computing the number of iterations using division
4819 would be more profitable than keeping the original induction variable. */
4820 if (expression_expensive_p (*bound))
4821 return false;
4823 /* Sometimes, it is possible to handle the situation that the number of
4824 iterations may be zero unless additional assumtions by using <
4825 instead of != in the exit condition.
4827 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4828 base the exit condition on it. However, that is often too
4829 expensive. */
4830 if (!integer_zerop (desc->may_be_zero))
4831 return iv_elimination_compare_lt (data, cand, comp, desc);
4833 return true;
4836 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4837 be copied, if is is used in the loop body and DATA->body_includes_call. */
4839 static int
4840 parm_decl_cost (struct ivopts_data *data, tree bound)
4842 tree sbound = bound;
4843 STRIP_NOPS (sbound);
4845 if (TREE_CODE (sbound) == SSA_NAME
4846 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4847 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4848 && data->body_includes_call)
4849 return COSTS_N_INSNS (1);
4851 return 0;
4854 /* Determines cost of basing replacement of USE on CAND in a condition. */
4856 static bool
4857 determine_use_iv_cost_condition (struct ivopts_data *data,
4858 struct iv_use *use, struct iv_cand *cand)
4860 tree bound = NULL_TREE;
4861 struct iv *cmp_iv;
4862 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4863 comp_cost elim_cost, express_cost, cost, bound_cost;
4864 bool ok;
4865 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4866 tree *control_var, *bound_cst;
4867 enum tree_code comp = ERROR_MARK;
4869 /* Only consider real candidates. */
4870 if (!cand->iv)
4872 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4873 ERROR_MARK, -1);
4874 return false;
4877 /* Try iv elimination. */
4878 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4880 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4881 if (elim_cost.cost == 0)
4882 elim_cost.cost = parm_decl_cost (data, bound);
4883 else if (TREE_CODE (bound) == INTEGER_CST)
4884 elim_cost.cost = 0;
4885 /* If we replace a loop condition 'i < n' with 'p < base + n',
4886 depends_on_elim will have 'base' and 'n' set, which implies
4887 that both 'base' and 'n' will be live during the loop. More likely,
4888 'base + n' will be loop invariant, resulting in only one live value
4889 during the loop. So in that case we clear depends_on_elim and set
4890 elim_inv_expr_id instead. */
4891 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4893 elim_inv_expr_id = get_expr_id (data, bound);
4894 bitmap_clear (depends_on_elim);
4896 /* The bound is a loop invariant, so it will be only computed
4897 once. */
4898 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4900 else
4901 elim_cost = infinite_cost;
4903 /* Try expressing the original giv. If it is compared with an invariant,
4904 note that we cannot get rid of it. */
4905 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4906 NULL, &cmp_iv);
4907 gcc_assert (ok);
4909 /* When the condition is a comparison of the candidate IV against
4910 zero, prefer this IV.
4912 TODO: The constant that we're subtracting from the cost should
4913 be target-dependent. This information should be added to the
4914 target costs for each backend. */
4915 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4916 && integer_zerop (*bound_cst)
4917 && (operand_equal_p (*control_var, cand->var_after, 0)
4918 || operand_equal_p (*control_var, cand->var_before, 0)))
4919 elim_cost.cost -= 1;
4921 express_cost = get_computation_cost (data, use, cand, false,
4922 &depends_on_express, NULL,
4923 &express_inv_expr_id);
4924 fd_ivopts_data = data;
4925 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4927 /* Count the cost of the original bound as well. */
4928 bound_cost = force_var_cost (data, *bound_cst, NULL);
4929 if (bound_cost.cost == 0)
4930 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4931 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4932 bound_cost.cost = 0;
4933 express_cost.cost += bound_cost.cost;
4935 /* Choose the better approach, preferring the eliminated IV. */
4936 if (compare_costs (elim_cost, express_cost) <= 0)
4938 cost = elim_cost;
4939 depends_on = depends_on_elim;
4940 depends_on_elim = NULL;
4941 inv_expr_id = elim_inv_expr_id;
4943 else
4945 cost = express_cost;
4946 depends_on = depends_on_express;
4947 depends_on_express = NULL;
4948 bound = NULL_TREE;
4949 comp = ERROR_MARK;
4950 inv_expr_id = express_inv_expr_id;
4953 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4955 if (depends_on_elim)
4956 BITMAP_FREE (depends_on_elim);
4957 if (depends_on_express)
4958 BITMAP_FREE (depends_on_express);
4960 return !infinite_cost_p (cost);
4963 /* Determines cost of basing replacement of USE on CAND. Returns false
4964 if USE cannot be based on CAND. */
4966 static bool
4967 determine_use_iv_cost (struct ivopts_data *data,
4968 struct iv_use *use, struct iv_cand *cand)
4970 switch (use->type)
4972 case USE_NONLINEAR_EXPR:
4973 return determine_use_iv_cost_generic (data, use, cand);
4975 case USE_ADDRESS:
4976 return determine_use_iv_cost_address (data, use, cand);
4978 case USE_COMPARE:
4979 return determine_use_iv_cost_condition (data, use, cand);
4981 default:
4982 gcc_unreachable ();
4986 /* Return true if get_computation_cost indicates that autoincrement is
4987 a possibility for the pair of USE and CAND, false otherwise. */
4989 static bool
4990 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4991 struct iv_cand *cand)
4993 bitmap depends_on;
4994 bool can_autoinc;
4995 comp_cost cost;
4997 if (use->type != USE_ADDRESS)
4998 return false;
5000 cost = get_computation_cost (data, use, cand, true, &depends_on,
5001 &can_autoinc, NULL);
5003 BITMAP_FREE (depends_on);
5005 return !infinite_cost_p (cost) && can_autoinc;
5008 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5009 use that allows autoincrement, and set their AINC_USE if possible. */
5011 static void
5012 set_autoinc_for_original_candidates (struct ivopts_data *data)
5014 unsigned i, j;
5016 for (i = 0; i < n_iv_cands (data); i++)
5018 struct iv_cand *cand = iv_cand (data, i);
5019 struct iv_use *closest_before = NULL;
5020 struct iv_use *closest_after = NULL;
5021 if (cand->pos != IP_ORIGINAL)
5022 continue;
5024 for (j = 0; j < n_iv_uses (data); j++)
5026 struct iv_use *use = iv_use (data, j);
5027 unsigned uid = gimple_uid (use->stmt);
5029 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5030 continue;
5032 if (uid < gimple_uid (cand->incremented_at)
5033 && (closest_before == NULL
5034 || uid > gimple_uid (closest_before->stmt)))
5035 closest_before = use;
5037 if (uid > gimple_uid (cand->incremented_at)
5038 && (closest_after == NULL
5039 || uid < gimple_uid (closest_after->stmt)))
5040 closest_after = use;
5043 if (closest_before != NULL
5044 && autoinc_possible_for_pair (data, closest_before, cand))
5045 cand->ainc_use = closest_before;
5046 else if (closest_after != NULL
5047 && autoinc_possible_for_pair (data, closest_after, cand))
5048 cand->ainc_use = closest_after;
5052 /* Finds the candidates for the induction variables. */
5054 static void
5055 find_iv_candidates (struct ivopts_data *data)
5057 /* Add commonly used ivs. */
5058 add_standard_iv_candidates (data);
5060 /* Add old induction variables. */
5061 add_old_ivs_candidates (data);
5063 /* Add induction variables derived from uses. */
5064 add_derived_ivs_candidates (data);
5066 set_autoinc_for_original_candidates (data);
5068 /* Record the important candidates. */
5069 record_important_candidates (data);
5072 /* Determines costs of basing the use of the iv on an iv candidate. */
5074 static void
5075 determine_use_iv_costs (struct ivopts_data *data)
5077 unsigned i, j;
5078 struct iv_use *use;
5079 struct iv_cand *cand;
5080 bitmap to_clear = BITMAP_ALLOC (NULL);
5082 alloc_use_cost_map (data);
5084 for (i = 0; i < n_iv_uses (data); i++)
5086 use = iv_use (data, i);
5088 if (data->consider_all_candidates)
5090 for (j = 0; j < n_iv_cands (data); j++)
5092 cand = iv_cand (data, j);
5093 determine_use_iv_cost (data, use, cand);
5096 else
5098 bitmap_iterator bi;
5100 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5102 cand = iv_cand (data, j);
5103 if (!determine_use_iv_cost (data, use, cand))
5104 bitmap_set_bit (to_clear, j);
5107 /* Remove the candidates for that the cost is infinite from
5108 the list of related candidates. */
5109 bitmap_and_compl_into (use->related_cands, to_clear);
5110 bitmap_clear (to_clear);
5114 BITMAP_FREE (to_clear);
5116 if (dump_file && (dump_flags & TDF_DETAILS))
5118 fprintf (dump_file, "Use-candidate costs:\n");
5120 for (i = 0; i < n_iv_uses (data); i++)
5122 use = iv_use (data, i);
5124 fprintf (dump_file, "Use %d:\n", i);
5125 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5126 for (j = 0; j < use->n_map_members; j++)
5128 if (!use->cost_map[j].cand
5129 || infinite_cost_p (use->cost_map[j].cost))
5130 continue;
5132 fprintf (dump_file, " %d\t%d\t%d\t",
5133 use->cost_map[j].cand->id,
5134 use->cost_map[j].cost.cost,
5135 use->cost_map[j].cost.complexity);
5136 if (use->cost_map[j].depends_on)
5137 bitmap_print (dump_file,
5138 use->cost_map[j].depends_on, "","");
5139 if (use->cost_map[j].inv_expr_id != -1)
5140 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5141 fprintf (dump_file, "\n");
5144 fprintf (dump_file, "\n");
5146 fprintf (dump_file, "\n");
5150 /* Determines cost of the candidate CAND. */
5152 static void
5153 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5155 comp_cost cost_base;
5156 unsigned cost, cost_step;
5157 tree base;
5159 if (!cand->iv)
5161 cand->cost = 0;
5162 return;
5165 /* There are two costs associated with the candidate -- its increment
5166 and its initialization. The second is almost negligible for any loop
5167 that rolls enough, so we take it just very little into account. */
5169 base = cand->iv->base;
5170 cost_base = force_var_cost (data, base, NULL);
5171 /* It will be exceptional that the iv register happens to be initialized with
5172 the proper value at no cost. In general, there will at least be a regcopy
5173 or a const set. */
5174 if (cost_base.cost == 0)
5175 cost_base.cost = COSTS_N_INSNS (1);
5176 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5178 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5180 /* Prefer the original ivs unless we may gain something by replacing it.
5181 The reason is to make debugging simpler; so this is not relevant for
5182 artificial ivs created by other optimization passes. */
5183 if (cand->pos != IP_ORIGINAL
5184 || !SSA_NAME_VAR (cand->var_before)
5185 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5186 cost++;
5188 /* Prefer not to insert statements into latch unless there are some
5189 already (so that we do not create unnecessary jumps). */
5190 if (cand->pos == IP_END
5191 && empty_block_p (ip_end_pos (data->current_loop)))
5192 cost++;
5194 cand->cost = cost;
5195 cand->cost_step = cost_step;
5198 /* Determines costs of computation of the candidates. */
5200 static void
5201 determine_iv_costs (struct ivopts_data *data)
5203 unsigned i;
5205 if (dump_file && (dump_flags & TDF_DETAILS))
5207 fprintf (dump_file, "Candidate costs:\n");
5208 fprintf (dump_file, " cand\tcost\n");
5211 for (i = 0; i < n_iv_cands (data); i++)
5213 struct iv_cand *cand = iv_cand (data, i);
5215 determine_iv_cost (data, cand);
5217 if (dump_file && (dump_flags & TDF_DETAILS))
5218 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5221 if (dump_file && (dump_flags & TDF_DETAILS))
5222 fprintf (dump_file, "\n");
5225 /* Calculates cost for having SIZE induction variables. */
5227 static unsigned
5228 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5230 /* We add size to the cost, so that we prefer eliminating ivs
5231 if possible. */
5232 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5233 data->body_includes_call);
5236 /* For each size of the induction variable set determine the penalty. */
5238 static void
5239 determine_set_costs (struct ivopts_data *data)
5241 unsigned j, n;
5242 gphi *phi;
5243 gphi_iterator psi;
5244 tree op;
5245 struct loop *loop = data->current_loop;
5246 bitmap_iterator bi;
5248 if (dump_file && (dump_flags & TDF_DETAILS))
5250 fprintf (dump_file, "Global costs:\n");
5251 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5252 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5253 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5254 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5257 n = 0;
5258 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5260 phi = psi.phi ();
5261 op = PHI_RESULT (phi);
5263 if (virtual_operand_p (op))
5264 continue;
5266 if (get_iv (data, op))
5267 continue;
5269 n++;
5272 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5274 struct version_info *info = ver_info (data, j);
5276 if (info->inv_id && info->has_nonlin_use)
5277 n++;
5280 data->regs_used = n;
5281 if (dump_file && (dump_flags & TDF_DETAILS))
5282 fprintf (dump_file, " regs_used %d\n", n);
5284 if (dump_file && (dump_flags & TDF_DETAILS))
5286 fprintf (dump_file, " cost for size:\n");
5287 fprintf (dump_file, " ivs\tcost\n");
5288 for (j = 0; j <= 2 * target_avail_regs; j++)
5289 fprintf (dump_file, " %d\t%d\n", j,
5290 ivopts_global_cost_for_size (data, j));
5291 fprintf (dump_file, "\n");
5295 /* Returns true if A is a cheaper cost pair than B. */
5297 static bool
5298 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5300 int cmp;
5302 if (!a)
5303 return false;
5305 if (!b)
5306 return true;
5308 cmp = compare_costs (a->cost, b->cost);
5309 if (cmp < 0)
5310 return true;
5312 if (cmp > 0)
5313 return false;
5315 /* In case the costs are the same, prefer the cheaper candidate. */
5316 if (a->cand->cost < b->cand->cost)
5317 return true;
5319 return false;
5323 /* Returns candidate by that USE is expressed in IVS. */
5325 static struct cost_pair *
5326 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5328 return ivs->cand_for_use[use->id];
5331 /* Computes the cost field of IVS structure. */
5333 static void
5334 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5336 comp_cost cost = ivs->cand_use_cost;
5338 cost.cost += ivs->cand_cost;
5340 cost.cost += ivopts_global_cost_for_size (data,
5341 ivs->n_regs + ivs->num_used_inv_expr);
5343 ivs->cost = cost;
5346 /* Remove invariants in set INVS to set IVS. */
5348 static void
5349 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5351 bitmap_iterator bi;
5352 unsigned iid;
5354 if (!invs)
5355 return;
5357 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5359 ivs->n_invariant_uses[iid]--;
5360 if (ivs->n_invariant_uses[iid] == 0)
5361 ivs->n_regs--;
5365 /* Set USE not to be expressed by any candidate in IVS. */
5367 static void
5368 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5369 struct iv_use *use)
5371 unsigned uid = use->id, cid;
5372 struct cost_pair *cp;
5374 cp = ivs->cand_for_use[uid];
5375 if (!cp)
5376 return;
5377 cid = cp->cand->id;
5379 ivs->bad_uses++;
5380 ivs->cand_for_use[uid] = NULL;
5381 ivs->n_cand_uses[cid]--;
5383 if (ivs->n_cand_uses[cid] == 0)
5385 bitmap_clear_bit (ivs->cands, cid);
5386 /* Do not count the pseudocandidates. */
5387 if (cp->cand->iv)
5388 ivs->n_regs--;
5389 ivs->n_cands--;
5390 ivs->cand_cost -= cp->cand->cost;
5392 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5395 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5397 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5399 if (cp->inv_expr_id != -1)
5401 ivs->used_inv_expr[cp->inv_expr_id]--;
5402 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5403 ivs->num_used_inv_expr--;
5405 iv_ca_recount_cost (data, ivs);
5408 /* Add invariants in set INVS to set IVS. */
5410 static void
5411 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5413 bitmap_iterator bi;
5414 unsigned iid;
5416 if (!invs)
5417 return;
5419 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5421 ivs->n_invariant_uses[iid]++;
5422 if (ivs->n_invariant_uses[iid] == 1)
5423 ivs->n_regs++;
5427 /* Set cost pair for USE in set IVS to CP. */
5429 static void
5430 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5431 struct iv_use *use, struct cost_pair *cp)
5433 unsigned uid = use->id, cid;
5435 if (ivs->cand_for_use[uid] == cp)
5436 return;
5438 if (ivs->cand_for_use[uid])
5439 iv_ca_set_no_cp (data, ivs, use);
5441 if (cp)
5443 cid = cp->cand->id;
5445 ivs->bad_uses--;
5446 ivs->cand_for_use[uid] = cp;
5447 ivs->n_cand_uses[cid]++;
5448 if (ivs->n_cand_uses[cid] == 1)
5450 bitmap_set_bit (ivs->cands, cid);
5451 /* Do not count the pseudocandidates. */
5452 if (cp->cand->iv)
5453 ivs->n_regs++;
5454 ivs->n_cands++;
5455 ivs->cand_cost += cp->cand->cost;
5457 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5460 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5461 iv_ca_set_add_invariants (ivs, cp->depends_on);
5463 if (cp->inv_expr_id != -1)
5465 ivs->used_inv_expr[cp->inv_expr_id]++;
5466 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5467 ivs->num_used_inv_expr++;
5469 iv_ca_recount_cost (data, ivs);
5473 /* Extend set IVS by expressing USE by some of the candidates in it
5474 if possible. Consider all important candidates if candidates in
5475 set IVS don't give any result. */
5477 static void
5478 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5479 struct iv_use *use)
5481 struct cost_pair *best_cp = NULL, *cp;
5482 bitmap_iterator bi;
5483 unsigned i;
5484 struct iv_cand *cand;
5486 gcc_assert (ivs->upto >= use->id);
5487 ivs->upto++;
5488 ivs->bad_uses++;
5490 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5492 cand = iv_cand (data, i);
5493 cp = get_use_iv_cost (data, use, cand);
5494 if (cheaper_cost_pair (cp, best_cp))
5495 best_cp = cp;
5498 if (best_cp == NULL)
5500 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5502 cand = iv_cand (data, i);
5503 cp = get_use_iv_cost (data, use, cand);
5504 if (cheaper_cost_pair (cp, best_cp))
5505 best_cp = cp;
5509 iv_ca_set_cp (data, ivs, use, best_cp);
5512 /* Get cost for assignment IVS. */
5514 static comp_cost
5515 iv_ca_cost (struct iv_ca *ivs)
5517 /* This was a conditional expression but it triggered a bug in
5518 Sun C 5.5. */
5519 if (ivs->bad_uses)
5520 return infinite_cost;
5521 else
5522 return ivs->cost;
5525 /* Returns true if all dependences of CP are among invariants in IVS. */
5527 static bool
5528 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5530 unsigned i;
5531 bitmap_iterator bi;
5533 if (!cp->depends_on)
5534 return true;
5536 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5538 if (ivs->n_invariant_uses[i] == 0)
5539 return false;
5542 return true;
5545 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5546 it before NEXT_CHANGE. */
5548 static struct iv_ca_delta *
5549 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5550 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5552 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5554 change->use = use;
5555 change->old_cp = old_cp;
5556 change->new_cp = new_cp;
5557 change->next_change = next_change;
5559 return change;
5562 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5563 are rewritten. */
5565 static struct iv_ca_delta *
5566 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5568 struct iv_ca_delta *last;
5570 if (!l2)
5571 return l1;
5573 if (!l1)
5574 return l2;
5576 for (last = l1; last->next_change; last = last->next_change)
5577 continue;
5578 last->next_change = l2;
5580 return l1;
5583 /* Reverse the list of changes DELTA, forming the inverse to it. */
5585 static struct iv_ca_delta *
5586 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5588 struct iv_ca_delta *act, *next, *prev = NULL;
5589 struct cost_pair *tmp;
5591 for (act = delta; act; act = next)
5593 next = act->next_change;
5594 act->next_change = prev;
5595 prev = act;
5597 tmp = act->old_cp;
5598 act->old_cp = act->new_cp;
5599 act->new_cp = tmp;
5602 return prev;
5605 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5606 reverted instead. */
5608 static void
5609 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5610 struct iv_ca_delta *delta, bool forward)
5612 struct cost_pair *from, *to;
5613 struct iv_ca_delta *act;
5615 if (!forward)
5616 delta = iv_ca_delta_reverse (delta);
5618 for (act = delta; act; act = act->next_change)
5620 from = act->old_cp;
5621 to = act->new_cp;
5622 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5623 iv_ca_set_cp (data, ivs, act->use, to);
5626 if (!forward)
5627 iv_ca_delta_reverse (delta);
5630 /* Returns true if CAND is used in IVS. */
5632 static bool
5633 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5635 return ivs->n_cand_uses[cand->id] > 0;
5638 /* Returns number of induction variable candidates in the set IVS. */
5640 static unsigned
5641 iv_ca_n_cands (struct iv_ca *ivs)
5643 return ivs->n_cands;
5646 /* Free the list of changes DELTA. */
5648 static void
5649 iv_ca_delta_free (struct iv_ca_delta **delta)
5651 struct iv_ca_delta *act, *next;
5653 for (act = *delta; act; act = next)
5655 next = act->next_change;
5656 free (act);
5659 *delta = NULL;
5662 /* Allocates new iv candidates assignment. */
5664 static struct iv_ca *
5665 iv_ca_new (struct ivopts_data *data)
5667 struct iv_ca *nw = XNEW (struct iv_ca);
5669 nw->upto = 0;
5670 nw->bad_uses = 0;
5671 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5672 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5673 nw->cands = BITMAP_ALLOC (NULL);
5674 nw->n_cands = 0;
5675 nw->n_regs = 0;
5676 nw->cand_use_cost = no_cost;
5677 nw->cand_cost = 0;
5678 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5679 nw->cost = no_cost;
5680 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5681 nw->num_used_inv_expr = 0;
5683 return nw;
5686 /* Free memory occupied by the set IVS. */
5688 static void
5689 iv_ca_free (struct iv_ca **ivs)
5691 free ((*ivs)->cand_for_use);
5692 free ((*ivs)->n_cand_uses);
5693 BITMAP_FREE ((*ivs)->cands);
5694 free ((*ivs)->n_invariant_uses);
5695 free ((*ivs)->used_inv_expr);
5696 free (*ivs);
5697 *ivs = NULL;
5700 /* Dumps IVS to FILE. */
5702 static void
5703 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5705 const char *pref = " invariants ";
5706 unsigned i;
5707 comp_cost cost = iv_ca_cost (ivs);
5709 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5710 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5711 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5712 bitmap_print (file, ivs->cands, " candidates: ","\n");
5714 for (i = 0; i < ivs->upto; i++)
5716 struct iv_use *use = iv_use (data, i);
5717 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5718 if (cp)
5719 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5720 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5721 else
5722 fprintf (file, " use:%d --> ??\n", use->id);
5725 for (i = 1; i <= data->max_inv_id; i++)
5726 if (ivs->n_invariant_uses[i])
5728 fprintf (file, "%s%d", pref, i);
5729 pref = ", ";
5731 fprintf (file, "\n\n");
5734 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5735 new set, and store differences in DELTA. Number of induction variables
5736 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5737 the function will try to find a solution with mimimal iv candidates. */
5739 static comp_cost
5740 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5741 struct iv_cand *cand, struct iv_ca_delta **delta,
5742 unsigned *n_ivs, bool min_ncand)
5744 unsigned i;
5745 comp_cost cost;
5746 struct iv_use *use;
5747 struct cost_pair *old_cp, *new_cp;
5749 *delta = NULL;
5750 for (i = 0; i < ivs->upto; i++)
5752 use = iv_use (data, i);
5753 old_cp = iv_ca_cand_for_use (ivs, use);
5755 if (old_cp
5756 && old_cp->cand == cand)
5757 continue;
5759 new_cp = get_use_iv_cost (data, use, cand);
5760 if (!new_cp)
5761 continue;
5763 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5764 continue;
5766 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5767 continue;
5769 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5772 iv_ca_delta_commit (data, ivs, *delta, true);
5773 cost = iv_ca_cost (ivs);
5774 if (n_ivs)
5775 *n_ivs = iv_ca_n_cands (ivs);
5776 iv_ca_delta_commit (data, ivs, *delta, false);
5778 return cost;
5781 /* Try narrowing set IVS by removing CAND. Return the cost of
5782 the new set and store the differences in DELTA. START is
5783 the candidate with which we start narrowing. */
5785 static comp_cost
5786 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5787 struct iv_cand *cand, struct iv_cand *start,
5788 struct iv_ca_delta **delta)
5790 unsigned i, ci;
5791 struct iv_use *use;
5792 struct cost_pair *old_cp, *new_cp, *cp;
5793 bitmap_iterator bi;
5794 struct iv_cand *cnd;
5795 comp_cost cost, best_cost, acost;
5797 *delta = NULL;
5798 for (i = 0; i < n_iv_uses (data); i++)
5800 use = iv_use (data, i);
5802 old_cp = iv_ca_cand_for_use (ivs, use);
5803 if (old_cp->cand != cand)
5804 continue;
5806 best_cost = iv_ca_cost (ivs);
5807 /* Start narrowing with START. */
5808 new_cp = get_use_iv_cost (data, use, start);
5810 if (data->consider_all_candidates)
5812 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5814 if (ci == cand->id || (start && ci == start->id))
5815 continue;
5817 cnd = iv_cand (data, ci);
5819 cp = get_use_iv_cost (data, use, cnd);
5820 if (!cp)
5821 continue;
5823 iv_ca_set_cp (data, ivs, use, cp);
5824 acost = iv_ca_cost (ivs);
5826 if (compare_costs (acost, best_cost) < 0)
5828 best_cost = acost;
5829 new_cp = cp;
5833 else
5835 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5837 if (ci == cand->id || (start && ci == start->id))
5838 continue;
5840 cnd = iv_cand (data, ci);
5842 cp = get_use_iv_cost (data, use, cnd);
5843 if (!cp)
5844 continue;
5846 iv_ca_set_cp (data, ivs, use, cp);
5847 acost = iv_ca_cost (ivs);
5849 if (compare_costs (acost, best_cost) < 0)
5851 best_cost = acost;
5852 new_cp = cp;
5856 /* Restore to old cp for use. */
5857 iv_ca_set_cp (data, ivs, use, old_cp);
5859 if (!new_cp)
5861 iv_ca_delta_free (delta);
5862 return infinite_cost;
5865 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5868 iv_ca_delta_commit (data, ivs, *delta, true);
5869 cost = iv_ca_cost (ivs);
5870 iv_ca_delta_commit (data, ivs, *delta, false);
5872 return cost;
5875 /* Try optimizing the set of candidates IVS by removing candidates different
5876 from to EXCEPT_CAND from it. Return cost of the new set, and store
5877 differences in DELTA. */
5879 static comp_cost
5880 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5881 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5883 bitmap_iterator bi;
5884 struct iv_ca_delta *act_delta, *best_delta;
5885 unsigned i;
5886 comp_cost best_cost, acost;
5887 struct iv_cand *cand;
5889 best_delta = NULL;
5890 best_cost = iv_ca_cost (ivs);
5892 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5894 cand = iv_cand (data, i);
5896 if (cand == except_cand)
5897 continue;
5899 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5901 if (compare_costs (acost, best_cost) < 0)
5903 best_cost = acost;
5904 iv_ca_delta_free (&best_delta);
5905 best_delta = act_delta;
5907 else
5908 iv_ca_delta_free (&act_delta);
5911 if (!best_delta)
5913 *delta = NULL;
5914 return best_cost;
5917 /* Recurse to possibly remove other unnecessary ivs. */
5918 iv_ca_delta_commit (data, ivs, best_delta, true);
5919 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5920 iv_ca_delta_commit (data, ivs, best_delta, false);
5921 *delta = iv_ca_delta_join (best_delta, *delta);
5922 return best_cost;
5925 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5926 cheaper local cost for USE than BEST_CP. Return pointer to
5927 the corresponding cost_pair, otherwise just return BEST_CP. */
5929 static struct cost_pair*
5930 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5931 unsigned int cand_idx, struct iv_cand *old_cand,
5932 struct cost_pair *best_cp)
5934 struct iv_cand *cand;
5935 struct cost_pair *cp;
5937 gcc_assert (old_cand != NULL && best_cp != NULL);
5938 if (cand_idx == old_cand->id)
5939 return best_cp;
5941 cand = iv_cand (data, cand_idx);
5942 cp = get_use_iv_cost (data, use, cand);
5943 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5944 return cp;
5946 return best_cp;
5949 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5950 which are used by more than one iv uses. For each of those candidates,
5951 this function tries to represent iv uses under that candidate using
5952 other ones with lower local cost, then tries to prune the new set.
5953 If the new set has lower cost, It returns the new cost after recording
5954 candidate replacement in list DELTA. */
5956 static comp_cost
5957 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5958 struct iv_ca_delta **delta)
5960 bitmap_iterator bi, bj;
5961 unsigned int i, j, k;
5962 struct iv_use *use;
5963 struct iv_cand *cand;
5964 comp_cost orig_cost, acost;
5965 struct iv_ca_delta *act_delta, *tmp_delta;
5966 struct cost_pair *old_cp, *best_cp = NULL;
5968 *delta = NULL;
5969 orig_cost = iv_ca_cost (ivs);
5971 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5973 if (ivs->n_cand_uses[i] == 1
5974 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5975 continue;
5977 cand = iv_cand (data, i);
5979 act_delta = NULL;
5980 /* Represent uses under current candidate using other ones with
5981 lower local cost. */
5982 for (j = 0; j < ivs->upto; j++)
5984 use = iv_use (data, j);
5985 old_cp = iv_ca_cand_for_use (ivs, use);
5987 if (old_cp->cand != cand)
5988 continue;
5990 best_cp = old_cp;
5991 if (data->consider_all_candidates)
5992 for (k = 0; k < n_iv_cands (data); k++)
5993 best_cp = cheaper_cost_with_cand (data, use, k,
5994 old_cp->cand, best_cp);
5995 else
5996 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
5997 best_cp = cheaper_cost_with_cand (data, use, k,
5998 old_cp->cand, best_cp);
6000 if (best_cp == old_cp)
6001 continue;
6003 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6005 /* No need for further prune. */
6006 if (!act_delta)
6007 continue;
6009 /* Prune the new candidate set. */
6010 iv_ca_delta_commit (data, ivs, act_delta, true);
6011 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6012 iv_ca_delta_commit (data, ivs, act_delta, false);
6013 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6015 if (compare_costs (acost, orig_cost) < 0)
6017 *delta = act_delta;
6018 return acost;
6020 else
6021 iv_ca_delta_free (&act_delta);
6024 return orig_cost;
6027 /* Tries to extend the sets IVS in the best possible way in order
6028 to express the USE. If ORIGINALP is true, prefer candidates from
6029 the original set of IVs, otherwise favor important candidates not
6030 based on any memory object. */
6032 static bool
6033 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6034 struct iv_use *use, bool originalp)
6036 comp_cost best_cost, act_cost;
6037 unsigned i;
6038 bitmap_iterator bi;
6039 struct iv_cand *cand;
6040 struct iv_ca_delta *best_delta = NULL, *act_delta;
6041 struct cost_pair *cp;
6043 iv_ca_add_use (data, ivs, use);
6044 best_cost = iv_ca_cost (ivs);
6045 cp = iv_ca_cand_for_use (ivs, use);
6046 if (cp)
6048 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6049 iv_ca_set_no_cp (data, ivs, use);
6052 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6053 first try important candidates not based on any memory object. Only if
6054 this fails, try the specific ones. Rationale -- in loops with many
6055 variables the best choice often is to use just one generic biv. If we
6056 added here many ivs specific to the uses, the optimization algorithm later
6057 would be likely to get stuck in a local minimum, thus causing us to create
6058 too many ivs. The approach from few ivs to more seems more likely to be
6059 successful -- starting from few ivs, replacing an expensive use by a
6060 specific iv should always be a win. */
6061 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6063 cand = iv_cand (data, i);
6065 if (originalp && cand->pos !=IP_ORIGINAL)
6066 continue;
6068 if (!originalp && cand->iv->base_object != NULL_TREE)
6069 continue;
6071 if (iv_ca_cand_used_p (ivs, cand))
6072 continue;
6074 cp = get_use_iv_cost (data, use, cand);
6075 if (!cp)
6076 continue;
6078 iv_ca_set_cp (data, ivs, use, cp);
6079 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6080 true);
6081 iv_ca_set_no_cp (data, ivs, use);
6082 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6084 if (compare_costs (act_cost, best_cost) < 0)
6086 best_cost = act_cost;
6088 iv_ca_delta_free (&best_delta);
6089 best_delta = act_delta;
6091 else
6092 iv_ca_delta_free (&act_delta);
6095 if (infinite_cost_p (best_cost))
6097 for (i = 0; i < use->n_map_members; i++)
6099 cp = use->cost_map + i;
6100 cand = cp->cand;
6101 if (!cand)
6102 continue;
6104 /* Already tried this. */
6105 if (cand->important)
6107 if (originalp && cand->pos == IP_ORIGINAL)
6108 continue;
6109 if (!originalp && cand->iv->base_object == NULL_TREE)
6110 continue;
6113 if (iv_ca_cand_used_p (ivs, cand))
6114 continue;
6116 act_delta = NULL;
6117 iv_ca_set_cp (data, ivs, use, cp);
6118 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6119 iv_ca_set_no_cp (data, ivs, use);
6120 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6121 cp, act_delta);
6123 if (compare_costs (act_cost, best_cost) < 0)
6125 best_cost = act_cost;
6127 if (best_delta)
6128 iv_ca_delta_free (&best_delta);
6129 best_delta = act_delta;
6131 else
6132 iv_ca_delta_free (&act_delta);
6136 iv_ca_delta_commit (data, ivs, best_delta, true);
6137 iv_ca_delta_free (&best_delta);
6139 return !infinite_cost_p (best_cost);
6142 /* Finds an initial assignment of candidates to uses. */
6144 static struct iv_ca *
6145 get_initial_solution (struct ivopts_data *data, bool originalp)
6147 struct iv_ca *ivs = iv_ca_new (data);
6148 unsigned i;
6150 for (i = 0; i < n_iv_uses (data); i++)
6151 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6153 iv_ca_free (&ivs);
6154 return NULL;
6157 return ivs;
6160 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6161 points to a bool variable, this function tries to break local
6162 optimal fixed-point by replacing candidates in IVS if it's true. */
6164 static bool
6165 try_improve_iv_set (struct ivopts_data *data,
6166 struct iv_ca *ivs, bool *try_replace_p)
6168 unsigned i, n_ivs;
6169 comp_cost acost, best_cost = iv_ca_cost (ivs);
6170 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6171 struct iv_cand *cand;
6173 /* Try extending the set of induction variables by one. */
6174 for (i = 0; i < n_iv_cands (data); i++)
6176 cand = iv_cand (data, i);
6178 if (iv_ca_cand_used_p (ivs, cand))
6179 continue;
6181 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6182 if (!act_delta)
6183 continue;
6185 /* If we successfully added the candidate and the set is small enough,
6186 try optimizing it by removing other candidates. */
6187 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6189 iv_ca_delta_commit (data, ivs, act_delta, true);
6190 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6191 iv_ca_delta_commit (data, ivs, act_delta, false);
6192 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6195 if (compare_costs (acost, best_cost) < 0)
6197 best_cost = acost;
6198 iv_ca_delta_free (&best_delta);
6199 best_delta = act_delta;
6201 else
6202 iv_ca_delta_free (&act_delta);
6205 if (!best_delta)
6207 /* Try removing the candidates from the set instead. */
6208 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6210 if (!best_delta && *try_replace_p)
6212 *try_replace_p = false;
6213 /* So far candidate selecting algorithm tends to choose fewer IVs
6214 so that it can handle cases in which loops have many variables
6215 but the best choice is often to use only one general biv. One
6216 weakness is it can't handle opposite cases, in which different
6217 candidates should be chosen with respect to each use. To solve
6218 the problem, we replace candidates in a manner described by the
6219 comments of iv_ca_replace, thus give general algorithm a chance
6220 to break local optimal fixed-point in these cases. */
6221 best_cost = iv_ca_replace (data, ivs, &best_delta);
6224 if (!best_delta)
6225 return false;
6228 iv_ca_delta_commit (data, ivs, best_delta, true);
6229 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6230 iv_ca_delta_free (&best_delta);
6231 return true;
6234 /* Attempts to find the optimal set of induction variables. We do simple
6235 greedy heuristic -- we try to replace at most one candidate in the selected
6236 solution and remove the unused ivs while this improves the cost. */
6238 static struct iv_ca *
6239 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6241 struct iv_ca *set;
6242 bool try_replace_p = true;
6244 /* Get the initial solution. */
6245 set = get_initial_solution (data, originalp);
6246 if (!set)
6248 if (dump_file && (dump_flags & TDF_DETAILS))
6249 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6250 return NULL;
6253 if (dump_file && (dump_flags & TDF_DETAILS))
6255 fprintf (dump_file, "Initial set of candidates:\n");
6256 iv_ca_dump (data, dump_file, set);
6259 while (try_improve_iv_set (data, set, &try_replace_p))
6261 if (dump_file && (dump_flags & TDF_DETAILS))
6263 fprintf (dump_file, "Improved to:\n");
6264 iv_ca_dump (data, dump_file, set);
6268 return set;
6271 static struct iv_ca *
6272 find_optimal_iv_set (struct ivopts_data *data)
6274 unsigned i;
6275 struct iv_ca *set, *origset;
6276 struct iv_use *use;
6277 comp_cost cost, origcost;
6279 /* Determine the cost based on a strategy that starts with original IVs,
6280 and try again using a strategy that prefers candidates not based
6281 on any IVs. */
6282 origset = find_optimal_iv_set_1 (data, true);
6283 set = find_optimal_iv_set_1 (data, false);
6285 if (!origset && !set)
6286 return NULL;
6288 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6289 cost = set ? iv_ca_cost (set) : infinite_cost;
6291 if (dump_file && (dump_flags & TDF_DETAILS))
6293 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6294 origcost.cost, origcost.complexity);
6295 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6296 cost.cost, cost.complexity);
6299 /* Choose the one with the best cost. */
6300 if (compare_costs (origcost, cost) <= 0)
6302 if (set)
6303 iv_ca_free (&set);
6304 set = origset;
6306 else if (origset)
6307 iv_ca_free (&origset);
6309 for (i = 0; i < n_iv_uses (data); i++)
6311 use = iv_use (data, i);
6312 use->selected = iv_ca_cand_for_use (set, use)->cand;
6315 return set;
6318 /* Creates a new induction variable corresponding to CAND. */
6320 static void
6321 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6323 gimple_stmt_iterator incr_pos;
6324 tree base;
6325 bool after = false;
6327 if (!cand->iv)
6328 return;
6330 switch (cand->pos)
6332 case IP_NORMAL:
6333 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6334 break;
6336 case IP_END:
6337 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6338 after = true;
6339 break;
6341 case IP_AFTER_USE:
6342 after = true;
6343 /* fall through */
6344 case IP_BEFORE_USE:
6345 incr_pos = gsi_for_stmt (cand->incremented_at);
6346 break;
6348 case IP_ORIGINAL:
6349 /* Mark that the iv is preserved. */
6350 name_info (data, cand->var_before)->preserve_biv = true;
6351 name_info (data, cand->var_after)->preserve_biv = true;
6353 /* Rewrite the increment so that it uses var_before directly. */
6354 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6355 return;
6358 gimple_add_tmp_var (cand->var_before);
6360 base = unshare_expr (cand->iv->base);
6362 create_iv (base, unshare_expr (cand->iv->step),
6363 cand->var_before, data->current_loop,
6364 &incr_pos, after, &cand->var_before, &cand->var_after);
6367 /* Creates new induction variables described in SET. */
6369 static void
6370 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6372 unsigned i;
6373 struct iv_cand *cand;
6374 bitmap_iterator bi;
6376 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6378 cand = iv_cand (data, i);
6379 create_new_iv (data, cand);
6382 if (dump_file && (dump_flags & TDF_DETAILS))
6384 fprintf (dump_file, "Selected IV set for loop %d",
6385 data->current_loop->num);
6386 if (data->loop_loc != UNKNOWN_LOCATION)
6387 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6388 LOCATION_LINE (data->loop_loc));
6389 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6390 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6392 cand = iv_cand (data, i);
6393 dump_cand (dump_file, cand);
6395 fprintf (dump_file, "\n");
6399 /* Rewrites USE (definition of iv used in a nonlinear expression)
6400 using candidate CAND. */
6402 static void
6403 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6404 struct iv_use *use, struct iv_cand *cand)
6406 tree comp;
6407 tree op, tgt;
6408 gassign *ass;
6409 gimple_stmt_iterator bsi;
6411 /* An important special case -- if we are asked to express value of
6412 the original iv by itself, just exit; there is no need to
6413 introduce a new computation (that might also need casting the
6414 variable to unsigned and back). */
6415 if (cand->pos == IP_ORIGINAL
6416 && cand->incremented_at == use->stmt)
6418 enum tree_code stmt_code;
6420 gcc_assert (is_gimple_assign (use->stmt));
6421 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6423 /* Check whether we may leave the computation unchanged.
6424 This is the case only if it does not rely on other
6425 computations in the loop -- otherwise, the computation
6426 we rely upon may be removed in remove_unused_ivs,
6427 thus leading to ICE. */
6428 stmt_code = gimple_assign_rhs_code (use->stmt);
6429 if (stmt_code == PLUS_EXPR
6430 || stmt_code == MINUS_EXPR
6431 || stmt_code == POINTER_PLUS_EXPR)
6433 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6434 op = gimple_assign_rhs2 (use->stmt);
6435 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6436 op = gimple_assign_rhs1 (use->stmt);
6437 else
6438 op = NULL_TREE;
6440 else
6441 op = NULL_TREE;
6443 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6444 return;
6447 comp = get_computation (data->current_loop, use, cand);
6448 gcc_assert (comp != NULL_TREE);
6450 switch (gimple_code (use->stmt))
6452 case GIMPLE_PHI:
6453 tgt = PHI_RESULT (use->stmt);
6455 /* If we should keep the biv, do not replace it. */
6456 if (name_info (data, tgt)->preserve_biv)
6457 return;
6459 bsi = gsi_after_labels (gimple_bb (use->stmt));
6460 break;
6462 case GIMPLE_ASSIGN:
6463 tgt = gimple_assign_lhs (use->stmt);
6464 bsi = gsi_for_stmt (use->stmt);
6465 break;
6467 default:
6468 gcc_unreachable ();
6471 if (!valid_gimple_rhs_p (comp)
6472 || (gimple_code (use->stmt) != GIMPLE_PHI
6473 /* We can't allow re-allocating the stmt as it might be pointed
6474 to still. */
6475 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6476 >= gimple_num_ops (gsi_stmt (bsi)))))
6478 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6479 true, GSI_SAME_STMT);
6480 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6482 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6483 /* As this isn't a plain copy we have to reset alignment
6484 information. */
6485 if (SSA_NAME_PTR_INFO (comp))
6486 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6490 if (gimple_code (use->stmt) == GIMPLE_PHI)
6492 ass = gimple_build_assign (tgt, comp);
6493 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6495 bsi = gsi_for_stmt (use->stmt);
6496 remove_phi_node (&bsi, false);
6498 else
6500 gimple_assign_set_rhs_from_tree (&bsi, comp);
6501 use->stmt = gsi_stmt (bsi);
6505 /* Performs a peephole optimization to reorder the iv update statement with
6506 a mem ref to enable instruction combining in later phases. The mem ref uses
6507 the iv value before the update, so the reordering transformation requires
6508 adjustment of the offset. CAND is the selected IV_CAND.
6510 Example:
6512 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6513 iv2 = iv1 + 1;
6515 if (t < val) (1)
6516 goto L;
6517 goto Head;
6520 directly propagating t over to (1) will introduce overlapping live range
6521 thus increase register pressure. This peephole transform it into:
6524 iv2 = iv1 + 1;
6525 t = MEM_REF (base, iv2, 8, 8);
6526 if (t < val)
6527 goto L;
6528 goto Head;
6531 static void
6532 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6534 tree var_after;
6535 gimple iv_update, stmt;
6536 basic_block bb;
6537 gimple_stmt_iterator gsi, gsi_iv;
6539 if (cand->pos != IP_NORMAL)
6540 return;
6542 var_after = cand->var_after;
6543 iv_update = SSA_NAME_DEF_STMT (var_after);
6545 bb = gimple_bb (iv_update);
6546 gsi = gsi_last_nondebug_bb (bb);
6547 stmt = gsi_stmt (gsi);
6549 /* Only handle conditional statement for now. */
6550 if (gimple_code (stmt) != GIMPLE_COND)
6551 return;
6553 gsi_prev_nondebug (&gsi);
6554 stmt = gsi_stmt (gsi);
6555 if (stmt != iv_update)
6556 return;
6558 gsi_prev_nondebug (&gsi);
6559 if (gsi_end_p (gsi))
6560 return;
6562 stmt = gsi_stmt (gsi);
6563 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6564 return;
6566 if (stmt != use->stmt)
6567 return;
6569 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6570 return;
6572 if (dump_file && (dump_flags & TDF_DETAILS))
6574 fprintf (dump_file, "Reordering \n");
6575 print_gimple_stmt (dump_file, iv_update, 0, 0);
6576 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6577 fprintf (dump_file, "\n");
6580 gsi = gsi_for_stmt (use->stmt);
6581 gsi_iv = gsi_for_stmt (iv_update);
6582 gsi_move_before (&gsi_iv, &gsi);
6584 cand->pos = IP_BEFORE_USE;
6585 cand->incremented_at = use->stmt;
6588 /* Rewrites USE (address that is an iv) using candidate CAND. */
6590 static void
6591 rewrite_use_address (struct ivopts_data *data,
6592 struct iv_use *use, struct iv_cand *cand)
6594 aff_tree aff;
6595 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6596 tree base_hint = NULL_TREE;
6597 tree ref, iv;
6598 bool ok;
6600 adjust_iv_update_pos (cand, use);
6601 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6602 gcc_assert (ok);
6603 unshare_aff_combination (&aff);
6605 /* To avoid undefined overflow problems, all IV candidates use unsigned
6606 integer types. The drawback is that this makes it impossible for
6607 create_mem_ref to distinguish an IV that is based on a memory object
6608 from one that represents simply an offset.
6610 To work around this problem, we pass a hint to create_mem_ref that
6611 indicates which variable (if any) in aff is an IV based on a memory
6612 object. Note that we only consider the candidate. If this is not
6613 based on an object, the base of the reference is in some subexpression
6614 of the use -- but these will use pointer types, so they are recognized
6615 by the create_mem_ref heuristics anyway. */
6616 if (cand->iv->base_object)
6617 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6619 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6620 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6621 reference_alias_ptr_type (*use->op_p),
6622 iv, base_hint, data->speed);
6623 copy_ref_info (ref, *use->op_p);
6624 *use->op_p = ref;
6627 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6628 candidate CAND. */
6630 static void
6631 rewrite_use_compare (struct ivopts_data *data,
6632 struct iv_use *use, struct iv_cand *cand)
6634 tree comp, *var_p, op, bound;
6635 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6636 enum tree_code compare;
6637 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6638 bool ok;
6640 bound = cp->value;
6641 if (bound)
6643 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6644 tree var_type = TREE_TYPE (var);
6645 gimple_seq stmts;
6647 if (dump_file && (dump_flags & TDF_DETAILS))
6649 fprintf (dump_file, "Replacing exit test: ");
6650 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6652 compare = cp->comp;
6653 bound = unshare_expr (fold_convert (var_type, bound));
6654 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6655 if (stmts)
6656 gsi_insert_seq_on_edge_immediate (
6657 loop_preheader_edge (data->current_loop),
6658 stmts);
6660 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6661 gimple_cond_set_lhs (cond_stmt, var);
6662 gimple_cond_set_code (cond_stmt, compare);
6663 gimple_cond_set_rhs (cond_stmt, op);
6664 return;
6667 /* The induction variable elimination failed; just express the original
6668 giv. */
6669 comp = get_computation (data->current_loop, use, cand);
6670 gcc_assert (comp != NULL_TREE);
6672 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6673 gcc_assert (ok);
6675 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6676 true, GSI_SAME_STMT);
6679 /* Rewrites USE using candidate CAND. */
6681 static void
6682 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6684 switch (use->type)
6686 case USE_NONLINEAR_EXPR:
6687 rewrite_use_nonlinear_expr (data, use, cand);
6688 break;
6690 case USE_ADDRESS:
6691 rewrite_use_address (data, use, cand);
6692 break;
6694 case USE_COMPARE:
6695 rewrite_use_compare (data, use, cand);
6696 break;
6698 default:
6699 gcc_unreachable ();
6702 update_stmt (use->stmt);
6705 /* Compare routine for sorting the vector of iv_uses after dominance. */
6707 static int
6708 ivuse_cmp (const void *a, const void *b)
6710 const struct iv_use *usea = *((const struct iv_use * const *)a);
6711 const struct iv_use *useb = *((const struct iv_use * const *)b);
6712 basic_block bba = gimple_bb (usea->stmt);
6713 basic_block bbb = gimple_bb (useb->stmt);
6714 if (bba == bbb)
6716 if (usea->stmt == useb->stmt)
6717 return 0;
6718 if (gimple_uid (usea->stmt) > gimple_uid (useb->stmt))
6719 return 1;
6720 else
6721 return -1;
6723 else if (dominated_by_p (CDI_DOMINATORS, bba, bbb))
6724 return 1;
6725 else
6726 return -1;
6729 /* Rewrite the uses using the selected induction variables. */
6731 static void
6732 rewrite_uses (struct ivopts_data *data)
6734 unsigned i;
6735 struct iv_cand *cand;
6736 struct iv_use *use;
6738 /* Sort uses so that dominating uses are processed first. */
6739 data->iv_uses.qsort (ivuse_cmp);
6741 for (i = 0; i < n_iv_uses (data); i++)
6743 use = iv_use (data, i);
6744 cand = use->selected;
6745 gcc_assert (cand);
6747 rewrite_use (data, use, cand);
6751 /* Removes the ivs that are not used after rewriting. */
6753 static void
6754 remove_unused_ivs (struct ivopts_data *data)
6756 unsigned j;
6757 bitmap_iterator bi;
6758 bitmap toremove = BITMAP_ALLOC (NULL);
6760 /* Figure out an order in which to release SSA DEFs so that we don't
6761 release something that we'd have to propagate into a debug stmt
6762 afterwards. */
6763 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6765 struct version_info *info;
6767 info = ver_info (data, j);
6768 if (info->iv
6769 && !integer_zerop (info->iv->step)
6770 && !info->inv_id
6771 && !info->iv->have_use_for
6772 && !info->preserve_biv)
6774 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6776 tree def = info->iv->ssa_name;
6778 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6780 imm_use_iterator imm_iter;
6781 use_operand_p use_p;
6782 gimple stmt;
6783 int count = 0;
6785 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6787 if (!gimple_debug_bind_p (stmt))
6788 continue;
6790 /* We just want to determine whether to do nothing
6791 (count == 0), to substitute the computed
6792 expression into a single use of the SSA DEF by
6793 itself (count == 1), or to use a debug temp
6794 because the SSA DEF is used multiple times or as
6795 part of a larger expression (count > 1). */
6796 count++;
6797 if (gimple_debug_bind_get_value (stmt) != def)
6798 count++;
6800 if (count > 1)
6801 BREAK_FROM_IMM_USE_STMT (imm_iter);
6804 if (!count)
6805 continue;
6807 struct iv_use dummy_use;
6808 struct iv_cand *best_cand = NULL, *cand;
6809 unsigned i, best_pref = 0, cand_pref;
6811 memset (&dummy_use, 0, sizeof (dummy_use));
6812 dummy_use.iv = info->iv;
6813 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6815 cand = iv_use (data, i)->selected;
6816 if (cand == best_cand)
6817 continue;
6818 cand_pref = operand_equal_p (cand->iv->step,
6819 info->iv->step, 0)
6820 ? 4 : 0;
6821 cand_pref
6822 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6823 == TYPE_MODE (TREE_TYPE (info->iv->base))
6824 ? 2 : 0;
6825 cand_pref
6826 += TREE_CODE (cand->iv->base) == INTEGER_CST
6827 ? 1 : 0;
6828 if (best_cand == NULL || best_pref < cand_pref)
6830 best_cand = cand;
6831 best_pref = cand_pref;
6835 if (!best_cand)
6836 continue;
6838 tree comp = get_computation_at (data->current_loop,
6839 &dummy_use, best_cand,
6840 SSA_NAME_DEF_STMT (def));
6841 if (!comp)
6842 continue;
6844 if (count > 1)
6846 tree vexpr = make_node (DEBUG_EXPR_DECL);
6847 DECL_ARTIFICIAL (vexpr) = 1;
6848 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6849 if (SSA_NAME_VAR (def))
6850 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6851 else
6852 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6853 gdebug *def_temp
6854 = gimple_build_debug_bind (vexpr, comp, NULL);
6855 gimple_stmt_iterator gsi;
6857 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6858 gsi = gsi_after_labels (gimple_bb
6859 (SSA_NAME_DEF_STMT (def)));
6860 else
6861 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6863 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6864 comp = vexpr;
6867 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6869 if (!gimple_debug_bind_p (stmt))
6870 continue;
6872 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6873 SET_USE (use_p, comp);
6875 update_stmt (stmt);
6881 release_defs_bitset (toremove);
6883 BITMAP_FREE (toremove);
6886 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6887 for hash_map::traverse. */
6889 bool
6890 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6892 free (value);
6893 return true;
6896 /* Frees data allocated by the optimization of a single loop. */
6898 static void
6899 free_loop_data (struct ivopts_data *data)
6901 unsigned i, j;
6902 bitmap_iterator bi;
6903 tree obj;
6905 if (data->niters)
6907 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6908 delete data->niters;
6909 data->niters = NULL;
6912 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6914 struct version_info *info;
6916 info = ver_info (data, i);
6917 free (info->iv);
6918 info->iv = NULL;
6919 info->has_nonlin_use = false;
6920 info->preserve_biv = false;
6921 info->inv_id = 0;
6923 bitmap_clear (data->relevant);
6924 bitmap_clear (data->important_candidates);
6926 for (i = 0; i < n_iv_uses (data); i++)
6928 struct iv_use *use = iv_use (data, i);
6930 free (use->iv);
6931 BITMAP_FREE (use->related_cands);
6932 for (j = 0; j < use->n_map_members; j++)
6933 if (use->cost_map[j].depends_on)
6934 BITMAP_FREE (use->cost_map[j].depends_on);
6935 free (use->cost_map);
6936 free (use);
6938 data->iv_uses.truncate (0);
6940 for (i = 0; i < n_iv_cands (data); i++)
6942 struct iv_cand *cand = iv_cand (data, i);
6944 free (cand->iv);
6945 if (cand->depends_on)
6946 BITMAP_FREE (cand->depends_on);
6947 free (cand);
6949 data->iv_candidates.truncate (0);
6951 if (data->version_info_size < num_ssa_names)
6953 data->version_info_size = 2 * num_ssa_names;
6954 free (data->version_info);
6955 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6958 data->max_inv_id = 0;
6960 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6961 SET_DECL_RTL (obj, NULL_RTX);
6963 decl_rtl_to_reset.truncate (0);
6965 data->inv_expr_tab->empty ();
6966 data->inv_expr_id = 0;
6969 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6970 loop tree. */
6972 static void
6973 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6975 free_loop_data (data);
6976 free (data->version_info);
6977 BITMAP_FREE (data->relevant);
6978 BITMAP_FREE (data->important_candidates);
6980 decl_rtl_to_reset.release ();
6981 data->iv_uses.release ();
6982 data->iv_candidates.release ();
6983 delete data->inv_expr_tab;
6984 data->inv_expr_tab = NULL;
6985 free_affine_expand_cache (&data->name_expansion_cache);
6988 /* Returns true if the loop body BODY includes any function calls. */
6990 static bool
6991 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6993 gimple_stmt_iterator gsi;
6994 unsigned i;
6996 for (i = 0; i < num_nodes; i++)
6997 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6999 gimple stmt = gsi_stmt (gsi);
7000 if (is_gimple_call (stmt)
7001 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7002 return true;
7004 return false;
7007 /* Optimizes the LOOP. Returns true if anything changed. */
7009 static bool
7010 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7012 bool changed = false;
7013 struct iv_ca *iv_ca;
7014 edge exit = single_dom_exit (loop);
7015 basic_block *body;
7017 gcc_assert (!data->niters);
7018 data->current_loop = loop;
7019 data->loop_loc = find_loop_location (loop);
7020 data->speed = optimize_loop_for_speed_p (loop);
7022 if (dump_file && (dump_flags & TDF_DETAILS))
7024 fprintf (dump_file, "Processing loop %d", loop->num);
7025 if (data->loop_loc != UNKNOWN_LOCATION)
7026 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7027 LOCATION_LINE (data->loop_loc));
7028 fprintf (dump_file, "\n");
7030 if (exit)
7032 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7033 exit->src->index, exit->dest->index);
7034 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7035 fprintf (dump_file, "\n");
7038 fprintf (dump_file, "\n");
7041 body = get_loop_body (loop);
7042 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7043 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7044 free (body);
7046 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7048 /* For each ssa name determines whether it behaves as an induction variable
7049 in some loop. */
7050 if (!find_induction_variables (data))
7051 goto finish;
7053 /* Finds interesting uses (item 1). */
7054 find_interesting_uses (data);
7055 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7056 goto finish;
7058 /* Finds candidates for the induction variables (item 2). */
7059 find_iv_candidates (data);
7061 /* Calculates the costs (item 3, part 1). */
7062 determine_iv_costs (data);
7063 determine_use_iv_costs (data);
7064 determine_set_costs (data);
7066 /* Find the optimal set of induction variables (item 3, part 2). */
7067 iv_ca = find_optimal_iv_set (data);
7068 if (!iv_ca)
7069 goto finish;
7070 changed = true;
7072 /* Create the new induction variables (item 4, part 1). */
7073 create_new_ivs (data, iv_ca);
7074 iv_ca_free (&iv_ca);
7076 /* Rewrite the uses (item 4, part 2). */
7077 rewrite_uses (data);
7079 /* Remove the ivs that are unused after rewriting. */
7080 remove_unused_ivs (data);
7082 /* We have changed the structure of induction variables; it might happen
7083 that definitions in the scev database refer to some of them that were
7084 eliminated. */
7085 scev_reset ();
7087 finish:
7088 free_loop_data (data);
7090 return changed;
7093 /* Main entry point. Optimizes induction variables in loops. */
7095 void
7096 tree_ssa_iv_optimize (void)
7098 struct loop *loop;
7099 struct ivopts_data data;
7101 tree_ssa_iv_optimize_init (&data);
7103 /* Optimize the loops starting with the innermost ones. */
7104 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7106 if (dump_file && (dump_flags & TDF_DETAILS))
7107 flow_loop_dump (loop, dump_file, NULL, 1);
7109 tree_ssa_iv_optimize_loop (&data, loop);
7112 tree_ssa_iv_optimize_finalize (&data);