* config/sh/sh.c (expand_cbranchdi4): Use a scratch register if
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob10fd7dc48c774e82677dd2a08abaa963039579cb
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: add_cost and zero_cost defined in exprmed.h conflict with local uses.
97 #include "expmed.h"
98 #undef add_cost
99 #undef zero_cost
101 /* FIXME: Expressions are expanded to RTL in this pass to determine the
102 cost of different addressing modes. This should be moved to a TBD
103 interface between the GIMPLE and RTL worlds. */
104 #include "expr.h"
106 /* The infinite cost. */
107 #define INFTY 10000000
109 #define AVG_LOOP_NITER(LOOP) 5
111 /* Returns the expected number of loop iterations for LOOP.
112 The average trip count is computed from profile data if it
113 exists. */
115 static inline HOST_WIDE_INT
116 avg_loop_niter (struct loop *loop)
118 HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
119 if (niter == -1)
120 return AVG_LOOP_NITER (loop);
122 return niter;
125 /* Representation of the induction variable. */
126 struct iv
128 tree base; /* Initial value of the iv. */
129 tree base_object; /* A memory object to that the induction variable points. */
130 tree step; /* Step of the iv (constant only). */
131 tree ssa_name; /* The ssa name with the value. */
132 bool biv_p; /* Is it a biv? */
133 bool have_use_for; /* Do we already have a use for it? */
134 unsigned use_id; /* The identifier in the use if it is the case. */
137 /* Per-ssa version information (induction variable descriptions, etc.). */
138 struct version_info
140 tree name; /* The ssa name. */
141 struct iv *iv; /* Induction variable description. */
142 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
143 an expression that is not an induction variable. */
144 bool preserve_biv; /* For the original biv, whether to preserve it. */
145 unsigned inv_id; /* Id of an invariant. */
148 /* Types of uses. */
149 enum use_type
151 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
152 USE_ADDRESS, /* Use in an address. */
153 USE_COMPARE /* Use is a compare. */
156 /* Cost of a computation. */
157 typedef struct
159 int cost; /* The runtime cost. */
160 unsigned complexity; /* The estimate of the complexity of the code for
161 the computation (in no concrete units --
162 complexity field should be larger for more
163 complex expressions and addressing modes). */
164 } comp_cost;
166 static const comp_cost zero_cost = {0, 0};
167 static const comp_cost infinite_cost = {INFTY, INFTY};
169 /* The candidate - cost pair. */
170 struct cost_pair
172 struct iv_cand *cand; /* The candidate. */
173 comp_cost cost; /* The cost. */
174 bitmap depends_on; /* The list of invariants that have to be
175 preserved. */
176 tree value; /* For final value elimination, the expression for
177 the final value of the iv. For iv elimination,
178 the new bound to compare with. */
179 int inv_expr_id; /* Loop invariant expression id. */
182 /* Use. */
183 struct iv_use
185 unsigned id; /* The id of the use. */
186 enum use_type type; /* Type of the use. */
187 struct iv *iv; /* The induction variable it is based on. */
188 gimple stmt; /* Statement in that it occurs. */
189 tree *op_p; /* The place where it occurs. */
190 bitmap related_cands; /* The set of "related" iv candidates, plus the common
191 important ones. */
193 unsigned n_map_members; /* Number of candidates in the cost_map list. */
194 struct cost_pair *cost_map;
195 /* The costs wrto the iv candidates. */
197 struct iv_cand *selected;
198 /* The selected candidate. */
201 /* The position where the iv is computed. */
202 enum iv_position
204 IP_NORMAL, /* At the end, just before the exit condition. */
205 IP_END, /* At the end of the latch block. */
206 IP_BEFORE_USE, /* Immediately before a specific use. */
207 IP_AFTER_USE, /* Immediately after a specific use. */
208 IP_ORIGINAL /* The original biv. */
211 /* The induction variable candidate. */
212 struct iv_cand
214 unsigned id; /* The number of the candidate. */
215 bool important; /* Whether this is an "important" candidate, i.e. such
216 that it should be considered by all uses. */
217 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
218 gimple incremented_at;/* For original biv, the statement where it is
219 incremented. */
220 tree var_before; /* The variable used for it before increment. */
221 tree var_after; /* The variable used for it after increment. */
222 struct iv *iv; /* The value of the candidate. NULL for
223 "pseudocandidate" used to indicate the possibility
224 to replace the final value of an iv by direct
225 computation of the value. */
226 unsigned cost; /* Cost of the candidate. */
227 unsigned cost_step; /* Cost of the candidate's increment operation. */
228 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
229 where it is incremented. */
230 bitmap depends_on; /* The list of invariants that are used in step of the
231 biv. */
234 /* Loop invariant expression hashtable entry. */
235 struct iv_inv_expr_ent
237 tree expr;
238 int id;
239 hashval_t hash;
242 /* The data used by the induction variable optimizations. */
244 typedef struct iv_use *iv_use_p;
245 DEF_VEC_P(iv_use_p);
246 DEF_VEC_ALLOC_P(iv_use_p,heap);
248 typedef struct iv_cand *iv_cand_p;
249 DEF_VEC_P(iv_cand_p);
250 DEF_VEC_ALLOC_P(iv_cand_p,heap);
252 struct ivopts_data
254 /* The currently optimized loop. */
255 struct loop *current_loop;
257 /* Numbers of iterations for all exits of the current loop. */
258 struct pointer_map_t *niters;
260 /* Number of registers used in it. */
261 unsigned regs_used;
263 /* The size of version_info array allocated. */
264 unsigned version_info_size;
266 /* The array of information for the ssa names. */
267 struct version_info *version_info;
269 /* The hashtable of loop invariant expressions created
270 by ivopt. */
271 htab_t inv_expr_tab;
273 /* Loop invariant expression id. */
274 int inv_expr_id;
276 /* The bitmap of indices in version_info whose value was changed. */
277 bitmap relevant;
279 /* The uses of induction variables. */
280 VEC(iv_use_p,heap) *iv_uses;
282 /* The candidates. */
283 VEC(iv_cand_p,heap) *iv_candidates;
285 /* A bitmap of important candidates. */
286 bitmap important_candidates;
288 /* The maximum invariant id. */
289 unsigned max_inv_id;
291 /* Whether to consider just related and important candidates when replacing a
292 use. */
293 bool consider_all_candidates;
295 /* Are we optimizing for speed? */
296 bool speed;
298 /* Whether the loop body includes any function calls. */
299 bool body_includes_call;
302 /* An assignment of iv candidates to uses. */
304 struct iv_ca
306 /* The number of uses covered by the assignment. */
307 unsigned upto;
309 /* Number of uses that cannot be expressed by the candidates in the set. */
310 unsigned bad_uses;
312 /* Candidate assigned to a use, together with the related costs. */
313 struct cost_pair **cand_for_use;
315 /* Number of times each candidate is used. */
316 unsigned *n_cand_uses;
318 /* The candidates used. */
319 bitmap cands;
321 /* The number of candidates in the set. */
322 unsigned n_cands;
324 /* Total number of registers needed. */
325 unsigned n_regs;
327 /* Total cost of expressing uses. */
328 comp_cost cand_use_cost;
330 /* Total cost of candidates. */
331 unsigned cand_cost;
333 /* Number of times each invariant is used. */
334 unsigned *n_invariant_uses;
336 /* The array holding the number of uses of each loop
337 invariant expressions created by ivopt. */
338 unsigned *used_inv_expr;
340 /* The number of created loop invariants. */
341 unsigned num_used_inv_expr;
343 /* Total cost of the assignment. */
344 comp_cost cost;
347 /* Difference of two iv candidate assignments. */
349 struct iv_ca_delta
351 /* Changed use. */
352 struct iv_use *use;
354 /* An old assignment (for rollback purposes). */
355 struct cost_pair *old_cp;
357 /* A new assignment. */
358 struct cost_pair *new_cp;
360 /* Next change in the list. */
361 struct iv_ca_delta *next_change;
364 /* Bound on number of candidates below that all candidates are considered. */
366 #define CONSIDER_ALL_CANDIDATES_BOUND \
367 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
369 /* If there are more iv occurrences, we just give up (it is quite unlikely that
370 optimizing such a loop would help, and it would take ages). */
372 #define MAX_CONSIDERED_USES \
373 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
375 /* If there are at most this number of ivs in the set, try removing unnecessary
376 ivs from the set always. */
378 #define ALWAYS_PRUNE_CAND_SET_BOUND \
379 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
381 /* The list of trees for that the decl_rtl field must be reset is stored
382 here. */
384 static VEC(tree,heap) *decl_rtl_to_reset;
386 static comp_cost force_expr_to_var_cost (tree, bool);
388 /* Number of uses recorded in DATA. */
390 static inline unsigned
391 n_iv_uses (struct ivopts_data *data)
393 return VEC_length (iv_use_p, data->iv_uses);
396 /* Ith use recorded in DATA. */
398 static inline struct iv_use *
399 iv_use (struct ivopts_data *data, unsigned i)
401 return VEC_index (iv_use_p, data->iv_uses, i);
404 /* Number of candidates recorded in DATA. */
406 static inline unsigned
407 n_iv_cands (struct ivopts_data *data)
409 return VEC_length (iv_cand_p, data->iv_candidates);
412 /* Ith candidate recorded in DATA. */
414 static inline struct iv_cand *
415 iv_cand (struct ivopts_data *data, unsigned i)
417 return VEC_index (iv_cand_p, data->iv_candidates, i);
420 /* The single loop exit if it dominates the latch, NULL otherwise. */
422 edge
423 single_dom_exit (struct loop *loop)
425 edge exit = single_exit (loop);
427 if (!exit)
428 return NULL;
430 if (!just_once_each_iteration_p (loop, exit->src))
431 return NULL;
433 return exit;
436 /* Dumps information about the induction variable IV to FILE. */
438 extern void dump_iv (FILE *, struct iv *);
439 void
440 dump_iv (FILE *file, struct iv *iv)
442 if (iv->ssa_name)
444 fprintf (file, "ssa name ");
445 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
446 fprintf (file, "\n");
449 fprintf (file, " type ");
450 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
451 fprintf (file, "\n");
453 if (iv->step)
455 fprintf (file, " base ");
456 print_generic_expr (file, iv->base, TDF_SLIM);
457 fprintf (file, "\n");
459 fprintf (file, " step ");
460 print_generic_expr (file, iv->step, TDF_SLIM);
461 fprintf (file, "\n");
463 else
465 fprintf (file, " invariant ");
466 print_generic_expr (file, iv->base, TDF_SLIM);
467 fprintf (file, "\n");
470 if (iv->base_object)
472 fprintf (file, " base object ");
473 print_generic_expr (file, iv->base_object, TDF_SLIM);
474 fprintf (file, "\n");
477 if (iv->biv_p)
478 fprintf (file, " is a biv\n");
481 /* Dumps information about the USE to FILE. */
483 extern void dump_use (FILE *, struct iv_use *);
484 void
485 dump_use (FILE *file, struct iv_use *use)
487 fprintf (file, "use %d\n", use->id);
489 switch (use->type)
491 case USE_NONLINEAR_EXPR:
492 fprintf (file, " generic\n");
493 break;
495 case USE_ADDRESS:
496 fprintf (file, " address\n");
497 break;
499 case USE_COMPARE:
500 fprintf (file, " compare\n");
501 break;
503 default:
504 gcc_unreachable ();
507 fprintf (file, " in statement ");
508 print_gimple_stmt (file, use->stmt, 0, 0);
509 fprintf (file, "\n");
511 fprintf (file, " at position ");
512 if (use->op_p)
513 print_generic_expr (file, *use->op_p, TDF_SLIM);
514 fprintf (file, "\n");
516 dump_iv (file, use->iv);
518 if (use->related_cands)
520 fprintf (file, " related candidates ");
521 dump_bitmap (file, use->related_cands);
525 /* Dumps information about the uses to FILE. */
527 extern void dump_uses (FILE *, struct ivopts_data *);
528 void
529 dump_uses (FILE *file, struct ivopts_data *data)
531 unsigned i;
532 struct iv_use *use;
534 for (i = 0; i < n_iv_uses (data); i++)
536 use = iv_use (data, i);
538 dump_use (file, use);
539 fprintf (file, "\n");
543 /* Dumps information about induction variable candidate CAND to FILE. */
545 extern void dump_cand (FILE *, struct iv_cand *);
546 void
547 dump_cand (FILE *file, struct iv_cand *cand)
549 struct iv *iv = cand->iv;
551 fprintf (file, "candidate %d%s\n",
552 cand->id, cand->important ? " (important)" : "");
554 if (cand->depends_on)
556 fprintf (file, " depends on ");
557 dump_bitmap (file, cand->depends_on);
560 if (!iv)
562 fprintf (file, " final value replacement\n");
563 return;
566 if (cand->var_before)
568 fprintf (file, " var_before ");
569 print_generic_expr (file, cand->var_before, TDF_SLIM);
570 fprintf (file, "\n");
572 if (cand->var_after)
574 fprintf (file, " var_after ");
575 print_generic_expr (file, cand->var_after, TDF_SLIM);
576 fprintf (file, "\n");
579 switch (cand->pos)
581 case IP_NORMAL:
582 fprintf (file, " incremented before exit test\n");
583 break;
585 case IP_BEFORE_USE:
586 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
587 break;
589 case IP_AFTER_USE:
590 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
591 break;
593 case IP_END:
594 fprintf (file, " incremented at end\n");
595 break;
597 case IP_ORIGINAL:
598 fprintf (file, " original biv\n");
599 break;
602 dump_iv (file, iv);
605 /* Returns the info for ssa version VER. */
607 static inline struct version_info *
608 ver_info (struct ivopts_data *data, unsigned ver)
610 return data->version_info + ver;
613 /* Returns the info for ssa name NAME. */
615 static inline struct version_info *
616 name_info (struct ivopts_data *data, tree name)
618 return ver_info (data, SSA_NAME_VERSION (name));
621 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
622 emitted in LOOP. */
624 static bool
625 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
627 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
629 gcc_assert (bb);
631 if (sbb == loop->latch)
632 return true;
634 if (sbb != bb)
635 return false;
637 return stmt == last_stmt (bb);
640 /* Returns true if STMT if after the place where the original induction
641 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
642 if the positions are identical. */
644 static bool
645 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
647 basic_block cand_bb = gimple_bb (cand->incremented_at);
648 basic_block stmt_bb = gimple_bb (stmt);
650 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
651 return false;
653 if (stmt_bb != cand_bb)
654 return true;
656 if (true_if_equal
657 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
658 return true;
659 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
662 /* Returns true if STMT if after the place where the induction variable
663 CAND is incremented in LOOP. */
665 static bool
666 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
668 switch (cand->pos)
670 case IP_END:
671 return false;
673 case IP_NORMAL:
674 return stmt_after_ip_normal_pos (loop, stmt);
676 case IP_ORIGINAL:
677 case IP_AFTER_USE:
678 return stmt_after_inc_pos (cand, stmt, false);
680 case IP_BEFORE_USE:
681 return stmt_after_inc_pos (cand, stmt, true);
683 default:
684 gcc_unreachable ();
688 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
690 static bool
691 abnormal_ssa_name_p (tree exp)
693 if (!exp)
694 return false;
696 if (TREE_CODE (exp) != SSA_NAME)
697 return false;
699 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
702 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
703 abnormal phi node. Callback for for_each_index. */
705 static bool
706 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
707 void *data ATTRIBUTE_UNUSED)
709 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
711 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
712 return false;
713 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
714 return false;
717 return !abnormal_ssa_name_p (*index);
720 /* Returns true if EXPR contains a ssa name that occurs in an
721 abnormal phi node. */
723 bool
724 contains_abnormal_ssa_name_p (tree expr)
726 enum tree_code code;
727 enum tree_code_class codeclass;
729 if (!expr)
730 return false;
732 code = TREE_CODE (expr);
733 codeclass = TREE_CODE_CLASS (code);
735 if (code == SSA_NAME)
736 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
738 if (code == INTEGER_CST
739 || is_gimple_min_invariant (expr))
740 return false;
742 if (code == ADDR_EXPR)
743 return !for_each_index (&TREE_OPERAND (expr, 0),
744 idx_contains_abnormal_ssa_name_p,
745 NULL);
747 if (code == COND_EXPR)
748 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
749 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
750 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
752 switch (codeclass)
754 case tcc_binary:
755 case tcc_comparison:
756 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
757 return true;
759 /* Fallthru. */
760 case tcc_unary:
761 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
762 return true;
764 break;
766 default:
767 gcc_unreachable ();
770 return false;
773 /* Returns tree describing number of iterations determined from
774 EXIT of DATA->current_loop, or NULL if something goes wrong. */
776 static tree
777 niter_for_exit (struct ivopts_data *data, edge exit,
778 struct tree_niter_desc **desc_p)
780 struct tree_niter_desc* desc = NULL;
781 tree niter;
782 void **slot;
784 if (!data->niters)
786 data->niters = pointer_map_create ();
787 slot = NULL;
789 else
790 slot = pointer_map_contains (data->niters, exit);
792 if (!slot)
794 /* Try to determine number of iterations. We must know it
795 unconditionally (i.e., without possibility of # of iterations
796 being zero). Also, we cannot safely work with ssa names that
797 appear in phi nodes on abnormal edges, so that we do not create
798 overlapping life ranges for them (PR 27283). */
799 desc = XNEW (struct tree_niter_desc);
800 if (number_of_iterations_exit (data->current_loop,
801 exit, desc, true)
802 && integer_zerop (desc->may_be_zero)
803 && !contains_abnormal_ssa_name_p (desc->niter))
804 niter = desc->niter;
805 else
806 niter = NULL_TREE;
808 desc->niter = niter;
809 slot = pointer_map_insert (data->niters, exit);
810 *slot = desc;
812 else
813 niter = ((struct tree_niter_desc *) *slot)->niter;
815 if (desc_p)
816 *desc_p = (struct tree_niter_desc *) *slot;
817 return niter;
820 /* Returns tree describing number of iterations determined from
821 single dominating exit of DATA->current_loop, or NULL if something
822 goes wrong. */
824 static tree
825 niter_for_single_dom_exit (struct ivopts_data *data)
827 edge exit = single_dom_exit (data->current_loop);
829 if (!exit)
830 return NULL;
832 return niter_for_exit (data, exit, NULL);
835 /* Hash table equality function for expressions. */
837 static int
838 htab_inv_expr_eq (const void *ent1, const void *ent2)
840 const struct iv_inv_expr_ent *expr1 =
841 (const struct iv_inv_expr_ent *)ent1;
842 const struct iv_inv_expr_ent *expr2 =
843 (const struct iv_inv_expr_ent *)ent2;
845 return expr1->hash == expr2->hash
846 && operand_equal_p (expr1->expr, expr2->expr, 0);
849 /* Hash function for loop invariant expressions. */
851 static hashval_t
852 htab_inv_expr_hash (const void *ent)
854 const struct iv_inv_expr_ent *expr =
855 (const struct iv_inv_expr_ent *)ent;
856 return expr->hash;
859 /* Initializes data structures used by the iv optimization pass, stored
860 in DATA. */
862 static void
863 tree_ssa_iv_optimize_init (struct ivopts_data *data)
865 data->version_info_size = 2 * num_ssa_names;
866 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
867 data->relevant = BITMAP_ALLOC (NULL);
868 data->important_candidates = BITMAP_ALLOC (NULL);
869 data->max_inv_id = 0;
870 data->niters = NULL;
871 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
872 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
873 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
874 htab_inv_expr_eq, free);
875 data->inv_expr_id = 0;
876 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
879 /* Returns a memory object to that EXPR points. In case we are able to
880 determine that it does not point to any such object, NULL is returned. */
882 static tree
883 determine_base_object (tree expr)
885 enum tree_code code = TREE_CODE (expr);
886 tree base, obj;
888 /* If this is a pointer casted to any type, we need to determine
889 the base object for the pointer; so handle conversions before
890 throwing away non-pointer expressions. */
891 if (CONVERT_EXPR_P (expr))
892 return determine_base_object (TREE_OPERAND (expr, 0));
894 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
895 return NULL_TREE;
897 switch (code)
899 case INTEGER_CST:
900 return NULL_TREE;
902 case ADDR_EXPR:
903 obj = TREE_OPERAND (expr, 0);
904 base = get_base_address (obj);
906 if (!base)
907 return expr;
909 if (TREE_CODE (base) == MEM_REF)
910 return determine_base_object (TREE_OPERAND (base, 0));
912 return fold_convert (ptr_type_node,
913 build_fold_addr_expr (base));
915 case POINTER_PLUS_EXPR:
916 return determine_base_object (TREE_OPERAND (expr, 0));
918 case PLUS_EXPR:
919 case MINUS_EXPR:
920 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
921 gcc_unreachable ();
923 default:
924 return fold_convert (ptr_type_node, expr);
928 /* Allocates an induction variable with given initial value BASE and step STEP
929 for loop LOOP. */
931 static struct iv *
932 alloc_iv (tree base, tree step)
934 struct iv *iv = XCNEW (struct iv);
935 gcc_assert (step != NULL_TREE);
937 iv->base = base;
938 iv->base_object = determine_base_object (base);
939 iv->step = step;
940 iv->biv_p = false;
941 iv->have_use_for = false;
942 iv->use_id = 0;
943 iv->ssa_name = NULL_TREE;
945 return iv;
948 /* Sets STEP and BASE for induction variable IV. */
950 static void
951 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
953 struct version_info *info = name_info (data, iv);
955 gcc_assert (!info->iv);
957 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
958 info->iv = alloc_iv (base, step);
959 info->iv->ssa_name = iv;
962 /* Finds induction variable declaration for VAR. */
964 static struct iv *
965 get_iv (struct ivopts_data *data, tree var)
967 basic_block bb;
968 tree type = TREE_TYPE (var);
970 if (!POINTER_TYPE_P (type)
971 && !INTEGRAL_TYPE_P (type))
972 return NULL;
974 if (!name_info (data, var)->iv)
976 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
978 if (!bb
979 || !flow_bb_inside_loop_p (data->current_loop, bb))
980 set_iv (data, var, var, build_int_cst (type, 0));
983 return name_info (data, var)->iv;
986 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
987 not define a simple affine biv with nonzero step. */
989 static tree
990 determine_biv_step (gimple phi)
992 struct loop *loop = gimple_bb (phi)->loop_father;
993 tree name = PHI_RESULT (phi);
994 affine_iv iv;
996 if (!is_gimple_reg (name))
997 return NULL_TREE;
999 if (!simple_iv (loop, loop, name, &iv, true))
1000 return NULL_TREE;
1002 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1005 /* Finds basic ivs. */
1007 static bool
1008 find_bivs (struct ivopts_data *data)
1010 gimple phi;
1011 tree step, type, base;
1012 bool found = false;
1013 struct loop *loop = data->current_loop;
1014 gimple_stmt_iterator psi;
1016 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1018 phi = gsi_stmt (psi);
1020 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1021 continue;
1023 step = determine_biv_step (phi);
1024 if (!step)
1025 continue;
1027 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1028 base = expand_simple_operations (base);
1029 if (contains_abnormal_ssa_name_p (base)
1030 || contains_abnormal_ssa_name_p (step))
1031 continue;
1033 type = TREE_TYPE (PHI_RESULT (phi));
1034 base = fold_convert (type, base);
1035 if (step)
1037 if (POINTER_TYPE_P (type))
1038 step = fold_convert (sizetype, step);
1039 else
1040 step = fold_convert (type, step);
1043 set_iv (data, PHI_RESULT (phi), base, step);
1044 found = true;
1047 return found;
1050 /* Marks basic ivs. */
1052 static void
1053 mark_bivs (struct ivopts_data *data)
1055 gimple phi;
1056 tree var;
1057 struct iv *iv, *incr_iv;
1058 struct loop *loop = data->current_loop;
1059 basic_block incr_bb;
1060 gimple_stmt_iterator psi;
1062 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1064 phi = gsi_stmt (psi);
1066 iv = get_iv (data, PHI_RESULT (phi));
1067 if (!iv)
1068 continue;
1070 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1071 incr_iv = get_iv (data, var);
1072 if (!incr_iv)
1073 continue;
1075 /* If the increment is in the subloop, ignore it. */
1076 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1077 if (incr_bb->loop_father != data->current_loop
1078 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1079 continue;
1081 iv->biv_p = true;
1082 incr_iv->biv_p = true;
1086 /* Checks whether STMT defines a linear induction variable and stores its
1087 parameters to IV. */
1089 static bool
1090 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1092 tree lhs;
1093 struct loop *loop = data->current_loop;
1095 iv->base = NULL_TREE;
1096 iv->step = NULL_TREE;
1098 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1099 return false;
1101 lhs = gimple_assign_lhs (stmt);
1102 if (TREE_CODE (lhs) != SSA_NAME)
1103 return false;
1105 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1106 return false;
1107 iv->base = expand_simple_operations (iv->base);
1109 if (contains_abnormal_ssa_name_p (iv->base)
1110 || contains_abnormal_ssa_name_p (iv->step))
1111 return false;
1113 /* If STMT could throw, then do not consider STMT as defining a GIV.
1114 While this will suppress optimizations, we can not safely delete this
1115 GIV and associated statements, even if it appears it is not used. */
1116 if (stmt_could_throw_p (stmt))
1117 return false;
1119 return true;
1122 /* Finds general ivs in statement STMT. */
1124 static void
1125 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1127 affine_iv iv;
1129 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1130 return;
1132 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1135 /* Finds general ivs in basic block BB. */
1137 static void
1138 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1140 gimple_stmt_iterator bsi;
1142 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1143 find_givs_in_stmt (data, gsi_stmt (bsi));
1146 /* Finds general ivs. */
1148 static void
1149 find_givs (struct ivopts_data *data)
1151 struct loop *loop = data->current_loop;
1152 basic_block *body = get_loop_body_in_dom_order (loop);
1153 unsigned i;
1155 for (i = 0; i < loop->num_nodes; i++)
1156 find_givs_in_bb (data, body[i]);
1157 free (body);
1160 /* For each ssa name defined in LOOP determines whether it is an induction
1161 variable and if so, its initial value and step. */
1163 static bool
1164 find_induction_variables (struct ivopts_data *data)
1166 unsigned i;
1167 bitmap_iterator bi;
1169 if (!find_bivs (data))
1170 return false;
1172 find_givs (data);
1173 mark_bivs (data);
1175 if (dump_file && (dump_flags & TDF_DETAILS))
1177 tree niter = niter_for_single_dom_exit (data);
1179 if (niter)
1181 fprintf (dump_file, " number of iterations ");
1182 print_generic_expr (dump_file, niter, TDF_SLIM);
1183 fprintf (dump_file, "\n\n");
1186 fprintf (dump_file, "Induction variables:\n\n");
1188 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1190 if (ver_info (data, i)->iv)
1191 dump_iv (dump_file, ver_info (data, i)->iv);
1195 return true;
1198 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1200 static struct iv_use *
1201 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1202 gimple stmt, enum use_type use_type)
1204 struct iv_use *use = XCNEW (struct iv_use);
1206 use->id = n_iv_uses (data);
1207 use->type = use_type;
1208 use->iv = iv;
1209 use->stmt = stmt;
1210 use->op_p = use_p;
1211 use->related_cands = BITMAP_ALLOC (NULL);
1213 /* To avoid showing ssa name in the dumps, if it was not reset by the
1214 caller. */
1215 iv->ssa_name = NULL_TREE;
1217 if (dump_file && (dump_flags & TDF_DETAILS))
1218 dump_use (dump_file, use);
1220 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1222 return use;
1225 /* Checks whether OP is a loop-level invariant and if so, records it.
1226 NONLINEAR_USE is true if the invariant is used in a way we do not
1227 handle specially. */
1229 static void
1230 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1232 basic_block bb;
1233 struct version_info *info;
1235 if (TREE_CODE (op) != SSA_NAME
1236 || !is_gimple_reg (op))
1237 return;
1239 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1240 if (bb
1241 && flow_bb_inside_loop_p (data->current_loop, bb))
1242 return;
1244 info = name_info (data, op);
1245 info->name = op;
1246 info->has_nonlin_use |= nonlinear_use;
1247 if (!info->inv_id)
1248 info->inv_id = ++data->max_inv_id;
1249 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1252 /* Checks whether the use OP is interesting and if so, records it. */
1254 static struct iv_use *
1255 find_interesting_uses_op (struct ivopts_data *data, tree op)
1257 struct iv *iv;
1258 struct iv *civ;
1259 gimple stmt;
1260 struct iv_use *use;
1262 if (TREE_CODE (op) != SSA_NAME)
1263 return NULL;
1265 iv = get_iv (data, op);
1266 if (!iv)
1267 return NULL;
1269 if (iv->have_use_for)
1271 use = iv_use (data, iv->use_id);
1273 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1274 return use;
1277 if (integer_zerop (iv->step))
1279 record_invariant (data, op, true);
1280 return NULL;
1282 iv->have_use_for = true;
1284 civ = XNEW (struct iv);
1285 *civ = *iv;
1287 stmt = SSA_NAME_DEF_STMT (op);
1288 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1289 || is_gimple_assign (stmt));
1291 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1292 iv->use_id = use->id;
1294 return use;
1297 /* Given a condition in statement STMT, checks whether it is a compare
1298 of an induction variable and an invariant. If this is the case,
1299 CONTROL_VAR is set to location of the iv, BOUND to the location of
1300 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1301 induction variable descriptions, and true is returned. If this is not
1302 the case, CONTROL_VAR and BOUND are set to the arguments of the
1303 condition and false is returned. */
1305 static bool
1306 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1307 tree **control_var, tree **bound,
1308 struct iv **iv_var, struct iv **iv_bound)
1310 /* The objects returned when COND has constant operands. */
1311 static struct iv const_iv;
1312 static tree zero;
1313 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1314 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1315 bool ret = false;
1317 if (gimple_code (stmt) == GIMPLE_COND)
1319 op0 = gimple_cond_lhs_ptr (stmt);
1320 op1 = gimple_cond_rhs_ptr (stmt);
1322 else
1324 op0 = gimple_assign_rhs1_ptr (stmt);
1325 op1 = gimple_assign_rhs2_ptr (stmt);
1328 zero = integer_zero_node;
1329 const_iv.step = integer_zero_node;
1331 if (TREE_CODE (*op0) == SSA_NAME)
1332 iv0 = get_iv (data, *op0);
1333 if (TREE_CODE (*op1) == SSA_NAME)
1334 iv1 = get_iv (data, *op1);
1336 /* Exactly one of the compared values must be an iv, and the other one must
1337 be an invariant. */
1338 if (!iv0 || !iv1)
1339 goto end;
1341 if (integer_zerop (iv0->step))
1343 /* Control variable may be on the other side. */
1344 tmp_op = op0; op0 = op1; op1 = tmp_op;
1345 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1347 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1349 end:
1350 if (control_var)
1351 *control_var = op0;;
1352 if (iv_var)
1353 *iv_var = iv0;;
1354 if (bound)
1355 *bound = op1;
1356 if (iv_bound)
1357 *iv_bound = iv1;
1359 return ret;
1362 /* Checks whether the condition in STMT is interesting and if so,
1363 records it. */
1365 static void
1366 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1368 tree *var_p, *bound_p;
1369 struct iv *var_iv, *civ;
1371 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1373 find_interesting_uses_op (data, *var_p);
1374 find_interesting_uses_op (data, *bound_p);
1375 return;
1378 civ = XNEW (struct iv);
1379 *civ = *var_iv;
1380 record_use (data, NULL, civ, stmt, USE_COMPARE);
1383 /* Returns true if expression EXPR is obviously invariant in LOOP,
1384 i.e. if all its operands are defined outside of the LOOP. LOOP
1385 should not be the function body. */
1387 bool
1388 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1390 basic_block def_bb;
1391 unsigned i, len;
1393 gcc_assert (loop_depth (loop) > 0);
1395 if (is_gimple_min_invariant (expr))
1396 return true;
1398 if (TREE_CODE (expr) == SSA_NAME)
1400 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1401 if (def_bb
1402 && flow_bb_inside_loop_p (loop, def_bb))
1403 return false;
1405 return true;
1408 if (!EXPR_P (expr))
1409 return false;
1411 len = TREE_OPERAND_LENGTH (expr);
1412 for (i = 0; i < len; i++)
1413 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1414 return false;
1416 return true;
1419 /* Returns true if statement STMT is obviously invariant in LOOP,
1420 i.e. if all its operands on the RHS are defined outside of the LOOP.
1421 LOOP should not be the function body. */
1423 bool
1424 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1426 unsigned i;
1427 tree lhs;
1429 gcc_assert (loop_depth (loop) > 0);
1431 lhs = gimple_get_lhs (stmt);
1432 for (i = 0; i < gimple_num_ops (stmt); i++)
1434 tree op = gimple_op (stmt, i);
1435 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1436 return false;
1439 return true;
1442 /* Cumulates the steps of indices into DATA and replaces their values with the
1443 initial ones. Returns false when the value of the index cannot be determined.
1444 Callback for for_each_index. */
1446 struct ifs_ivopts_data
1448 struct ivopts_data *ivopts_data;
1449 gimple stmt;
1450 tree step;
1453 static bool
1454 idx_find_step (tree base, tree *idx, void *data)
1456 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1457 struct iv *iv;
1458 tree step, iv_base, iv_step, lbound, off;
1459 struct loop *loop = dta->ivopts_data->current_loop;
1461 /* If base is a component ref, require that the offset of the reference
1462 be invariant. */
1463 if (TREE_CODE (base) == COMPONENT_REF)
1465 off = component_ref_field_offset (base);
1466 return expr_invariant_in_loop_p (loop, off);
1469 /* If base is array, first check whether we will be able to move the
1470 reference out of the loop (in order to take its address in strength
1471 reduction). In order for this to work we need both lower bound
1472 and step to be loop invariants. */
1473 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1475 /* Moreover, for a range, the size needs to be invariant as well. */
1476 if (TREE_CODE (base) == ARRAY_RANGE_REF
1477 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1478 return false;
1480 step = array_ref_element_size (base);
1481 lbound = array_ref_low_bound (base);
1483 if (!expr_invariant_in_loop_p (loop, step)
1484 || !expr_invariant_in_loop_p (loop, lbound))
1485 return false;
1488 if (TREE_CODE (*idx) != SSA_NAME)
1489 return true;
1491 iv = get_iv (dta->ivopts_data, *idx);
1492 if (!iv)
1493 return false;
1495 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1496 *&x[0], which is not folded and does not trigger the
1497 ARRAY_REF path below. */
1498 *idx = iv->base;
1500 if (integer_zerop (iv->step))
1501 return true;
1503 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1505 step = array_ref_element_size (base);
1507 /* We only handle addresses whose step is an integer constant. */
1508 if (TREE_CODE (step) != INTEGER_CST)
1509 return false;
1511 else
1512 /* The step for pointer arithmetics already is 1 byte. */
1513 step = size_one_node;
1515 iv_base = iv->base;
1516 iv_step = iv->step;
1517 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1518 sizetype, &iv_base, &iv_step, dta->stmt,
1519 false))
1521 /* The index might wrap. */
1522 return false;
1525 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1526 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1528 return true;
1531 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1532 object is passed to it in DATA. */
1534 static bool
1535 idx_record_use (tree base, tree *idx,
1536 void *vdata)
1538 struct ivopts_data *data = (struct ivopts_data *) vdata;
1539 find_interesting_uses_op (data, *idx);
1540 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1542 find_interesting_uses_op (data, array_ref_element_size (base));
1543 find_interesting_uses_op (data, array_ref_low_bound (base));
1545 return true;
1548 /* If we can prove that TOP = cst * BOT for some constant cst,
1549 store cst to MUL and return true. Otherwise return false.
1550 The returned value is always sign-extended, regardless of the
1551 signedness of TOP and BOT. */
1553 static bool
1554 constant_multiple_of (tree top, tree bot, double_int *mul)
1556 tree mby;
1557 enum tree_code code;
1558 double_int res, p0, p1;
1559 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1561 STRIP_NOPS (top);
1562 STRIP_NOPS (bot);
1564 if (operand_equal_p (top, bot, 0))
1566 *mul = double_int_one;
1567 return true;
1570 code = TREE_CODE (top);
1571 switch (code)
1573 case MULT_EXPR:
1574 mby = TREE_OPERAND (top, 1);
1575 if (TREE_CODE (mby) != INTEGER_CST)
1576 return false;
1578 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1579 return false;
1581 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1582 precision);
1583 return true;
1585 case PLUS_EXPR:
1586 case MINUS_EXPR:
1587 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1588 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1589 return false;
1591 if (code == MINUS_EXPR)
1592 p1 = double_int_neg (p1);
1593 *mul = double_int_sext (double_int_add (p0, p1), precision);
1594 return true;
1596 case INTEGER_CST:
1597 if (TREE_CODE (bot) != INTEGER_CST)
1598 return false;
1600 p0 = double_int_sext (tree_to_double_int (top), precision);
1601 p1 = double_int_sext (tree_to_double_int (bot), precision);
1602 if (double_int_zero_p (p1))
1603 return false;
1604 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1605 precision);
1606 return double_int_zero_p (res);
1608 default:
1609 return false;
1613 /* Returns true if memory reference REF with step STEP may be unaligned. */
1615 static bool
1616 may_be_unaligned_p (tree ref, tree step)
1618 tree base;
1619 tree base_type;
1620 HOST_WIDE_INT bitsize;
1621 HOST_WIDE_INT bitpos;
1622 tree toffset;
1623 enum machine_mode mode;
1624 int unsignedp, volatilep;
1625 unsigned base_align;
1627 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1628 thus they are not misaligned. */
1629 if (TREE_CODE (ref) == TARGET_MEM_REF)
1630 return false;
1632 /* The test below is basically copy of what expr.c:normal_inner_ref
1633 does to check whether the object must be loaded by parts when
1634 STRICT_ALIGNMENT is true. */
1635 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1636 &unsignedp, &volatilep, true);
1637 base_type = TREE_TYPE (base);
1638 base_align = TYPE_ALIGN (base_type);
1640 if (mode != BLKmode)
1642 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1644 if (base_align < mode_align
1645 || (bitpos % mode_align) != 0
1646 || (bitpos % BITS_PER_UNIT) != 0)
1647 return true;
1649 if (toffset
1650 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1651 return true;
1653 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1654 return true;
1657 return false;
1660 /* Return true if EXPR may be non-addressable. */
1662 bool
1663 may_be_nonaddressable_p (tree expr)
1665 switch (TREE_CODE (expr))
1667 case TARGET_MEM_REF:
1668 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1669 target, thus they are always addressable. */
1670 return false;
1672 case COMPONENT_REF:
1673 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1674 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1676 case VIEW_CONVERT_EXPR:
1677 /* This kind of view-conversions may wrap non-addressable objects
1678 and make them look addressable. After some processing the
1679 non-addressability may be uncovered again, causing ADDR_EXPRs
1680 of inappropriate objects to be built. */
1681 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1682 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1683 return true;
1685 /* ... fall through ... */
1687 case ARRAY_REF:
1688 case ARRAY_RANGE_REF:
1689 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1691 CASE_CONVERT:
1692 return true;
1694 default:
1695 break;
1698 return false;
1701 /* Finds addresses in *OP_P inside STMT. */
1703 static void
1704 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1706 tree base = *op_p, step = size_zero_node;
1707 struct iv *civ;
1708 struct ifs_ivopts_data ifs_ivopts_data;
1710 /* Do not play with volatile memory references. A bit too conservative,
1711 perhaps, but safe. */
1712 if (gimple_has_volatile_ops (stmt))
1713 goto fail;
1715 /* Ignore bitfields for now. Not really something terribly complicated
1716 to handle. TODO. */
1717 if (TREE_CODE (base) == BIT_FIELD_REF)
1718 goto fail;
1720 base = unshare_expr (base);
1722 if (TREE_CODE (base) == TARGET_MEM_REF)
1724 tree type = build_pointer_type (TREE_TYPE (base));
1725 tree astep;
1727 if (TMR_BASE (base)
1728 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1730 civ = get_iv (data, TMR_BASE (base));
1731 if (!civ)
1732 goto fail;
1734 TMR_BASE (base) = civ->base;
1735 step = civ->step;
1737 if (TMR_INDEX2 (base)
1738 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1740 civ = get_iv (data, TMR_INDEX2 (base));
1741 if (!civ)
1742 goto fail;
1744 TMR_INDEX2 (base) = civ->base;
1745 step = civ->step;
1747 if (TMR_INDEX (base)
1748 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1750 civ = get_iv (data, TMR_INDEX (base));
1751 if (!civ)
1752 goto fail;
1754 TMR_INDEX (base) = civ->base;
1755 astep = civ->step;
1757 if (astep)
1759 if (TMR_STEP (base))
1760 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1762 step = fold_build2 (PLUS_EXPR, type, step, astep);
1766 if (integer_zerop (step))
1767 goto fail;
1768 base = tree_mem_ref_addr (type, base);
1770 else
1772 ifs_ivopts_data.ivopts_data = data;
1773 ifs_ivopts_data.stmt = stmt;
1774 ifs_ivopts_data.step = size_zero_node;
1775 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1776 || integer_zerop (ifs_ivopts_data.step))
1777 goto fail;
1778 step = ifs_ivopts_data.step;
1780 /* Check that the base expression is addressable. This needs
1781 to be done after substituting bases of IVs into it. */
1782 if (may_be_nonaddressable_p (base))
1783 goto fail;
1785 /* Moreover, on strict alignment platforms, check that it is
1786 sufficiently aligned. */
1787 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1788 goto fail;
1790 base = build_fold_addr_expr (base);
1792 /* Substituting bases of IVs into the base expression might
1793 have caused folding opportunities. */
1794 if (TREE_CODE (base) == ADDR_EXPR)
1796 tree *ref = &TREE_OPERAND (base, 0);
1797 while (handled_component_p (*ref))
1798 ref = &TREE_OPERAND (*ref, 0);
1799 if (TREE_CODE (*ref) == MEM_REF)
1801 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1802 TREE_OPERAND (*ref, 0),
1803 TREE_OPERAND (*ref, 1));
1804 if (tem)
1805 *ref = tem;
1810 civ = alloc_iv (base, step);
1811 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1812 return;
1814 fail:
1815 for_each_index (op_p, idx_record_use, data);
1818 /* Finds and records invariants used in STMT. */
1820 static void
1821 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1823 ssa_op_iter iter;
1824 use_operand_p use_p;
1825 tree op;
1827 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1829 op = USE_FROM_PTR (use_p);
1830 record_invariant (data, op, false);
1834 /* Finds interesting uses of induction variables in the statement STMT. */
1836 static void
1837 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1839 struct iv *iv;
1840 tree op, *lhs, *rhs;
1841 ssa_op_iter iter;
1842 use_operand_p use_p;
1843 enum tree_code code;
1845 find_invariants_stmt (data, stmt);
1847 if (gimple_code (stmt) == GIMPLE_COND)
1849 find_interesting_uses_cond (data, stmt);
1850 return;
1853 if (is_gimple_assign (stmt))
1855 lhs = gimple_assign_lhs_ptr (stmt);
1856 rhs = gimple_assign_rhs1_ptr (stmt);
1858 if (TREE_CODE (*lhs) == SSA_NAME)
1860 /* If the statement defines an induction variable, the uses are not
1861 interesting by themselves. */
1863 iv = get_iv (data, *lhs);
1865 if (iv && !integer_zerop (iv->step))
1866 return;
1869 code = gimple_assign_rhs_code (stmt);
1870 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1871 && (REFERENCE_CLASS_P (*rhs)
1872 || is_gimple_val (*rhs)))
1874 if (REFERENCE_CLASS_P (*rhs))
1875 find_interesting_uses_address (data, stmt, rhs);
1876 else
1877 find_interesting_uses_op (data, *rhs);
1879 if (REFERENCE_CLASS_P (*lhs))
1880 find_interesting_uses_address (data, stmt, lhs);
1881 return;
1883 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1885 find_interesting_uses_cond (data, stmt);
1886 return;
1889 /* TODO -- we should also handle address uses of type
1891 memory = call (whatever);
1895 call (memory). */
1898 if (gimple_code (stmt) == GIMPLE_PHI
1899 && gimple_bb (stmt) == data->current_loop->header)
1901 iv = get_iv (data, PHI_RESULT (stmt));
1903 if (iv && !integer_zerop (iv->step))
1904 return;
1907 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1909 op = USE_FROM_PTR (use_p);
1911 if (TREE_CODE (op) != SSA_NAME)
1912 continue;
1914 iv = get_iv (data, op);
1915 if (!iv)
1916 continue;
1918 find_interesting_uses_op (data, op);
1922 /* Finds interesting uses of induction variables outside of loops
1923 on loop exit edge EXIT. */
1925 static void
1926 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1928 gimple phi;
1929 gimple_stmt_iterator psi;
1930 tree def;
1932 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1934 phi = gsi_stmt (psi);
1935 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1936 if (is_gimple_reg (def))
1937 find_interesting_uses_op (data, def);
1941 /* Finds uses of the induction variables that are interesting. */
1943 static void
1944 find_interesting_uses (struct ivopts_data *data)
1946 basic_block bb;
1947 gimple_stmt_iterator bsi;
1948 basic_block *body = get_loop_body (data->current_loop);
1949 unsigned i;
1950 struct version_info *info;
1951 edge e;
1953 if (dump_file && (dump_flags & TDF_DETAILS))
1954 fprintf (dump_file, "Uses:\n\n");
1956 for (i = 0; i < data->current_loop->num_nodes; i++)
1958 edge_iterator ei;
1959 bb = body[i];
1961 FOR_EACH_EDGE (e, ei, bb->succs)
1962 if (e->dest != EXIT_BLOCK_PTR
1963 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1964 find_interesting_uses_outside (data, e);
1966 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1967 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1968 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1969 if (!is_gimple_debug (gsi_stmt (bsi)))
1970 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1973 if (dump_file && (dump_flags & TDF_DETAILS))
1975 bitmap_iterator bi;
1977 fprintf (dump_file, "\n");
1979 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1981 info = ver_info (data, i);
1982 if (info->inv_id)
1984 fprintf (dump_file, " ");
1985 print_generic_expr (dump_file, info->name, TDF_SLIM);
1986 fprintf (dump_file, " is invariant (%d)%s\n",
1987 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1991 fprintf (dump_file, "\n");
1994 free (body);
1997 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1998 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1999 we are at the top-level of the processed address. */
2001 static tree
2002 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2003 unsigned HOST_WIDE_INT *offset)
2005 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2006 enum tree_code code;
2007 tree type, orig_type = TREE_TYPE (expr);
2008 unsigned HOST_WIDE_INT off0, off1, st;
2009 tree orig_expr = expr;
2011 STRIP_NOPS (expr);
2013 type = TREE_TYPE (expr);
2014 code = TREE_CODE (expr);
2015 *offset = 0;
2017 switch (code)
2019 case INTEGER_CST:
2020 if (!cst_and_fits_in_hwi (expr)
2021 || integer_zerop (expr))
2022 return orig_expr;
2024 *offset = int_cst_value (expr);
2025 return build_int_cst (orig_type, 0);
2027 case POINTER_PLUS_EXPR:
2028 case PLUS_EXPR:
2029 case MINUS_EXPR:
2030 op0 = TREE_OPERAND (expr, 0);
2031 op1 = TREE_OPERAND (expr, 1);
2033 op0 = strip_offset_1 (op0, false, false, &off0);
2034 op1 = strip_offset_1 (op1, false, false, &off1);
2036 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2037 if (op0 == TREE_OPERAND (expr, 0)
2038 && op1 == TREE_OPERAND (expr, 1))
2039 return orig_expr;
2041 if (integer_zerop (op1))
2042 expr = op0;
2043 else if (integer_zerop (op0))
2045 if (code == MINUS_EXPR)
2046 expr = fold_build1 (NEGATE_EXPR, type, op1);
2047 else
2048 expr = op1;
2050 else
2051 expr = fold_build2 (code, type, op0, op1);
2053 return fold_convert (orig_type, expr);
2055 case MULT_EXPR:
2056 op1 = TREE_OPERAND (expr, 1);
2057 if (!cst_and_fits_in_hwi (op1))
2058 return orig_expr;
2060 op0 = TREE_OPERAND (expr, 0);
2061 op0 = strip_offset_1 (op0, false, false, &off0);
2062 if (op0 == TREE_OPERAND (expr, 0))
2063 return orig_expr;
2065 *offset = off0 * int_cst_value (op1);
2066 if (integer_zerop (op0))
2067 expr = op0;
2068 else
2069 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2071 return fold_convert (orig_type, expr);
2073 case ARRAY_REF:
2074 case ARRAY_RANGE_REF:
2075 if (!inside_addr)
2076 return orig_expr;
2078 step = array_ref_element_size (expr);
2079 if (!cst_and_fits_in_hwi (step))
2080 break;
2082 st = int_cst_value (step);
2083 op1 = TREE_OPERAND (expr, 1);
2084 op1 = strip_offset_1 (op1, false, false, &off1);
2085 *offset = off1 * st;
2087 if (top_compref
2088 && integer_zerop (op1))
2090 /* Strip the component reference completely. */
2091 op0 = TREE_OPERAND (expr, 0);
2092 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2093 *offset += off0;
2094 return op0;
2096 break;
2098 case COMPONENT_REF:
2099 if (!inside_addr)
2100 return orig_expr;
2102 tmp = component_ref_field_offset (expr);
2103 if (top_compref
2104 && cst_and_fits_in_hwi (tmp))
2106 /* Strip the component reference completely. */
2107 op0 = TREE_OPERAND (expr, 0);
2108 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2109 *offset = off0 + int_cst_value (tmp);
2110 return op0;
2112 break;
2114 case ADDR_EXPR:
2115 op0 = TREE_OPERAND (expr, 0);
2116 op0 = strip_offset_1 (op0, true, true, &off0);
2117 *offset += off0;
2119 if (op0 == TREE_OPERAND (expr, 0))
2120 return orig_expr;
2122 expr = build_fold_addr_expr (op0);
2123 return fold_convert (orig_type, expr);
2125 case MEM_REF:
2126 /* ??? Offset operand? */
2127 inside_addr = false;
2128 break;
2130 default:
2131 return orig_expr;
2134 /* Default handling of expressions for that we want to recurse into
2135 the first operand. */
2136 op0 = TREE_OPERAND (expr, 0);
2137 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2138 *offset += off0;
2140 if (op0 == TREE_OPERAND (expr, 0)
2141 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2142 return orig_expr;
2144 expr = copy_node (expr);
2145 TREE_OPERAND (expr, 0) = op0;
2146 if (op1)
2147 TREE_OPERAND (expr, 1) = op1;
2149 /* Inside address, we might strip the top level component references,
2150 thus changing type of the expression. Handling of ADDR_EXPR
2151 will fix that. */
2152 expr = fold_convert (orig_type, expr);
2154 return expr;
2157 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2159 static tree
2160 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2162 return strip_offset_1 (expr, false, false, offset);
2165 /* Returns variant of TYPE that can be used as base for different uses.
2166 We return unsigned type with the same precision, which avoids problems
2167 with overflows. */
2169 static tree
2170 generic_type_for (tree type)
2172 if (POINTER_TYPE_P (type))
2173 return unsigned_type_for (type);
2175 if (TYPE_UNSIGNED (type))
2176 return type;
2178 return unsigned_type_for (type);
2181 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2182 the bitmap to that we should store it. */
2184 static struct ivopts_data *fd_ivopts_data;
2185 static tree
2186 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2188 bitmap *depends_on = (bitmap *) data;
2189 struct version_info *info;
2191 if (TREE_CODE (*expr_p) != SSA_NAME)
2192 return NULL_TREE;
2193 info = name_info (fd_ivopts_data, *expr_p);
2195 if (!info->inv_id || info->has_nonlin_use)
2196 return NULL_TREE;
2198 if (!*depends_on)
2199 *depends_on = BITMAP_ALLOC (NULL);
2200 bitmap_set_bit (*depends_on, info->inv_id);
2202 return NULL_TREE;
2205 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2206 position to POS. If USE is not NULL, the candidate is set as related to
2207 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2208 replacement of the final value of the iv by a direct computation. */
2210 static struct iv_cand *
2211 add_candidate_1 (struct ivopts_data *data,
2212 tree base, tree step, bool important, enum iv_position pos,
2213 struct iv_use *use, gimple incremented_at)
2215 unsigned i;
2216 struct iv_cand *cand = NULL;
2217 tree type, orig_type;
2219 if (base)
2221 orig_type = TREE_TYPE (base);
2222 type = generic_type_for (orig_type);
2223 if (type != orig_type)
2225 base = fold_convert (type, base);
2226 step = fold_convert (type, step);
2230 for (i = 0; i < n_iv_cands (data); i++)
2232 cand = iv_cand (data, i);
2234 if (cand->pos != pos)
2235 continue;
2237 if (cand->incremented_at != incremented_at
2238 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2239 && cand->ainc_use != use))
2240 continue;
2242 if (!cand->iv)
2244 if (!base && !step)
2245 break;
2247 continue;
2250 if (!base && !step)
2251 continue;
2253 if (operand_equal_p (base, cand->iv->base, 0)
2254 && operand_equal_p (step, cand->iv->step, 0)
2255 && (TYPE_PRECISION (TREE_TYPE (base))
2256 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2257 break;
2260 if (i == n_iv_cands (data))
2262 cand = XCNEW (struct iv_cand);
2263 cand->id = i;
2265 if (!base && !step)
2266 cand->iv = NULL;
2267 else
2268 cand->iv = alloc_iv (base, step);
2270 cand->pos = pos;
2271 if (pos != IP_ORIGINAL && cand->iv)
2273 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2274 cand->var_after = cand->var_before;
2276 cand->important = important;
2277 cand->incremented_at = incremented_at;
2278 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2280 if (step
2281 && TREE_CODE (step) != INTEGER_CST)
2283 fd_ivopts_data = data;
2284 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2287 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2288 cand->ainc_use = use;
2289 else
2290 cand->ainc_use = NULL;
2292 if (dump_file && (dump_flags & TDF_DETAILS))
2293 dump_cand (dump_file, cand);
2296 if (important && !cand->important)
2298 cand->important = true;
2299 if (dump_file && (dump_flags & TDF_DETAILS))
2300 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2303 if (use)
2305 bitmap_set_bit (use->related_cands, i);
2306 if (dump_file && (dump_flags & TDF_DETAILS))
2307 fprintf (dump_file, "Candidate %d is related to use %d\n",
2308 cand->id, use->id);
2311 return cand;
2314 /* Returns true if incrementing the induction variable at the end of the LOOP
2315 is allowed.
2317 The purpose is to avoid splitting latch edge with a biv increment, thus
2318 creating a jump, possibly confusing other optimization passes and leaving
2319 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2320 is not available (so we do not have a better alternative), or if the latch
2321 edge is already nonempty. */
2323 static bool
2324 allow_ip_end_pos_p (struct loop *loop)
2326 if (!ip_normal_pos (loop))
2327 return true;
2329 if (!empty_block_p (ip_end_pos (loop)))
2330 return true;
2332 return false;
2335 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2336 Important field is set to IMPORTANT. */
2338 static void
2339 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2340 bool important, struct iv_use *use)
2342 basic_block use_bb = gimple_bb (use->stmt);
2343 enum machine_mode mem_mode;
2344 unsigned HOST_WIDE_INT cstepi;
2346 /* If we insert the increment in any position other than the standard
2347 ones, we must ensure that it is incremented once per iteration.
2348 It must not be in an inner nested loop, or one side of an if
2349 statement. */
2350 if (use_bb->loop_father != data->current_loop
2351 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2352 || stmt_could_throw_p (use->stmt)
2353 || !cst_and_fits_in_hwi (step))
2354 return;
2356 cstepi = int_cst_value (step);
2358 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2359 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2360 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2362 enum tree_code code = MINUS_EXPR;
2363 tree new_base;
2364 tree new_step = step;
2366 if (POINTER_TYPE_P (TREE_TYPE (base)))
2368 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2369 code = POINTER_PLUS_EXPR;
2371 else
2372 new_step = fold_convert (TREE_TYPE (base), new_step);
2373 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2374 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2375 use->stmt);
2377 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2378 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2380 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2381 use->stmt);
2385 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2386 position to POS. If USE is not NULL, the candidate is set as related to
2387 it. The candidate computation is scheduled on all available positions. */
2389 static void
2390 add_candidate (struct ivopts_data *data,
2391 tree base, tree step, bool important, struct iv_use *use)
2393 if (ip_normal_pos (data->current_loop))
2394 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2395 if (ip_end_pos (data->current_loop)
2396 && allow_ip_end_pos_p (data->current_loop))
2397 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2399 if (use != NULL && use->type == USE_ADDRESS)
2400 add_autoinc_candidates (data, base, step, important, use);
2403 /* Add a standard "0 + 1 * iteration" iv candidate for a
2404 type with SIZE bits. */
2406 static void
2407 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2408 unsigned int size)
2410 tree type = lang_hooks.types.type_for_size (size, true);
2411 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2412 true, NULL);
2415 /* Adds standard iv candidates. */
2417 static void
2418 add_standard_iv_candidates (struct ivopts_data *data)
2420 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2422 /* The same for a double-integer type if it is still fast enough. */
2423 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2424 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2428 /* Adds candidates bases on the old induction variable IV. */
2430 static void
2431 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2433 gimple phi;
2434 tree def;
2435 struct iv_cand *cand;
2437 add_candidate (data, iv->base, iv->step, true, NULL);
2439 /* The same, but with initial value zero. */
2440 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2441 add_candidate (data, size_int (0), iv->step, true, NULL);
2442 else
2443 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2444 iv->step, true, NULL);
2446 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2447 if (gimple_code (phi) == GIMPLE_PHI)
2449 /* Additionally record the possibility of leaving the original iv
2450 untouched. */
2451 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2452 cand = add_candidate_1 (data,
2453 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2454 SSA_NAME_DEF_STMT (def));
2455 cand->var_before = iv->ssa_name;
2456 cand->var_after = def;
2460 /* Adds candidates based on the old induction variables. */
2462 static void
2463 add_old_ivs_candidates (struct ivopts_data *data)
2465 unsigned i;
2466 struct iv *iv;
2467 bitmap_iterator bi;
2469 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2471 iv = ver_info (data, i)->iv;
2472 if (iv && iv->biv_p && !integer_zerop (iv->step))
2473 add_old_iv_candidates (data, iv);
2477 /* Adds candidates based on the value of the induction variable IV and USE. */
2479 static void
2480 add_iv_value_candidates (struct ivopts_data *data,
2481 struct iv *iv, struct iv_use *use)
2483 unsigned HOST_WIDE_INT offset;
2484 tree base;
2485 tree basetype;
2487 add_candidate (data, iv->base, iv->step, false, use);
2489 /* The same, but with initial value zero. Make such variable important,
2490 since it is generic enough so that possibly many uses may be based
2491 on it. */
2492 basetype = TREE_TYPE (iv->base);
2493 if (POINTER_TYPE_P (basetype))
2494 basetype = sizetype;
2495 add_candidate (data, build_int_cst (basetype, 0),
2496 iv->step, true, use);
2498 /* Third, try removing the constant offset. Make sure to even
2499 add a candidate for &a[0] vs. (T *)&a. */
2500 base = strip_offset (iv->base, &offset);
2501 if (offset
2502 || base != iv->base)
2503 add_candidate (data, base, iv->step, false, use);
2506 /* Adds candidates based on the uses. */
2508 static void
2509 add_derived_ivs_candidates (struct ivopts_data *data)
2511 unsigned i;
2513 for (i = 0; i < n_iv_uses (data); i++)
2515 struct iv_use *use = iv_use (data, i);
2517 if (!use)
2518 continue;
2520 switch (use->type)
2522 case USE_NONLINEAR_EXPR:
2523 case USE_COMPARE:
2524 case USE_ADDRESS:
2525 /* Just add the ivs based on the value of the iv used here. */
2526 add_iv_value_candidates (data, use->iv, use);
2527 break;
2529 default:
2530 gcc_unreachable ();
2535 /* Record important candidates and add them to related_cands bitmaps
2536 if needed. */
2538 static void
2539 record_important_candidates (struct ivopts_data *data)
2541 unsigned i;
2542 struct iv_use *use;
2544 for (i = 0; i < n_iv_cands (data); i++)
2546 struct iv_cand *cand = iv_cand (data, i);
2548 if (cand->important)
2549 bitmap_set_bit (data->important_candidates, i);
2552 data->consider_all_candidates = (n_iv_cands (data)
2553 <= CONSIDER_ALL_CANDIDATES_BOUND);
2555 if (data->consider_all_candidates)
2557 /* We will not need "related_cands" bitmaps in this case,
2558 so release them to decrease peak memory consumption. */
2559 for (i = 0; i < n_iv_uses (data); i++)
2561 use = iv_use (data, i);
2562 BITMAP_FREE (use->related_cands);
2565 else
2567 /* Add important candidates to the related_cands bitmaps. */
2568 for (i = 0; i < n_iv_uses (data); i++)
2569 bitmap_ior_into (iv_use (data, i)->related_cands,
2570 data->important_candidates);
2574 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2575 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2576 we allocate a simple list to every use. */
2578 static void
2579 alloc_use_cost_map (struct ivopts_data *data)
2581 unsigned i, size, s, j;
2583 for (i = 0; i < n_iv_uses (data); i++)
2585 struct iv_use *use = iv_use (data, i);
2586 bitmap_iterator bi;
2588 if (data->consider_all_candidates)
2589 size = n_iv_cands (data);
2590 else
2592 s = 0;
2593 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2595 s++;
2598 /* Round up to the power of two, so that moduling by it is fast. */
2599 for (size = 1; size < s; size <<= 1)
2600 continue;
2603 use->n_map_members = size;
2604 use->cost_map = XCNEWVEC (struct cost_pair, size);
2608 /* Returns description of computation cost of expression whose runtime
2609 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2611 static comp_cost
2612 new_cost (unsigned runtime, unsigned complexity)
2614 comp_cost cost;
2616 cost.cost = runtime;
2617 cost.complexity = complexity;
2619 return cost;
2622 /* Adds costs COST1 and COST2. */
2624 static comp_cost
2625 add_costs (comp_cost cost1, comp_cost cost2)
2627 cost1.cost += cost2.cost;
2628 cost1.complexity += cost2.complexity;
2630 return cost1;
2632 /* Subtracts costs COST1 and COST2. */
2634 static comp_cost
2635 sub_costs (comp_cost cost1, comp_cost cost2)
2637 cost1.cost -= cost2.cost;
2638 cost1.complexity -= cost2.complexity;
2640 return cost1;
2643 /* Returns a negative number if COST1 < COST2, a positive number if
2644 COST1 > COST2, and 0 if COST1 = COST2. */
2646 static int
2647 compare_costs (comp_cost cost1, comp_cost cost2)
2649 if (cost1.cost == cost2.cost)
2650 return cost1.complexity - cost2.complexity;
2652 return cost1.cost - cost2.cost;
2655 /* Returns true if COST is infinite. */
2657 static bool
2658 infinite_cost_p (comp_cost cost)
2660 return cost.cost == INFTY;
2663 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2664 on invariants DEPENDS_ON and that the value used in expressing it
2665 is VALUE. */
2667 static void
2668 set_use_iv_cost (struct ivopts_data *data,
2669 struct iv_use *use, struct iv_cand *cand,
2670 comp_cost cost, bitmap depends_on, tree value,
2671 int inv_expr_id)
2673 unsigned i, s;
2675 if (infinite_cost_p (cost))
2677 BITMAP_FREE (depends_on);
2678 return;
2681 if (data->consider_all_candidates)
2683 use->cost_map[cand->id].cand = cand;
2684 use->cost_map[cand->id].cost = cost;
2685 use->cost_map[cand->id].depends_on = depends_on;
2686 use->cost_map[cand->id].value = value;
2687 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2688 return;
2691 /* n_map_members is a power of two, so this computes modulo. */
2692 s = cand->id & (use->n_map_members - 1);
2693 for (i = s; i < use->n_map_members; i++)
2694 if (!use->cost_map[i].cand)
2695 goto found;
2696 for (i = 0; i < s; i++)
2697 if (!use->cost_map[i].cand)
2698 goto found;
2700 gcc_unreachable ();
2702 found:
2703 use->cost_map[i].cand = cand;
2704 use->cost_map[i].cost = cost;
2705 use->cost_map[i].depends_on = depends_on;
2706 use->cost_map[i].value = value;
2707 use->cost_map[i].inv_expr_id = inv_expr_id;
2710 /* Gets cost of (USE, CANDIDATE) pair. */
2712 static struct cost_pair *
2713 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2714 struct iv_cand *cand)
2716 unsigned i, s;
2717 struct cost_pair *ret;
2719 if (!cand)
2720 return NULL;
2722 if (data->consider_all_candidates)
2724 ret = use->cost_map + cand->id;
2725 if (!ret->cand)
2726 return NULL;
2728 return ret;
2731 /* n_map_members is a power of two, so this computes modulo. */
2732 s = cand->id & (use->n_map_members - 1);
2733 for (i = s; i < use->n_map_members; i++)
2734 if (use->cost_map[i].cand == cand)
2735 return use->cost_map + i;
2737 for (i = 0; i < s; i++)
2738 if (use->cost_map[i].cand == cand)
2739 return use->cost_map + i;
2741 return NULL;
2744 /* Returns estimate on cost of computing SEQ. */
2746 static unsigned
2747 seq_cost (rtx seq, bool speed)
2749 unsigned cost = 0;
2750 rtx set;
2752 for (; seq; seq = NEXT_INSN (seq))
2754 set = single_set (seq);
2755 if (set)
2756 cost += rtx_cost (SET_SRC (set), SET, speed);
2757 else
2758 cost++;
2761 return cost;
2764 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2765 static rtx
2766 produce_memory_decl_rtl (tree obj, int *regno)
2768 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2769 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2770 rtx x;
2772 gcc_assert (obj);
2773 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2775 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2776 x = gen_rtx_SYMBOL_REF (address_mode, name);
2777 SET_SYMBOL_REF_DECL (x, obj);
2778 x = gen_rtx_MEM (DECL_MODE (obj), x);
2779 set_mem_addr_space (x, as);
2780 targetm.encode_section_info (obj, x, true);
2782 else
2784 x = gen_raw_REG (address_mode, (*regno)++);
2785 x = gen_rtx_MEM (DECL_MODE (obj), x);
2786 set_mem_addr_space (x, as);
2789 return x;
2792 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2793 walk_tree. DATA contains the actual fake register number. */
2795 static tree
2796 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2798 tree obj = NULL_TREE;
2799 rtx x = NULL_RTX;
2800 int *regno = (int *) data;
2802 switch (TREE_CODE (*expr_p))
2804 case ADDR_EXPR:
2805 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2806 handled_component_p (*expr_p);
2807 expr_p = &TREE_OPERAND (*expr_p, 0))
2808 continue;
2809 obj = *expr_p;
2810 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2811 x = produce_memory_decl_rtl (obj, regno);
2812 break;
2814 case SSA_NAME:
2815 *ws = 0;
2816 obj = SSA_NAME_VAR (*expr_p);
2817 if (!DECL_RTL_SET_P (obj))
2818 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2819 break;
2821 case VAR_DECL:
2822 case PARM_DECL:
2823 case RESULT_DECL:
2824 *ws = 0;
2825 obj = *expr_p;
2827 if (DECL_RTL_SET_P (obj))
2828 break;
2830 if (DECL_MODE (obj) == BLKmode)
2831 x = produce_memory_decl_rtl (obj, regno);
2832 else
2833 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2835 break;
2837 default:
2838 break;
2841 if (x)
2843 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2844 SET_DECL_RTL (obj, x);
2847 return NULL_TREE;
2850 /* Determines cost of the computation of EXPR. */
2852 static unsigned
2853 computation_cost (tree expr, bool speed)
2855 rtx seq, rslt;
2856 tree type = TREE_TYPE (expr);
2857 unsigned cost;
2858 /* Avoid using hard regs in ways which may be unsupported. */
2859 int regno = LAST_VIRTUAL_REGISTER + 1;
2860 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2861 enum node_frequency real_frequency = node->frequency;
2863 node->frequency = NODE_FREQUENCY_NORMAL;
2864 crtl->maybe_hot_insn_p = speed;
2865 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2866 start_sequence ();
2867 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2868 seq = get_insns ();
2869 end_sequence ();
2870 default_rtl_profile ();
2871 node->frequency = real_frequency;
2873 cost = seq_cost (seq, speed);
2874 if (MEM_P (rslt))
2875 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2876 TYPE_ADDR_SPACE (type), speed);
2877 else if (!REG_P (rslt))
2878 cost += rtx_cost (rslt, SET, speed);
2880 return cost;
2883 /* Returns variable containing the value of candidate CAND at statement AT. */
2885 static tree
2886 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2888 if (stmt_after_increment (loop, cand, stmt))
2889 return cand->var_after;
2890 else
2891 return cand->var_before;
2894 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2895 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2898 tree_int_cst_sign_bit (const_tree t)
2900 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2901 unsigned HOST_WIDE_INT w;
2903 if (bitno < HOST_BITS_PER_WIDE_INT)
2904 w = TREE_INT_CST_LOW (t);
2905 else
2907 w = TREE_INT_CST_HIGH (t);
2908 bitno -= HOST_BITS_PER_WIDE_INT;
2911 return (w >> bitno) & 1;
2914 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2915 same precision that is at least as wide as the precision of TYPE, stores
2916 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2917 type of A and B. */
2919 static tree
2920 determine_common_wider_type (tree *a, tree *b)
2922 tree wider_type = NULL;
2923 tree suba, subb;
2924 tree atype = TREE_TYPE (*a);
2926 if (CONVERT_EXPR_P (*a))
2928 suba = TREE_OPERAND (*a, 0);
2929 wider_type = TREE_TYPE (suba);
2930 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2931 return atype;
2933 else
2934 return atype;
2936 if (CONVERT_EXPR_P (*b))
2938 subb = TREE_OPERAND (*b, 0);
2939 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2940 return atype;
2942 else
2943 return atype;
2945 *a = suba;
2946 *b = subb;
2947 return wider_type;
2950 /* Determines the expression by that USE is expressed from induction variable
2951 CAND at statement AT in LOOP. The expression is stored in a decomposed
2952 form into AFF. Returns false if USE cannot be expressed using CAND. */
2954 static bool
2955 get_computation_aff (struct loop *loop,
2956 struct iv_use *use, struct iv_cand *cand, gimple at,
2957 struct affine_tree_combination *aff)
2959 tree ubase = use->iv->base;
2960 tree ustep = use->iv->step;
2961 tree cbase = cand->iv->base;
2962 tree cstep = cand->iv->step, cstep_common;
2963 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2964 tree common_type, var;
2965 tree uutype;
2966 aff_tree cbase_aff, var_aff;
2967 double_int rat;
2969 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2971 /* We do not have a precision to express the values of use. */
2972 return false;
2975 var = var_at_stmt (loop, cand, at);
2976 uutype = unsigned_type_for (utype);
2978 /* If the conversion is not noop, perform it. */
2979 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2981 cstep = fold_convert (uutype, cstep);
2982 cbase = fold_convert (uutype, cbase);
2983 var = fold_convert (uutype, var);
2986 if (!constant_multiple_of (ustep, cstep, &rat))
2987 return false;
2989 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2990 type, we achieve better folding by computing their difference in this
2991 wider type, and cast the result to UUTYPE. We do not need to worry about
2992 overflows, as all the arithmetics will in the end be performed in UUTYPE
2993 anyway. */
2994 common_type = determine_common_wider_type (&ubase, &cbase);
2996 /* use = ubase - ratio * cbase + ratio * var. */
2997 tree_to_aff_combination (ubase, common_type, aff);
2998 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2999 tree_to_aff_combination (var, uutype, &var_aff);
3001 /* We need to shift the value if we are after the increment. */
3002 if (stmt_after_increment (loop, cand, at))
3004 aff_tree cstep_aff;
3006 if (common_type != uutype)
3007 cstep_common = fold_convert (common_type, cstep);
3008 else
3009 cstep_common = cstep;
3011 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3012 aff_combination_add (&cbase_aff, &cstep_aff);
3015 aff_combination_scale (&cbase_aff, double_int_neg (rat));
3016 aff_combination_add (aff, &cbase_aff);
3017 if (common_type != uutype)
3018 aff_combination_convert (aff, uutype);
3020 aff_combination_scale (&var_aff, rat);
3021 aff_combination_add (aff, &var_aff);
3023 return true;
3026 /* Determines the expression by that USE is expressed from induction variable
3027 CAND at statement AT in LOOP. The computation is unshared. */
3029 static tree
3030 get_computation_at (struct loop *loop,
3031 struct iv_use *use, struct iv_cand *cand, gimple at)
3033 aff_tree aff;
3034 tree type = TREE_TYPE (use->iv->base);
3036 if (!get_computation_aff (loop, use, cand, at, &aff))
3037 return NULL_TREE;
3038 unshare_aff_combination (&aff);
3039 return fold_convert (type, aff_combination_to_tree (&aff));
3042 /* Determines the expression by that USE is expressed from induction variable
3043 CAND in LOOP. The computation is unshared. */
3045 static tree
3046 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3048 return get_computation_at (loop, use, cand, use->stmt);
3051 /* Adjust the cost COST for being in loop setup rather than loop body.
3052 If we're optimizing for space, the loop setup overhead is constant;
3053 if we're optimizing for speed, amortize it over the per-iteration cost. */
3054 static unsigned
3055 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3057 if (cost == INFTY)
3058 return cost;
3059 else if (optimize_loop_for_speed_p (data->current_loop))
3060 return cost / avg_loop_niter (data->current_loop);
3061 else
3062 return cost;
3065 /* Returns cost of addition in MODE. */
3067 static unsigned
3068 add_cost (enum machine_mode mode, bool speed)
3070 static unsigned costs[NUM_MACHINE_MODES];
3071 rtx seq;
3072 unsigned cost;
3074 if (costs[mode])
3075 return costs[mode];
3077 start_sequence ();
3078 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3079 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3080 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3081 NULL_RTX);
3082 seq = get_insns ();
3083 end_sequence ();
3085 cost = seq_cost (seq, speed);
3086 if (!cost)
3087 cost = 1;
3089 costs[mode] = cost;
3091 if (dump_file && (dump_flags & TDF_DETAILS))
3092 fprintf (dump_file, "Addition in %s costs %d\n",
3093 GET_MODE_NAME (mode), cost);
3094 return cost;
3097 /* Entry in a hashtable of already known costs for multiplication. */
3098 struct mbc_entry
3100 HOST_WIDE_INT cst; /* The constant to multiply by. */
3101 enum machine_mode mode; /* In mode. */
3102 unsigned cost; /* The cost. */
3105 /* Counts hash value for the ENTRY. */
3107 static hashval_t
3108 mbc_entry_hash (const void *entry)
3110 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3112 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3115 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3117 static int
3118 mbc_entry_eq (const void *entry1, const void *entry2)
3120 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3121 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3123 return (e1->mode == e2->mode
3124 && e1->cst == e2->cst);
3127 /* Returns cost of multiplication by constant CST in MODE. */
3129 unsigned
3130 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3132 static htab_t costs;
3133 struct mbc_entry **cached, act;
3134 rtx seq;
3135 unsigned cost;
3137 if (!costs)
3138 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3140 act.mode = mode;
3141 act.cst = cst;
3142 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3143 if (*cached)
3144 return (*cached)->cost;
3146 *cached = XNEW (struct mbc_entry);
3147 (*cached)->mode = mode;
3148 (*cached)->cst = cst;
3150 start_sequence ();
3151 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3152 gen_int_mode (cst, mode), NULL_RTX, 0);
3153 seq = get_insns ();
3154 end_sequence ();
3156 cost = seq_cost (seq, speed);
3158 if (dump_file && (dump_flags & TDF_DETAILS))
3159 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3160 (int) cst, GET_MODE_NAME (mode), cost);
3162 (*cached)->cost = cost;
3164 return cost;
3167 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3168 validity for a memory reference accessing memory of mode MODE in
3169 address space AS. */
3171 DEF_VEC_P (sbitmap);
3172 DEF_VEC_ALLOC_P (sbitmap, heap);
3174 bool
3175 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3176 addr_space_t as)
3178 #define MAX_RATIO 128
3179 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3180 static VEC (sbitmap, heap) *valid_mult_list;
3181 sbitmap valid_mult;
3183 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3184 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3186 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3187 if (!valid_mult)
3189 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3190 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3191 rtx addr;
3192 HOST_WIDE_INT i;
3194 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3195 sbitmap_zero (valid_mult);
3196 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3197 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3199 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3200 if (memory_address_addr_space_p (mode, addr, as))
3201 SET_BIT (valid_mult, i + MAX_RATIO);
3204 if (dump_file && (dump_flags & TDF_DETAILS))
3206 fprintf (dump_file, " allowed multipliers:");
3207 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3208 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3209 fprintf (dump_file, " %d", (int) i);
3210 fprintf (dump_file, "\n");
3211 fprintf (dump_file, "\n");
3214 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3217 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3218 return false;
3220 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3223 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3224 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3225 variable is omitted. Compute the cost for a memory reference that accesses
3226 a memory location of mode MEM_MODE in address space AS.
3228 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3229 size of MEM_MODE / RATIO) is available. To make this determination, we
3230 look at the size of the increment to be made, which is given in CSTEP.
3231 CSTEP may be zero if the step is unknown.
3232 STMT_AFTER_INC is true iff the statement we're looking at is after the
3233 increment of the original biv.
3235 TODO -- there must be some better way. This all is quite crude. */
3237 typedef struct
3239 HOST_WIDE_INT min_offset, max_offset;
3240 unsigned costs[2][2][2][2];
3241 } *address_cost_data;
3243 DEF_VEC_P (address_cost_data);
3244 DEF_VEC_ALLOC_P (address_cost_data, heap);
3246 static comp_cost
3247 get_address_cost (bool symbol_present, bool var_present,
3248 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3249 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3250 addr_space_t as, bool speed,
3251 bool stmt_after_inc, bool *may_autoinc)
3253 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3254 static VEC(address_cost_data, heap) *address_cost_data_list;
3255 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3256 address_cost_data data;
3257 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3258 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3259 unsigned cost, acost, complexity;
3260 bool offset_p, ratio_p, autoinc;
3261 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3262 unsigned HOST_WIDE_INT mask;
3263 unsigned bits;
3265 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3266 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3267 data_index + 1);
3269 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3270 if (!data)
3272 HOST_WIDE_INT i;
3273 HOST_WIDE_INT rat, off = 0;
3274 int old_cse_not_expected, width;
3275 unsigned sym_p, var_p, off_p, rat_p, add_c;
3276 rtx seq, addr, base;
3277 rtx reg0, reg1;
3279 data = (address_cost_data) xcalloc (1, sizeof (*data));
3281 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3283 width = GET_MODE_BITSIZE (address_mode) - 1;
3284 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3285 width = HOST_BITS_PER_WIDE_INT - 1;
3286 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3288 for (i = width; i >= 0; i--)
3290 off = -((HOST_WIDE_INT) 1 << i);
3291 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3292 if (memory_address_addr_space_p (mem_mode, addr, as))
3293 break;
3295 data->min_offset = (i == -1? 0 : off);
3297 for (i = width; i >= 0; i--)
3299 off = ((HOST_WIDE_INT) 1 << i) - 1;
3300 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3301 if (memory_address_addr_space_p (mem_mode, addr, as))
3302 break;
3304 if (i == -1)
3305 off = 0;
3306 data->max_offset = off;
3308 if (dump_file && (dump_flags & TDF_DETAILS))
3310 fprintf (dump_file, "get_address_cost:\n");
3311 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3312 GET_MODE_NAME (mem_mode),
3313 data->min_offset);
3314 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3315 GET_MODE_NAME (mem_mode),
3316 data->max_offset);
3319 rat = 1;
3320 for (i = 2; i <= MAX_RATIO; i++)
3321 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3323 rat = i;
3324 break;
3327 /* Compute the cost of various addressing modes. */
3328 acost = 0;
3329 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3330 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3332 if (HAVE_PRE_DECREMENT)
3334 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3335 has_predec[mem_mode]
3336 = memory_address_addr_space_p (mem_mode, addr, as);
3338 if (HAVE_POST_DECREMENT)
3340 addr = gen_rtx_POST_DEC (address_mode, reg0);
3341 has_postdec[mem_mode]
3342 = memory_address_addr_space_p (mem_mode, addr, as);
3344 if (HAVE_PRE_INCREMENT)
3346 addr = gen_rtx_PRE_INC (address_mode, reg0);
3347 has_preinc[mem_mode]
3348 = memory_address_addr_space_p (mem_mode, addr, as);
3350 if (HAVE_POST_INCREMENT)
3352 addr = gen_rtx_POST_INC (address_mode, reg0);
3353 has_postinc[mem_mode]
3354 = memory_address_addr_space_p (mem_mode, addr, as);
3356 for (i = 0; i < 16; i++)
3358 sym_p = i & 1;
3359 var_p = (i >> 1) & 1;
3360 off_p = (i >> 2) & 1;
3361 rat_p = (i >> 3) & 1;
3363 addr = reg0;
3364 if (rat_p)
3365 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3366 gen_int_mode (rat, address_mode));
3368 if (var_p)
3369 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3371 if (sym_p)
3373 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3374 /* ??? We can run into trouble with some backends by presenting
3375 it with symbols which haven't been properly passed through
3376 targetm.encode_section_info. By setting the local bit, we
3377 enhance the probability of things working. */
3378 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3380 if (off_p)
3381 base = gen_rtx_fmt_e (CONST, address_mode,
3382 gen_rtx_fmt_ee
3383 (PLUS, address_mode, base,
3384 gen_int_mode (off, address_mode)));
3386 else if (off_p)
3387 base = gen_int_mode (off, address_mode);
3388 else
3389 base = NULL_RTX;
3391 if (base)
3392 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3394 start_sequence ();
3395 /* To avoid splitting addressing modes, pretend that no cse will
3396 follow. */
3397 old_cse_not_expected = cse_not_expected;
3398 cse_not_expected = true;
3399 addr = memory_address_addr_space (mem_mode, addr, as);
3400 cse_not_expected = old_cse_not_expected;
3401 seq = get_insns ();
3402 end_sequence ();
3404 acost = seq_cost (seq, speed);
3405 acost += address_cost (addr, mem_mode, as, speed);
3407 if (!acost)
3408 acost = 1;
3409 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3412 /* On some targets, it is quite expensive to load symbol to a register,
3413 which makes addresses that contain symbols look much more expensive.
3414 However, the symbol will have to be loaded in any case before the
3415 loop (and quite likely we have it in register already), so it does not
3416 make much sense to penalize them too heavily. So make some final
3417 tweaks for the SYMBOL_PRESENT modes:
3419 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3420 var is cheaper, use this mode with small penalty.
3421 If VAR_PRESENT is true, try whether the mode with
3422 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3423 if this is the case, use it. */
3424 add_c = add_cost (address_mode, speed);
3425 for (i = 0; i < 8; i++)
3427 var_p = i & 1;
3428 off_p = (i >> 1) & 1;
3429 rat_p = (i >> 2) & 1;
3431 acost = data->costs[0][1][off_p][rat_p] + 1;
3432 if (var_p)
3433 acost += add_c;
3435 if (acost < data->costs[1][var_p][off_p][rat_p])
3436 data->costs[1][var_p][off_p][rat_p] = acost;
3439 if (dump_file && (dump_flags & TDF_DETAILS))
3441 fprintf (dump_file, "Address costs:\n");
3443 for (i = 0; i < 16; i++)
3445 sym_p = i & 1;
3446 var_p = (i >> 1) & 1;
3447 off_p = (i >> 2) & 1;
3448 rat_p = (i >> 3) & 1;
3450 fprintf (dump_file, " ");
3451 if (sym_p)
3452 fprintf (dump_file, "sym + ");
3453 if (var_p)
3454 fprintf (dump_file, "var + ");
3455 if (off_p)
3456 fprintf (dump_file, "cst + ");
3457 if (rat_p)
3458 fprintf (dump_file, "rat * ");
3460 acost = data->costs[sym_p][var_p][off_p][rat_p];
3461 fprintf (dump_file, "index costs %d\n", acost);
3463 if (has_predec[mem_mode] || has_postdec[mem_mode]
3464 || has_preinc[mem_mode] || has_postinc[mem_mode])
3465 fprintf (dump_file, " May include autoinc/dec\n");
3466 fprintf (dump_file, "\n");
3469 VEC_replace (address_cost_data, address_cost_data_list,
3470 data_index, data);
3473 bits = GET_MODE_BITSIZE (address_mode);
3474 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3475 offset &= mask;
3476 if ((offset >> (bits - 1) & 1))
3477 offset |= ~mask;
3478 s_offset = offset;
3480 autoinc = false;
3481 msize = GET_MODE_SIZE (mem_mode);
3482 autoinc_offset = offset;
3483 if (stmt_after_inc)
3484 autoinc_offset += ratio * cstep;
3485 if (symbol_present || var_present || ratio != 1)
3486 autoinc = false;
3487 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3488 && msize == cstep)
3489 || (has_postdec[mem_mode] && autoinc_offset == 0
3490 && msize == -cstep)
3491 || (has_preinc[mem_mode] && autoinc_offset == msize
3492 && msize == cstep)
3493 || (has_predec[mem_mode] && autoinc_offset == -msize
3494 && msize == -cstep))
3495 autoinc = true;
3497 cost = 0;
3498 offset_p = (s_offset != 0
3499 && data->min_offset <= s_offset
3500 && s_offset <= data->max_offset);
3501 ratio_p = (ratio != 1
3502 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3504 if (ratio != 1 && !ratio_p)
3505 cost += multiply_by_cost (ratio, address_mode, speed);
3507 if (s_offset && !offset_p && !symbol_present)
3508 cost += add_cost (address_mode, speed);
3510 if (may_autoinc)
3511 *may_autoinc = autoinc;
3512 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3513 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3514 return new_cost (cost + acost, complexity);
3517 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3518 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3519 calculating the operands of EXPR. Returns true if successful, and returns
3520 the cost in COST. */
3522 static bool
3523 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3524 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3526 comp_cost res;
3527 tree op1 = TREE_OPERAND (expr, 1);
3528 tree cst = TREE_OPERAND (mult, 1);
3529 tree multop = TREE_OPERAND (mult, 0);
3530 int m = exact_log2 (int_cst_value (cst));
3531 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3532 int sa_cost;
3534 if (!(m >= 0 && m < maxm))
3535 return false;
3537 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3538 ? shiftadd_cost[speed][mode][m]
3539 : (mult == op1
3540 ? shiftsub1_cost[speed][mode][m]
3541 : shiftsub0_cost[speed][mode][m]));
3542 res = new_cost (sa_cost, 0);
3543 res = add_costs (res, mult == op1 ? cost0 : cost1);
3545 STRIP_NOPS (multop);
3546 if (!is_gimple_val (multop))
3547 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3549 *cost = res;
3550 return true;
3553 /* Estimates cost of forcing expression EXPR into a variable. */
3555 static comp_cost
3556 force_expr_to_var_cost (tree expr, bool speed)
3558 static bool costs_initialized = false;
3559 static unsigned integer_cost [2];
3560 static unsigned symbol_cost [2];
3561 static unsigned address_cost [2];
3562 tree op0, op1;
3563 comp_cost cost0, cost1, cost;
3564 enum machine_mode mode;
3566 if (!costs_initialized)
3568 tree type = build_pointer_type (integer_type_node);
3569 tree var, addr;
3570 rtx x;
3571 int i;
3573 var = create_tmp_var_raw (integer_type_node, "test_var");
3574 TREE_STATIC (var) = 1;
3575 x = produce_memory_decl_rtl (var, NULL);
3576 SET_DECL_RTL (var, x);
3578 addr = build1 (ADDR_EXPR, type, var);
3581 for (i = 0; i < 2; i++)
3583 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3584 2000), i);
3586 symbol_cost[i] = computation_cost (addr, i) + 1;
3588 address_cost[i]
3589 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3590 addr,
3591 build_int_cst (sizetype, 2000)), i) + 1;
3592 if (dump_file && (dump_flags & TDF_DETAILS))
3594 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3595 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3596 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3597 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3598 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3599 fprintf (dump_file, "\n");
3603 costs_initialized = true;
3606 STRIP_NOPS (expr);
3608 if (SSA_VAR_P (expr))
3609 return zero_cost;
3611 if (is_gimple_min_invariant (expr))
3613 if (TREE_CODE (expr) == INTEGER_CST)
3614 return new_cost (integer_cost [speed], 0);
3616 if (TREE_CODE (expr) == ADDR_EXPR)
3618 tree obj = TREE_OPERAND (expr, 0);
3620 if (TREE_CODE (obj) == VAR_DECL
3621 || TREE_CODE (obj) == PARM_DECL
3622 || TREE_CODE (obj) == RESULT_DECL)
3623 return new_cost (symbol_cost [speed], 0);
3626 return new_cost (address_cost [speed], 0);
3629 switch (TREE_CODE (expr))
3631 case POINTER_PLUS_EXPR:
3632 case PLUS_EXPR:
3633 case MINUS_EXPR:
3634 case MULT_EXPR:
3635 op0 = TREE_OPERAND (expr, 0);
3636 op1 = TREE_OPERAND (expr, 1);
3637 STRIP_NOPS (op0);
3638 STRIP_NOPS (op1);
3640 if (is_gimple_val (op0))
3641 cost0 = zero_cost;
3642 else
3643 cost0 = force_expr_to_var_cost (op0, speed);
3645 if (is_gimple_val (op1))
3646 cost1 = zero_cost;
3647 else
3648 cost1 = force_expr_to_var_cost (op1, speed);
3650 break;
3652 case NEGATE_EXPR:
3653 op0 = TREE_OPERAND (expr, 0);
3654 STRIP_NOPS (op0);
3655 op1 = NULL_TREE;
3657 if (is_gimple_val (op0))
3658 cost0 = zero_cost;
3659 else
3660 cost0 = force_expr_to_var_cost (op0, speed);
3662 cost1 = zero_cost;
3663 break;
3665 default:
3666 /* Just an arbitrary value, FIXME. */
3667 return new_cost (target_spill_cost[speed], 0);
3670 mode = TYPE_MODE (TREE_TYPE (expr));
3671 switch (TREE_CODE (expr))
3673 case POINTER_PLUS_EXPR:
3674 case PLUS_EXPR:
3675 case MINUS_EXPR:
3676 case NEGATE_EXPR:
3677 cost = new_cost (add_cost (mode, speed), 0);
3678 if (TREE_CODE (expr) != NEGATE_EXPR)
3680 tree mult = NULL_TREE;
3681 comp_cost sa_cost;
3682 if (TREE_CODE (op1) == MULT_EXPR)
3683 mult = op1;
3684 else if (TREE_CODE (op0) == MULT_EXPR)
3685 mult = op0;
3687 if (mult != NULL_TREE
3688 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3689 && get_shiftadd_cost (expr, mode, cost0, cost1, mult, speed,
3690 &sa_cost))
3691 return sa_cost;
3693 break;
3695 case MULT_EXPR:
3696 if (cst_and_fits_in_hwi (op0))
3697 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3698 else if (cst_and_fits_in_hwi (op1))
3699 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3700 else
3701 return new_cost (target_spill_cost [speed], 0);
3702 break;
3704 default:
3705 gcc_unreachable ();
3708 cost = add_costs (cost, cost0);
3709 cost = add_costs (cost, cost1);
3711 /* Bound the cost by target_spill_cost. The parts of complicated
3712 computations often are either loop invariant or at least can
3713 be shared between several iv uses, so letting this grow without
3714 limits would not give reasonable results. */
3715 if (cost.cost > (int) target_spill_cost [speed])
3716 cost.cost = target_spill_cost [speed];
3718 return cost;
3721 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3722 invariants the computation depends on. */
3724 static comp_cost
3725 force_var_cost (struct ivopts_data *data,
3726 tree expr, bitmap *depends_on)
3728 if (depends_on)
3730 fd_ivopts_data = data;
3731 walk_tree (&expr, find_depends, depends_on, NULL);
3734 return force_expr_to_var_cost (expr, data->speed);
3737 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3738 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3739 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3740 invariants the computation depends on. */
3742 static comp_cost
3743 split_address_cost (struct ivopts_data *data,
3744 tree addr, bool *symbol_present, bool *var_present,
3745 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3747 tree core;
3748 HOST_WIDE_INT bitsize;
3749 HOST_WIDE_INT bitpos;
3750 tree toffset;
3751 enum machine_mode mode;
3752 int unsignedp, volatilep;
3754 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3755 &unsignedp, &volatilep, false);
3757 if (toffset != 0
3758 || bitpos % BITS_PER_UNIT != 0
3759 || TREE_CODE (core) != VAR_DECL)
3761 *symbol_present = false;
3762 *var_present = true;
3763 fd_ivopts_data = data;
3764 walk_tree (&addr, find_depends, depends_on, NULL);
3765 return new_cost (target_spill_cost[data->speed], 0);
3768 *offset += bitpos / BITS_PER_UNIT;
3769 if (TREE_STATIC (core)
3770 || DECL_EXTERNAL (core))
3772 *symbol_present = true;
3773 *var_present = false;
3774 return zero_cost;
3777 *symbol_present = false;
3778 *var_present = true;
3779 return zero_cost;
3782 /* Estimates cost of expressing difference of addresses E1 - E2 as
3783 var + symbol + offset. The value of offset is added to OFFSET,
3784 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3785 part is missing. DEPENDS_ON is a set of the invariants the computation
3786 depends on. */
3788 static comp_cost
3789 ptr_difference_cost (struct ivopts_data *data,
3790 tree e1, tree e2, bool *symbol_present, bool *var_present,
3791 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3793 HOST_WIDE_INT diff = 0;
3794 aff_tree aff_e1, aff_e2;
3795 tree type;
3797 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3799 if (ptr_difference_const (e1, e2, &diff))
3801 *offset += diff;
3802 *symbol_present = false;
3803 *var_present = false;
3804 return zero_cost;
3807 if (integer_zerop (e2))
3808 return split_address_cost (data, TREE_OPERAND (e1, 0),
3809 symbol_present, var_present, offset, depends_on);
3811 *symbol_present = false;
3812 *var_present = true;
3814 type = signed_type_for (TREE_TYPE (e1));
3815 tree_to_aff_combination (e1, type, &aff_e1);
3816 tree_to_aff_combination (e2, type, &aff_e2);
3817 aff_combination_scale (&aff_e2, double_int_minus_one);
3818 aff_combination_add (&aff_e1, &aff_e2);
3820 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3823 /* Estimates cost of expressing difference E1 - E2 as
3824 var + symbol + offset. The value of offset is added to OFFSET,
3825 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3826 part is missing. DEPENDS_ON is a set of the invariants the computation
3827 depends on. */
3829 static comp_cost
3830 difference_cost (struct ivopts_data *data,
3831 tree e1, tree e2, bool *symbol_present, bool *var_present,
3832 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3834 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3835 unsigned HOST_WIDE_INT off1, off2;
3836 aff_tree aff_e1, aff_e2;
3837 tree type;
3839 e1 = strip_offset (e1, &off1);
3840 e2 = strip_offset (e2, &off2);
3841 *offset += off1 - off2;
3843 STRIP_NOPS (e1);
3844 STRIP_NOPS (e2);
3846 if (TREE_CODE (e1) == ADDR_EXPR)
3847 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3848 offset, depends_on);
3849 *symbol_present = false;
3851 if (operand_equal_p (e1, e2, 0))
3853 *var_present = false;
3854 return zero_cost;
3857 *var_present = true;
3859 if (integer_zerop (e2))
3860 return force_var_cost (data, e1, depends_on);
3862 if (integer_zerop (e1))
3864 comp_cost cost = force_var_cost (data, e2, depends_on);
3865 cost.cost += multiply_by_cost (-1, mode, data->speed);
3866 return cost;
3869 type = signed_type_for (TREE_TYPE (e1));
3870 tree_to_aff_combination (e1, type, &aff_e1);
3871 tree_to_aff_combination (e2, type, &aff_e2);
3872 aff_combination_scale (&aff_e2, double_int_minus_one);
3873 aff_combination_add (&aff_e1, &aff_e2);
3875 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3878 /* Returns true if AFF1 and AFF2 are identical. */
3880 static bool
3881 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3883 unsigned i;
3885 if (aff1->n != aff2->n)
3886 return false;
3888 for (i = 0; i < aff1->n; i++)
3890 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3891 return false;
3893 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3894 return false;
3896 return true;
3899 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3901 static int
3902 get_expr_id (struct ivopts_data *data, tree expr)
3904 struct iv_inv_expr_ent ent;
3905 struct iv_inv_expr_ent **slot;
3907 ent.expr = expr;
3908 ent.hash = iterative_hash_expr (expr, 0);
3909 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3910 &ent, INSERT);
3911 if (*slot)
3912 return (*slot)->id;
3914 *slot = XNEW (struct iv_inv_expr_ent);
3915 (*slot)->expr = expr;
3916 (*slot)->hash = ent.hash;
3917 (*slot)->id = data->inv_expr_id++;
3918 return (*slot)->id;
3921 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3922 requires a new compiler generated temporary. Returns -1 otherwise.
3923 ADDRESS_P is a flag indicating if the expression is for address
3924 computation. */
3926 static int
3927 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3928 tree cbase, HOST_WIDE_INT ratio,
3929 bool address_p)
3931 aff_tree ubase_aff, cbase_aff;
3932 tree expr, ub, cb;
3934 STRIP_NOPS (ubase);
3935 STRIP_NOPS (cbase);
3936 ub = ubase;
3937 cb = cbase;
3939 if ((TREE_CODE (ubase) == INTEGER_CST)
3940 && (TREE_CODE (cbase) == INTEGER_CST))
3941 return -1;
3943 /* Strips the constant part. */
3944 if (TREE_CODE (ubase) == PLUS_EXPR
3945 || TREE_CODE (ubase) == MINUS_EXPR
3946 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3948 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3949 ubase = TREE_OPERAND (ubase, 0);
3952 /* Strips the constant part. */
3953 if (TREE_CODE (cbase) == PLUS_EXPR
3954 || TREE_CODE (cbase) == MINUS_EXPR
3955 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3957 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3958 cbase = TREE_OPERAND (cbase, 0);
3961 if (address_p)
3963 if (((TREE_CODE (ubase) == SSA_NAME)
3964 || (TREE_CODE (ubase) == ADDR_EXPR
3965 && is_gimple_min_invariant (ubase)))
3966 && (TREE_CODE (cbase) == INTEGER_CST))
3967 return -1;
3969 if (((TREE_CODE (cbase) == SSA_NAME)
3970 || (TREE_CODE (cbase) == ADDR_EXPR
3971 && is_gimple_min_invariant (cbase)))
3972 && (TREE_CODE (ubase) == INTEGER_CST))
3973 return -1;
3976 if (ratio == 1)
3978 if(operand_equal_p (ubase, cbase, 0))
3979 return -1;
3981 if (TREE_CODE (ubase) == ADDR_EXPR
3982 && TREE_CODE (cbase) == ADDR_EXPR)
3984 tree usym, csym;
3986 usym = TREE_OPERAND (ubase, 0);
3987 csym = TREE_OPERAND (cbase, 0);
3988 if (TREE_CODE (usym) == ARRAY_REF)
3990 tree ind = TREE_OPERAND (usym, 1);
3991 if (TREE_CODE (ind) == INTEGER_CST
3992 && host_integerp (ind, 0)
3993 && TREE_INT_CST_LOW (ind) == 0)
3994 usym = TREE_OPERAND (usym, 0);
3996 if (TREE_CODE (csym) == ARRAY_REF)
3998 tree ind = TREE_OPERAND (csym, 1);
3999 if (TREE_CODE (ind) == INTEGER_CST
4000 && host_integerp (ind, 0)
4001 && TREE_INT_CST_LOW (ind) == 0)
4002 csym = TREE_OPERAND (csym, 0);
4004 if (operand_equal_p (usym, csym, 0))
4005 return -1;
4007 /* Now do more complex comparison */
4008 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4009 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4010 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4011 return -1;
4014 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4015 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4017 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
4018 aff_combination_add (&ubase_aff, &cbase_aff);
4019 expr = aff_combination_to_tree (&ubase_aff);
4020 return get_expr_id (data, expr);
4025 /* Determines the cost of the computation by that USE is expressed
4026 from induction variable CAND. If ADDRESS_P is true, we just need
4027 to create an address from it, otherwise we want to get it into
4028 register. A set of invariants we depend on is stored in
4029 DEPENDS_ON. AT is the statement at that the value is computed.
4030 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4031 addressing is likely. */
4033 static comp_cost
4034 get_computation_cost_at (struct ivopts_data *data,
4035 struct iv_use *use, struct iv_cand *cand,
4036 bool address_p, bitmap *depends_on, gimple at,
4037 bool *can_autoinc,
4038 int *inv_expr_id)
4040 tree ubase = use->iv->base, ustep = use->iv->step;
4041 tree cbase, cstep;
4042 tree utype = TREE_TYPE (ubase), ctype;
4043 unsigned HOST_WIDE_INT cstepi, offset = 0;
4044 HOST_WIDE_INT ratio, aratio;
4045 bool var_present, symbol_present, stmt_is_after_inc;
4046 comp_cost cost;
4047 double_int rat;
4048 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4050 *depends_on = NULL;
4052 /* Only consider real candidates. */
4053 if (!cand->iv)
4054 return infinite_cost;
4056 cbase = cand->iv->base;
4057 cstep = cand->iv->step;
4058 ctype = TREE_TYPE (cbase);
4060 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4062 /* We do not have a precision to express the values of use. */
4063 return infinite_cost;
4066 if (address_p)
4068 /* Do not try to express address of an object with computation based
4069 on address of a different object. This may cause problems in rtl
4070 level alias analysis (that does not expect this to be happening,
4071 as this is illegal in C), and would be unlikely to be useful
4072 anyway. */
4073 if (use->iv->base_object
4074 && cand->iv->base_object
4075 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4076 return infinite_cost;
4079 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4081 /* TODO -- add direct handling of this case. */
4082 goto fallback;
4085 /* CSTEPI is removed from the offset in case statement is after the
4086 increment. If the step is not constant, we use zero instead.
4087 This is a bit imprecise (there is the extra addition), but
4088 redundancy elimination is likely to transform the code so that
4089 it uses value of the variable before increment anyway,
4090 so it is not that much unrealistic. */
4091 if (cst_and_fits_in_hwi (cstep))
4092 cstepi = int_cst_value (cstep);
4093 else
4094 cstepi = 0;
4096 if (!constant_multiple_of (ustep, cstep, &rat))
4097 return infinite_cost;
4099 if (double_int_fits_in_shwi_p (rat))
4100 ratio = double_int_to_shwi (rat);
4101 else
4102 return infinite_cost;
4104 STRIP_NOPS (cbase);
4105 ctype = TREE_TYPE (cbase);
4107 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4109 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4110 or ratio == 1, it is better to handle this like
4112 ubase - ratio * cbase + ratio * var
4114 (also holds in the case ratio == -1, TODO. */
4116 if (cst_and_fits_in_hwi (cbase))
4118 offset = - ratio * int_cst_value (cbase);
4119 cost = difference_cost (data,
4120 ubase, build_int_cst (utype, 0),
4121 &symbol_present, &var_present, &offset,
4122 depends_on);
4123 cost.cost /= avg_loop_niter (data->current_loop);
4125 else if (ratio == 1)
4127 tree real_cbase = cbase;
4129 /* Check to see if any adjustment is needed. */
4130 if (cstepi == 0 && stmt_is_after_inc)
4132 aff_tree real_cbase_aff;
4133 aff_tree cstep_aff;
4135 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4136 &real_cbase_aff);
4137 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4139 aff_combination_add (&real_cbase_aff, &cstep_aff);
4140 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4143 cost = difference_cost (data,
4144 ubase, real_cbase,
4145 &symbol_present, &var_present, &offset,
4146 depends_on);
4147 cost.cost /= avg_loop_niter (data->current_loop);
4149 else if (address_p
4150 && !POINTER_TYPE_P (ctype)
4151 && multiplier_allowed_in_address_p
4152 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4153 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4155 cbase
4156 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4157 cost = difference_cost (data,
4158 ubase, cbase,
4159 &symbol_present, &var_present, &offset,
4160 depends_on);
4161 cost.cost /= avg_loop_niter (data->current_loop);
4163 else
4165 cost = force_var_cost (data, cbase, depends_on);
4166 cost = add_costs (cost,
4167 difference_cost (data,
4168 ubase, build_int_cst (utype, 0),
4169 &symbol_present, &var_present,
4170 &offset, depends_on));
4171 cost.cost /= avg_loop_niter (data->current_loop);
4172 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4175 if (inv_expr_id)
4177 *inv_expr_id =
4178 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4179 /* Clear depends on. */
4180 if (*inv_expr_id != -1 && depends_on && *depends_on)
4181 bitmap_clear (*depends_on);
4184 /* If we are after the increment, the value of the candidate is higher by
4185 one iteration. */
4186 if (stmt_is_after_inc)
4187 offset -= ratio * cstepi;
4189 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4190 (symbol/var1/const parts may be omitted). If we are looking for an
4191 address, find the cost of addressing this. */
4192 if (address_p)
4193 return add_costs (cost,
4194 get_address_cost (symbol_present, var_present,
4195 offset, ratio, cstepi,
4196 TYPE_MODE (TREE_TYPE (utype)),
4197 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4198 speed, stmt_is_after_inc,
4199 can_autoinc));
4201 /* Otherwise estimate the costs for computing the expression. */
4202 if (!symbol_present && !var_present && !offset)
4204 if (ratio != 1)
4205 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4206 return cost;
4209 /* Symbol + offset should be compile-time computable so consider that they
4210 are added once to the variable, if present. */
4211 if (var_present && (symbol_present || offset))
4212 cost.cost += adjust_setup_cost (data,
4213 add_cost (TYPE_MODE (ctype), speed));
4215 /* Having offset does not affect runtime cost in case it is added to
4216 symbol, but it increases complexity. */
4217 if (offset)
4218 cost.complexity++;
4220 cost.cost += add_cost (TYPE_MODE (ctype), speed);
4222 aratio = ratio > 0 ? ratio : -ratio;
4223 if (aratio != 1)
4224 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4225 return cost;
4227 fallback:
4228 if (can_autoinc)
4229 *can_autoinc = false;
4232 /* Just get the expression, expand it and measure the cost. */
4233 tree comp = get_computation_at (data->current_loop, use, cand, at);
4235 if (!comp)
4236 return infinite_cost;
4238 if (address_p)
4239 comp = build_simple_mem_ref (comp);
4241 return new_cost (computation_cost (comp, speed), 0);
4245 /* Determines the cost of the computation by that USE is expressed
4246 from induction variable CAND. If ADDRESS_P is true, we just need
4247 to create an address from it, otherwise we want to get it into
4248 register. A set of invariants we depend on is stored in
4249 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4250 autoinc addressing is likely. */
4252 static comp_cost
4253 get_computation_cost (struct ivopts_data *data,
4254 struct iv_use *use, struct iv_cand *cand,
4255 bool address_p, bitmap *depends_on,
4256 bool *can_autoinc, int *inv_expr_id)
4258 return get_computation_cost_at (data,
4259 use, cand, address_p, depends_on, use->stmt,
4260 can_autoinc, inv_expr_id);
4263 /* Determines cost of basing replacement of USE on CAND in a generic
4264 expression. */
4266 static bool
4267 determine_use_iv_cost_generic (struct ivopts_data *data,
4268 struct iv_use *use, struct iv_cand *cand)
4270 bitmap depends_on;
4271 comp_cost cost;
4272 int inv_expr_id = -1;
4274 /* The simple case first -- if we need to express value of the preserved
4275 original biv, the cost is 0. This also prevents us from counting the
4276 cost of increment twice -- once at this use and once in the cost of
4277 the candidate. */
4278 if (cand->pos == IP_ORIGINAL
4279 && cand->incremented_at == use->stmt)
4281 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4282 return true;
4285 cost = get_computation_cost (data, use, cand, false, &depends_on,
4286 NULL, &inv_expr_id);
4288 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4289 inv_expr_id);
4291 return !infinite_cost_p (cost);
4294 /* Determines cost of basing replacement of USE on CAND in an address. */
4296 static bool
4297 determine_use_iv_cost_address (struct ivopts_data *data,
4298 struct iv_use *use, struct iv_cand *cand)
4300 bitmap depends_on;
4301 bool can_autoinc;
4302 int inv_expr_id = -1;
4303 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4304 &can_autoinc, &inv_expr_id);
4306 if (cand->ainc_use == use)
4308 if (can_autoinc)
4309 cost.cost -= cand->cost_step;
4310 /* If we generated the candidate solely for exploiting autoincrement
4311 opportunities, and it turns out it can't be used, set the cost to
4312 infinity to make sure we ignore it. */
4313 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4314 cost = infinite_cost;
4316 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4317 inv_expr_id);
4319 return !infinite_cost_p (cost);
4322 /* Computes value of candidate CAND at position AT in iteration NITER, and
4323 stores it to VAL. */
4325 static void
4326 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4327 aff_tree *val)
4329 aff_tree step, delta, nit;
4330 struct iv *iv = cand->iv;
4331 tree type = TREE_TYPE (iv->base);
4332 tree steptype = type;
4333 if (POINTER_TYPE_P (type))
4334 steptype = sizetype;
4336 tree_to_aff_combination (iv->step, steptype, &step);
4337 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4338 aff_combination_convert (&nit, steptype);
4339 aff_combination_mult (&nit, &step, &delta);
4340 if (stmt_after_increment (loop, cand, at))
4341 aff_combination_add (&delta, &step);
4343 tree_to_aff_combination (iv->base, type, val);
4344 aff_combination_add (val, &delta);
4347 /* Returns period of induction variable iv. */
4349 static tree
4350 iv_period (struct iv *iv)
4352 tree step = iv->step, period, type;
4353 tree pow2div;
4355 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4357 type = unsigned_type_for (TREE_TYPE (step));
4358 /* Period of the iv is lcm (step, type_range)/step -1,
4359 i.e., N*type_range/step - 1. Since type range is power
4360 of two, N == (step >> num_of_ending_zeros_binary (step),
4361 so the final result is
4363 (type_range >> num_of_ending_zeros_binary (step)) - 1
4366 pow2div = num_ending_zeros (step);
4368 period = build_low_bits_mask (type,
4369 (TYPE_PRECISION (type)
4370 - tree_low_cst (pow2div, 1)));
4372 return period;
4375 /* Returns the comparison operator used when eliminating the iv USE. */
4377 static enum tree_code
4378 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4380 struct loop *loop = data->current_loop;
4381 basic_block ex_bb;
4382 edge exit;
4384 ex_bb = gimple_bb (use->stmt);
4385 exit = EDGE_SUCC (ex_bb, 0);
4386 if (flow_bb_inside_loop_p (loop, exit->dest))
4387 exit = EDGE_SUCC (ex_bb, 1);
4389 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4392 /* Check whether it is possible to express the condition in USE by comparison
4393 of candidate CAND. If so, store the value compared with to BOUND. */
4395 static bool
4396 may_eliminate_iv (struct ivopts_data *data,
4397 struct iv_use *use, struct iv_cand *cand, tree *bound)
4399 basic_block ex_bb;
4400 edge exit;
4401 tree nit, period;
4402 struct loop *loop = data->current_loop;
4403 aff_tree bnd;
4404 struct tree_niter_desc *desc = NULL;
4406 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4407 return false;
4409 /* For now works only for exits that dominate the loop latch.
4410 TODO: extend to other conditions inside loop body. */
4411 ex_bb = gimple_bb (use->stmt);
4412 if (use->stmt != last_stmt (ex_bb)
4413 || gimple_code (use->stmt) != GIMPLE_COND
4414 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4415 return false;
4417 exit = EDGE_SUCC (ex_bb, 0);
4418 if (flow_bb_inside_loop_p (loop, exit->dest))
4419 exit = EDGE_SUCC (ex_bb, 1);
4420 if (flow_bb_inside_loop_p (loop, exit->dest))
4421 return false;
4423 nit = niter_for_exit (data, exit, &desc);
4424 if (!nit)
4425 return false;
4427 /* Determine whether we can use the variable to test the exit condition.
4428 This is the case iff the period of the induction variable is greater
4429 than the number of iterations for which the exit condition is true. */
4430 period = iv_period (cand->iv);
4432 /* If the number of iterations is constant, compare against it directly. */
4433 if (TREE_CODE (nit) == INTEGER_CST)
4435 /* See cand_value_at. */
4436 if (stmt_after_increment (loop, cand, use->stmt))
4438 if (!tree_int_cst_lt (nit, period))
4439 return false;
4441 else
4443 if (tree_int_cst_lt (period, nit))
4444 return false;
4448 /* If not, and if this is the only possible exit of the loop, see whether
4449 we can get a conservative estimate on the number of iterations of the
4450 entire loop and compare against that instead. */
4451 else
4453 double_int period_value, max_niter;
4455 max_niter = desc->max;
4456 if (stmt_after_increment (loop, cand, use->stmt))
4457 max_niter = double_int_add (max_niter, double_int_one);
4458 period_value = tree_to_double_int (period);
4459 if (double_int_ucmp (max_niter, period_value) > 0)
4461 /* See if we can take advantage of infered loop bound information. */
4462 if (loop_only_exit_p (loop, exit))
4464 if (!estimated_loop_iterations (loop, true, &max_niter))
4465 return false;
4466 /* The loop bound is already adjusted by adding 1. */
4467 if (double_int_ucmp (max_niter, period_value) > 0)
4468 return false;
4470 else
4471 return false;
4475 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4477 *bound = aff_combination_to_tree (&bnd);
4478 /* It is unlikely that computing the number of iterations using division
4479 would be more profitable than keeping the original induction variable. */
4480 if (expression_expensive_p (*bound))
4481 return false;
4482 return true;
4485 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4486 be copied, if is is used in the loop body and DATA->body_includes_call. */
4488 static int
4489 parm_decl_cost (struct ivopts_data *data, tree bound)
4491 tree sbound = bound;
4492 STRIP_NOPS (sbound);
4494 if (TREE_CODE (sbound) == SSA_NAME
4495 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4496 && gimple_nop_p (SSA_NAME_DEF_STMT (sbound))
4497 && data->body_includes_call)
4498 return COSTS_N_INSNS (1);
4500 return 0;
4503 /* Determines cost of basing replacement of USE on CAND in a condition. */
4505 static bool
4506 determine_use_iv_cost_condition (struct ivopts_data *data,
4507 struct iv_use *use, struct iv_cand *cand)
4509 tree bound = NULL_TREE;
4510 struct iv *cmp_iv;
4511 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4512 comp_cost elim_cost, express_cost, cost, bound_cost;
4513 bool ok;
4514 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4515 tree *control_var, *bound_cst;
4517 /* Only consider real candidates. */
4518 if (!cand->iv)
4520 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4521 return false;
4524 /* Try iv elimination. */
4525 if (may_eliminate_iv (data, use, cand, &bound))
4527 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4528 if (elim_cost.cost == 0)
4529 elim_cost.cost = parm_decl_cost (data, bound);
4530 else if (TREE_CODE (bound) == INTEGER_CST)
4531 elim_cost.cost = 0;
4532 /* If we replace a loop condition 'i < n' with 'p < base + n',
4533 depends_on_elim will have 'base' and 'n' set, which implies
4534 that both 'base' and 'n' will be live during the loop. More likely,
4535 'base + n' will be loop invariant, resulting in only one live value
4536 during the loop. So in that case we clear depends_on_elim and set
4537 elim_inv_expr_id instead. */
4538 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4540 elim_inv_expr_id = get_expr_id (data, bound);
4541 bitmap_clear (depends_on_elim);
4543 /* The bound is a loop invariant, so it will be only computed
4544 once. */
4545 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4547 else
4548 elim_cost = infinite_cost;
4550 /* Try expressing the original giv. If it is compared with an invariant,
4551 note that we cannot get rid of it. */
4552 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4553 NULL, &cmp_iv);
4554 gcc_assert (ok);
4556 /* When the condition is a comparison of the candidate IV against
4557 zero, prefer this IV.
4559 TODO: The constant that we're substracting from the cost should
4560 be target-dependent. This information should be added to the
4561 target costs for each backend. */
4562 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4563 && integer_zerop (*bound_cst)
4564 && (operand_equal_p (*control_var, cand->var_after, 0)
4565 || operand_equal_p (*control_var, cand->var_before, 0)))
4566 elim_cost.cost -= 1;
4568 express_cost = get_computation_cost (data, use, cand, false,
4569 &depends_on_express, NULL,
4570 &express_inv_expr_id);
4571 fd_ivopts_data = data;
4572 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4574 /* Count the cost of the original bound as well. */
4575 bound_cost = force_var_cost (data, *bound_cst, NULL);
4576 if (bound_cost.cost == 0)
4577 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4578 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4579 bound_cost.cost = 0;
4580 express_cost.cost += bound_cost.cost;
4582 /* Choose the better approach, preferring the eliminated IV. */
4583 if (compare_costs (elim_cost, express_cost) <= 0)
4585 cost = elim_cost;
4586 depends_on = depends_on_elim;
4587 depends_on_elim = NULL;
4588 inv_expr_id = elim_inv_expr_id;
4590 else
4592 cost = express_cost;
4593 depends_on = depends_on_express;
4594 depends_on_express = NULL;
4595 bound = NULL_TREE;
4596 inv_expr_id = express_inv_expr_id;
4599 set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4601 if (depends_on_elim)
4602 BITMAP_FREE (depends_on_elim);
4603 if (depends_on_express)
4604 BITMAP_FREE (depends_on_express);
4606 return !infinite_cost_p (cost);
4609 /* Determines cost of basing replacement of USE on CAND. Returns false
4610 if USE cannot be based on CAND. */
4612 static bool
4613 determine_use_iv_cost (struct ivopts_data *data,
4614 struct iv_use *use, struct iv_cand *cand)
4616 switch (use->type)
4618 case USE_NONLINEAR_EXPR:
4619 return determine_use_iv_cost_generic (data, use, cand);
4621 case USE_ADDRESS:
4622 return determine_use_iv_cost_address (data, use, cand);
4624 case USE_COMPARE:
4625 return determine_use_iv_cost_condition (data, use, cand);
4627 default:
4628 gcc_unreachable ();
4632 /* Return true if get_computation_cost indicates that autoincrement is
4633 a possibility for the pair of USE and CAND, false otherwise. */
4635 static bool
4636 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4637 struct iv_cand *cand)
4639 bitmap depends_on;
4640 bool can_autoinc;
4641 comp_cost cost;
4643 if (use->type != USE_ADDRESS)
4644 return false;
4646 cost = get_computation_cost (data, use, cand, true, &depends_on,
4647 &can_autoinc, NULL);
4649 BITMAP_FREE (depends_on);
4651 return !infinite_cost_p (cost) && can_autoinc;
4654 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4655 use that allows autoincrement, and set their AINC_USE if possible. */
4657 static void
4658 set_autoinc_for_original_candidates (struct ivopts_data *data)
4660 unsigned i, j;
4662 for (i = 0; i < n_iv_cands (data); i++)
4664 struct iv_cand *cand = iv_cand (data, i);
4665 struct iv_use *closest = NULL;
4666 if (cand->pos != IP_ORIGINAL)
4667 continue;
4668 for (j = 0; j < n_iv_uses (data); j++)
4670 struct iv_use *use = iv_use (data, j);
4671 unsigned uid = gimple_uid (use->stmt);
4672 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4673 || uid > gimple_uid (cand->incremented_at))
4674 continue;
4675 if (closest == NULL || uid > gimple_uid (closest->stmt))
4676 closest = use;
4678 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4679 continue;
4680 cand->ainc_use = closest;
4684 /* Finds the candidates for the induction variables. */
4686 static void
4687 find_iv_candidates (struct ivopts_data *data)
4689 /* Add commonly used ivs. */
4690 add_standard_iv_candidates (data);
4692 /* Add old induction variables. */
4693 add_old_ivs_candidates (data);
4695 /* Add induction variables derived from uses. */
4696 add_derived_ivs_candidates (data);
4698 set_autoinc_for_original_candidates (data);
4700 /* Record the important candidates. */
4701 record_important_candidates (data);
4704 /* Determines costs of basing the use of the iv on an iv candidate. */
4706 static void
4707 determine_use_iv_costs (struct ivopts_data *data)
4709 unsigned i, j;
4710 struct iv_use *use;
4711 struct iv_cand *cand;
4712 bitmap to_clear = BITMAP_ALLOC (NULL);
4714 alloc_use_cost_map (data);
4716 for (i = 0; i < n_iv_uses (data); i++)
4718 use = iv_use (data, i);
4720 if (data->consider_all_candidates)
4722 for (j = 0; j < n_iv_cands (data); j++)
4724 cand = iv_cand (data, j);
4725 determine_use_iv_cost (data, use, cand);
4728 else
4730 bitmap_iterator bi;
4732 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4734 cand = iv_cand (data, j);
4735 if (!determine_use_iv_cost (data, use, cand))
4736 bitmap_set_bit (to_clear, j);
4739 /* Remove the candidates for that the cost is infinite from
4740 the list of related candidates. */
4741 bitmap_and_compl_into (use->related_cands, to_clear);
4742 bitmap_clear (to_clear);
4746 BITMAP_FREE (to_clear);
4748 if (dump_file && (dump_flags & TDF_DETAILS))
4750 fprintf (dump_file, "Use-candidate costs:\n");
4752 for (i = 0; i < n_iv_uses (data); i++)
4754 use = iv_use (data, i);
4756 fprintf (dump_file, "Use %d:\n", i);
4757 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4758 for (j = 0; j < use->n_map_members; j++)
4760 if (!use->cost_map[j].cand
4761 || infinite_cost_p (use->cost_map[j].cost))
4762 continue;
4764 fprintf (dump_file, " %d\t%d\t%d\t",
4765 use->cost_map[j].cand->id,
4766 use->cost_map[j].cost.cost,
4767 use->cost_map[j].cost.complexity);
4768 if (use->cost_map[j].depends_on)
4769 bitmap_print (dump_file,
4770 use->cost_map[j].depends_on, "","");
4771 if (use->cost_map[j].inv_expr_id != -1)
4772 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4773 fprintf (dump_file, "\n");
4776 fprintf (dump_file, "\n");
4778 fprintf (dump_file, "\n");
4782 /* Determines cost of the candidate CAND. */
4784 static void
4785 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4787 comp_cost cost_base;
4788 unsigned cost, cost_step;
4789 tree base;
4791 if (!cand->iv)
4793 cand->cost = 0;
4794 return;
4797 /* There are two costs associated with the candidate -- its increment
4798 and its initialization. The second is almost negligible for any loop
4799 that rolls enough, so we take it just very little into account. */
4801 base = cand->iv->base;
4802 cost_base = force_var_cost (data, base, NULL);
4803 /* It will be exceptional that the iv register happens to be initialized with
4804 the proper value at no cost. In general, there will at least be a regcopy
4805 or a const set. */
4806 if (cost_base.cost == 0)
4807 cost_base.cost = COSTS_N_INSNS (1);
4808 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4810 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4812 /* Prefer the original ivs unless we may gain something by replacing it.
4813 The reason is to make debugging simpler; so this is not relevant for
4814 artificial ivs created by other optimization passes. */
4815 if (cand->pos != IP_ORIGINAL
4816 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4817 cost++;
4819 /* Prefer not to insert statements into latch unless there are some
4820 already (so that we do not create unnecessary jumps). */
4821 if (cand->pos == IP_END
4822 && empty_block_p (ip_end_pos (data->current_loop)))
4823 cost++;
4825 cand->cost = cost;
4826 cand->cost_step = cost_step;
4829 /* Determines costs of computation of the candidates. */
4831 static void
4832 determine_iv_costs (struct ivopts_data *data)
4834 unsigned i;
4836 if (dump_file && (dump_flags & TDF_DETAILS))
4838 fprintf (dump_file, "Candidate costs:\n");
4839 fprintf (dump_file, " cand\tcost\n");
4842 for (i = 0; i < n_iv_cands (data); i++)
4844 struct iv_cand *cand = iv_cand (data, i);
4846 determine_iv_cost (data, cand);
4848 if (dump_file && (dump_flags & TDF_DETAILS))
4849 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4852 if (dump_file && (dump_flags & TDF_DETAILS))
4853 fprintf (dump_file, "\n");
4856 /* Calculates cost for having SIZE induction variables. */
4858 static unsigned
4859 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4861 /* We add size to the cost, so that we prefer eliminating ivs
4862 if possible. */
4863 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4864 data->body_includes_call);
4867 /* For each size of the induction variable set determine the penalty. */
4869 static void
4870 determine_set_costs (struct ivopts_data *data)
4872 unsigned j, n;
4873 gimple phi;
4874 gimple_stmt_iterator psi;
4875 tree op;
4876 struct loop *loop = data->current_loop;
4877 bitmap_iterator bi;
4879 if (dump_file && (dump_flags & TDF_DETAILS))
4881 fprintf (dump_file, "Global costs:\n");
4882 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4883 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
4884 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4885 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4888 n = 0;
4889 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4891 phi = gsi_stmt (psi);
4892 op = PHI_RESULT (phi);
4894 if (!is_gimple_reg (op))
4895 continue;
4897 if (get_iv (data, op))
4898 continue;
4900 n++;
4903 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4905 struct version_info *info = ver_info (data, j);
4907 if (info->inv_id && info->has_nonlin_use)
4908 n++;
4911 data->regs_used = n;
4912 if (dump_file && (dump_flags & TDF_DETAILS))
4913 fprintf (dump_file, " regs_used %d\n", n);
4915 if (dump_file && (dump_flags & TDF_DETAILS))
4917 fprintf (dump_file, " cost for size:\n");
4918 fprintf (dump_file, " ivs\tcost\n");
4919 for (j = 0; j <= 2 * target_avail_regs; j++)
4920 fprintf (dump_file, " %d\t%d\n", j,
4921 ivopts_global_cost_for_size (data, j));
4922 fprintf (dump_file, "\n");
4926 /* Returns true if A is a cheaper cost pair than B. */
4928 static bool
4929 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4931 int cmp;
4933 if (!a)
4934 return false;
4936 if (!b)
4937 return true;
4939 cmp = compare_costs (a->cost, b->cost);
4940 if (cmp < 0)
4941 return true;
4943 if (cmp > 0)
4944 return false;
4946 /* In case the costs are the same, prefer the cheaper candidate. */
4947 if (a->cand->cost < b->cand->cost)
4948 return true;
4950 return false;
4954 /* Returns candidate by that USE is expressed in IVS. */
4956 static struct cost_pair *
4957 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4959 return ivs->cand_for_use[use->id];
4962 /* Computes the cost field of IVS structure. */
4964 static void
4965 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4967 comp_cost cost = ivs->cand_use_cost;
4969 cost.cost += ivs->cand_cost;
4971 cost.cost += ivopts_global_cost_for_size (data,
4972 ivs->n_regs + ivs->num_used_inv_expr);
4974 ivs->cost = cost;
4977 /* Remove invariants in set INVS to set IVS. */
4979 static void
4980 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4982 bitmap_iterator bi;
4983 unsigned iid;
4985 if (!invs)
4986 return;
4988 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4990 ivs->n_invariant_uses[iid]--;
4991 if (ivs->n_invariant_uses[iid] == 0)
4992 ivs->n_regs--;
4996 /* Set USE not to be expressed by any candidate in IVS. */
4998 static void
4999 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5000 struct iv_use *use)
5002 unsigned uid = use->id, cid;
5003 struct cost_pair *cp;
5005 cp = ivs->cand_for_use[uid];
5006 if (!cp)
5007 return;
5008 cid = cp->cand->id;
5010 ivs->bad_uses++;
5011 ivs->cand_for_use[uid] = NULL;
5012 ivs->n_cand_uses[cid]--;
5014 if (ivs->n_cand_uses[cid] == 0)
5016 bitmap_clear_bit (ivs->cands, cid);
5017 /* Do not count the pseudocandidates. */
5018 if (cp->cand->iv)
5019 ivs->n_regs--;
5020 ivs->n_cands--;
5021 ivs->cand_cost -= cp->cand->cost;
5023 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5026 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5028 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5030 if (cp->inv_expr_id != -1)
5032 ivs->used_inv_expr[cp->inv_expr_id]--;
5033 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5034 ivs->num_used_inv_expr--;
5036 iv_ca_recount_cost (data, ivs);
5039 /* Add invariants in set INVS to set IVS. */
5041 static void
5042 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5044 bitmap_iterator bi;
5045 unsigned iid;
5047 if (!invs)
5048 return;
5050 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5052 ivs->n_invariant_uses[iid]++;
5053 if (ivs->n_invariant_uses[iid] == 1)
5054 ivs->n_regs++;
5058 /* Set cost pair for USE in set IVS to CP. */
5060 static void
5061 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5062 struct iv_use *use, struct cost_pair *cp)
5064 unsigned uid = use->id, cid;
5066 if (ivs->cand_for_use[uid] == cp)
5067 return;
5069 if (ivs->cand_for_use[uid])
5070 iv_ca_set_no_cp (data, ivs, use);
5072 if (cp)
5074 cid = cp->cand->id;
5076 ivs->bad_uses--;
5077 ivs->cand_for_use[uid] = cp;
5078 ivs->n_cand_uses[cid]++;
5079 if (ivs->n_cand_uses[cid] == 1)
5081 bitmap_set_bit (ivs->cands, cid);
5082 /* Do not count the pseudocandidates. */
5083 if (cp->cand->iv)
5084 ivs->n_regs++;
5085 ivs->n_cands++;
5086 ivs->cand_cost += cp->cand->cost;
5088 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5091 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5092 iv_ca_set_add_invariants (ivs, cp->depends_on);
5094 if (cp->inv_expr_id != -1)
5096 ivs->used_inv_expr[cp->inv_expr_id]++;
5097 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5098 ivs->num_used_inv_expr++;
5100 iv_ca_recount_cost (data, ivs);
5104 /* Extend set IVS by expressing USE by some of the candidates in it
5105 if possible. All important candidates will be considered
5106 if IMPORTANT_CANDIDATES is true. */
5108 static void
5109 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5110 struct iv_use *use, bool important_candidates)
5112 struct cost_pair *best_cp = NULL, *cp;
5113 bitmap_iterator bi;
5114 bitmap cands;
5115 unsigned i;
5117 gcc_assert (ivs->upto >= use->id);
5119 if (ivs->upto == use->id)
5121 ivs->upto++;
5122 ivs->bad_uses++;
5125 cands = (important_candidates ? data->important_candidates : ivs->cands);
5126 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5128 struct iv_cand *cand = iv_cand (data, i);
5130 cp = get_use_iv_cost (data, use, cand);
5132 if (cheaper_cost_pair (cp, best_cp))
5133 best_cp = cp;
5136 iv_ca_set_cp (data, ivs, use, best_cp);
5139 /* Get cost for assignment IVS. */
5141 static comp_cost
5142 iv_ca_cost (struct iv_ca *ivs)
5144 /* This was a conditional expression but it triggered a bug in
5145 Sun C 5.5. */
5146 if (ivs->bad_uses)
5147 return infinite_cost;
5148 else
5149 return ivs->cost;
5152 /* Returns true if all dependences of CP are among invariants in IVS. */
5154 static bool
5155 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5157 unsigned i;
5158 bitmap_iterator bi;
5160 if (!cp->depends_on)
5161 return true;
5163 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5165 if (ivs->n_invariant_uses[i] == 0)
5166 return false;
5169 return true;
5172 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5173 it before NEXT_CHANGE. */
5175 static struct iv_ca_delta *
5176 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5177 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5179 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5181 change->use = use;
5182 change->old_cp = old_cp;
5183 change->new_cp = new_cp;
5184 change->next_change = next_change;
5186 return change;
5189 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5190 are rewritten. */
5192 static struct iv_ca_delta *
5193 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5195 struct iv_ca_delta *last;
5197 if (!l2)
5198 return l1;
5200 if (!l1)
5201 return l2;
5203 for (last = l1; last->next_change; last = last->next_change)
5204 continue;
5205 last->next_change = l2;
5207 return l1;
5210 /* Reverse the list of changes DELTA, forming the inverse to it. */
5212 static struct iv_ca_delta *
5213 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5215 struct iv_ca_delta *act, *next, *prev = NULL;
5216 struct cost_pair *tmp;
5218 for (act = delta; act; act = next)
5220 next = act->next_change;
5221 act->next_change = prev;
5222 prev = act;
5224 tmp = act->old_cp;
5225 act->old_cp = act->new_cp;
5226 act->new_cp = tmp;
5229 return prev;
5232 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5233 reverted instead. */
5235 static void
5236 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5237 struct iv_ca_delta *delta, bool forward)
5239 struct cost_pair *from, *to;
5240 struct iv_ca_delta *act;
5242 if (!forward)
5243 delta = iv_ca_delta_reverse (delta);
5245 for (act = delta; act; act = act->next_change)
5247 from = act->old_cp;
5248 to = act->new_cp;
5249 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5250 iv_ca_set_cp (data, ivs, act->use, to);
5253 if (!forward)
5254 iv_ca_delta_reverse (delta);
5257 /* Returns true if CAND is used in IVS. */
5259 static bool
5260 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5262 return ivs->n_cand_uses[cand->id] > 0;
5265 /* Returns number of induction variable candidates in the set IVS. */
5267 static unsigned
5268 iv_ca_n_cands (struct iv_ca *ivs)
5270 return ivs->n_cands;
5273 /* Free the list of changes DELTA. */
5275 static void
5276 iv_ca_delta_free (struct iv_ca_delta **delta)
5278 struct iv_ca_delta *act, *next;
5280 for (act = *delta; act; act = next)
5282 next = act->next_change;
5283 free (act);
5286 *delta = NULL;
5289 /* Allocates new iv candidates assignment. */
5291 static struct iv_ca *
5292 iv_ca_new (struct ivopts_data *data)
5294 struct iv_ca *nw = XNEW (struct iv_ca);
5296 nw->upto = 0;
5297 nw->bad_uses = 0;
5298 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5299 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5300 nw->cands = BITMAP_ALLOC (NULL);
5301 nw->n_cands = 0;
5302 nw->n_regs = 0;
5303 nw->cand_use_cost = zero_cost;
5304 nw->cand_cost = 0;
5305 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5306 nw->cost = zero_cost;
5307 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5308 nw->num_used_inv_expr = 0;
5310 return nw;
5313 /* Free memory occupied by the set IVS. */
5315 static void
5316 iv_ca_free (struct iv_ca **ivs)
5318 free ((*ivs)->cand_for_use);
5319 free ((*ivs)->n_cand_uses);
5320 BITMAP_FREE ((*ivs)->cands);
5321 free ((*ivs)->n_invariant_uses);
5322 free ((*ivs)->used_inv_expr);
5323 free (*ivs);
5324 *ivs = NULL;
5327 /* Dumps IVS to FILE. */
5329 static void
5330 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5332 const char *pref = " invariants ";
5333 unsigned i;
5334 comp_cost cost = iv_ca_cost (ivs);
5336 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5337 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5338 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5339 bitmap_print (file, ivs->cands, " candidates: ","\n");
5341 for (i = 0; i < ivs->upto; i++)
5343 struct iv_use *use = iv_use (data, i);
5344 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5345 if (cp)
5346 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5347 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5348 else
5349 fprintf (file, " use:%d --> ??\n", use->id);
5352 for (i = 1; i <= data->max_inv_id; i++)
5353 if (ivs->n_invariant_uses[i])
5355 fprintf (file, "%s%d", pref, i);
5356 pref = ", ";
5358 fprintf (file, "\n\n");
5361 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5362 new set, and store differences in DELTA. Number of induction variables
5363 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5364 the function will try to find a solution with mimimal iv candidates. */
5366 static comp_cost
5367 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5368 struct iv_cand *cand, struct iv_ca_delta **delta,
5369 unsigned *n_ivs, bool min_ncand)
5371 unsigned i;
5372 comp_cost cost;
5373 struct iv_use *use;
5374 struct cost_pair *old_cp, *new_cp;
5376 *delta = NULL;
5377 for (i = 0; i < ivs->upto; i++)
5379 use = iv_use (data, i);
5380 old_cp = iv_ca_cand_for_use (ivs, use);
5382 if (old_cp
5383 && old_cp->cand == cand)
5384 continue;
5386 new_cp = get_use_iv_cost (data, use, cand);
5387 if (!new_cp)
5388 continue;
5390 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5391 continue;
5393 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5394 continue;
5396 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5399 iv_ca_delta_commit (data, ivs, *delta, true);
5400 cost = iv_ca_cost (ivs);
5401 if (n_ivs)
5402 *n_ivs = iv_ca_n_cands (ivs);
5403 iv_ca_delta_commit (data, ivs, *delta, false);
5405 return cost;
5408 /* Try narrowing set IVS by removing CAND. Return the cost of
5409 the new set and store the differences in DELTA. */
5411 static comp_cost
5412 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5413 struct iv_cand *cand, struct iv_ca_delta **delta)
5415 unsigned i, ci;
5416 struct iv_use *use;
5417 struct cost_pair *old_cp, *new_cp, *cp;
5418 bitmap_iterator bi;
5419 struct iv_cand *cnd;
5420 comp_cost cost;
5422 *delta = NULL;
5423 for (i = 0; i < n_iv_uses (data); i++)
5425 use = iv_use (data, i);
5427 old_cp = iv_ca_cand_for_use (ivs, use);
5428 if (old_cp->cand != cand)
5429 continue;
5431 new_cp = NULL;
5433 if (data->consider_all_candidates)
5435 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5437 if (ci == cand->id)
5438 continue;
5440 cnd = iv_cand (data, ci);
5442 cp = get_use_iv_cost (data, use, cnd);
5443 if (!cp)
5444 continue;
5446 if (!iv_ca_has_deps (ivs, cp))
5447 continue;
5449 if (!cheaper_cost_pair (cp, new_cp))
5450 continue;
5452 new_cp = cp;
5455 else
5457 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5459 if (ci == cand->id)
5460 continue;
5462 cnd = iv_cand (data, ci);
5464 cp = get_use_iv_cost (data, use, cnd);
5465 if (!cp)
5466 continue;
5467 if (!iv_ca_has_deps (ivs, cp))
5468 continue;
5470 if (!cheaper_cost_pair (cp, new_cp))
5471 continue;
5473 new_cp = cp;
5477 if (!new_cp)
5479 iv_ca_delta_free (delta);
5480 return infinite_cost;
5483 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5486 iv_ca_delta_commit (data, ivs, *delta, true);
5487 cost = iv_ca_cost (ivs);
5488 iv_ca_delta_commit (data, ivs, *delta, false);
5490 return cost;
5493 /* Try optimizing the set of candidates IVS by removing candidates different
5494 from to EXCEPT_CAND from it. Return cost of the new set, and store
5495 differences in DELTA. */
5497 static comp_cost
5498 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5499 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5501 bitmap_iterator bi;
5502 struct iv_ca_delta *act_delta, *best_delta;
5503 unsigned i;
5504 comp_cost best_cost, acost;
5505 struct iv_cand *cand;
5507 best_delta = NULL;
5508 best_cost = iv_ca_cost (ivs);
5510 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5512 cand = iv_cand (data, i);
5514 if (cand == except_cand)
5515 continue;
5517 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5519 if (compare_costs (acost, best_cost) < 0)
5521 best_cost = acost;
5522 iv_ca_delta_free (&best_delta);
5523 best_delta = act_delta;
5525 else
5526 iv_ca_delta_free (&act_delta);
5529 if (!best_delta)
5531 *delta = NULL;
5532 return best_cost;
5535 /* Recurse to possibly remove other unnecessary ivs. */
5536 iv_ca_delta_commit (data, ivs, best_delta, true);
5537 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5538 iv_ca_delta_commit (data, ivs, best_delta, false);
5539 *delta = iv_ca_delta_join (best_delta, *delta);
5540 return best_cost;
5543 /* Tries to extend the sets IVS in the best possible way in order
5544 to express the USE. If ORIGINALP is true, prefer candidates from
5545 the original set of IVs, otherwise favor important candidates not
5546 based on any memory object. */
5548 static bool
5549 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5550 struct iv_use *use, bool originalp)
5552 comp_cost best_cost, act_cost;
5553 unsigned i;
5554 bitmap_iterator bi;
5555 struct iv_cand *cand;
5556 struct iv_ca_delta *best_delta = NULL, *act_delta;
5557 struct cost_pair *cp;
5559 iv_ca_add_use (data, ivs, use, false);
5560 best_cost = iv_ca_cost (ivs);
5562 cp = iv_ca_cand_for_use (ivs, use);
5563 if (!cp)
5565 ivs->upto--;
5566 ivs->bad_uses--;
5567 iv_ca_add_use (data, ivs, use, true);
5568 best_cost = iv_ca_cost (ivs);
5569 cp = iv_ca_cand_for_use (ivs, use);
5571 if (cp)
5573 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5574 iv_ca_set_no_cp (data, ivs, use);
5577 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5578 first try important candidates not based on any memory object. Only if
5579 this fails, try the specific ones. Rationale -- in loops with many
5580 variables the best choice often is to use just one generic biv. If we
5581 added here many ivs specific to the uses, the optimization algorithm later
5582 would be likely to get stuck in a local minimum, thus causing us to create
5583 too many ivs. The approach from few ivs to more seems more likely to be
5584 successful -- starting from few ivs, replacing an expensive use by a
5585 specific iv should always be a win. */
5586 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5588 cand = iv_cand (data, i);
5590 if (originalp && cand->pos !=IP_ORIGINAL)
5591 continue;
5593 if (!originalp && cand->iv->base_object != NULL_TREE)
5594 continue;
5596 if (iv_ca_cand_used_p (ivs, cand))
5597 continue;
5599 cp = get_use_iv_cost (data, use, cand);
5600 if (!cp)
5601 continue;
5603 iv_ca_set_cp (data, ivs, use, cp);
5604 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5605 true);
5606 iv_ca_set_no_cp (data, ivs, use);
5607 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5609 if (compare_costs (act_cost, best_cost) < 0)
5611 best_cost = act_cost;
5613 iv_ca_delta_free (&best_delta);
5614 best_delta = act_delta;
5616 else
5617 iv_ca_delta_free (&act_delta);
5620 if (infinite_cost_p (best_cost))
5622 for (i = 0; i < use->n_map_members; i++)
5624 cp = use->cost_map + i;
5625 cand = cp->cand;
5626 if (!cand)
5627 continue;
5629 /* Already tried this. */
5630 if (cand->important)
5632 if (originalp && cand->pos == IP_ORIGINAL)
5633 continue;
5634 if (!originalp && cand->iv->base_object == NULL_TREE)
5635 continue;
5638 if (iv_ca_cand_used_p (ivs, cand))
5639 continue;
5641 act_delta = NULL;
5642 iv_ca_set_cp (data, ivs, use, cp);
5643 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5644 iv_ca_set_no_cp (data, ivs, use);
5645 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5646 cp, act_delta);
5648 if (compare_costs (act_cost, best_cost) < 0)
5650 best_cost = act_cost;
5652 if (best_delta)
5653 iv_ca_delta_free (&best_delta);
5654 best_delta = act_delta;
5656 else
5657 iv_ca_delta_free (&act_delta);
5661 iv_ca_delta_commit (data, ivs, best_delta, true);
5662 iv_ca_delta_free (&best_delta);
5664 return !infinite_cost_p (best_cost);
5667 /* Finds an initial assignment of candidates to uses. */
5669 static struct iv_ca *
5670 get_initial_solution (struct ivopts_data *data, bool originalp)
5672 struct iv_ca *ivs = iv_ca_new (data);
5673 unsigned i;
5675 for (i = 0; i < n_iv_uses (data); i++)
5676 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5678 iv_ca_free (&ivs);
5679 return NULL;
5682 return ivs;
5685 /* Tries to improve set of induction variables IVS. */
5687 static bool
5688 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5690 unsigned i, n_ivs;
5691 comp_cost acost, best_cost = iv_ca_cost (ivs);
5692 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5693 struct iv_cand *cand;
5695 /* Try extending the set of induction variables by one. */
5696 for (i = 0; i < n_iv_cands (data); i++)
5698 cand = iv_cand (data, i);
5700 if (iv_ca_cand_used_p (ivs, cand))
5701 continue;
5703 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5704 if (!act_delta)
5705 continue;
5707 /* If we successfully added the candidate and the set is small enough,
5708 try optimizing it by removing other candidates. */
5709 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5711 iv_ca_delta_commit (data, ivs, act_delta, true);
5712 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5713 iv_ca_delta_commit (data, ivs, act_delta, false);
5714 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5717 if (compare_costs (acost, best_cost) < 0)
5719 best_cost = acost;
5720 iv_ca_delta_free (&best_delta);
5721 best_delta = act_delta;
5723 else
5724 iv_ca_delta_free (&act_delta);
5727 if (!best_delta)
5729 /* Try removing the candidates from the set instead. */
5730 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5732 /* Nothing more we can do. */
5733 if (!best_delta)
5734 return false;
5737 iv_ca_delta_commit (data, ivs, best_delta, true);
5738 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5739 iv_ca_delta_free (&best_delta);
5740 return true;
5743 /* Attempts to find the optimal set of induction variables. We do simple
5744 greedy heuristic -- we try to replace at most one candidate in the selected
5745 solution and remove the unused ivs while this improves the cost. */
5747 static struct iv_ca *
5748 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5750 struct iv_ca *set;
5752 /* Get the initial solution. */
5753 set = get_initial_solution (data, originalp);
5754 if (!set)
5756 if (dump_file && (dump_flags & TDF_DETAILS))
5757 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5758 return NULL;
5761 if (dump_file && (dump_flags & TDF_DETAILS))
5763 fprintf (dump_file, "Initial set of candidates:\n");
5764 iv_ca_dump (data, dump_file, set);
5767 while (try_improve_iv_set (data, set))
5769 if (dump_file && (dump_flags & TDF_DETAILS))
5771 fprintf (dump_file, "Improved to:\n");
5772 iv_ca_dump (data, dump_file, set);
5776 return set;
5779 static struct iv_ca *
5780 find_optimal_iv_set (struct ivopts_data *data)
5782 unsigned i;
5783 struct iv_ca *set, *origset;
5784 struct iv_use *use;
5785 comp_cost cost, origcost;
5787 /* Determine the cost based on a strategy that starts with original IVs,
5788 and try again using a strategy that prefers candidates not based
5789 on any IVs. */
5790 origset = find_optimal_iv_set_1 (data, true);
5791 set = find_optimal_iv_set_1 (data, false);
5793 if (!origset && !set)
5794 return NULL;
5796 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5797 cost = set ? iv_ca_cost (set) : infinite_cost;
5799 if (dump_file && (dump_flags & TDF_DETAILS))
5801 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5802 origcost.cost, origcost.complexity);
5803 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5804 cost.cost, cost.complexity);
5807 /* Choose the one with the best cost. */
5808 if (compare_costs (origcost, cost) <= 0)
5810 if (set)
5811 iv_ca_free (&set);
5812 set = origset;
5814 else if (origset)
5815 iv_ca_free (&origset);
5817 for (i = 0; i < n_iv_uses (data); i++)
5819 use = iv_use (data, i);
5820 use->selected = iv_ca_cand_for_use (set, use)->cand;
5823 return set;
5826 /* Creates a new induction variable corresponding to CAND. */
5828 static void
5829 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5831 gimple_stmt_iterator incr_pos;
5832 tree base;
5833 bool after = false;
5835 if (!cand->iv)
5836 return;
5838 switch (cand->pos)
5840 case IP_NORMAL:
5841 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5842 break;
5844 case IP_END:
5845 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5846 after = true;
5847 break;
5849 case IP_AFTER_USE:
5850 after = true;
5851 /* fall through */
5852 case IP_BEFORE_USE:
5853 incr_pos = gsi_for_stmt (cand->incremented_at);
5854 break;
5856 case IP_ORIGINAL:
5857 /* Mark that the iv is preserved. */
5858 name_info (data, cand->var_before)->preserve_biv = true;
5859 name_info (data, cand->var_after)->preserve_biv = true;
5861 /* Rewrite the increment so that it uses var_before directly. */
5862 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5863 return;
5866 gimple_add_tmp_var (cand->var_before);
5867 add_referenced_var (cand->var_before);
5869 base = unshare_expr (cand->iv->base);
5871 create_iv (base, unshare_expr (cand->iv->step),
5872 cand->var_before, data->current_loop,
5873 &incr_pos, after, &cand->var_before, &cand->var_after);
5876 /* Creates new induction variables described in SET. */
5878 static void
5879 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5881 unsigned i;
5882 struct iv_cand *cand;
5883 bitmap_iterator bi;
5885 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5887 cand = iv_cand (data, i);
5888 create_new_iv (data, cand);
5891 if (dump_file && (dump_flags & TDF_DETAILS))
5893 fprintf (dump_file, "\nSelected IV set: \n");
5894 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5896 cand = iv_cand (data, i);
5897 dump_cand (dump_file, cand);
5899 fprintf (dump_file, "\n");
5903 /* Rewrites USE (definition of iv used in a nonlinear expression)
5904 using candidate CAND. */
5906 static void
5907 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5908 struct iv_use *use, struct iv_cand *cand)
5910 tree comp;
5911 tree op, tgt;
5912 gimple ass;
5913 gimple_stmt_iterator bsi;
5915 /* An important special case -- if we are asked to express value of
5916 the original iv by itself, just exit; there is no need to
5917 introduce a new computation (that might also need casting the
5918 variable to unsigned and back). */
5919 if (cand->pos == IP_ORIGINAL
5920 && cand->incremented_at == use->stmt)
5922 tree step, ctype, utype;
5923 enum tree_code incr_code = PLUS_EXPR, old_code;
5925 gcc_assert (is_gimple_assign (use->stmt));
5926 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5928 step = cand->iv->step;
5929 ctype = TREE_TYPE (step);
5930 utype = TREE_TYPE (cand->var_after);
5931 if (TREE_CODE (step) == NEGATE_EXPR)
5933 incr_code = MINUS_EXPR;
5934 step = TREE_OPERAND (step, 0);
5937 /* Check whether we may leave the computation unchanged.
5938 This is the case only if it does not rely on other
5939 computations in the loop -- otherwise, the computation
5940 we rely upon may be removed in remove_unused_ivs,
5941 thus leading to ICE. */
5942 old_code = gimple_assign_rhs_code (use->stmt);
5943 if (old_code == PLUS_EXPR
5944 || old_code == MINUS_EXPR
5945 || old_code == POINTER_PLUS_EXPR)
5947 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5948 op = gimple_assign_rhs2 (use->stmt);
5949 else if (old_code != MINUS_EXPR
5950 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5951 op = gimple_assign_rhs1 (use->stmt);
5952 else
5953 op = NULL_TREE;
5955 else
5956 op = NULL_TREE;
5958 if (op
5959 && (TREE_CODE (op) == INTEGER_CST
5960 || operand_equal_p (op, step, 0)))
5961 return;
5963 /* Otherwise, add the necessary computations to express
5964 the iv. */
5965 op = fold_convert (ctype, cand->var_before);
5966 comp = fold_convert (utype,
5967 build2 (incr_code, ctype, op,
5968 unshare_expr (step)));
5970 else
5972 comp = get_computation (data->current_loop, use, cand);
5973 gcc_assert (comp != NULL_TREE);
5976 switch (gimple_code (use->stmt))
5978 case GIMPLE_PHI:
5979 tgt = PHI_RESULT (use->stmt);
5981 /* If we should keep the biv, do not replace it. */
5982 if (name_info (data, tgt)->preserve_biv)
5983 return;
5985 bsi = gsi_after_labels (gimple_bb (use->stmt));
5986 break;
5988 case GIMPLE_ASSIGN:
5989 tgt = gimple_assign_lhs (use->stmt);
5990 bsi = gsi_for_stmt (use->stmt);
5991 break;
5993 default:
5994 gcc_unreachable ();
5997 if (!valid_gimple_rhs_p (comp)
5998 || (gimple_code (use->stmt) != GIMPLE_PHI
5999 /* We can't allow re-allocating the stmt as it might be pointed
6000 to still. */
6001 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6002 >= gimple_num_ops (gsi_stmt (bsi)))))
6004 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6005 true, GSI_SAME_STMT);
6006 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6008 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6009 /* As this isn't a plain copy we have to reset alignment
6010 information. */
6011 if (SSA_NAME_PTR_INFO (comp))
6013 SSA_NAME_PTR_INFO (comp)->align = 1;
6014 SSA_NAME_PTR_INFO (comp)->misalign = 0;
6019 if (gimple_code (use->stmt) == GIMPLE_PHI)
6021 ass = gimple_build_assign (tgt, comp);
6022 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6024 bsi = gsi_for_stmt (use->stmt);
6025 remove_phi_node (&bsi, false);
6027 else
6029 gimple_assign_set_rhs_from_tree (&bsi, comp);
6030 use->stmt = gsi_stmt (bsi);
6034 /* Copies the reference information from OLD_REF to NEW_REF. */
6036 static void
6037 copy_ref_info (tree new_ref, tree old_ref)
6039 tree new_ptr_base = NULL_TREE;
6041 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
6042 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
6044 new_ptr_base = TREE_OPERAND (new_ref, 0);
6046 /* We can transfer points-to information from an old pointer
6047 or decl base to the new one. */
6048 if (new_ptr_base
6049 && TREE_CODE (new_ptr_base) == SSA_NAME
6050 && !SSA_NAME_PTR_INFO (new_ptr_base))
6052 tree base = get_base_address (old_ref);
6053 if (!base)
6055 else if ((TREE_CODE (base) == MEM_REF
6056 || TREE_CODE (base) == TARGET_MEM_REF)
6057 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
6058 && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
6060 struct ptr_info_def *new_pi;
6061 duplicate_ssa_name_ptr_info
6062 (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
6063 new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
6064 /* We have to be careful about transfering alignment information. */
6065 if (TREE_CODE (old_ref) == MEM_REF
6066 && !(TREE_CODE (new_ref) == TARGET_MEM_REF
6067 && (TMR_INDEX2 (new_ref)
6068 || (TMR_STEP (new_ref)
6069 && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
6070 < new_pi->align)))))
6072 new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
6073 mem_ref_offset (new_ref)).low;
6074 new_pi->misalign &= (new_pi->align - 1);
6076 else
6078 new_pi->align = 1;
6079 new_pi->misalign = 0;
6082 else if (TREE_CODE (base) == VAR_DECL
6083 || TREE_CODE (base) == PARM_DECL
6084 || TREE_CODE (base) == RESULT_DECL)
6086 struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
6087 pt_solution_set_var (&pi->pt, base);
6092 /* Performs a peephole optimization to reorder the iv update statement with
6093 a mem ref to enable instruction combining in later phases. The mem ref uses
6094 the iv value before the update, so the reordering transformation requires
6095 adjustment of the offset. CAND is the selected IV_CAND.
6097 Example:
6099 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6100 iv2 = iv1 + 1;
6102 if (t < val) (1)
6103 goto L;
6104 goto Head;
6107 directly propagating t over to (1) will introduce overlapping live range
6108 thus increase register pressure. This peephole transform it into:
6111 iv2 = iv1 + 1;
6112 t = MEM_REF (base, iv2, 8, 8);
6113 if (t < val)
6114 goto L;
6115 goto Head;
6118 static void
6119 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6121 tree var_after;
6122 gimple iv_update, stmt;
6123 basic_block bb;
6124 gimple_stmt_iterator gsi, gsi_iv;
6126 if (cand->pos != IP_NORMAL)
6127 return;
6129 var_after = cand->var_after;
6130 iv_update = SSA_NAME_DEF_STMT (var_after);
6132 bb = gimple_bb (iv_update);
6133 gsi = gsi_last_nondebug_bb (bb);
6134 stmt = gsi_stmt (gsi);
6136 /* Only handle conditional statement for now. */
6137 if (gimple_code (stmt) != GIMPLE_COND)
6138 return;
6140 gsi_prev_nondebug (&gsi);
6141 stmt = gsi_stmt (gsi);
6142 if (stmt != iv_update)
6143 return;
6145 gsi_prev_nondebug (&gsi);
6146 if (gsi_end_p (gsi))
6147 return;
6149 stmt = gsi_stmt (gsi);
6150 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6151 return;
6153 if (stmt != use->stmt)
6154 return;
6156 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6157 return;
6159 if (dump_file && (dump_flags & TDF_DETAILS))
6161 fprintf (dump_file, "Reordering \n");
6162 print_gimple_stmt (dump_file, iv_update, 0, 0);
6163 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6164 fprintf (dump_file, "\n");
6167 gsi = gsi_for_stmt (use->stmt);
6168 gsi_iv = gsi_for_stmt (iv_update);
6169 gsi_move_before (&gsi_iv, &gsi);
6171 cand->pos = IP_BEFORE_USE;
6172 cand->incremented_at = use->stmt;
6175 /* Rewrites USE (address that is an iv) using candidate CAND. */
6177 static void
6178 rewrite_use_address (struct ivopts_data *data,
6179 struct iv_use *use, struct iv_cand *cand)
6181 aff_tree aff;
6182 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6183 tree base_hint = NULL_TREE;
6184 tree ref, iv;
6185 bool ok;
6187 adjust_iv_update_pos (cand, use);
6188 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6189 gcc_assert (ok);
6190 unshare_aff_combination (&aff);
6192 /* To avoid undefined overflow problems, all IV candidates use unsigned
6193 integer types. The drawback is that this makes it impossible for
6194 create_mem_ref to distinguish an IV that is based on a memory object
6195 from one that represents simply an offset.
6197 To work around this problem, we pass a hint to create_mem_ref that
6198 indicates which variable (if any) in aff is an IV based on a memory
6199 object. Note that we only consider the candidate. If this is not
6200 based on an object, the base of the reference is in some subexpression
6201 of the use -- but these will use pointer types, so they are recognized
6202 by the create_mem_ref heuristics anyway. */
6203 if (cand->iv->base_object)
6204 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6206 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6207 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6208 reference_alias_ptr_type (*use->op_p),
6209 iv, base_hint, data->speed);
6210 copy_ref_info (ref, *use->op_p);
6211 *use->op_p = ref;
6214 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6215 candidate CAND. */
6217 static void
6218 rewrite_use_compare (struct ivopts_data *data,
6219 struct iv_use *use, struct iv_cand *cand)
6221 tree comp, *var_p, op, bound;
6222 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6223 enum tree_code compare;
6224 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6225 bool ok;
6227 bound = cp->value;
6228 if (bound)
6230 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6231 tree var_type = TREE_TYPE (var);
6232 gimple_seq stmts;
6234 if (dump_file && (dump_flags & TDF_DETAILS))
6236 fprintf (dump_file, "Replacing exit test: ");
6237 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6239 compare = iv_elimination_compare (data, use);
6240 bound = unshare_expr (fold_convert (var_type, bound));
6241 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6242 if (stmts)
6243 gsi_insert_seq_on_edge_immediate (
6244 loop_preheader_edge (data->current_loop),
6245 stmts);
6247 gimple_cond_set_lhs (use->stmt, var);
6248 gimple_cond_set_code (use->stmt, compare);
6249 gimple_cond_set_rhs (use->stmt, op);
6250 return;
6253 /* The induction variable elimination failed; just express the original
6254 giv. */
6255 comp = get_computation (data->current_loop, use, cand);
6256 gcc_assert (comp != NULL_TREE);
6258 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6259 gcc_assert (ok);
6261 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6262 true, GSI_SAME_STMT);
6265 /* Rewrites USE using candidate CAND. */
6267 static void
6268 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6270 switch (use->type)
6272 case USE_NONLINEAR_EXPR:
6273 rewrite_use_nonlinear_expr (data, use, cand);
6274 break;
6276 case USE_ADDRESS:
6277 rewrite_use_address (data, use, cand);
6278 break;
6280 case USE_COMPARE:
6281 rewrite_use_compare (data, use, cand);
6282 break;
6284 default:
6285 gcc_unreachable ();
6288 update_stmt (use->stmt);
6291 /* Rewrite the uses using the selected induction variables. */
6293 static void
6294 rewrite_uses (struct ivopts_data *data)
6296 unsigned i;
6297 struct iv_cand *cand;
6298 struct iv_use *use;
6300 for (i = 0; i < n_iv_uses (data); i++)
6302 use = iv_use (data, i);
6303 cand = use->selected;
6304 gcc_assert (cand);
6306 rewrite_use (data, use, cand);
6310 /* Removes the ivs that are not used after rewriting. */
6312 static void
6313 remove_unused_ivs (struct ivopts_data *data)
6315 unsigned j;
6316 bitmap_iterator bi;
6317 bitmap toremove = BITMAP_ALLOC (NULL);
6319 /* Figure out an order in which to release SSA DEFs so that we don't
6320 release something that we'd have to propagate into a debug stmt
6321 afterwards. */
6322 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6324 struct version_info *info;
6326 info = ver_info (data, j);
6327 if (info->iv
6328 && !integer_zerop (info->iv->step)
6329 && !info->inv_id
6330 && !info->iv->have_use_for
6331 && !info->preserve_biv)
6332 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6335 release_defs_bitset (toremove);
6337 BITMAP_FREE (toremove);
6340 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6341 for pointer_map_traverse. */
6343 static bool
6344 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6345 void *data ATTRIBUTE_UNUSED)
6347 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6349 free (niter);
6350 return true;
6353 /* Frees data allocated by the optimization of a single loop. */
6355 static void
6356 free_loop_data (struct ivopts_data *data)
6358 unsigned i, j;
6359 bitmap_iterator bi;
6360 tree obj;
6362 if (data->niters)
6364 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6365 pointer_map_destroy (data->niters);
6366 data->niters = NULL;
6369 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6371 struct version_info *info;
6373 info = ver_info (data, i);
6374 free (info->iv);
6375 info->iv = NULL;
6376 info->has_nonlin_use = false;
6377 info->preserve_biv = false;
6378 info->inv_id = 0;
6380 bitmap_clear (data->relevant);
6381 bitmap_clear (data->important_candidates);
6383 for (i = 0; i < n_iv_uses (data); i++)
6385 struct iv_use *use = iv_use (data, i);
6387 free (use->iv);
6388 BITMAP_FREE (use->related_cands);
6389 for (j = 0; j < use->n_map_members; j++)
6390 if (use->cost_map[j].depends_on)
6391 BITMAP_FREE (use->cost_map[j].depends_on);
6392 free (use->cost_map);
6393 free (use);
6395 VEC_truncate (iv_use_p, data->iv_uses, 0);
6397 for (i = 0; i < n_iv_cands (data); i++)
6399 struct iv_cand *cand = iv_cand (data, i);
6401 free (cand->iv);
6402 if (cand->depends_on)
6403 BITMAP_FREE (cand->depends_on);
6404 free (cand);
6406 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6408 if (data->version_info_size < num_ssa_names)
6410 data->version_info_size = 2 * num_ssa_names;
6411 free (data->version_info);
6412 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6415 data->max_inv_id = 0;
6417 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6418 SET_DECL_RTL (obj, NULL_RTX);
6420 VEC_truncate (tree, decl_rtl_to_reset, 0);
6422 htab_empty (data->inv_expr_tab);
6423 data->inv_expr_id = 0;
6426 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6427 loop tree. */
6429 static void
6430 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6432 free_loop_data (data);
6433 free (data->version_info);
6434 BITMAP_FREE (data->relevant);
6435 BITMAP_FREE (data->important_candidates);
6437 VEC_free (tree, heap, decl_rtl_to_reset);
6438 VEC_free (iv_use_p, heap, data->iv_uses);
6439 VEC_free (iv_cand_p, heap, data->iv_candidates);
6440 htab_delete (data->inv_expr_tab);
6443 /* Returns true if the loop body BODY includes any function calls. */
6445 static bool
6446 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6448 gimple_stmt_iterator gsi;
6449 unsigned i;
6451 for (i = 0; i < num_nodes; i++)
6452 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6454 gimple stmt = gsi_stmt (gsi);
6455 if (is_gimple_call (stmt)
6456 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6457 return true;
6459 return false;
6462 /* Optimizes the LOOP. Returns true if anything changed. */
6464 static bool
6465 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6467 bool changed = false;
6468 struct iv_ca *iv_ca;
6469 edge exit;
6470 basic_block *body;
6472 gcc_assert (!data->niters);
6473 data->current_loop = loop;
6474 data->speed = optimize_loop_for_speed_p (loop);
6476 if (dump_file && (dump_flags & TDF_DETAILS))
6478 fprintf (dump_file, "Processing loop %d\n", loop->num);
6480 exit = single_dom_exit (loop);
6481 if (exit)
6483 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6484 exit->src->index, exit->dest->index);
6485 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6486 fprintf (dump_file, "\n");
6489 fprintf (dump_file, "\n");
6492 body = get_loop_body (loop);
6493 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6494 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6495 free (body);
6497 /* For each ssa name determines whether it behaves as an induction variable
6498 in some loop. */
6499 if (!find_induction_variables (data))
6500 goto finish;
6502 /* Finds interesting uses (item 1). */
6503 find_interesting_uses (data);
6504 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6505 goto finish;
6507 /* Finds candidates for the induction variables (item 2). */
6508 find_iv_candidates (data);
6510 /* Calculates the costs (item 3, part 1). */
6511 determine_iv_costs (data);
6512 determine_use_iv_costs (data);
6513 determine_set_costs (data);
6515 /* Find the optimal set of induction variables (item 3, part 2). */
6516 iv_ca = find_optimal_iv_set (data);
6517 if (!iv_ca)
6518 goto finish;
6519 changed = true;
6521 /* Create the new induction variables (item 4, part 1). */
6522 create_new_ivs (data, iv_ca);
6523 iv_ca_free (&iv_ca);
6525 /* Rewrite the uses (item 4, part 2). */
6526 rewrite_uses (data);
6528 /* Remove the ivs that are unused after rewriting. */
6529 remove_unused_ivs (data);
6531 /* We have changed the structure of induction variables; it might happen
6532 that definitions in the scev database refer to some of them that were
6533 eliminated. */
6534 scev_reset ();
6536 finish:
6537 free_loop_data (data);
6539 return changed;
6542 /* Main entry point. Optimizes induction variables in loops. */
6544 void
6545 tree_ssa_iv_optimize (void)
6547 struct loop *loop;
6548 struct ivopts_data data;
6549 loop_iterator li;
6551 tree_ssa_iv_optimize_init (&data);
6553 /* Optimize the loops starting with the innermost ones. */
6554 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6556 if (dump_file && (dump_flags & TDF_DETAILS))
6557 flow_loop_dump (loop, dump_file, NULL, 1);
6559 tree_ssa_iv_optimize_loop (&data, loop);
6562 tree_ssa_iv_optimize_finalize (&data);