PR target/47063
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob59e2fef5976b4b27c19e846309a4716e237b1836
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
98 #include "expr.h"
100 /* The infinite cost. */
101 #define INFTY 10000000
103 #define AVG_LOOP_NITER(LOOP) 5
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
107 exists. */
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop *loop)
112 HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
113 if (niter == -1)
114 return AVG_LOOP_NITER (loop);
116 return niter;
119 /* Representation of the induction variable. */
120 struct iv
122 tree base; /* Initial value of the iv. */
123 tree base_object; /* A memory object to that the induction variable points. */
124 tree step; /* Step of the iv (constant only). */
125 tree ssa_name; /* The ssa name with the value. */
126 bool biv_p; /* Is it a biv? */
127 bool have_use_for; /* Do we already have a use for it? */
128 unsigned use_id; /* The identifier in the use if it is the case. */
131 /* Per-ssa version information (induction variable descriptions, etc.). */
132 struct version_info
134 tree name; /* The ssa name. */
135 struct iv *iv; /* Induction variable description. */
136 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv; /* For the original biv, whether to preserve it. */
139 unsigned inv_id; /* Id of an invariant. */
142 /* Types of uses. */
143 enum use_type
145 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
146 USE_ADDRESS, /* Use in an address. */
147 USE_COMPARE /* Use is a compare. */
150 /* Cost of a computation. */
151 typedef struct
153 int cost; /* The runtime cost. */
154 unsigned complexity; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
158 } comp_cost;
160 static const comp_cost zero_cost = {0, 0};
161 static const comp_cost infinite_cost = {INFTY, INFTY};
163 /* The candidate - cost pair. */
164 struct cost_pair
166 struct iv_cand *cand; /* The candidate. */
167 comp_cost cost; /* The cost. */
168 bitmap depends_on; /* The list of invariants that have to be
169 preserved. */
170 tree value; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id; /* Loop invariant expression id. */
176 /* Use. */
177 struct iv_use
179 unsigned id; /* The id of the use. */
180 enum use_type type; /* Type of the use. */
181 struct iv *iv; /* The induction variable it is based on. */
182 gimple stmt; /* Statement in that it occurs. */
183 tree *op_p; /* The place where it occurs. */
184 bitmap related_cands; /* The set of "related" iv candidates, plus the common
185 important ones. */
187 unsigned n_map_members; /* Number of candidates in the cost_map list. */
188 struct cost_pair *cost_map;
189 /* The costs wrto the iv candidates. */
191 struct iv_cand *selected;
192 /* The selected candidate. */
195 /* The position where the iv is computed. */
196 enum iv_position
198 IP_NORMAL, /* At the end, just before the exit condition. */
199 IP_END, /* At the end of the latch block. */
200 IP_BEFORE_USE, /* Immediately before a specific use. */
201 IP_AFTER_USE, /* Immediately after a specific use. */
202 IP_ORIGINAL /* The original biv. */
205 /* The induction variable candidate. */
206 struct iv_cand
208 unsigned id; /* The number of the candidate. */
209 bool important; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
212 gimple incremented_at;/* For original biv, the statement where it is
213 incremented. */
214 tree var_before; /* The variable used for it before increment. */
215 tree var_after; /* The variable used for it after increment. */
216 struct iv *iv; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost; /* Cost of the candidate. */
221 unsigned cost_step; /* Cost of the candidate's increment operation. */
222 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on; /* The list of invariants that are used in step of the
225 biv. */
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
231 tree expr;
232 int id;
233 hashval_t hash;
236 /* The data used by the induction variable optimizations. */
238 typedef struct iv_use *iv_use_p;
239 DEF_VEC_P(iv_use_p);
240 DEF_VEC_ALLOC_P(iv_use_p,heap);
242 typedef struct iv_cand *iv_cand_p;
243 DEF_VEC_P(iv_cand_p);
244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
246 struct ivopts_data
248 /* The currently optimized loop. */
249 struct loop *current_loop;
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t *niters;
254 /* Number of registers used in it. */
255 unsigned regs_used;
257 /* The size of version_info array allocated. */
258 unsigned version_info_size;
260 /* The array of information for the ssa names. */
261 struct version_info *version_info;
263 /* The hashtable of loop invariant expressions created
264 by ivopt. */
265 htab_t inv_expr_tab;
267 /* Loop invariant expression id. */
268 int inv_expr_id;
270 /* The bitmap of indices in version_info whose value was changed. */
271 bitmap relevant;
273 /* The uses of induction variables. */
274 VEC(iv_use_p,heap) *iv_uses;
276 /* The candidates. */
277 VEC(iv_cand_p,heap) *iv_candidates;
279 /* A bitmap of important candidates. */
280 bitmap important_candidates;
282 /* The maximum invariant id. */
283 unsigned max_inv_id;
285 /* Whether to consider just related and important candidates when replacing a
286 use. */
287 bool consider_all_candidates;
289 /* Are we optimizing for speed? */
290 bool speed;
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call;
296 /* An assignment of iv candidates to uses. */
298 struct iv_ca
300 /* The number of uses covered by the assignment. */
301 unsigned upto;
303 /* Number of uses that cannot be expressed by the candidates in the set. */
304 unsigned bad_uses;
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair **cand_for_use;
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses;
312 /* The candidates used. */
313 bitmap cands;
315 /* The number of candidates in the set. */
316 unsigned n_cands;
318 /* Total number of registers needed. */
319 unsigned n_regs;
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost;
324 /* Total cost of candidates. */
325 unsigned cand_cost;
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses;
330 /* The array holding the number of uses of each loop
331 invariant expressions created by ivopt. */
332 unsigned *used_inv_expr;
334 /* The number of created loop invariants. */
335 unsigned num_used_inv_expr;
337 /* Total cost of the assignment. */
338 comp_cost cost;
341 /* Difference of two iv candidate assignments. */
343 struct iv_ca_delta
345 /* Changed use. */
346 struct iv_use *use;
348 /* An old assignment (for rollback purposes). */
349 struct cost_pair *old_cp;
351 /* A new assignment. */
352 struct cost_pair *new_cp;
354 /* Next change in the list. */
355 struct iv_ca_delta *next_change;
358 /* Bound on number of candidates below that all candidates are considered. */
360 #define CONSIDER_ALL_CANDIDATES_BOUND \
361 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
364 optimizing such a loop would help, and it would take ages). */
366 #define MAX_CONSIDERED_USES \
367 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
369 /* If there are at most this number of ivs in the set, try removing unnecessary
370 ivs from the set always. */
372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
373 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
375 /* The list of trees for that the decl_rtl field must be reset is stored
376 here. */
378 static VEC(tree,heap) *decl_rtl_to_reset;
380 /* Number of uses recorded in DATA. */
382 static inline unsigned
383 n_iv_uses (struct ivopts_data *data)
385 return VEC_length (iv_use_p, data->iv_uses);
388 /* Ith use recorded in DATA. */
390 static inline struct iv_use *
391 iv_use (struct ivopts_data *data, unsigned i)
393 return VEC_index (iv_use_p, data->iv_uses, i);
396 /* Number of candidates recorded in DATA. */
398 static inline unsigned
399 n_iv_cands (struct ivopts_data *data)
401 return VEC_length (iv_cand_p, data->iv_candidates);
404 /* Ith candidate recorded in DATA. */
406 static inline struct iv_cand *
407 iv_cand (struct ivopts_data *data, unsigned i)
409 return VEC_index (iv_cand_p, data->iv_candidates, i);
412 /* The single loop exit if it dominates the latch, NULL otherwise. */
414 edge
415 single_dom_exit (struct loop *loop)
417 edge exit = single_exit (loop);
419 if (!exit)
420 return NULL;
422 if (!just_once_each_iteration_p (loop, exit->src))
423 return NULL;
425 return exit;
428 /* Dumps information about the induction variable IV to FILE. */
430 extern void dump_iv (FILE *, struct iv *);
431 void
432 dump_iv (FILE *file, struct iv *iv)
434 if (iv->ssa_name)
436 fprintf (file, "ssa name ");
437 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
438 fprintf (file, "\n");
441 fprintf (file, " type ");
442 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
443 fprintf (file, "\n");
445 if (iv->step)
447 fprintf (file, " base ");
448 print_generic_expr (file, iv->base, TDF_SLIM);
449 fprintf (file, "\n");
451 fprintf (file, " step ");
452 print_generic_expr (file, iv->step, TDF_SLIM);
453 fprintf (file, "\n");
455 else
457 fprintf (file, " invariant ");
458 print_generic_expr (file, iv->base, TDF_SLIM);
459 fprintf (file, "\n");
462 if (iv->base_object)
464 fprintf (file, " base object ");
465 print_generic_expr (file, iv->base_object, TDF_SLIM);
466 fprintf (file, "\n");
469 if (iv->biv_p)
470 fprintf (file, " is a biv\n");
473 /* Dumps information about the USE to FILE. */
475 extern void dump_use (FILE *, struct iv_use *);
476 void
477 dump_use (FILE *file, struct iv_use *use)
479 fprintf (file, "use %d\n", use->id);
481 switch (use->type)
483 case USE_NONLINEAR_EXPR:
484 fprintf (file, " generic\n");
485 break;
487 case USE_ADDRESS:
488 fprintf (file, " address\n");
489 break;
491 case USE_COMPARE:
492 fprintf (file, " compare\n");
493 break;
495 default:
496 gcc_unreachable ();
499 fprintf (file, " in statement ");
500 print_gimple_stmt (file, use->stmt, 0, 0);
501 fprintf (file, "\n");
503 fprintf (file, " at position ");
504 if (use->op_p)
505 print_generic_expr (file, *use->op_p, TDF_SLIM);
506 fprintf (file, "\n");
508 dump_iv (file, use->iv);
510 if (use->related_cands)
512 fprintf (file, " related candidates ");
513 dump_bitmap (file, use->related_cands);
517 /* Dumps information about the uses to FILE. */
519 extern void dump_uses (FILE *, struct ivopts_data *);
520 void
521 dump_uses (FILE *file, struct ivopts_data *data)
523 unsigned i;
524 struct iv_use *use;
526 for (i = 0; i < n_iv_uses (data); i++)
528 use = iv_use (data, i);
530 dump_use (file, use);
531 fprintf (file, "\n");
535 /* Dumps information about induction variable candidate CAND to FILE. */
537 extern void dump_cand (FILE *, struct iv_cand *);
538 void
539 dump_cand (FILE *file, struct iv_cand *cand)
541 struct iv *iv = cand->iv;
543 fprintf (file, "candidate %d%s\n",
544 cand->id, cand->important ? " (important)" : "");
546 if (cand->depends_on)
548 fprintf (file, " depends on ");
549 dump_bitmap (file, cand->depends_on);
552 if (!iv)
554 fprintf (file, " final value replacement\n");
555 return;
558 if (cand->var_before)
560 fprintf (file, " var_before ");
561 print_generic_expr (file, cand->var_before, TDF_SLIM);
562 fprintf (file, "\n");
564 if (cand->var_after)
566 fprintf (file, " var_after ");
567 print_generic_expr (file, cand->var_after, TDF_SLIM);
568 fprintf (file, "\n");
571 switch (cand->pos)
573 case IP_NORMAL:
574 fprintf (file, " incremented before exit test\n");
575 break;
577 case IP_BEFORE_USE:
578 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
579 break;
581 case IP_AFTER_USE:
582 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
583 break;
585 case IP_END:
586 fprintf (file, " incremented at end\n");
587 break;
589 case IP_ORIGINAL:
590 fprintf (file, " original biv\n");
591 break;
594 dump_iv (file, iv);
597 /* Returns the info for ssa version VER. */
599 static inline struct version_info *
600 ver_info (struct ivopts_data *data, unsigned ver)
602 return data->version_info + ver;
605 /* Returns the info for ssa name NAME. */
607 static inline struct version_info *
608 name_info (struct ivopts_data *data, tree name)
610 return ver_info (data, SSA_NAME_VERSION (name));
613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
614 emitted in LOOP. */
616 static bool
617 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
619 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
621 gcc_assert (bb);
623 if (sbb == loop->latch)
624 return true;
626 if (sbb != bb)
627 return false;
629 return stmt == last_stmt (bb);
632 /* Returns true if STMT if after the place where the original induction
633 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
634 if the positions are identical. */
636 static bool
637 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
639 basic_block cand_bb = gimple_bb (cand->incremented_at);
640 basic_block stmt_bb = gimple_bb (stmt);
642 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
643 return false;
645 if (stmt_bb != cand_bb)
646 return true;
648 if (true_if_equal
649 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
650 return true;
651 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
654 /* Returns true if STMT if after the place where the induction variable
655 CAND is incremented in LOOP. */
657 static bool
658 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
660 switch (cand->pos)
662 case IP_END:
663 return false;
665 case IP_NORMAL:
666 return stmt_after_ip_normal_pos (loop, stmt);
668 case IP_ORIGINAL:
669 case IP_AFTER_USE:
670 return stmt_after_inc_pos (cand, stmt, false);
672 case IP_BEFORE_USE:
673 return stmt_after_inc_pos (cand, stmt, true);
675 default:
676 gcc_unreachable ();
680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
682 static bool
683 abnormal_ssa_name_p (tree exp)
685 if (!exp)
686 return false;
688 if (TREE_CODE (exp) != SSA_NAME)
689 return false;
691 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
695 abnormal phi node. Callback for for_each_index. */
697 static bool
698 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
699 void *data ATTRIBUTE_UNUSED)
701 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
703 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
704 return false;
705 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
706 return false;
709 return !abnormal_ssa_name_p (*index);
712 /* Returns true if EXPR contains a ssa name that occurs in an
713 abnormal phi node. */
715 bool
716 contains_abnormal_ssa_name_p (tree expr)
718 enum tree_code code;
719 enum tree_code_class codeclass;
721 if (!expr)
722 return false;
724 code = TREE_CODE (expr);
725 codeclass = TREE_CODE_CLASS (code);
727 if (code == SSA_NAME)
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
730 if (code == INTEGER_CST
731 || is_gimple_min_invariant (expr))
732 return false;
734 if (code == ADDR_EXPR)
735 return !for_each_index (&TREE_OPERAND (expr, 0),
736 idx_contains_abnormal_ssa_name_p,
737 NULL);
739 if (code == COND_EXPR)
740 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
742 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
744 switch (codeclass)
746 case tcc_binary:
747 case tcc_comparison:
748 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
749 return true;
751 /* Fallthru. */
752 case tcc_unary:
753 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
754 return true;
756 break;
758 default:
759 gcc_unreachable ();
762 return false;
765 /* Returns tree describing number of iterations determined from
766 EXIT of DATA->current_loop, or NULL if something goes wrong. */
768 static tree
769 niter_for_exit (struct ivopts_data *data, edge exit,
770 struct tree_niter_desc **desc_p)
772 struct tree_niter_desc* desc = NULL;
773 tree niter;
774 void **slot;
776 if (!data->niters)
778 data->niters = pointer_map_create ();
779 slot = NULL;
781 else
782 slot = pointer_map_contains (data->niters, exit);
784 if (!slot)
786 /* Try to determine number of iterations. We must know it
787 unconditionally (i.e., without possibility of # of iterations
788 being zero). Also, we cannot safely work with ssa names that
789 appear in phi nodes on abnormal edges, so that we do not create
790 overlapping life ranges for them (PR 27283). */
791 desc = XNEW (struct tree_niter_desc);
792 if (number_of_iterations_exit (data->current_loop,
793 exit, desc, true)
794 && integer_zerop (desc->may_be_zero)
795 && !contains_abnormal_ssa_name_p (desc->niter))
796 niter = desc->niter;
797 else
798 niter = NULL_TREE;
800 desc->niter = niter;
801 slot = pointer_map_insert (data->niters, exit);
802 *slot = desc;
804 else
805 niter = ((struct tree_niter_desc *) *slot)->niter;
807 if (desc_p)
808 *desc_p = (struct tree_niter_desc *) *slot;
809 return niter;
812 /* Returns tree describing number of iterations determined from
813 single dominating exit of DATA->current_loop, or NULL if something
814 goes wrong. */
816 static tree
817 niter_for_single_dom_exit (struct ivopts_data *data)
819 edge exit = single_dom_exit (data->current_loop);
821 if (!exit)
822 return NULL;
824 return niter_for_exit (data, exit, NULL);
827 /* Hash table equality function for expressions. */
829 static int
830 htab_inv_expr_eq (const void *ent1, const void *ent2)
832 const struct iv_inv_expr_ent *expr1 =
833 (const struct iv_inv_expr_ent *)ent1;
834 const struct iv_inv_expr_ent *expr2 =
835 (const struct iv_inv_expr_ent *)ent2;
837 return expr1->hash == expr2->hash
838 && operand_equal_p (expr1->expr, expr2->expr, 0);
841 /* Hash function for loop invariant expressions. */
843 static hashval_t
844 htab_inv_expr_hash (const void *ent)
846 const struct iv_inv_expr_ent *expr =
847 (const struct iv_inv_expr_ent *)ent;
848 return expr->hash;
851 /* Initializes data structures used by the iv optimization pass, stored
852 in DATA. */
854 static void
855 tree_ssa_iv_optimize_init (struct ivopts_data *data)
857 data->version_info_size = 2 * num_ssa_names;
858 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
859 data->relevant = BITMAP_ALLOC (NULL);
860 data->important_candidates = BITMAP_ALLOC (NULL);
861 data->max_inv_id = 0;
862 data->niters = NULL;
863 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
864 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
865 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
866 htab_inv_expr_eq, free);
867 data->inv_expr_id = 0;
868 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
871 /* Returns a memory object to that EXPR points. In case we are able to
872 determine that it does not point to any such object, NULL is returned. */
874 static tree
875 determine_base_object (tree expr)
877 enum tree_code code = TREE_CODE (expr);
878 tree base, obj;
880 /* If this is a pointer casted to any type, we need to determine
881 the base object for the pointer; so handle conversions before
882 throwing away non-pointer expressions. */
883 if (CONVERT_EXPR_P (expr))
884 return determine_base_object (TREE_OPERAND (expr, 0));
886 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
887 return NULL_TREE;
889 switch (code)
891 case INTEGER_CST:
892 return NULL_TREE;
894 case ADDR_EXPR:
895 obj = TREE_OPERAND (expr, 0);
896 base = get_base_address (obj);
898 if (!base)
899 return expr;
901 if (TREE_CODE (base) == MEM_REF)
902 return determine_base_object (TREE_OPERAND (base, 0));
904 return fold_convert (ptr_type_node,
905 build_fold_addr_expr (base));
907 case POINTER_PLUS_EXPR:
908 return determine_base_object (TREE_OPERAND (expr, 0));
910 case PLUS_EXPR:
911 case MINUS_EXPR:
912 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
913 gcc_unreachable ();
915 default:
916 return fold_convert (ptr_type_node, expr);
920 /* Allocates an induction variable with given initial value BASE and step STEP
921 for loop LOOP. */
923 static struct iv *
924 alloc_iv (tree base, tree step)
926 struct iv *iv = XCNEW (struct iv);
927 gcc_assert (step != NULL_TREE);
929 iv->base = base;
930 iv->base_object = determine_base_object (base);
931 iv->step = step;
932 iv->biv_p = false;
933 iv->have_use_for = false;
934 iv->use_id = 0;
935 iv->ssa_name = NULL_TREE;
937 return iv;
940 /* Sets STEP and BASE for induction variable IV. */
942 static void
943 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
945 struct version_info *info = name_info (data, iv);
947 gcc_assert (!info->iv);
949 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
950 info->iv = alloc_iv (base, step);
951 info->iv->ssa_name = iv;
954 /* Finds induction variable declaration for VAR. */
956 static struct iv *
957 get_iv (struct ivopts_data *data, tree var)
959 basic_block bb;
960 tree type = TREE_TYPE (var);
962 if (!POINTER_TYPE_P (type)
963 && !INTEGRAL_TYPE_P (type))
964 return NULL;
966 if (!name_info (data, var)->iv)
968 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
970 if (!bb
971 || !flow_bb_inside_loop_p (data->current_loop, bb))
972 set_iv (data, var, var, build_int_cst (type, 0));
975 return name_info (data, var)->iv;
978 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
979 not define a simple affine biv with nonzero step. */
981 static tree
982 determine_biv_step (gimple phi)
984 struct loop *loop = gimple_bb (phi)->loop_father;
985 tree name = PHI_RESULT (phi);
986 affine_iv iv;
988 if (!is_gimple_reg (name))
989 return NULL_TREE;
991 if (!simple_iv (loop, loop, name, &iv, true))
992 return NULL_TREE;
994 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
997 /* Finds basic ivs. */
999 static bool
1000 find_bivs (struct ivopts_data *data)
1002 gimple phi;
1003 tree step, type, base;
1004 bool found = false;
1005 struct loop *loop = data->current_loop;
1006 gimple_stmt_iterator psi;
1008 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1010 phi = gsi_stmt (psi);
1012 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1013 continue;
1015 step = determine_biv_step (phi);
1016 if (!step)
1017 continue;
1019 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1020 base = expand_simple_operations (base);
1021 if (contains_abnormal_ssa_name_p (base)
1022 || contains_abnormal_ssa_name_p (step))
1023 continue;
1025 type = TREE_TYPE (PHI_RESULT (phi));
1026 base = fold_convert (type, base);
1027 if (step)
1029 if (POINTER_TYPE_P (type))
1030 step = fold_convert (sizetype, step);
1031 else
1032 step = fold_convert (type, step);
1035 set_iv (data, PHI_RESULT (phi), base, step);
1036 found = true;
1039 return found;
1042 /* Marks basic ivs. */
1044 static void
1045 mark_bivs (struct ivopts_data *data)
1047 gimple phi;
1048 tree var;
1049 struct iv *iv, *incr_iv;
1050 struct loop *loop = data->current_loop;
1051 basic_block incr_bb;
1052 gimple_stmt_iterator psi;
1054 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1056 phi = gsi_stmt (psi);
1058 iv = get_iv (data, PHI_RESULT (phi));
1059 if (!iv)
1060 continue;
1062 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1063 incr_iv = get_iv (data, var);
1064 if (!incr_iv)
1065 continue;
1067 /* If the increment is in the subloop, ignore it. */
1068 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1069 if (incr_bb->loop_father != data->current_loop
1070 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1071 continue;
1073 iv->biv_p = true;
1074 incr_iv->biv_p = true;
1078 /* Checks whether STMT defines a linear induction variable and stores its
1079 parameters to IV. */
1081 static bool
1082 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1084 tree lhs;
1085 struct loop *loop = data->current_loop;
1087 iv->base = NULL_TREE;
1088 iv->step = NULL_TREE;
1090 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1091 return false;
1093 lhs = gimple_assign_lhs (stmt);
1094 if (TREE_CODE (lhs) != SSA_NAME)
1095 return false;
1097 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1098 return false;
1099 iv->base = expand_simple_operations (iv->base);
1101 if (contains_abnormal_ssa_name_p (iv->base)
1102 || contains_abnormal_ssa_name_p (iv->step))
1103 return false;
1105 return true;
1108 /* Finds general ivs in statement STMT. */
1110 static void
1111 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1113 affine_iv iv;
1115 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1116 return;
1118 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1121 /* Finds general ivs in basic block BB. */
1123 static void
1124 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1126 gimple_stmt_iterator bsi;
1128 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1129 find_givs_in_stmt (data, gsi_stmt (bsi));
1132 /* Finds general ivs. */
1134 static void
1135 find_givs (struct ivopts_data *data)
1137 struct loop *loop = data->current_loop;
1138 basic_block *body = get_loop_body_in_dom_order (loop);
1139 unsigned i;
1141 for (i = 0; i < loop->num_nodes; i++)
1142 find_givs_in_bb (data, body[i]);
1143 free (body);
1146 /* For each ssa name defined in LOOP determines whether it is an induction
1147 variable and if so, its initial value and step. */
1149 static bool
1150 find_induction_variables (struct ivopts_data *data)
1152 unsigned i;
1153 bitmap_iterator bi;
1155 if (!find_bivs (data))
1156 return false;
1158 find_givs (data);
1159 mark_bivs (data);
1161 if (dump_file && (dump_flags & TDF_DETAILS))
1163 tree niter = niter_for_single_dom_exit (data);
1165 if (niter)
1167 fprintf (dump_file, " number of iterations ");
1168 print_generic_expr (dump_file, niter, TDF_SLIM);
1169 fprintf (dump_file, "\n\n");
1172 fprintf (dump_file, "Induction variables:\n\n");
1174 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1176 if (ver_info (data, i)->iv)
1177 dump_iv (dump_file, ver_info (data, i)->iv);
1181 return true;
1184 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1186 static struct iv_use *
1187 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1188 gimple stmt, enum use_type use_type)
1190 struct iv_use *use = XCNEW (struct iv_use);
1192 use->id = n_iv_uses (data);
1193 use->type = use_type;
1194 use->iv = iv;
1195 use->stmt = stmt;
1196 use->op_p = use_p;
1197 use->related_cands = BITMAP_ALLOC (NULL);
1199 /* To avoid showing ssa name in the dumps, if it was not reset by the
1200 caller. */
1201 iv->ssa_name = NULL_TREE;
1203 if (dump_file && (dump_flags & TDF_DETAILS))
1204 dump_use (dump_file, use);
1206 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1208 return use;
1211 /* Checks whether OP is a loop-level invariant and if so, records it.
1212 NONLINEAR_USE is true if the invariant is used in a way we do not
1213 handle specially. */
1215 static void
1216 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1218 basic_block bb;
1219 struct version_info *info;
1221 if (TREE_CODE (op) != SSA_NAME
1222 || !is_gimple_reg (op))
1223 return;
1225 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1226 if (bb
1227 && flow_bb_inside_loop_p (data->current_loop, bb))
1228 return;
1230 info = name_info (data, op);
1231 info->name = op;
1232 info->has_nonlin_use |= nonlinear_use;
1233 if (!info->inv_id)
1234 info->inv_id = ++data->max_inv_id;
1235 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1238 /* Checks whether the use OP is interesting and if so, records it. */
1240 static struct iv_use *
1241 find_interesting_uses_op (struct ivopts_data *data, tree op)
1243 struct iv *iv;
1244 struct iv *civ;
1245 gimple stmt;
1246 struct iv_use *use;
1248 if (TREE_CODE (op) != SSA_NAME)
1249 return NULL;
1251 iv = get_iv (data, op);
1252 if (!iv)
1253 return NULL;
1255 if (iv->have_use_for)
1257 use = iv_use (data, iv->use_id);
1259 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1260 return use;
1263 if (integer_zerop (iv->step))
1265 record_invariant (data, op, true);
1266 return NULL;
1268 iv->have_use_for = true;
1270 civ = XNEW (struct iv);
1271 *civ = *iv;
1273 stmt = SSA_NAME_DEF_STMT (op);
1274 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1275 || is_gimple_assign (stmt));
1277 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1278 iv->use_id = use->id;
1280 return use;
1283 /* Given a condition in statement STMT, checks whether it is a compare
1284 of an induction variable and an invariant. If this is the case,
1285 CONTROL_VAR is set to location of the iv, BOUND to the location of
1286 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1287 induction variable descriptions, and true is returned. If this is not
1288 the case, CONTROL_VAR and BOUND are set to the arguments of the
1289 condition and false is returned. */
1291 static bool
1292 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1293 tree **control_var, tree **bound,
1294 struct iv **iv_var, struct iv **iv_bound)
1296 /* The objects returned when COND has constant operands. */
1297 static struct iv const_iv;
1298 static tree zero;
1299 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1300 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1301 bool ret = false;
1303 if (gimple_code (stmt) == GIMPLE_COND)
1305 op0 = gimple_cond_lhs_ptr (stmt);
1306 op1 = gimple_cond_rhs_ptr (stmt);
1308 else
1310 op0 = gimple_assign_rhs1_ptr (stmt);
1311 op1 = gimple_assign_rhs2_ptr (stmt);
1314 zero = integer_zero_node;
1315 const_iv.step = integer_zero_node;
1317 if (TREE_CODE (*op0) == SSA_NAME)
1318 iv0 = get_iv (data, *op0);
1319 if (TREE_CODE (*op1) == SSA_NAME)
1320 iv1 = get_iv (data, *op1);
1322 /* Exactly one of the compared values must be an iv, and the other one must
1323 be an invariant. */
1324 if (!iv0 || !iv1)
1325 goto end;
1327 if (integer_zerop (iv0->step))
1329 /* Control variable may be on the other side. */
1330 tmp_op = op0; op0 = op1; op1 = tmp_op;
1331 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1333 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1335 end:
1336 if (control_var)
1337 *control_var = op0;;
1338 if (iv_var)
1339 *iv_var = iv0;;
1340 if (bound)
1341 *bound = op1;
1342 if (iv_bound)
1343 *iv_bound = iv1;
1345 return ret;
1348 /* Checks whether the condition in STMT is interesting and if so,
1349 records it. */
1351 static void
1352 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1354 tree *var_p, *bound_p;
1355 struct iv *var_iv, *civ;
1357 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1359 find_interesting_uses_op (data, *var_p);
1360 find_interesting_uses_op (data, *bound_p);
1361 return;
1364 civ = XNEW (struct iv);
1365 *civ = *var_iv;
1366 record_use (data, NULL, civ, stmt, USE_COMPARE);
1369 /* Returns true if expression EXPR is obviously invariant in LOOP,
1370 i.e. if all its operands are defined outside of the LOOP. LOOP
1371 should not be the function body. */
1373 bool
1374 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1376 basic_block def_bb;
1377 unsigned i, len;
1379 gcc_assert (loop_depth (loop) > 0);
1381 if (is_gimple_min_invariant (expr))
1382 return true;
1384 if (TREE_CODE (expr) == SSA_NAME)
1386 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1387 if (def_bb
1388 && flow_bb_inside_loop_p (loop, def_bb))
1389 return false;
1391 return true;
1394 if (!EXPR_P (expr))
1395 return false;
1397 len = TREE_OPERAND_LENGTH (expr);
1398 for (i = 0; i < len; i++)
1399 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1400 return false;
1402 return true;
1405 /* Returns true if statement STMT is obviously invariant in LOOP,
1406 i.e. if all its operands on the RHS are defined outside of the LOOP.
1407 LOOP should not be the function body. */
1409 bool
1410 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1412 unsigned i;
1413 tree lhs;
1415 gcc_assert (loop_depth (loop) > 0);
1417 lhs = gimple_get_lhs (stmt);
1418 for (i = 0; i < gimple_num_ops (stmt); i++)
1420 tree op = gimple_op (stmt, i);
1421 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1422 return false;
1425 return true;
1428 /* Cumulates the steps of indices into DATA and replaces their values with the
1429 initial ones. Returns false when the value of the index cannot be determined.
1430 Callback for for_each_index. */
1432 struct ifs_ivopts_data
1434 struct ivopts_data *ivopts_data;
1435 gimple stmt;
1436 tree step;
1439 static bool
1440 idx_find_step (tree base, tree *idx, void *data)
1442 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1443 struct iv *iv;
1444 tree step, iv_base, iv_step, lbound, off;
1445 struct loop *loop = dta->ivopts_data->current_loop;
1447 /* If base is a component ref, require that the offset of the reference
1448 be invariant. */
1449 if (TREE_CODE (base) == COMPONENT_REF)
1451 off = component_ref_field_offset (base);
1452 return expr_invariant_in_loop_p (loop, off);
1455 /* If base is array, first check whether we will be able to move the
1456 reference out of the loop (in order to take its address in strength
1457 reduction). In order for this to work we need both lower bound
1458 and step to be loop invariants. */
1459 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1461 /* Moreover, for a range, the size needs to be invariant as well. */
1462 if (TREE_CODE (base) == ARRAY_RANGE_REF
1463 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1464 return false;
1466 step = array_ref_element_size (base);
1467 lbound = array_ref_low_bound (base);
1469 if (!expr_invariant_in_loop_p (loop, step)
1470 || !expr_invariant_in_loop_p (loop, lbound))
1471 return false;
1474 if (TREE_CODE (*idx) != SSA_NAME)
1475 return true;
1477 iv = get_iv (dta->ivopts_data, *idx);
1478 if (!iv)
1479 return false;
1481 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1482 *&x[0], which is not folded and does not trigger the
1483 ARRAY_REF path below. */
1484 *idx = iv->base;
1486 if (integer_zerop (iv->step))
1487 return true;
1489 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1491 step = array_ref_element_size (base);
1493 /* We only handle addresses whose step is an integer constant. */
1494 if (TREE_CODE (step) != INTEGER_CST)
1495 return false;
1497 else
1498 /* The step for pointer arithmetics already is 1 byte. */
1499 step = size_one_node;
1501 iv_base = iv->base;
1502 iv_step = iv->step;
1503 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1504 sizetype, &iv_base, &iv_step, dta->stmt,
1505 false))
1507 /* The index might wrap. */
1508 return false;
1511 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1512 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1514 return true;
1517 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1518 object is passed to it in DATA. */
1520 static bool
1521 idx_record_use (tree base, tree *idx,
1522 void *vdata)
1524 struct ivopts_data *data = (struct ivopts_data *) vdata;
1525 find_interesting_uses_op (data, *idx);
1526 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1528 find_interesting_uses_op (data, array_ref_element_size (base));
1529 find_interesting_uses_op (data, array_ref_low_bound (base));
1531 return true;
1534 /* If we can prove that TOP = cst * BOT for some constant cst,
1535 store cst to MUL and return true. Otherwise return false.
1536 The returned value is always sign-extended, regardless of the
1537 signedness of TOP and BOT. */
1539 static bool
1540 constant_multiple_of (tree top, tree bot, double_int *mul)
1542 tree mby;
1543 enum tree_code code;
1544 double_int res, p0, p1;
1545 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1547 STRIP_NOPS (top);
1548 STRIP_NOPS (bot);
1550 if (operand_equal_p (top, bot, 0))
1552 *mul = double_int_one;
1553 return true;
1556 code = TREE_CODE (top);
1557 switch (code)
1559 case MULT_EXPR:
1560 mby = TREE_OPERAND (top, 1);
1561 if (TREE_CODE (mby) != INTEGER_CST)
1562 return false;
1564 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1565 return false;
1567 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1568 precision);
1569 return true;
1571 case PLUS_EXPR:
1572 case MINUS_EXPR:
1573 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1574 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1575 return false;
1577 if (code == MINUS_EXPR)
1578 p1 = double_int_neg (p1);
1579 *mul = double_int_sext (double_int_add (p0, p1), precision);
1580 return true;
1582 case INTEGER_CST:
1583 if (TREE_CODE (bot) != INTEGER_CST)
1584 return false;
1586 p0 = double_int_sext (tree_to_double_int (top), precision);
1587 p1 = double_int_sext (tree_to_double_int (bot), precision);
1588 if (double_int_zero_p (p1))
1589 return false;
1590 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1591 precision);
1592 return double_int_zero_p (res);
1594 default:
1595 return false;
1599 /* Returns true if memory reference REF with step STEP may be unaligned. */
1601 static bool
1602 may_be_unaligned_p (tree ref, tree step)
1604 tree base;
1605 tree base_type;
1606 HOST_WIDE_INT bitsize;
1607 HOST_WIDE_INT bitpos;
1608 tree toffset;
1609 enum machine_mode mode;
1610 int unsignedp, volatilep;
1611 unsigned base_align;
1613 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1614 thus they are not misaligned. */
1615 if (TREE_CODE (ref) == TARGET_MEM_REF)
1616 return false;
1618 /* The test below is basically copy of what expr.c:normal_inner_ref
1619 does to check whether the object must be loaded by parts when
1620 STRICT_ALIGNMENT is true. */
1621 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1622 &unsignedp, &volatilep, true);
1623 base_type = TREE_TYPE (base);
1624 base_align = TYPE_ALIGN (base_type);
1626 if (mode != BLKmode)
1628 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1630 if (base_align < mode_align
1631 || (bitpos % mode_align) != 0
1632 || (bitpos % BITS_PER_UNIT) != 0)
1633 return true;
1635 if (toffset
1636 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1637 return true;
1639 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1640 return true;
1643 return false;
1646 /* Return true if EXPR may be non-addressable. */
1648 bool
1649 may_be_nonaddressable_p (tree expr)
1651 switch (TREE_CODE (expr))
1653 case TARGET_MEM_REF:
1654 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1655 target, thus they are always addressable. */
1656 return false;
1658 case COMPONENT_REF:
1659 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1660 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1662 case VIEW_CONVERT_EXPR:
1663 /* This kind of view-conversions may wrap non-addressable objects
1664 and make them look addressable. After some processing the
1665 non-addressability may be uncovered again, causing ADDR_EXPRs
1666 of inappropriate objects to be built. */
1667 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1668 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1669 return true;
1671 /* ... fall through ... */
1673 case ARRAY_REF:
1674 case ARRAY_RANGE_REF:
1675 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1677 CASE_CONVERT:
1678 return true;
1680 default:
1681 break;
1684 return false;
1687 /* Finds addresses in *OP_P inside STMT. */
1689 static void
1690 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1692 tree base = *op_p, step = size_zero_node;
1693 struct iv *civ;
1694 struct ifs_ivopts_data ifs_ivopts_data;
1696 /* Do not play with volatile memory references. A bit too conservative,
1697 perhaps, but safe. */
1698 if (gimple_has_volatile_ops (stmt))
1699 goto fail;
1701 /* Ignore bitfields for now. Not really something terribly complicated
1702 to handle. TODO. */
1703 if (TREE_CODE (base) == BIT_FIELD_REF)
1704 goto fail;
1706 base = unshare_expr (base);
1708 if (TREE_CODE (base) == TARGET_MEM_REF)
1710 tree type = build_pointer_type (TREE_TYPE (base));
1711 tree astep;
1713 if (TMR_BASE (base)
1714 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1716 civ = get_iv (data, TMR_BASE (base));
1717 if (!civ)
1718 goto fail;
1720 TMR_BASE (base) = civ->base;
1721 step = civ->step;
1723 if (TMR_INDEX2 (base)
1724 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1726 civ = get_iv (data, TMR_INDEX2 (base));
1727 if (!civ)
1728 goto fail;
1730 TMR_INDEX2 (base) = civ->base;
1731 step = civ->step;
1733 if (TMR_INDEX (base)
1734 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1736 civ = get_iv (data, TMR_INDEX (base));
1737 if (!civ)
1738 goto fail;
1740 TMR_INDEX (base) = civ->base;
1741 astep = civ->step;
1743 if (astep)
1745 if (TMR_STEP (base))
1746 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1748 step = fold_build2 (PLUS_EXPR, type, step, astep);
1752 if (integer_zerop (step))
1753 goto fail;
1754 base = tree_mem_ref_addr (type, base);
1756 else
1758 ifs_ivopts_data.ivopts_data = data;
1759 ifs_ivopts_data.stmt = stmt;
1760 ifs_ivopts_data.step = size_zero_node;
1761 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1762 || integer_zerop (ifs_ivopts_data.step))
1763 goto fail;
1764 step = ifs_ivopts_data.step;
1766 /* Check that the base expression is addressable. This needs
1767 to be done after substituting bases of IVs into it. */
1768 if (may_be_nonaddressable_p (base))
1769 goto fail;
1771 /* Moreover, on strict alignment platforms, check that it is
1772 sufficiently aligned. */
1773 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1774 goto fail;
1776 base = build_fold_addr_expr (base);
1778 /* Substituting bases of IVs into the base expression might
1779 have caused folding opportunities. */
1780 if (TREE_CODE (base) == ADDR_EXPR)
1782 tree *ref = &TREE_OPERAND (base, 0);
1783 while (handled_component_p (*ref))
1784 ref = &TREE_OPERAND (*ref, 0);
1785 if (TREE_CODE (*ref) == MEM_REF)
1787 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1788 TREE_OPERAND (*ref, 0),
1789 TREE_OPERAND (*ref, 1));
1790 if (tem)
1791 *ref = tem;
1796 civ = alloc_iv (base, step);
1797 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1798 return;
1800 fail:
1801 for_each_index (op_p, idx_record_use, data);
1804 /* Finds and records invariants used in STMT. */
1806 static void
1807 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1809 ssa_op_iter iter;
1810 use_operand_p use_p;
1811 tree op;
1813 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1815 op = USE_FROM_PTR (use_p);
1816 record_invariant (data, op, false);
1820 /* Finds interesting uses of induction variables in the statement STMT. */
1822 static void
1823 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1825 struct iv *iv;
1826 tree op, *lhs, *rhs;
1827 ssa_op_iter iter;
1828 use_operand_p use_p;
1829 enum tree_code code;
1831 find_invariants_stmt (data, stmt);
1833 if (gimple_code (stmt) == GIMPLE_COND)
1835 find_interesting_uses_cond (data, stmt);
1836 return;
1839 if (is_gimple_assign (stmt))
1841 lhs = gimple_assign_lhs_ptr (stmt);
1842 rhs = gimple_assign_rhs1_ptr (stmt);
1844 if (TREE_CODE (*lhs) == SSA_NAME)
1846 /* If the statement defines an induction variable, the uses are not
1847 interesting by themselves. */
1849 iv = get_iv (data, *lhs);
1851 if (iv && !integer_zerop (iv->step))
1852 return;
1855 code = gimple_assign_rhs_code (stmt);
1856 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1857 && (REFERENCE_CLASS_P (*rhs)
1858 || is_gimple_val (*rhs)))
1860 if (REFERENCE_CLASS_P (*rhs))
1861 find_interesting_uses_address (data, stmt, rhs);
1862 else
1863 find_interesting_uses_op (data, *rhs);
1865 if (REFERENCE_CLASS_P (*lhs))
1866 find_interesting_uses_address (data, stmt, lhs);
1867 return;
1869 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1871 find_interesting_uses_cond (data, stmt);
1872 return;
1875 /* TODO -- we should also handle address uses of type
1877 memory = call (whatever);
1881 call (memory). */
1884 if (gimple_code (stmt) == GIMPLE_PHI
1885 && gimple_bb (stmt) == data->current_loop->header)
1887 iv = get_iv (data, PHI_RESULT (stmt));
1889 if (iv && !integer_zerop (iv->step))
1890 return;
1893 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1895 op = USE_FROM_PTR (use_p);
1897 if (TREE_CODE (op) != SSA_NAME)
1898 continue;
1900 iv = get_iv (data, op);
1901 if (!iv)
1902 continue;
1904 find_interesting_uses_op (data, op);
1908 /* Finds interesting uses of induction variables outside of loops
1909 on loop exit edge EXIT. */
1911 static void
1912 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1914 gimple phi;
1915 gimple_stmt_iterator psi;
1916 tree def;
1918 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1920 phi = gsi_stmt (psi);
1921 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1922 if (is_gimple_reg (def))
1923 find_interesting_uses_op (data, def);
1927 /* Finds uses of the induction variables that are interesting. */
1929 static void
1930 find_interesting_uses (struct ivopts_data *data)
1932 basic_block bb;
1933 gimple_stmt_iterator bsi;
1934 basic_block *body = get_loop_body (data->current_loop);
1935 unsigned i;
1936 struct version_info *info;
1937 edge e;
1939 if (dump_file && (dump_flags & TDF_DETAILS))
1940 fprintf (dump_file, "Uses:\n\n");
1942 for (i = 0; i < data->current_loop->num_nodes; i++)
1944 edge_iterator ei;
1945 bb = body[i];
1947 FOR_EACH_EDGE (e, ei, bb->succs)
1948 if (e->dest != EXIT_BLOCK_PTR
1949 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1950 find_interesting_uses_outside (data, e);
1952 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1953 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1954 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1955 if (!is_gimple_debug (gsi_stmt (bsi)))
1956 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1959 if (dump_file && (dump_flags & TDF_DETAILS))
1961 bitmap_iterator bi;
1963 fprintf (dump_file, "\n");
1965 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1967 info = ver_info (data, i);
1968 if (info->inv_id)
1970 fprintf (dump_file, " ");
1971 print_generic_expr (dump_file, info->name, TDF_SLIM);
1972 fprintf (dump_file, " is invariant (%d)%s\n",
1973 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1977 fprintf (dump_file, "\n");
1980 free (body);
1983 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1984 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1985 we are at the top-level of the processed address. */
1987 static tree
1988 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1989 unsigned HOST_WIDE_INT *offset)
1991 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1992 enum tree_code code;
1993 tree type, orig_type = TREE_TYPE (expr);
1994 unsigned HOST_WIDE_INT off0, off1, st;
1995 tree orig_expr = expr;
1997 STRIP_NOPS (expr);
1999 type = TREE_TYPE (expr);
2000 code = TREE_CODE (expr);
2001 *offset = 0;
2003 switch (code)
2005 case INTEGER_CST:
2006 if (!cst_and_fits_in_hwi (expr)
2007 || integer_zerop (expr))
2008 return orig_expr;
2010 *offset = int_cst_value (expr);
2011 return build_int_cst (orig_type, 0);
2013 case POINTER_PLUS_EXPR:
2014 case PLUS_EXPR:
2015 case MINUS_EXPR:
2016 op0 = TREE_OPERAND (expr, 0);
2017 op1 = TREE_OPERAND (expr, 1);
2019 op0 = strip_offset_1 (op0, false, false, &off0);
2020 op1 = strip_offset_1 (op1, false, false, &off1);
2022 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2023 if (op0 == TREE_OPERAND (expr, 0)
2024 && op1 == TREE_OPERAND (expr, 1))
2025 return orig_expr;
2027 if (integer_zerop (op1))
2028 expr = op0;
2029 else if (integer_zerop (op0))
2031 if (code == MINUS_EXPR)
2032 expr = fold_build1 (NEGATE_EXPR, type, op1);
2033 else
2034 expr = op1;
2036 else
2037 expr = fold_build2 (code, type, op0, op1);
2039 return fold_convert (orig_type, expr);
2041 case MULT_EXPR:
2042 op1 = TREE_OPERAND (expr, 1);
2043 if (!cst_and_fits_in_hwi (op1))
2044 return orig_expr;
2046 op0 = TREE_OPERAND (expr, 0);
2047 op0 = strip_offset_1 (op0, false, false, &off0);
2048 if (op0 == TREE_OPERAND (expr, 0))
2049 return orig_expr;
2051 *offset = off0 * int_cst_value (op1);
2052 if (integer_zerop (op0))
2053 expr = op0;
2054 else
2055 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2057 return fold_convert (orig_type, expr);
2059 case ARRAY_REF:
2060 case ARRAY_RANGE_REF:
2061 if (!inside_addr)
2062 return orig_expr;
2064 step = array_ref_element_size (expr);
2065 if (!cst_and_fits_in_hwi (step))
2066 break;
2068 st = int_cst_value (step);
2069 op1 = TREE_OPERAND (expr, 1);
2070 op1 = strip_offset_1 (op1, false, false, &off1);
2071 *offset = off1 * st;
2073 if (top_compref
2074 && integer_zerop (op1))
2076 /* Strip the component reference completely. */
2077 op0 = TREE_OPERAND (expr, 0);
2078 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2079 *offset += off0;
2080 return op0;
2082 break;
2084 case COMPONENT_REF:
2085 if (!inside_addr)
2086 return orig_expr;
2088 tmp = component_ref_field_offset (expr);
2089 if (top_compref
2090 && cst_and_fits_in_hwi (tmp))
2092 /* Strip the component reference completely. */
2093 op0 = TREE_OPERAND (expr, 0);
2094 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2095 *offset = off0 + int_cst_value (tmp);
2096 return op0;
2098 break;
2100 case ADDR_EXPR:
2101 op0 = TREE_OPERAND (expr, 0);
2102 op0 = strip_offset_1 (op0, true, true, &off0);
2103 *offset += off0;
2105 if (op0 == TREE_OPERAND (expr, 0))
2106 return orig_expr;
2108 expr = build_fold_addr_expr (op0);
2109 return fold_convert (orig_type, expr);
2111 case MEM_REF:
2112 /* ??? Offset operand? */
2113 inside_addr = false;
2114 break;
2116 default:
2117 return orig_expr;
2120 /* Default handling of expressions for that we want to recurse into
2121 the first operand. */
2122 op0 = TREE_OPERAND (expr, 0);
2123 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2124 *offset += off0;
2126 if (op0 == TREE_OPERAND (expr, 0)
2127 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2128 return orig_expr;
2130 expr = copy_node (expr);
2131 TREE_OPERAND (expr, 0) = op0;
2132 if (op1)
2133 TREE_OPERAND (expr, 1) = op1;
2135 /* Inside address, we might strip the top level component references,
2136 thus changing type of the expression. Handling of ADDR_EXPR
2137 will fix that. */
2138 expr = fold_convert (orig_type, expr);
2140 return expr;
2143 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2145 static tree
2146 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2148 return strip_offset_1 (expr, false, false, offset);
2151 /* Returns variant of TYPE that can be used as base for different uses.
2152 We return unsigned type with the same precision, which avoids problems
2153 with overflows. */
2155 static tree
2156 generic_type_for (tree type)
2158 if (POINTER_TYPE_P (type))
2159 return unsigned_type_for (type);
2161 if (TYPE_UNSIGNED (type))
2162 return type;
2164 return unsigned_type_for (type);
2167 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2168 the bitmap to that we should store it. */
2170 static struct ivopts_data *fd_ivopts_data;
2171 static tree
2172 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2174 bitmap *depends_on = (bitmap *) data;
2175 struct version_info *info;
2177 if (TREE_CODE (*expr_p) != SSA_NAME)
2178 return NULL_TREE;
2179 info = name_info (fd_ivopts_data, *expr_p);
2181 if (!info->inv_id || info->has_nonlin_use)
2182 return NULL_TREE;
2184 if (!*depends_on)
2185 *depends_on = BITMAP_ALLOC (NULL);
2186 bitmap_set_bit (*depends_on, info->inv_id);
2188 return NULL_TREE;
2191 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2192 position to POS. If USE is not NULL, the candidate is set as related to
2193 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2194 replacement of the final value of the iv by a direct computation. */
2196 static struct iv_cand *
2197 add_candidate_1 (struct ivopts_data *data,
2198 tree base, tree step, bool important, enum iv_position pos,
2199 struct iv_use *use, gimple incremented_at)
2201 unsigned i;
2202 struct iv_cand *cand = NULL;
2203 tree type, orig_type;
2205 if (base)
2207 orig_type = TREE_TYPE (base);
2208 type = generic_type_for (orig_type);
2209 if (type != orig_type)
2211 base = fold_convert (type, base);
2212 step = fold_convert (type, step);
2216 for (i = 0; i < n_iv_cands (data); i++)
2218 cand = iv_cand (data, i);
2220 if (cand->pos != pos)
2221 continue;
2223 if (cand->incremented_at != incremented_at
2224 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2225 && cand->ainc_use != use))
2226 continue;
2228 if (!cand->iv)
2230 if (!base && !step)
2231 break;
2233 continue;
2236 if (!base && !step)
2237 continue;
2239 if (operand_equal_p (base, cand->iv->base, 0)
2240 && operand_equal_p (step, cand->iv->step, 0)
2241 && (TYPE_PRECISION (TREE_TYPE (base))
2242 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2243 break;
2246 if (i == n_iv_cands (data))
2248 cand = XCNEW (struct iv_cand);
2249 cand->id = i;
2251 if (!base && !step)
2252 cand->iv = NULL;
2253 else
2254 cand->iv = alloc_iv (base, step);
2256 cand->pos = pos;
2257 if (pos != IP_ORIGINAL && cand->iv)
2259 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2260 cand->var_after = cand->var_before;
2262 cand->important = important;
2263 cand->incremented_at = incremented_at;
2264 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2266 if (step
2267 && TREE_CODE (step) != INTEGER_CST)
2269 fd_ivopts_data = data;
2270 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2273 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2274 cand->ainc_use = use;
2275 else
2276 cand->ainc_use = NULL;
2278 if (dump_file && (dump_flags & TDF_DETAILS))
2279 dump_cand (dump_file, cand);
2282 if (important && !cand->important)
2284 cand->important = true;
2285 if (dump_file && (dump_flags & TDF_DETAILS))
2286 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2289 if (use)
2291 bitmap_set_bit (use->related_cands, i);
2292 if (dump_file && (dump_flags & TDF_DETAILS))
2293 fprintf (dump_file, "Candidate %d is related to use %d\n",
2294 cand->id, use->id);
2297 return cand;
2300 /* Returns true if incrementing the induction variable at the end of the LOOP
2301 is allowed.
2303 The purpose is to avoid splitting latch edge with a biv increment, thus
2304 creating a jump, possibly confusing other optimization passes and leaving
2305 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2306 is not available (so we do not have a better alternative), or if the latch
2307 edge is already nonempty. */
2309 static bool
2310 allow_ip_end_pos_p (struct loop *loop)
2312 if (!ip_normal_pos (loop))
2313 return true;
2315 if (!empty_block_p (ip_end_pos (loop)))
2316 return true;
2318 return false;
2321 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2322 Important field is set to IMPORTANT. */
2324 static void
2325 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2326 bool important, struct iv_use *use)
2328 basic_block use_bb = gimple_bb (use->stmt);
2329 enum machine_mode mem_mode;
2330 unsigned HOST_WIDE_INT cstepi;
2332 /* If we insert the increment in any position other than the standard
2333 ones, we must ensure that it is incremented once per iteration.
2334 It must not be in an inner nested loop, or one side of an if
2335 statement. */
2336 if (use_bb->loop_father != data->current_loop
2337 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2338 || stmt_could_throw_p (use->stmt)
2339 || !cst_and_fits_in_hwi (step))
2340 return;
2342 cstepi = int_cst_value (step);
2344 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2345 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2346 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2348 enum tree_code code = MINUS_EXPR;
2349 tree new_base;
2350 tree new_step = step;
2352 if (POINTER_TYPE_P (TREE_TYPE (base)))
2354 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2355 code = POINTER_PLUS_EXPR;
2357 else
2358 new_step = fold_convert (TREE_TYPE (base), new_step);
2359 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2360 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2361 use->stmt);
2363 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2364 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2366 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2367 use->stmt);
2371 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2372 position to POS. If USE is not NULL, the candidate is set as related to
2373 it. The candidate computation is scheduled on all available positions. */
2375 static void
2376 add_candidate (struct ivopts_data *data,
2377 tree base, tree step, bool important, struct iv_use *use)
2379 if (ip_normal_pos (data->current_loop))
2380 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2381 if (ip_end_pos (data->current_loop)
2382 && allow_ip_end_pos_p (data->current_loop))
2383 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2385 if (use != NULL && use->type == USE_ADDRESS)
2386 add_autoinc_candidates (data, base, step, important, use);
2389 /* Add a standard "0 + 1 * iteration" iv candidate for a
2390 type with SIZE bits. */
2392 static void
2393 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2394 unsigned int size)
2396 tree type = lang_hooks.types.type_for_size (size, true);
2397 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2398 true, NULL);
2401 /* Adds standard iv candidates. */
2403 static void
2404 add_standard_iv_candidates (struct ivopts_data *data)
2406 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2408 /* The same for a double-integer type if it is still fast enough. */
2409 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2410 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2414 /* Adds candidates bases on the old induction variable IV. */
2416 static void
2417 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2419 gimple phi;
2420 tree def;
2421 struct iv_cand *cand;
2423 add_candidate (data, iv->base, iv->step, true, NULL);
2425 /* The same, but with initial value zero. */
2426 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2427 add_candidate (data, size_int (0), iv->step, true, NULL);
2428 else
2429 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2430 iv->step, true, NULL);
2432 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2433 if (gimple_code (phi) == GIMPLE_PHI)
2435 /* Additionally record the possibility of leaving the original iv
2436 untouched. */
2437 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2438 cand = add_candidate_1 (data,
2439 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2440 SSA_NAME_DEF_STMT (def));
2441 cand->var_before = iv->ssa_name;
2442 cand->var_after = def;
2446 /* Adds candidates based on the old induction variables. */
2448 static void
2449 add_old_ivs_candidates (struct ivopts_data *data)
2451 unsigned i;
2452 struct iv *iv;
2453 bitmap_iterator bi;
2455 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2457 iv = ver_info (data, i)->iv;
2458 if (iv && iv->biv_p && !integer_zerop (iv->step))
2459 add_old_iv_candidates (data, iv);
2463 /* Adds candidates based on the value of the induction variable IV and USE. */
2465 static void
2466 add_iv_value_candidates (struct ivopts_data *data,
2467 struct iv *iv, struct iv_use *use)
2469 unsigned HOST_WIDE_INT offset;
2470 tree base;
2471 tree basetype;
2473 add_candidate (data, iv->base, iv->step, false, use);
2475 /* The same, but with initial value zero. Make such variable important,
2476 since it is generic enough so that possibly many uses may be based
2477 on it. */
2478 basetype = TREE_TYPE (iv->base);
2479 if (POINTER_TYPE_P (basetype))
2480 basetype = sizetype;
2481 add_candidate (data, build_int_cst (basetype, 0),
2482 iv->step, true, use);
2484 /* Third, try removing the constant offset. Make sure to even
2485 add a candidate for &a[0] vs. (T *)&a. */
2486 base = strip_offset (iv->base, &offset);
2487 if (offset
2488 || base != iv->base)
2489 add_candidate (data, base, iv->step, false, use);
2492 /* Adds candidates based on the uses. */
2494 static void
2495 add_derived_ivs_candidates (struct ivopts_data *data)
2497 unsigned i;
2499 for (i = 0; i < n_iv_uses (data); i++)
2501 struct iv_use *use = iv_use (data, i);
2503 if (!use)
2504 continue;
2506 switch (use->type)
2508 case USE_NONLINEAR_EXPR:
2509 case USE_COMPARE:
2510 case USE_ADDRESS:
2511 /* Just add the ivs based on the value of the iv used here. */
2512 add_iv_value_candidates (data, use->iv, use);
2513 break;
2515 default:
2516 gcc_unreachable ();
2521 /* Record important candidates and add them to related_cands bitmaps
2522 if needed. */
2524 static void
2525 record_important_candidates (struct ivopts_data *data)
2527 unsigned i;
2528 struct iv_use *use;
2530 for (i = 0; i < n_iv_cands (data); i++)
2532 struct iv_cand *cand = iv_cand (data, i);
2534 if (cand->important)
2535 bitmap_set_bit (data->important_candidates, i);
2538 data->consider_all_candidates = (n_iv_cands (data)
2539 <= CONSIDER_ALL_CANDIDATES_BOUND);
2541 if (data->consider_all_candidates)
2543 /* We will not need "related_cands" bitmaps in this case,
2544 so release them to decrease peak memory consumption. */
2545 for (i = 0; i < n_iv_uses (data); i++)
2547 use = iv_use (data, i);
2548 BITMAP_FREE (use->related_cands);
2551 else
2553 /* Add important candidates to the related_cands bitmaps. */
2554 for (i = 0; i < n_iv_uses (data); i++)
2555 bitmap_ior_into (iv_use (data, i)->related_cands,
2556 data->important_candidates);
2560 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2561 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2562 we allocate a simple list to every use. */
2564 static void
2565 alloc_use_cost_map (struct ivopts_data *data)
2567 unsigned i, size, s, j;
2569 for (i = 0; i < n_iv_uses (data); i++)
2571 struct iv_use *use = iv_use (data, i);
2572 bitmap_iterator bi;
2574 if (data->consider_all_candidates)
2575 size = n_iv_cands (data);
2576 else
2578 s = 0;
2579 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2581 s++;
2584 /* Round up to the power of two, so that moduling by it is fast. */
2585 for (size = 1; size < s; size <<= 1)
2586 continue;
2589 use->n_map_members = size;
2590 use->cost_map = XCNEWVEC (struct cost_pair, size);
2594 /* Returns description of computation cost of expression whose runtime
2595 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2597 static comp_cost
2598 new_cost (unsigned runtime, unsigned complexity)
2600 comp_cost cost;
2602 cost.cost = runtime;
2603 cost.complexity = complexity;
2605 return cost;
2608 /* Adds costs COST1 and COST2. */
2610 static comp_cost
2611 add_costs (comp_cost cost1, comp_cost cost2)
2613 cost1.cost += cost2.cost;
2614 cost1.complexity += cost2.complexity;
2616 return cost1;
2618 /* Subtracts costs COST1 and COST2. */
2620 static comp_cost
2621 sub_costs (comp_cost cost1, comp_cost cost2)
2623 cost1.cost -= cost2.cost;
2624 cost1.complexity -= cost2.complexity;
2626 return cost1;
2629 /* Returns a negative number if COST1 < COST2, a positive number if
2630 COST1 > COST2, and 0 if COST1 = COST2. */
2632 static int
2633 compare_costs (comp_cost cost1, comp_cost cost2)
2635 if (cost1.cost == cost2.cost)
2636 return cost1.complexity - cost2.complexity;
2638 return cost1.cost - cost2.cost;
2641 /* Returns true if COST is infinite. */
2643 static bool
2644 infinite_cost_p (comp_cost cost)
2646 return cost.cost == INFTY;
2649 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2650 on invariants DEPENDS_ON and that the value used in expressing it
2651 is VALUE. */
2653 static void
2654 set_use_iv_cost (struct ivopts_data *data,
2655 struct iv_use *use, struct iv_cand *cand,
2656 comp_cost cost, bitmap depends_on, tree value,
2657 int inv_expr_id)
2659 unsigned i, s;
2661 if (infinite_cost_p (cost))
2663 BITMAP_FREE (depends_on);
2664 return;
2667 if (data->consider_all_candidates)
2669 use->cost_map[cand->id].cand = cand;
2670 use->cost_map[cand->id].cost = cost;
2671 use->cost_map[cand->id].depends_on = depends_on;
2672 use->cost_map[cand->id].value = value;
2673 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2674 return;
2677 /* n_map_members is a power of two, so this computes modulo. */
2678 s = cand->id & (use->n_map_members - 1);
2679 for (i = s; i < use->n_map_members; i++)
2680 if (!use->cost_map[i].cand)
2681 goto found;
2682 for (i = 0; i < s; i++)
2683 if (!use->cost_map[i].cand)
2684 goto found;
2686 gcc_unreachable ();
2688 found:
2689 use->cost_map[i].cand = cand;
2690 use->cost_map[i].cost = cost;
2691 use->cost_map[i].depends_on = depends_on;
2692 use->cost_map[i].value = value;
2693 use->cost_map[i].inv_expr_id = inv_expr_id;
2696 /* Gets cost of (USE, CANDIDATE) pair. */
2698 static struct cost_pair *
2699 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2700 struct iv_cand *cand)
2702 unsigned i, s;
2703 struct cost_pair *ret;
2705 if (!cand)
2706 return NULL;
2708 if (data->consider_all_candidates)
2710 ret = use->cost_map + cand->id;
2711 if (!ret->cand)
2712 return NULL;
2714 return ret;
2717 /* n_map_members is a power of two, so this computes modulo. */
2718 s = cand->id & (use->n_map_members - 1);
2719 for (i = s; i < use->n_map_members; i++)
2720 if (use->cost_map[i].cand == cand)
2721 return use->cost_map + i;
2723 for (i = 0; i < s; i++)
2724 if (use->cost_map[i].cand == cand)
2725 return use->cost_map + i;
2727 return NULL;
2730 /* Returns estimate on cost of computing SEQ. */
2732 static unsigned
2733 seq_cost (rtx seq, bool speed)
2735 unsigned cost = 0;
2736 rtx set;
2738 for (; seq; seq = NEXT_INSN (seq))
2740 set = single_set (seq);
2741 if (set)
2742 cost += rtx_cost (set, SET,speed);
2743 else
2744 cost++;
2747 return cost;
2750 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2751 static rtx
2752 produce_memory_decl_rtl (tree obj, int *regno)
2754 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2755 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2756 rtx x;
2758 gcc_assert (obj);
2759 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2761 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2762 x = gen_rtx_SYMBOL_REF (address_mode, name);
2763 SET_SYMBOL_REF_DECL (x, obj);
2764 x = gen_rtx_MEM (DECL_MODE (obj), x);
2765 set_mem_addr_space (x, as);
2766 targetm.encode_section_info (obj, x, true);
2768 else
2770 x = gen_raw_REG (address_mode, (*regno)++);
2771 x = gen_rtx_MEM (DECL_MODE (obj), x);
2772 set_mem_addr_space (x, as);
2775 return x;
2778 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2779 walk_tree. DATA contains the actual fake register number. */
2781 static tree
2782 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2784 tree obj = NULL_TREE;
2785 rtx x = NULL_RTX;
2786 int *regno = (int *) data;
2788 switch (TREE_CODE (*expr_p))
2790 case ADDR_EXPR:
2791 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2792 handled_component_p (*expr_p);
2793 expr_p = &TREE_OPERAND (*expr_p, 0))
2794 continue;
2795 obj = *expr_p;
2796 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2797 x = produce_memory_decl_rtl (obj, regno);
2798 break;
2800 case SSA_NAME:
2801 *ws = 0;
2802 obj = SSA_NAME_VAR (*expr_p);
2803 if (!DECL_RTL_SET_P (obj))
2804 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2805 break;
2807 case VAR_DECL:
2808 case PARM_DECL:
2809 case RESULT_DECL:
2810 *ws = 0;
2811 obj = *expr_p;
2813 if (DECL_RTL_SET_P (obj))
2814 break;
2816 if (DECL_MODE (obj) == BLKmode)
2817 x = produce_memory_decl_rtl (obj, regno);
2818 else
2819 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2821 break;
2823 default:
2824 break;
2827 if (x)
2829 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2830 SET_DECL_RTL (obj, x);
2833 return NULL_TREE;
2836 /* Determines cost of the computation of EXPR. */
2838 static unsigned
2839 computation_cost (tree expr, bool speed)
2841 rtx seq, rslt;
2842 tree type = TREE_TYPE (expr);
2843 unsigned cost;
2844 /* Avoid using hard regs in ways which may be unsupported. */
2845 int regno = LAST_VIRTUAL_REGISTER + 1;
2846 struct cgraph_node *node = cgraph_node (current_function_decl);
2847 enum node_frequency real_frequency = node->frequency;
2849 node->frequency = NODE_FREQUENCY_NORMAL;
2850 crtl->maybe_hot_insn_p = speed;
2851 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2852 start_sequence ();
2853 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2854 seq = get_insns ();
2855 end_sequence ();
2856 default_rtl_profile ();
2857 node->frequency = real_frequency;
2859 cost = seq_cost (seq, speed);
2860 if (MEM_P (rslt))
2861 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2862 TYPE_ADDR_SPACE (type), speed);
2864 return cost;
2867 /* Returns variable containing the value of candidate CAND at statement AT. */
2869 static tree
2870 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2872 if (stmt_after_increment (loop, cand, stmt))
2873 return cand->var_after;
2874 else
2875 return cand->var_before;
2878 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2879 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2882 tree_int_cst_sign_bit (const_tree t)
2884 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2885 unsigned HOST_WIDE_INT w;
2887 if (bitno < HOST_BITS_PER_WIDE_INT)
2888 w = TREE_INT_CST_LOW (t);
2889 else
2891 w = TREE_INT_CST_HIGH (t);
2892 bitno -= HOST_BITS_PER_WIDE_INT;
2895 return (w >> bitno) & 1;
2898 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2899 same precision that is at least as wide as the precision of TYPE, stores
2900 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2901 type of A and B. */
2903 static tree
2904 determine_common_wider_type (tree *a, tree *b)
2906 tree wider_type = NULL;
2907 tree suba, subb;
2908 tree atype = TREE_TYPE (*a);
2910 if (CONVERT_EXPR_P (*a))
2912 suba = TREE_OPERAND (*a, 0);
2913 wider_type = TREE_TYPE (suba);
2914 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2915 return atype;
2917 else
2918 return atype;
2920 if (CONVERT_EXPR_P (*b))
2922 subb = TREE_OPERAND (*b, 0);
2923 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2924 return atype;
2926 else
2927 return atype;
2929 *a = suba;
2930 *b = subb;
2931 return wider_type;
2934 /* Determines the expression by that USE is expressed from induction variable
2935 CAND at statement AT in LOOP. The expression is stored in a decomposed
2936 form into AFF. Returns false if USE cannot be expressed using CAND. */
2938 static bool
2939 get_computation_aff (struct loop *loop,
2940 struct iv_use *use, struct iv_cand *cand, gimple at,
2941 struct affine_tree_combination *aff)
2943 tree ubase = use->iv->base;
2944 tree ustep = use->iv->step;
2945 tree cbase = cand->iv->base;
2946 tree cstep = cand->iv->step, cstep_common;
2947 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2948 tree common_type, var;
2949 tree uutype;
2950 aff_tree cbase_aff, var_aff;
2951 double_int rat;
2953 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2955 /* We do not have a precision to express the values of use. */
2956 return false;
2959 var = var_at_stmt (loop, cand, at);
2960 uutype = unsigned_type_for (utype);
2962 /* If the conversion is not noop, perform it. */
2963 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2965 cstep = fold_convert (uutype, cstep);
2966 cbase = fold_convert (uutype, cbase);
2967 var = fold_convert (uutype, var);
2970 if (!constant_multiple_of (ustep, cstep, &rat))
2971 return false;
2973 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2974 type, we achieve better folding by computing their difference in this
2975 wider type, and cast the result to UUTYPE. We do not need to worry about
2976 overflows, as all the arithmetics will in the end be performed in UUTYPE
2977 anyway. */
2978 common_type = determine_common_wider_type (&ubase, &cbase);
2980 /* use = ubase - ratio * cbase + ratio * var. */
2981 tree_to_aff_combination (ubase, common_type, aff);
2982 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2983 tree_to_aff_combination (var, uutype, &var_aff);
2985 /* We need to shift the value if we are after the increment. */
2986 if (stmt_after_increment (loop, cand, at))
2988 aff_tree cstep_aff;
2990 if (common_type != uutype)
2991 cstep_common = fold_convert (common_type, cstep);
2992 else
2993 cstep_common = cstep;
2995 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2996 aff_combination_add (&cbase_aff, &cstep_aff);
2999 aff_combination_scale (&cbase_aff, double_int_neg (rat));
3000 aff_combination_add (aff, &cbase_aff);
3001 if (common_type != uutype)
3002 aff_combination_convert (aff, uutype);
3004 aff_combination_scale (&var_aff, rat);
3005 aff_combination_add (aff, &var_aff);
3007 return true;
3010 /* Determines the expression by that USE is expressed from induction variable
3011 CAND at statement AT in LOOP. The computation is unshared. */
3013 static tree
3014 get_computation_at (struct loop *loop,
3015 struct iv_use *use, struct iv_cand *cand, gimple at)
3017 aff_tree aff;
3018 tree type = TREE_TYPE (use->iv->base);
3020 if (!get_computation_aff (loop, use, cand, at, &aff))
3021 return NULL_TREE;
3022 unshare_aff_combination (&aff);
3023 return fold_convert (type, aff_combination_to_tree (&aff));
3026 /* Determines the expression by that USE is expressed from induction variable
3027 CAND in LOOP. The computation is unshared. */
3029 static tree
3030 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3032 return get_computation_at (loop, use, cand, use->stmt);
3035 /* Adjust the cost COST for being in loop setup rather than loop body.
3036 If we're optimizing for space, the loop setup overhead is constant;
3037 if we're optimizing for speed, amortize it over the per-iteration cost. */
3038 static unsigned
3039 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3041 if (cost == INFTY)
3042 return cost;
3043 else if (optimize_loop_for_speed_p (data->current_loop))
3044 return cost / avg_loop_niter (data->current_loop);
3045 else
3046 return cost;
3049 /* Returns cost of addition in MODE. */
3051 static unsigned
3052 add_cost (enum machine_mode mode, bool speed)
3054 static unsigned costs[NUM_MACHINE_MODES];
3055 rtx seq;
3056 unsigned cost;
3058 if (costs[mode])
3059 return costs[mode];
3061 start_sequence ();
3062 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3063 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3064 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3065 NULL_RTX);
3066 seq = get_insns ();
3067 end_sequence ();
3069 cost = seq_cost (seq, speed);
3070 if (!cost)
3071 cost = 1;
3073 costs[mode] = cost;
3075 if (dump_file && (dump_flags & TDF_DETAILS))
3076 fprintf (dump_file, "Addition in %s costs %d\n",
3077 GET_MODE_NAME (mode), cost);
3078 return cost;
3081 /* Entry in a hashtable of already known costs for multiplication. */
3082 struct mbc_entry
3084 HOST_WIDE_INT cst; /* The constant to multiply by. */
3085 enum machine_mode mode; /* In mode. */
3086 unsigned cost; /* The cost. */
3089 /* Counts hash value for the ENTRY. */
3091 static hashval_t
3092 mbc_entry_hash (const void *entry)
3094 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3096 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3099 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3101 static int
3102 mbc_entry_eq (const void *entry1, const void *entry2)
3104 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3105 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3107 return (e1->mode == e2->mode
3108 && e1->cst == e2->cst);
3111 /* Returns cost of multiplication by constant CST in MODE. */
3113 unsigned
3114 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3116 static htab_t costs;
3117 struct mbc_entry **cached, act;
3118 rtx seq;
3119 unsigned cost;
3121 if (!costs)
3122 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3124 act.mode = mode;
3125 act.cst = cst;
3126 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3127 if (*cached)
3128 return (*cached)->cost;
3130 *cached = XNEW (struct mbc_entry);
3131 (*cached)->mode = mode;
3132 (*cached)->cst = cst;
3134 start_sequence ();
3135 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3136 gen_int_mode (cst, mode), NULL_RTX, 0);
3137 seq = get_insns ();
3138 end_sequence ();
3140 cost = seq_cost (seq, speed);
3142 if (dump_file && (dump_flags & TDF_DETAILS))
3143 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3144 (int) cst, GET_MODE_NAME (mode), cost);
3146 (*cached)->cost = cost;
3148 return cost;
3151 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3152 validity for a memory reference accessing memory of mode MODE in
3153 address space AS. */
3155 DEF_VEC_P (sbitmap);
3156 DEF_VEC_ALLOC_P (sbitmap, heap);
3158 bool
3159 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3160 addr_space_t as)
3162 #define MAX_RATIO 128
3163 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3164 static VEC (sbitmap, heap) *valid_mult_list;
3165 sbitmap valid_mult;
3167 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3168 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3170 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3171 if (!valid_mult)
3173 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3174 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3175 rtx addr;
3176 HOST_WIDE_INT i;
3178 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3179 sbitmap_zero (valid_mult);
3180 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3181 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3183 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3184 if (memory_address_addr_space_p (mode, addr, as))
3185 SET_BIT (valid_mult, i + MAX_RATIO);
3188 if (dump_file && (dump_flags & TDF_DETAILS))
3190 fprintf (dump_file, " allowed multipliers:");
3191 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3192 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3193 fprintf (dump_file, " %d", (int) i);
3194 fprintf (dump_file, "\n");
3195 fprintf (dump_file, "\n");
3198 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3201 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3202 return false;
3204 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3207 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3208 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3209 variable is omitted. Compute the cost for a memory reference that accesses
3210 a memory location of mode MEM_MODE in address space AS.
3212 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3213 size of MEM_MODE / RATIO) is available. To make this determination, we
3214 look at the size of the increment to be made, which is given in CSTEP.
3215 CSTEP may be zero if the step is unknown.
3216 STMT_AFTER_INC is true iff the statement we're looking at is after the
3217 increment of the original biv.
3219 TODO -- there must be some better way. This all is quite crude. */
3221 typedef struct
3223 HOST_WIDE_INT min_offset, max_offset;
3224 unsigned costs[2][2][2][2];
3225 } *address_cost_data;
3227 DEF_VEC_P (address_cost_data);
3228 DEF_VEC_ALLOC_P (address_cost_data, heap);
3230 static comp_cost
3231 get_address_cost (bool symbol_present, bool var_present,
3232 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3233 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3234 addr_space_t as, bool speed,
3235 bool stmt_after_inc, bool *may_autoinc)
3237 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3238 static VEC(address_cost_data, heap) *address_cost_data_list;
3239 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3240 address_cost_data data;
3241 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3242 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3243 unsigned cost, acost, complexity;
3244 bool offset_p, ratio_p, autoinc;
3245 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3246 unsigned HOST_WIDE_INT mask;
3247 unsigned bits;
3249 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3250 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3251 data_index + 1);
3253 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3254 if (!data)
3256 HOST_WIDE_INT i;
3257 HOST_WIDE_INT rat, off = 0;
3258 int old_cse_not_expected, width;
3259 unsigned sym_p, var_p, off_p, rat_p, add_c;
3260 rtx seq, addr, base;
3261 rtx reg0, reg1;
3263 data = (address_cost_data) xcalloc (1, sizeof (*data));
3265 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3267 width = GET_MODE_BITSIZE (address_mode) - 1;
3268 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3269 width = HOST_BITS_PER_WIDE_INT - 1;
3270 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3272 for (i = width; i >= 0; i--)
3274 off = -((HOST_WIDE_INT) 1 << i);
3275 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3276 if (memory_address_addr_space_p (mem_mode, addr, as))
3277 break;
3279 data->min_offset = (i == -1? 0 : off);
3281 for (i = width; i >= 0; i--)
3283 off = ((HOST_WIDE_INT) 1 << i) - 1;
3284 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3285 if (memory_address_addr_space_p (mem_mode, addr, as))
3286 break;
3288 if (i == -1)
3289 off = 0;
3290 data->max_offset = off;
3292 if (dump_file && (dump_flags & TDF_DETAILS))
3294 fprintf (dump_file, "get_address_cost:\n");
3295 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3296 GET_MODE_NAME (mem_mode),
3297 data->min_offset);
3298 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3299 GET_MODE_NAME (mem_mode),
3300 data->max_offset);
3303 rat = 1;
3304 for (i = 2; i <= MAX_RATIO; i++)
3305 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3307 rat = i;
3308 break;
3311 /* Compute the cost of various addressing modes. */
3312 acost = 0;
3313 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3314 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3316 if (HAVE_PRE_DECREMENT)
3318 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3319 has_predec[mem_mode]
3320 = memory_address_addr_space_p (mem_mode, addr, as);
3322 if (HAVE_POST_DECREMENT)
3324 addr = gen_rtx_POST_DEC (address_mode, reg0);
3325 has_postdec[mem_mode]
3326 = memory_address_addr_space_p (mem_mode, addr, as);
3328 if (HAVE_PRE_INCREMENT)
3330 addr = gen_rtx_PRE_INC (address_mode, reg0);
3331 has_preinc[mem_mode]
3332 = memory_address_addr_space_p (mem_mode, addr, as);
3334 if (HAVE_POST_INCREMENT)
3336 addr = gen_rtx_POST_INC (address_mode, reg0);
3337 has_postinc[mem_mode]
3338 = memory_address_addr_space_p (mem_mode, addr, as);
3340 for (i = 0; i < 16; i++)
3342 sym_p = i & 1;
3343 var_p = (i >> 1) & 1;
3344 off_p = (i >> 2) & 1;
3345 rat_p = (i >> 3) & 1;
3347 addr = reg0;
3348 if (rat_p)
3349 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3350 gen_int_mode (rat, address_mode));
3352 if (var_p)
3353 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3355 if (sym_p)
3357 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3358 /* ??? We can run into trouble with some backends by presenting
3359 it with symbols which haven't been properly passed through
3360 targetm.encode_section_info. By setting the local bit, we
3361 enhance the probability of things working. */
3362 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3364 if (off_p)
3365 base = gen_rtx_fmt_e (CONST, address_mode,
3366 gen_rtx_fmt_ee
3367 (PLUS, address_mode, base,
3368 gen_int_mode (off, address_mode)));
3370 else if (off_p)
3371 base = gen_int_mode (off, address_mode);
3372 else
3373 base = NULL_RTX;
3375 if (base)
3376 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3378 start_sequence ();
3379 /* To avoid splitting addressing modes, pretend that no cse will
3380 follow. */
3381 old_cse_not_expected = cse_not_expected;
3382 cse_not_expected = true;
3383 addr = memory_address_addr_space (mem_mode, addr, as);
3384 cse_not_expected = old_cse_not_expected;
3385 seq = get_insns ();
3386 end_sequence ();
3388 acost = seq_cost (seq, speed);
3389 acost += address_cost (addr, mem_mode, as, speed);
3391 if (!acost)
3392 acost = 1;
3393 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3396 /* On some targets, it is quite expensive to load symbol to a register,
3397 which makes addresses that contain symbols look much more expensive.
3398 However, the symbol will have to be loaded in any case before the
3399 loop (and quite likely we have it in register already), so it does not
3400 make much sense to penalize them too heavily. So make some final
3401 tweaks for the SYMBOL_PRESENT modes:
3403 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3404 var is cheaper, use this mode with small penalty.
3405 If VAR_PRESENT is true, try whether the mode with
3406 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3407 if this is the case, use it. */
3408 add_c = add_cost (address_mode, speed);
3409 for (i = 0; i < 8; i++)
3411 var_p = i & 1;
3412 off_p = (i >> 1) & 1;
3413 rat_p = (i >> 2) & 1;
3415 acost = data->costs[0][1][off_p][rat_p] + 1;
3416 if (var_p)
3417 acost += add_c;
3419 if (acost < data->costs[1][var_p][off_p][rat_p])
3420 data->costs[1][var_p][off_p][rat_p] = acost;
3423 if (dump_file && (dump_flags & TDF_DETAILS))
3425 fprintf (dump_file, "Address costs:\n");
3427 for (i = 0; i < 16; i++)
3429 sym_p = i & 1;
3430 var_p = (i >> 1) & 1;
3431 off_p = (i >> 2) & 1;
3432 rat_p = (i >> 3) & 1;
3434 fprintf (dump_file, " ");
3435 if (sym_p)
3436 fprintf (dump_file, "sym + ");
3437 if (var_p)
3438 fprintf (dump_file, "var + ");
3439 if (off_p)
3440 fprintf (dump_file, "cst + ");
3441 if (rat_p)
3442 fprintf (dump_file, "rat * ");
3444 acost = data->costs[sym_p][var_p][off_p][rat_p];
3445 fprintf (dump_file, "index costs %d\n", acost);
3447 if (has_predec[mem_mode] || has_postdec[mem_mode]
3448 || has_preinc[mem_mode] || has_postinc[mem_mode])
3449 fprintf (dump_file, " May include autoinc/dec\n");
3450 fprintf (dump_file, "\n");
3453 VEC_replace (address_cost_data, address_cost_data_list,
3454 data_index, data);
3457 bits = GET_MODE_BITSIZE (address_mode);
3458 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3459 offset &= mask;
3460 if ((offset >> (bits - 1) & 1))
3461 offset |= ~mask;
3462 s_offset = offset;
3464 autoinc = false;
3465 msize = GET_MODE_SIZE (mem_mode);
3466 autoinc_offset = offset;
3467 if (stmt_after_inc)
3468 autoinc_offset += ratio * cstep;
3469 if (symbol_present || var_present || ratio != 1)
3470 autoinc = false;
3471 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3472 && msize == cstep)
3473 || (has_postdec[mem_mode] && autoinc_offset == 0
3474 && msize == -cstep)
3475 || (has_preinc[mem_mode] && autoinc_offset == msize
3476 && msize == cstep)
3477 || (has_predec[mem_mode] && autoinc_offset == -msize
3478 && msize == -cstep))
3479 autoinc = true;
3481 cost = 0;
3482 offset_p = (s_offset != 0
3483 && data->min_offset <= s_offset
3484 && s_offset <= data->max_offset);
3485 ratio_p = (ratio != 1
3486 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3488 if (ratio != 1 && !ratio_p)
3489 cost += multiply_by_cost (ratio, address_mode, speed);
3491 if (s_offset && !offset_p && !symbol_present)
3492 cost += add_cost (address_mode, speed);
3494 if (may_autoinc)
3495 *may_autoinc = autoinc;
3496 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3497 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3498 return new_cost (cost + acost, complexity);
3501 /* Estimates cost of forcing expression EXPR into a variable. */
3503 static comp_cost
3504 force_expr_to_var_cost (tree expr, bool speed)
3506 static bool costs_initialized = false;
3507 static unsigned integer_cost [2];
3508 static unsigned symbol_cost [2];
3509 static unsigned address_cost [2];
3510 tree op0, op1;
3511 comp_cost cost0, cost1, cost;
3512 enum machine_mode mode;
3514 if (!costs_initialized)
3516 tree type = build_pointer_type (integer_type_node);
3517 tree var, addr;
3518 rtx x;
3519 int i;
3521 var = create_tmp_var_raw (integer_type_node, "test_var");
3522 TREE_STATIC (var) = 1;
3523 x = produce_memory_decl_rtl (var, NULL);
3524 SET_DECL_RTL (var, x);
3526 addr = build1 (ADDR_EXPR, type, var);
3529 for (i = 0; i < 2; i++)
3531 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3532 2000), i);
3534 symbol_cost[i] = computation_cost (addr, i) + 1;
3536 address_cost[i]
3537 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3538 addr,
3539 build_int_cst (sizetype, 2000)), i) + 1;
3540 if (dump_file && (dump_flags & TDF_DETAILS))
3542 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3543 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3544 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3545 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3546 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3547 fprintf (dump_file, "\n");
3551 costs_initialized = true;
3554 STRIP_NOPS (expr);
3556 if (SSA_VAR_P (expr))
3557 return zero_cost;
3559 if (is_gimple_min_invariant (expr))
3561 if (TREE_CODE (expr) == INTEGER_CST)
3562 return new_cost (integer_cost [speed], 0);
3564 if (TREE_CODE (expr) == ADDR_EXPR)
3566 tree obj = TREE_OPERAND (expr, 0);
3568 if (TREE_CODE (obj) == VAR_DECL
3569 || TREE_CODE (obj) == PARM_DECL
3570 || TREE_CODE (obj) == RESULT_DECL)
3571 return new_cost (symbol_cost [speed], 0);
3574 return new_cost (address_cost [speed], 0);
3577 switch (TREE_CODE (expr))
3579 case POINTER_PLUS_EXPR:
3580 case PLUS_EXPR:
3581 case MINUS_EXPR:
3582 case MULT_EXPR:
3583 op0 = TREE_OPERAND (expr, 0);
3584 op1 = TREE_OPERAND (expr, 1);
3585 STRIP_NOPS (op0);
3586 STRIP_NOPS (op1);
3588 if (is_gimple_val (op0))
3589 cost0 = zero_cost;
3590 else
3591 cost0 = force_expr_to_var_cost (op0, speed);
3593 if (is_gimple_val (op1))
3594 cost1 = zero_cost;
3595 else
3596 cost1 = force_expr_to_var_cost (op1, speed);
3598 break;
3600 case NEGATE_EXPR:
3601 op0 = TREE_OPERAND (expr, 0);
3602 STRIP_NOPS (op0);
3603 op1 = NULL_TREE;
3605 if (is_gimple_val (op0))
3606 cost0 = zero_cost;
3607 else
3608 cost0 = force_expr_to_var_cost (op0, speed);
3610 cost1 = zero_cost;
3611 break;
3613 default:
3614 /* Just an arbitrary value, FIXME. */
3615 return new_cost (target_spill_cost[speed], 0);
3618 mode = TYPE_MODE (TREE_TYPE (expr));
3619 switch (TREE_CODE (expr))
3621 case POINTER_PLUS_EXPR:
3622 case PLUS_EXPR:
3623 case MINUS_EXPR:
3624 case NEGATE_EXPR:
3625 cost = new_cost (add_cost (mode, speed), 0);
3626 break;
3628 case MULT_EXPR:
3629 if (cst_and_fits_in_hwi (op0))
3630 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3631 else if (cst_and_fits_in_hwi (op1))
3632 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3633 else
3634 return new_cost (target_spill_cost [speed], 0);
3635 break;
3637 default:
3638 gcc_unreachable ();
3641 cost = add_costs (cost, cost0);
3642 cost = add_costs (cost, cost1);
3644 /* Bound the cost by target_spill_cost. The parts of complicated
3645 computations often are either loop invariant or at least can
3646 be shared between several iv uses, so letting this grow without
3647 limits would not give reasonable results. */
3648 if (cost.cost > (int) target_spill_cost [speed])
3649 cost.cost = target_spill_cost [speed];
3651 return cost;
3654 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3655 invariants the computation depends on. */
3657 static comp_cost
3658 force_var_cost (struct ivopts_data *data,
3659 tree expr, bitmap *depends_on)
3661 if (depends_on)
3663 fd_ivopts_data = data;
3664 walk_tree (&expr, find_depends, depends_on, NULL);
3667 return force_expr_to_var_cost (expr, data->speed);
3670 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3671 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3672 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3673 invariants the computation depends on. */
3675 static comp_cost
3676 split_address_cost (struct ivopts_data *data,
3677 tree addr, bool *symbol_present, bool *var_present,
3678 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3680 tree core;
3681 HOST_WIDE_INT bitsize;
3682 HOST_WIDE_INT bitpos;
3683 tree toffset;
3684 enum machine_mode mode;
3685 int unsignedp, volatilep;
3687 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3688 &unsignedp, &volatilep, false);
3690 if (toffset != 0
3691 || bitpos % BITS_PER_UNIT != 0
3692 || TREE_CODE (core) != VAR_DECL)
3694 *symbol_present = false;
3695 *var_present = true;
3696 fd_ivopts_data = data;
3697 walk_tree (&addr, find_depends, depends_on, NULL);
3698 return new_cost (target_spill_cost[data->speed], 0);
3701 *offset += bitpos / BITS_PER_UNIT;
3702 if (TREE_STATIC (core)
3703 || DECL_EXTERNAL (core))
3705 *symbol_present = true;
3706 *var_present = false;
3707 return zero_cost;
3710 *symbol_present = false;
3711 *var_present = true;
3712 return zero_cost;
3715 /* Estimates cost of expressing difference of addresses E1 - E2 as
3716 var + symbol + offset. The value of offset is added to OFFSET,
3717 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3718 part is missing. DEPENDS_ON is a set of the invariants the computation
3719 depends on. */
3721 static comp_cost
3722 ptr_difference_cost (struct ivopts_data *data,
3723 tree e1, tree e2, bool *symbol_present, bool *var_present,
3724 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3726 HOST_WIDE_INT diff = 0;
3727 aff_tree aff_e1, aff_e2;
3728 tree type;
3730 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3732 if (ptr_difference_const (e1, e2, &diff))
3734 *offset += diff;
3735 *symbol_present = false;
3736 *var_present = false;
3737 return zero_cost;
3740 if (integer_zerop (e2))
3741 return split_address_cost (data, TREE_OPERAND (e1, 0),
3742 symbol_present, var_present, offset, depends_on);
3744 *symbol_present = false;
3745 *var_present = true;
3747 type = signed_type_for (TREE_TYPE (e1));
3748 tree_to_aff_combination (e1, type, &aff_e1);
3749 tree_to_aff_combination (e2, type, &aff_e2);
3750 aff_combination_scale (&aff_e2, double_int_minus_one);
3751 aff_combination_add (&aff_e1, &aff_e2);
3753 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3756 /* Estimates cost of expressing difference E1 - E2 as
3757 var + symbol + offset. The value of offset is added to OFFSET,
3758 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3759 part is missing. DEPENDS_ON is a set of the invariants the computation
3760 depends on. */
3762 static comp_cost
3763 difference_cost (struct ivopts_data *data,
3764 tree e1, tree e2, bool *symbol_present, bool *var_present,
3765 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3767 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3768 unsigned HOST_WIDE_INT off1, off2;
3769 aff_tree aff_e1, aff_e2;
3770 tree type;
3772 e1 = strip_offset (e1, &off1);
3773 e2 = strip_offset (e2, &off2);
3774 *offset += off1 - off2;
3776 STRIP_NOPS (e1);
3777 STRIP_NOPS (e2);
3779 if (TREE_CODE (e1) == ADDR_EXPR)
3780 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3781 offset, depends_on);
3782 *symbol_present = false;
3784 if (operand_equal_p (e1, e2, 0))
3786 *var_present = false;
3787 return zero_cost;
3790 *var_present = true;
3792 if (integer_zerop (e2))
3793 return force_var_cost (data, e1, depends_on);
3795 if (integer_zerop (e1))
3797 comp_cost cost = force_var_cost (data, e2, depends_on);
3798 cost.cost += multiply_by_cost (-1, mode, data->speed);
3799 return cost;
3802 type = signed_type_for (TREE_TYPE (e1));
3803 tree_to_aff_combination (e1, type, &aff_e1);
3804 tree_to_aff_combination (e2, type, &aff_e2);
3805 aff_combination_scale (&aff_e2, double_int_minus_one);
3806 aff_combination_add (&aff_e1, &aff_e2);
3808 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3811 /* Returns true if AFF1 and AFF2 are identical. */
3813 static bool
3814 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3816 unsigned i;
3818 if (aff1->n != aff2->n)
3819 return false;
3821 for (i = 0; i < aff1->n; i++)
3823 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3824 return false;
3826 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3827 return false;
3829 return true;
3832 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3833 requires a new compiler generated temporary. Returns -1 otherwise.
3834 ADDRESS_P is a flag indicating if the expression is for address
3835 computation. */
3837 static int
3838 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3839 tree cbase, HOST_WIDE_INT ratio,
3840 bool address_p)
3842 aff_tree ubase_aff, cbase_aff;
3843 tree expr, ub, cb;
3844 struct iv_inv_expr_ent ent;
3845 struct iv_inv_expr_ent **slot;
3847 STRIP_NOPS (ubase);
3848 STRIP_NOPS (cbase);
3849 ub = ubase;
3850 cb = cbase;
3852 if ((TREE_CODE (ubase) == INTEGER_CST)
3853 && (TREE_CODE (cbase) == INTEGER_CST))
3854 return -1;
3856 /* Strips the constant part. */
3857 if (TREE_CODE (ubase) == PLUS_EXPR
3858 || TREE_CODE (ubase) == MINUS_EXPR
3859 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3861 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3862 ubase = TREE_OPERAND (ubase, 0);
3865 /* Strips the constant part. */
3866 if (TREE_CODE (cbase) == PLUS_EXPR
3867 || TREE_CODE (cbase) == MINUS_EXPR
3868 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3870 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3871 cbase = TREE_OPERAND (cbase, 0);
3874 if (address_p)
3876 if (((TREE_CODE (ubase) == SSA_NAME)
3877 || (TREE_CODE (ubase) == ADDR_EXPR
3878 && is_gimple_min_invariant (ubase)))
3879 && (TREE_CODE (cbase) == INTEGER_CST))
3880 return -1;
3882 if (((TREE_CODE (cbase) == SSA_NAME)
3883 || (TREE_CODE (cbase) == ADDR_EXPR
3884 && is_gimple_min_invariant (cbase)))
3885 && (TREE_CODE (ubase) == INTEGER_CST))
3886 return -1;
3889 if (ratio == 1)
3891 if(operand_equal_p (ubase, cbase, 0))
3892 return -1;
3894 if (TREE_CODE (ubase) == ADDR_EXPR
3895 && TREE_CODE (cbase) == ADDR_EXPR)
3897 tree usym, csym;
3899 usym = TREE_OPERAND (ubase, 0);
3900 csym = TREE_OPERAND (cbase, 0);
3901 if (TREE_CODE (usym) == ARRAY_REF)
3903 tree ind = TREE_OPERAND (usym, 1);
3904 if (TREE_CODE (ind) == INTEGER_CST
3905 && host_integerp (ind, 0)
3906 && TREE_INT_CST_LOW (ind) == 0)
3907 usym = TREE_OPERAND (usym, 0);
3909 if (TREE_CODE (csym) == ARRAY_REF)
3911 tree ind = TREE_OPERAND (csym, 1);
3912 if (TREE_CODE (ind) == INTEGER_CST
3913 && host_integerp (ind, 0)
3914 && TREE_INT_CST_LOW (ind) == 0)
3915 csym = TREE_OPERAND (csym, 0);
3917 if (operand_equal_p (usym, csym, 0))
3918 return -1;
3920 /* Now do more complex comparison */
3921 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3922 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3923 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3924 return -1;
3927 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3928 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3930 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3931 aff_combination_add (&ubase_aff, &cbase_aff);
3932 expr = aff_combination_to_tree (&ubase_aff);
3933 ent.expr = expr;
3934 ent.hash = iterative_hash_expr (expr, 0);
3935 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3936 &ent, INSERT);
3937 if (*slot)
3938 return (*slot)->id;
3940 *slot = XNEW (struct iv_inv_expr_ent);
3941 (*slot)->expr = expr;
3942 (*slot)->hash = ent.hash;
3943 (*slot)->id = data->inv_expr_id++;
3944 return (*slot)->id;
3949 /* Determines the cost of the computation by that USE is expressed
3950 from induction variable CAND. If ADDRESS_P is true, we just need
3951 to create an address from it, otherwise we want to get it into
3952 register. A set of invariants we depend on is stored in
3953 DEPENDS_ON. AT is the statement at that the value is computed.
3954 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3955 addressing is likely. */
3957 static comp_cost
3958 get_computation_cost_at (struct ivopts_data *data,
3959 struct iv_use *use, struct iv_cand *cand,
3960 bool address_p, bitmap *depends_on, gimple at,
3961 bool *can_autoinc,
3962 int *inv_expr_id)
3964 tree ubase = use->iv->base, ustep = use->iv->step;
3965 tree cbase, cstep;
3966 tree utype = TREE_TYPE (ubase), ctype;
3967 unsigned HOST_WIDE_INT cstepi, offset = 0;
3968 HOST_WIDE_INT ratio, aratio;
3969 bool var_present, symbol_present, stmt_is_after_inc;
3970 comp_cost cost;
3971 double_int rat;
3972 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3974 *depends_on = NULL;
3976 /* Only consider real candidates. */
3977 if (!cand->iv)
3978 return infinite_cost;
3980 cbase = cand->iv->base;
3981 cstep = cand->iv->step;
3982 ctype = TREE_TYPE (cbase);
3984 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3986 /* We do not have a precision to express the values of use. */
3987 return infinite_cost;
3990 if (address_p)
3992 /* Do not try to express address of an object with computation based
3993 on address of a different object. This may cause problems in rtl
3994 level alias analysis (that does not expect this to be happening,
3995 as this is illegal in C), and would be unlikely to be useful
3996 anyway. */
3997 if (use->iv->base_object
3998 && cand->iv->base_object
3999 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4000 return infinite_cost;
4003 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4005 /* TODO -- add direct handling of this case. */
4006 goto fallback;
4009 /* CSTEPI is removed from the offset in case statement is after the
4010 increment. If the step is not constant, we use zero instead.
4011 This is a bit imprecise (there is the extra addition), but
4012 redundancy elimination is likely to transform the code so that
4013 it uses value of the variable before increment anyway,
4014 so it is not that much unrealistic. */
4015 if (cst_and_fits_in_hwi (cstep))
4016 cstepi = int_cst_value (cstep);
4017 else
4018 cstepi = 0;
4020 if (!constant_multiple_of (ustep, cstep, &rat))
4021 return infinite_cost;
4023 if (double_int_fits_in_shwi_p (rat))
4024 ratio = double_int_to_shwi (rat);
4025 else
4026 return infinite_cost;
4028 STRIP_NOPS (cbase);
4029 ctype = TREE_TYPE (cbase);
4031 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4033 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4034 or ratio == 1, it is better to handle this like
4036 ubase - ratio * cbase + ratio * var
4038 (also holds in the case ratio == -1, TODO. */
4040 if (cst_and_fits_in_hwi (cbase))
4042 offset = - ratio * int_cst_value (cbase);
4043 cost = difference_cost (data,
4044 ubase, build_int_cst (utype, 0),
4045 &symbol_present, &var_present, &offset,
4046 depends_on);
4047 cost.cost /= avg_loop_niter (data->current_loop);
4049 else if (ratio == 1)
4051 tree real_cbase = cbase;
4053 /* Check to see if any adjustment is needed. */
4054 if (cstepi == 0 && stmt_is_after_inc)
4056 aff_tree real_cbase_aff;
4057 aff_tree cstep_aff;
4059 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4060 &real_cbase_aff);
4061 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4063 aff_combination_add (&real_cbase_aff, &cstep_aff);
4064 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4067 cost = difference_cost (data,
4068 ubase, real_cbase,
4069 &symbol_present, &var_present, &offset,
4070 depends_on);
4071 cost.cost /= avg_loop_niter (data->current_loop);
4073 else if (address_p
4074 && !POINTER_TYPE_P (ctype)
4075 && multiplier_allowed_in_address_p
4076 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4077 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4079 cbase
4080 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4081 cost = difference_cost (data,
4082 ubase, cbase,
4083 &symbol_present, &var_present, &offset,
4084 depends_on);
4085 cost.cost /= avg_loop_niter (data->current_loop);
4087 else
4089 cost = force_var_cost (data, cbase, depends_on);
4090 cost = add_costs (cost,
4091 difference_cost (data,
4092 ubase, build_int_cst (utype, 0),
4093 &symbol_present, &var_present,
4094 &offset, depends_on));
4095 cost.cost /= avg_loop_niter (data->current_loop);
4096 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4099 if (inv_expr_id)
4101 *inv_expr_id =
4102 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4103 /* Clear depends on. */
4104 if (*inv_expr_id != -1 && depends_on && *depends_on)
4105 bitmap_clear (*depends_on);
4108 /* If we are after the increment, the value of the candidate is higher by
4109 one iteration. */
4110 if (stmt_is_after_inc)
4111 offset -= ratio * cstepi;
4113 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4114 (symbol/var1/const parts may be omitted). If we are looking for an
4115 address, find the cost of addressing this. */
4116 if (address_p)
4117 return add_costs (cost,
4118 get_address_cost (symbol_present, var_present,
4119 offset, ratio, cstepi,
4120 TYPE_MODE (TREE_TYPE (utype)),
4121 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4122 speed, stmt_is_after_inc,
4123 can_autoinc));
4125 /* Otherwise estimate the costs for computing the expression. */
4126 if (!symbol_present && !var_present && !offset)
4128 if (ratio != 1)
4129 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4130 return cost;
4133 /* Symbol + offset should be compile-time computable so consider that they
4134 are added once to the variable, if present. */
4135 if (var_present && (symbol_present || offset))
4136 cost.cost += adjust_setup_cost (data,
4137 add_cost (TYPE_MODE (ctype), speed));
4139 /* Having offset does not affect runtime cost in case it is added to
4140 symbol, but it increases complexity. */
4141 if (offset)
4142 cost.complexity++;
4144 cost.cost += add_cost (TYPE_MODE (ctype), speed);
4146 aratio = ratio > 0 ? ratio : -ratio;
4147 if (aratio != 1)
4148 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4149 return cost;
4151 fallback:
4152 if (can_autoinc)
4153 *can_autoinc = false;
4156 /* Just get the expression, expand it and measure the cost. */
4157 tree comp = get_computation_at (data->current_loop, use, cand, at);
4159 if (!comp)
4160 return infinite_cost;
4162 if (address_p)
4163 comp = build_simple_mem_ref (comp);
4165 return new_cost (computation_cost (comp, speed), 0);
4169 /* Determines the cost of the computation by that USE is expressed
4170 from induction variable CAND. If ADDRESS_P is true, we just need
4171 to create an address from it, otherwise we want to get it into
4172 register. A set of invariants we depend on is stored in
4173 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4174 autoinc addressing is likely. */
4176 static comp_cost
4177 get_computation_cost (struct ivopts_data *data,
4178 struct iv_use *use, struct iv_cand *cand,
4179 bool address_p, bitmap *depends_on,
4180 bool *can_autoinc, int *inv_expr_id)
4182 return get_computation_cost_at (data,
4183 use, cand, address_p, depends_on, use->stmt,
4184 can_autoinc, inv_expr_id);
4187 /* Determines cost of basing replacement of USE on CAND in a generic
4188 expression. */
4190 static bool
4191 determine_use_iv_cost_generic (struct ivopts_data *data,
4192 struct iv_use *use, struct iv_cand *cand)
4194 bitmap depends_on;
4195 comp_cost cost;
4196 int inv_expr_id = -1;
4198 /* The simple case first -- if we need to express value of the preserved
4199 original biv, the cost is 0. This also prevents us from counting the
4200 cost of increment twice -- once at this use and once in the cost of
4201 the candidate. */
4202 if (cand->pos == IP_ORIGINAL
4203 && cand->incremented_at == use->stmt)
4205 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4206 return true;
4209 cost = get_computation_cost (data, use, cand, false, &depends_on,
4210 NULL, &inv_expr_id);
4212 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4213 inv_expr_id);
4215 return !infinite_cost_p (cost);
4218 /* Determines cost of basing replacement of USE on CAND in an address. */
4220 static bool
4221 determine_use_iv_cost_address (struct ivopts_data *data,
4222 struct iv_use *use, struct iv_cand *cand)
4224 bitmap depends_on;
4225 bool can_autoinc;
4226 int inv_expr_id = -1;
4227 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4228 &can_autoinc, &inv_expr_id);
4230 if (cand->ainc_use == use)
4232 if (can_autoinc)
4233 cost.cost -= cand->cost_step;
4234 /* If we generated the candidate solely for exploiting autoincrement
4235 opportunities, and it turns out it can't be used, set the cost to
4236 infinity to make sure we ignore it. */
4237 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4238 cost = infinite_cost;
4240 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4241 inv_expr_id);
4243 return !infinite_cost_p (cost);
4246 /* Computes value of candidate CAND at position AT in iteration NITER, and
4247 stores it to VAL. */
4249 static void
4250 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4251 aff_tree *val)
4253 aff_tree step, delta, nit;
4254 struct iv *iv = cand->iv;
4255 tree type = TREE_TYPE (iv->base);
4256 tree steptype = type;
4257 if (POINTER_TYPE_P (type))
4258 steptype = sizetype;
4260 tree_to_aff_combination (iv->step, steptype, &step);
4261 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4262 aff_combination_convert (&nit, steptype);
4263 aff_combination_mult (&nit, &step, &delta);
4264 if (stmt_after_increment (loop, cand, at))
4265 aff_combination_add (&delta, &step);
4267 tree_to_aff_combination (iv->base, type, val);
4268 aff_combination_add (val, &delta);
4271 /* Returns period of induction variable iv. */
4273 static tree
4274 iv_period (struct iv *iv)
4276 tree step = iv->step, period, type;
4277 tree pow2div;
4279 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4281 type = unsigned_type_for (TREE_TYPE (step));
4282 /* Period of the iv is lcm (step, type_range)/step -1,
4283 i.e., N*type_range/step - 1. Since type range is power
4284 of two, N == (step >> num_of_ending_zeros_binary (step),
4285 so the final result is
4287 (type_range >> num_of_ending_zeros_binary (step)) - 1
4290 pow2div = num_ending_zeros (step);
4292 period = build_low_bits_mask (type,
4293 (TYPE_PRECISION (type)
4294 - tree_low_cst (pow2div, 1)));
4296 return period;
4299 /* Returns the comparison operator used when eliminating the iv USE. */
4301 static enum tree_code
4302 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4304 struct loop *loop = data->current_loop;
4305 basic_block ex_bb;
4306 edge exit;
4308 ex_bb = gimple_bb (use->stmt);
4309 exit = EDGE_SUCC (ex_bb, 0);
4310 if (flow_bb_inside_loop_p (loop, exit->dest))
4311 exit = EDGE_SUCC (ex_bb, 1);
4313 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4316 /* Check whether it is possible to express the condition in USE by comparison
4317 of candidate CAND. If so, store the value compared with to BOUND. */
4319 static bool
4320 may_eliminate_iv (struct ivopts_data *data,
4321 struct iv_use *use, struct iv_cand *cand, tree *bound)
4323 basic_block ex_bb;
4324 edge exit;
4325 tree nit, period;
4326 struct loop *loop = data->current_loop;
4327 aff_tree bnd;
4328 struct tree_niter_desc *desc = NULL;
4330 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4331 return false;
4333 /* For now works only for exits that dominate the loop latch.
4334 TODO: extend to other conditions inside loop body. */
4335 ex_bb = gimple_bb (use->stmt);
4336 if (use->stmt != last_stmt (ex_bb)
4337 || gimple_code (use->stmt) != GIMPLE_COND
4338 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4339 return false;
4341 exit = EDGE_SUCC (ex_bb, 0);
4342 if (flow_bb_inside_loop_p (loop, exit->dest))
4343 exit = EDGE_SUCC (ex_bb, 1);
4344 if (flow_bb_inside_loop_p (loop, exit->dest))
4345 return false;
4347 nit = niter_for_exit (data, exit, &desc);
4348 if (!nit)
4349 return false;
4351 /* Determine whether we can use the variable to test the exit condition.
4352 This is the case iff the period of the induction variable is greater
4353 than the number of iterations for which the exit condition is true. */
4354 period = iv_period (cand->iv);
4356 /* If the number of iterations is constant, compare against it directly. */
4357 if (TREE_CODE (nit) == INTEGER_CST)
4359 /* See cand_value_at. */
4360 if (stmt_after_increment (loop, cand, use->stmt))
4362 if (!tree_int_cst_lt (nit, period))
4363 return false;
4365 else
4367 if (tree_int_cst_lt (period, nit))
4368 return false;
4372 /* If not, and if this is the only possible exit of the loop, see whether
4373 we can get a conservative estimate on the number of iterations of the
4374 entire loop and compare against that instead. */
4375 else
4377 double_int period_value, max_niter;
4379 max_niter = desc->max;
4380 if (stmt_after_increment (loop, cand, use->stmt))
4381 max_niter = double_int_add (max_niter, double_int_one);
4382 period_value = tree_to_double_int (period);
4383 if (double_int_ucmp (max_niter, period_value) > 0)
4385 /* See if we can take advantage of infered loop bound information. */
4386 if (loop_only_exit_p (loop, exit))
4388 if (!estimated_loop_iterations (loop, true, &max_niter))
4389 return false;
4390 /* The loop bound is already adjusted by adding 1. */
4391 if (double_int_ucmp (max_niter, period_value) > 0)
4392 return false;
4394 else
4395 return false;
4399 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4401 *bound = aff_combination_to_tree (&bnd);
4402 /* It is unlikely that computing the number of iterations using division
4403 would be more profitable than keeping the original induction variable. */
4404 if (expression_expensive_p (*bound))
4405 return false;
4406 return true;
4410 /* Determines cost of basing replacement of USE on CAND in a condition. */
4412 static bool
4413 determine_use_iv_cost_condition (struct ivopts_data *data,
4414 struct iv_use *use, struct iv_cand *cand)
4416 tree bound = NULL_TREE;
4417 struct iv *cmp_iv;
4418 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4419 comp_cost elim_cost, express_cost, cost;
4420 bool ok;
4421 int inv_expr_id = -1;
4422 tree *control_var, *bound_cst;
4424 /* Only consider real candidates. */
4425 if (!cand->iv)
4427 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4428 return false;
4431 /* Try iv elimination. */
4432 if (may_eliminate_iv (data, use, cand, &bound))
4434 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4435 /* The bound is a loop invariant, so it will be only computed
4436 once. */
4437 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4439 else
4440 elim_cost = infinite_cost;
4442 /* Try expressing the original giv. If it is compared with an invariant,
4443 note that we cannot get rid of it. */
4444 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4445 NULL, &cmp_iv);
4446 gcc_assert (ok);
4448 /* When the condition is a comparison of the candidate IV against
4449 zero, prefer this IV.
4451 TODO: The constant that we're substracting from the cost should
4452 be target-dependent. This information should be added to the
4453 target costs for each backend. */
4454 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4455 && integer_zerop (*bound_cst)
4456 && (operand_equal_p (*control_var, cand->var_after, 0)
4457 || operand_equal_p (*control_var, cand->var_before, 0)))
4458 elim_cost.cost -= 1;
4460 express_cost = get_computation_cost (data, use, cand, false,
4461 &depends_on_express, NULL,
4462 &inv_expr_id);
4463 fd_ivopts_data = data;
4464 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4466 /* Choose the better approach, preferring the eliminated IV. */
4467 if (compare_costs (elim_cost, express_cost) <= 0)
4469 cost = elim_cost;
4470 depends_on = depends_on_elim;
4471 depends_on_elim = NULL;
4473 else
4475 cost = express_cost;
4476 depends_on = depends_on_express;
4477 depends_on_express = NULL;
4478 bound = NULL_TREE;
4481 set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4483 if (depends_on_elim)
4484 BITMAP_FREE (depends_on_elim);
4485 if (depends_on_express)
4486 BITMAP_FREE (depends_on_express);
4488 return !infinite_cost_p (cost);
4491 /* Determines cost of basing replacement of USE on CAND. Returns false
4492 if USE cannot be based on CAND. */
4494 static bool
4495 determine_use_iv_cost (struct ivopts_data *data,
4496 struct iv_use *use, struct iv_cand *cand)
4498 switch (use->type)
4500 case USE_NONLINEAR_EXPR:
4501 return determine_use_iv_cost_generic (data, use, cand);
4503 case USE_ADDRESS:
4504 return determine_use_iv_cost_address (data, use, cand);
4506 case USE_COMPARE:
4507 return determine_use_iv_cost_condition (data, use, cand);
4509 default:
4510 gcc_unreachable ();
4514 /* Return true if get_computation_cost indicates that autoincrement is
4515 a possibility for the pair of USE and CAND, false otherwise. */
4517 static bool
4518 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4519 struct iv_cand *cand)
4521 bitmap depends_on;
4522 bool can_autoinc;
4523 comp_cost cost;
4525 if (use->type != USE_ADDRESS)
4526 return false;
4528 cost = get_computation_cost (data, use, cand, true, &depends_on,
4529 &can_autoinc, NULL);
4531 BITMAP_FREE (depends_on);
4533 return !infinite_cost_p (cost) && can_autoinc;
4536 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4537 use that allows autoincrement, and set their AINC_USE if possible. */
4539 static void
4540 set_autoinc_for_original_candidates (struct ivopts_data *data)
4542 unsigned i, j;
4544 for (i = 0; i < n_iv_cands (data); i++)
4546 struct iv_cand *cand = iv_cand (data, i);
4547 struct iv_use *closest = NULL;
4548 if (cand->pos != IP_ORIGINAL)
4549 continue;
4550 for (j = 0; j < n_iv_uses (data); j++)
4552 struct iv_use *use = iv_use (data, j);
4553 unsigned uid = gimple_uid (use->stmt);
4554 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4555 || uid > gimple_uid (cand->incremented_at))
4556 continue;
4557 if (closest == NULL || uid > gimple_uid (closest->stmt))
4558 closest = use;
4560 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4561 continue;
4562 cand->ainc_use = closest;
4566 /* Finds the candidates for the induction variables. */
4568 static void
4569 find_iv_candidates (struct ivopts_data *data)
4571 /* Add commonly used ivs. */
4572 add_standard_iv_candidates (data);
4574 /* Add old induction variables. */
4575 add_old_ivs_candidates (data);
4577 /* Add induction variables derived from uses. */
4578 add_derived_ivs_candidates (data);
4580 set_autoinc_for_original_candidates (data);
4582 /* Record the important candidates. */
4583 record_important_candidates (data);
4586 /* Determines costs of basing the use of the iv on an iv candidate. */
4588 static void
4589 determine_use_iv_costs (struct ivopts_data *data)
4591 unsigned i, j;
4592 struct iv_use *use;
4593 struct iv_cand *cand;
4594 bitmap to_clear = BITMAP_ALLOC (NULL);
4596 alloc_use_cost_map (data);
4598 for (i = 0; i < n_iv_uses (data); i++)
4600 use = iv_use (data, i);
4602 if (data->consider_all_candidates)
4604 for (j = 0; j < n_iv_cands (data); j++)
4606 cand = iv_cand (data, j);
4607 determine_use_iv_cost (data, use, cand);
4610 else
4612 bitmap_iterator bi;
4614 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4616 cand = iv_cand (data, j);
4617 if (!determine_use_iv_cost (data, use, cand))
4618 bitmap_set_bit (to_clear, j);
4621 /* Remove the candidates for that the cost is infinite from
4622 the list of related candidates. */
4623 bitmap_and_compl_into (use->related_cands, to_clear);
4624 bitmap_clear (to_clear);
4628 BITMAP_FREE (to_clear);
4630 if (dump_file && (dump_flags & TDF_DETAILS))
4632 fprintf (dump_file, "Use-candidate costs:\n");
4634 for (i = 0; i < n_iv_uses (data); i++)
4636 use = iv_use (data, i);
4638 fprintf (dump_file, "Use %d:\n", i);
4639 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4640 for (j = 0; j < use->n_map_members; j++)
4642 if (!use->cost_map[j].cand
4643 || infinite_cost_p (use->cost_map[j].cost))
4644 continue;
4646 fprintf (dump_file, " %d\t%d\t%d\t",
4647 use->cost_map[j].cand->id,
4648 use->cost_map[j].cost.cost,
4649 use->cost_map[j].cost.complexity);
4650 if (use->cost_map[j].depends_on)
4651 bitmap_print (dump_file,
4652 use->cost_map[j].depends_on, "","");
4653 if (use->cost_map[j].inv_expr_id != -1)
4654 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4655 fprintf (dump_file, "\n");
4658 fprintf (dump_file, "\n");
4660 fprintf (dump_file, "\n");
4664 /* Determines cost of the candidate CAND. */
4666 static void
4667 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4669 comp_cost cost_base;
4670 unsigned cost, cost_step;
4671 tree base;
4673 if (!cand->iv)
4675 cand->cost = 0;
4676 return;
4679 /* There are two costs associated with the candidate -- its increment
4680 and its initialization. The second is almost negligible for any loop
4681 that rolls enough, so we take it just very little into account. */
4683 base = cand->iv->base;
4684 cost_base = force_var_cost (data, base, NULL);
4685 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4687 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4689 /* Prefer the original ivs unless we may gain something by replacing it.
4690 The reason is to make debugging simpler; so this is not relevant for
4691 artificial ivs created by other optimization passes. */
4692 if (cand->pos != IP_ORIGINAL
4693 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4694 cost++;
4696 /* Prefer not to insert statements into latch unless there are some
4697 already (so that we do not create unnecessary jumps). */
4698 if (cand->pos == IP_END
4699 && empty_block_p (ip_end_pos (data->current_loop)))
4700 cost++;
4702 cand->cost = cost;
4703 cand->cost_step = cost_step;
4706 /* Determines costs of computation of the candidates. */
4708 static void
4709 determine_iv_costs (struct ivopts_data *data)
4711 unsigned i;
4713 if (dump_file && (dump_flags & TDF_DETAILS))
4715 fprintf (dump_file, "Candidate costs:\n");
4716 fprintf (dump_file, " cand\tcost\n");
4719 for (i = 0; i < n_iv_cands (data); i++)
4721 struct iv_cand *cand = iv_cand (data, i);
4723 determine_iv_cost (data, cand);
4725 if (dump_file && (dump_flags & TDF_DETAILS))
4726 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4729 if (dump_file && (dump_flags & TDF_DETAILS))
4730 fprintf (dump_file, "\n");
4733 /* Calculates cost for having SIZE induction variables. */
4735 static unsigned
4736 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4738 /* We add size to the cost, so that we prefer eliminating ivs
4739 if possible. */
4740 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4741 data->body_includes_call);
4744 /* For each size of the induction variable set determine the penalty. */
4746 static void
4747 determine_set_costs (struct ivopts_data *data)
4749 unsigned j, n;
4750 gimple phi;
4751 gimple_stmt_iterator psi;
4752 tree op;
4753 struct loop *loop = data->current_loop;
4754 bitmap_iterator bi;
4756 if (dump_file && (dump_flags & TDF_DETAILS))
4758 fprintf (dump_file, "Global costs:\n");
4759 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4760 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
4761 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4762 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4765 n = 0;
4766 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4768 phi = gsi_stmt (psi);
4769 op = PHI_RESULT (phi);
4771 if (!is_gimple_reg (op))
4772 continue;
4774 if (get_iv (data, op))
4775 continue;
4777 n++;
4780 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4782 struct version_info *info = ver_info (data, j);
4784 if (info->inv_id && info->has_nonlin_use)
4785 n++;
4788 data->regs_used = n;
4789 if (dump_file && (dump_flags & TDF_DETAILS))
4790 fprintf (dump_file, " regs_used %d\n", n);
4792 if (dump_file && (dump_flags & TDF_DETAILS))
4794 fprintf (dump_file, " cost for size:\n");
4795 fprintf (dump_file, " ivs\tcost\n");
4796 for (j = 0; j <= 2 * target_avail_regs; j++)
4797 fprintf (dump_file, " %d\t%d\n", j,
4798 ivopts_global_cost_for_size (data, j));
4799 fprintf (dump_file, "\n");
4803 /* Returns true if A is a cheaper cost pair than B. */
4805 static bool
4806 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4808 int cmp;
4810 if (!a)
4811 return false;
4813 if (!b)
4814 return true;
4816 cmp = compare_costs (a->cost, b->cost);
4817 if (cmp < 0)
4818 return true;
4820 if (cmp > 0)
4821 return false;
4823 /* In case the costs are the same, prefer the cheaper candidate. */
4824 if (a->cand->cost < b->cand->cost)
4825 return true;
4827 return false;
4831 /* Returns candidate by that USE is expressed in IVS. */
4833 static struct cost_pair *
4834 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4836 return ivs->cand_for_use[use->id];
4839 /* Computes the cost field of IVS structure. */
4841 static void
4842 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4844 comp_cost cost = ivs->cand_use_cost;
4846 cost.cost += ivs->cand_cost;
4848 cost.cost += ivopts_global_cost_for_size (data,
4849 ivs->n_regs + ivs->num_used_inv_expr);
4851 ivs->cost = cost;
4854 /* Remove invariants in set INVS to set IVS. */
4856 static void
4857 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4859 bitmap_iterator bi;
4860 unsigned iid;
4862 if (!invs)
4863 return;
4865 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4867 ivs->n_invariant_uses[iid]--;
4868 if (ivs->n_invariant_uses[iid] == 0)
4869 ivs->n_regs--;
4873 /* Set USE not to be expressed by any candidate in IVS. */
4875 static void
4876 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4877 struct iv_use *use)
4879 unsigned uid = use->id, cid;
4880 struct cost_pair *cp;
4882 cp = ivs->cand_for_use[uid];
4883 if (!cp)
4884 return;
4885 cid = cp->cand->id;
4887 ivs->bad_uses++;
4888 ivs->cand_for_use[uid] = NULL;
4889 ivs->n_cand_uses[cid]--;
4891 if (ivs->n_cand_uses[cid] == 0)
4893 bitmap_clear_bit (ivs->cands, cid);
4894 /* Do not count the pseudocandidates. */
4895 if (cp->cand->iv)
4896 ivs->n_regs--;
4897 ivs->n_cands--;
4898 ivs->cand_cost -= cp->cand->cost;
4900 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4903 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4905 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4907 if (cp->inv_expr_id != -1)
4909 ivs->used_inv_expr[cp->inv_expr_id]--;
4910 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
4911 ivs->num_used_inv_expr--;
4913 iv_ca_recount_cost (data, ivs);
4916 /* Add invariants in set INVS to set IVS. */
4918 static void
4919 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4921 bitmap_iterator bi;
4922 unsigned iid;
4924 if (!invs)
4925 return;
4927 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4929 ivs->n_invariant_uses[iid]++;
4930 if (ivs->n_invariant_uses[iid] == 1)
4931 ivs->n_regs++;
4935 /* Set cost pair for USE in set IVS to CP. */
4937 static void
4938 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4939 struct iv_use *use, struct cost_pair *cp)
4941 unsigned uid = use->id, cid;
4943 if (ivs->cand_for_use[uid] == cp)
4944 return;
4946 if (ivs->cand_for_use[uid])
4947 iv_ca_set_no_cp (data, ivs, use);
4949 if (cp)
4951 cid = cp->cand->id;
4953 ivs->bad_uses--;
4954 ivs->cand_for_use[uid] = cp;
4955 ivs->n_cand_uses[cid]++;
4956 if (ivs->n_cand_uses[cid] == 1)
4958 bitmap_set_bit (ivs->cands, cid);
4959 /* Do not count the pseudocandidates. */
4960 if (cp->cand->iv)
4961 ivs->n_regs++;
4962 ivs->n_cands++;
4963 ivs->cand_cost += cp->cand->cost;
4965 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4968 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4969 iv_ca_set_add_invariants (ivs, cp->depends_on);
4971 if (cp->inv_expr_id != -1)
4973 ivs->used_inv_expr[cp->inv_expr_id]++;
4974 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
4975 ivs->num_used_inv_expr++;
4977 iv_ca_recount_cost (data, ivs);
4981 /* Extend set IVS by expressing USE by some of the candidates in it
4982 if possible. All important candidates will be considered
4983 if IMPORTANT_CANDIDATES is true. */
4985 static void
4986 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4987 struct iv_use *use, bool important_candidates)
4989 struct cost_pair *best_cp = NULL, *cp;
4990 bitmap_iterator bi;
4991 bitmap cands;
4992 unsigned i;
4994 gcc_assert (ivs->upto >= use->id);
4996 if (ivs->upto == use->id)
4998 ivs->upto++;
4999 ivs->bad_uses++;
5002 cands = (important_candidates ? data->important_candidates : ivs->cands);
5003 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5005 struct iv_cand *cand = iv_cand (data, i);
5007 cp = get_use_iv_cost (data, use, cand);
5009 if (cheaper_cost_pair (cp, best_cp))
5010 best_cp = cp;
5013 iv_ca_set_cp (data, ivs, use, best_cp);
5016 /* Get cost for assignment IVS. */
5018 static comp_cost
5019 iv_ca_cost (struct iv_ca *ivs)
5021 /* This was a conditional expression but it triggered a bug in
5022 Sun C 5.5. */
5023 if (ivs->bad_uses)
5024 return infinite_cost;
5025 else
5026 return ivs->cost;
5029 /* Returns true if all dependences of CP are among invariants in IVS. */
5031 static bool
5032 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5034 unsigned i;
5035 bitmap_iterator bi;
5037 if (!cp->depends_on)
5038 return true;
5040 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5042 if (ivs->n_invariant_uses[i] == 0)
5043 return false;
5046 return true;
5049 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5050 it before NEXT_CHANGE. */
5052 static struct iv_ca_delta *
5053 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5054 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5056 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5058 change->use = use;
5059 change->old_cp = old_cp;
5060 change->new_cp = new_cp;
5061 change->next_change = next_change;
5063 return change;
5066 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5067 are rewritten. */
5069 static struct iv_ca_delta *
5070 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5072 struct iv_ca_delta *last;
5074 if (!l2)
5075 return l1;
5077 if (!l1)
5078 return l2;
5080 for (last = l1; last->next_change; last = last->next_change)
5081 continue;
5082 last->next_change = l2;
5084 return l1;
5087 /* Reverse the list of changes DELTA, forming the inverse to it. */
5089 static struct iv_ca_delta *
5090 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5092 struct iv_ca_delta *act, *next, *prev = NULL;
5093 struct cost_pair *tmp;
5095 for (act = delta; act; act = next)
5097 next = act->next_change;
5098 act->next_change = prev;
5099 prev = act;
5101 tmp = act->old_cp;
5102 act->old_cp = act->new_cp;
5103 act->new_cp = tmp;
5106 return prev;
5109 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5110 reverted instead. */
5112 static void
5113 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5114 struct iv_ca_delta *delta, bool forward)
5116 struct cost_pair *from, *to;
5117 struct iv_ca_delta *act;
5119 if (!forward)
5120 delta = iv_ca_delta_reverse (delta);
5122 for (act = delta; act; act = act->next_change)
5124 from = act->old_cp;
5125 to = act->new_cp;
5126 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5127 iv_ca_set_cp (data, ivs, act->use, to);
5130 if (!forward)
5131 iv_ca_delta_reverse (delta);
5134 /* Returns true if CAND is used in IVS. */
5136 static bool
5137 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5139 return ivs->n_cand_uses[cand->id] > 0;
5142 /* Returns number of induction variable candidates in the set IVS. */
5144 static unsigned
5145 iv_ca_n_cands (struct iv_ca *ivs)
5147 return ivs->n_cands;
5150 /* Free the list of changes DELTA. */
5152 static void
5153 iv_ca_delta_free (struct iv_ca_delta **delta)
5155 struct iv_ca_delta *act, *next;
5157 for (act = *delta; act; act = next)
5159 next = act->next_change;
5160 free (act);
5163 *delta = NULL;
5166 /* Allocates new iv candidates assignment. */
5168 static struct iv_ca *
5169 iv_ca_new (struct ivopts_data *data)
5171 struct iv_ca *nw = XNEW (struct iv_ca);
5173 nw->upto = 0;
5174 nw->bad_uses = 0;
5175 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5176 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5177 nw->cands = BITMAP_ALLOC (NULL);
5178 nw->n_cands = 0;
5179 nw->n_regs = 0;
5180 nw->cand_use_cost = zero_cost;
5181 nw->cand_cost = 0;
5182 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5183 nw->cost = zero_cost;
5184 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5185 nw->num_used_inv_expr = 0;
5187 return nw;
5190 /* Free memory occupied by the set IVS. */
5192 static void
5193 iv_ca_free (struct iv_ca **ivs)
5195 free ((*ivs)->cand_for_use);
5196 free ((*ivs)->n_cand_uses);
5197 BITMAP_FREE ((*ivs)->cands);
5198 free ((*ivs)->n_invariant_uses);
5199 free ((*ivs)->used_inv_expr);
5200 free (*ivs);
5201 *ivs = NULL;
5204 /* Dumps IVS to FILE. */
5206 static void
5207 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5209 const char *pref = " invariants ";
5210 unsigned i;
5211 comp_cost cost = iv_ca_cost (ivs);
5213 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5214 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5215 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5216 bitmap_print (file, ivs->cands, " candidates: ","\n");
5218 for (i = 0; i < ivs->upto; i++)
5220 struct iv_use *use = iv_use (data, i);
5221 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5222 if (cp)
5223 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5224 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5225 else
5226 fprintf (file, " use:%d --> ??\n", use->id);
5229 for (i = 1; i <= data->max_inv_id; i++)
5230 if (ivs->n_invariant_uses[i])
5232 fprintf (file, "%s%d", pref, i);
5233 pref = ", ";
5235 fprintf (file, "\n\n");
5238 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5239 new set, and store differences in DELTA. Number of induction variables
5240 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5241 the function will try to find a solution with mimimal iv candidates. */
5243 static comp_cost
5244 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5245 struct iv_cand *cand, struct iv_ca_delta **delta,
5246 unsigned *n_ivs, bool min_ncand)
5248 unsigned i;
5249 comp_cost cost;
5250 struct iv_use *use;
5251 struct cost_pair *old_cp, *new_cp;
5253 *delta = NULL;
5254 for (i = 0; i < ivs->upto; i++)
5256 use = iv_use (data, i);
5257 old_cp = iv_ca_cand_for_use (ivs, use);
5259 if (old_cp
5260 && old_cp->cand == cand)
5261 continue;
5263 new_cp = get_use_iv_cost (data, use, cand);
5264 if (!new_cp)
5265 continue;
5267 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5268 continue;
5270 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5271 continue;
5273 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5276 iv_ca_delta_commit (data, ivs, *delta, true);
5277 cost = iv_ca_cost (ivs);
5278 if (n_ivs)
5279 *n_ivs = iv_ca_n_cands (ivs);
5280 iv_ca_delta_commit (data, ivs, *delta, false);
5282 return cost;
5285 /* Try narrowing set IVS by removing CAND. Return the cost of
5286 the new set and store the differences in DELTA. */
5288 static comp_cost
5289 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5290 struct iv_cand *cand, struct iv_ca_delta **delta)
5292 unsigned i, ci;
5293 struct iv_use *use;
5294 struct cost_pair *old_cp, *new_cp, *cp;
5295 bitmap_iterator bi;
5296 struct iv_cand *cnd;
5297 comp_cost cost;
5299 *delta = NULL;
5300 for (i = 0; i < n_iv_uses (data); i++)
5302 use = iv_use (data, i);
5304 old_cp = iv_ca_cand_for_use (ivs, use);
5305 if (old_cp->cand != cand)
5306 continue;
5308 new_cp = NULL;
5310 if (data->consider_all_candidates)
5312 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5314 if (ci == cand->id)
5315 continue;
5317 cnd = iv_cand (data, ci);
5319 cp = get_use_iv_cost (data, use, cnd);
5320 if (!cp)
5321 continue;
5323 if (!iv_ca_has_deps (ivs, cp))
5324 continue;
5326 if (!cheaper_cost_pair (cp, new_cp))
5327 continue;
5329 new_cp = cp;
5332 else
5334 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5336 if (ci == cand->id)
5337 continue;
5339 cnd = iv_cand (data, ci);
5341 cp = get_use_iv_cost (data, use, cnd);
5342 if (!cp)
5343 continue;
5344 if (!iv_ca_has_deps (ivs, cp))
5345 continue;
5347 if (!cheaper_cost_pair (cp, new_cp))
5348 continue;
5350 new_cp = cp;
5354 if (!new_cp)
5356 iv_ca_delta_free (delta);
5357 return infinite_cost;
5360 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5363 iv_ca_delta_commit (data, ivs, *delta, true);
5364 cost = iv_ca_cost (ivs);
5365 iv_ca_delta_commit (data, ivs, *delta, false);
5367 return cost;
5370 /* Try optimizing the set of candidates IVS by removing candidates different
5371 from to EXCEPT_CAND from it. Return cost of the new set, and store
5372 differences in DELTA. */
5374 static comp_cost
5375 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5376 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5378 bitmap_iterator bi;
5379 struct iv_ca_delta *act_delta, *best_delta;
5380 unsigned i;
5381 comp_cost best_cost, acost;
5382 struct iv_cand *cand;
5384 best_delta = NULL;
5385 best_cost = iv_ca_cost (ivs);
5387 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5389 cand = iv_cand (data, i);
5391 if (cand == except_cand)
5392 continue;
5394 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5396 if (compare_costs (acost, best_cost) < 0)
5398 best_cost = acost;
5399 iv_ca_delta_free (&best_delta);
5400 best_delta = act_delta;
5402 else
5403 iv_ca_delta_free (&act_delta);
5406 if (!best_delta)
5408 *delta = NULL;
5409 return best_cost;
5412 /* Recurse to possibly remove other unnecessary ivs. */
5413 iv_ca_delta_commit (data, ivs, best_delta, true);
5414 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5415 iv_ca_delta_commit (data, ivs, best_delta, false);
5416 *delta = iv_ca_delta_join (best_delta, *delta);
5417 return best_cost;
5420 /* Tries to extend the sets IVS in the best possible way in order
5421 to express the USE. If ORIGINALP is true, prefer candidates from
5422 the original set of IVs, otherwise favor important candidates not
5423 based on any memory object. */
5425 static bool
5426 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5427 struct iv_use *use, bool originalp)
5429 comp_cost best_cost, act_cost;
5430 unsigned i;
5431 bitmap_iterator bi;
5432 struct iv_cand *cand;
5433 struct iv_ca_delta *best_delta = NULL, *act_delta;
5434 struct cost_pair *cp;
5436 iv_ca_add_use (data, ivs, use, false);
5437 best_cost = iv_ca_cost (ivs);
5439 cp = iv_ca_cand_for_use (ivs, use);
5440 if (!cp)
5442 ivs->upto--;
5443 ivs->bad_uses--;
5444 iv_ca_add_use (data, ivs, use, true);
5445 best_cost = iv_ca_cost (ivs);
5446 cp = iv_ca_cand_for_use (ivs, use);
5448 if (cp)
5450 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5451 iv_ca_set_no_cp (data, ivs, use);
5454 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5455 first try important candidates not based on any memory object. Only if
5456 this fails, try the specific ones. Rationale -- in loops with many
5457 variables the best choice often is to use just one generic biv. If we
5458 added here many ivs specific to the uses, the optimization algorithm later
5459 would be likely to get stuck in a local minimum, thus causing us to create
5460 too many ivs. The approach from few ivs to more seems more likely to be
5461 successful -- starting from few ivs, replacing an expensive use by a
5462 specific iv should always be a win. */
5463 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5465 cand = iv_cand (data, i);
5467 if (originalp && cand->pos !=IP_ORIGINAL)
5468 continue;
5470 if (!originalp && cand->iv->base_object != NULL_TREE)
5471 continue;
5473 if (iv_ca_cand_used_p (ivs, cand))
5474 continue;
5476 cp = get_use_iv_cost (data, use, cand);
5477 if (!cp)
5478 continue;
5480 iv_ca_set_cp (data, ivs, use, cp);
5481 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5482 true);
5483 iv_ca_set_no_cp (data, ivs, use);
5484 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5486 if (compare_costs (act_cost, best_cost) < 0)
5488 best_cost = act_cost;
5490 iv_ca_delta_free (&best_delta);
5491 best_delta = act_delta;
5493 else
5494 iv_ca_delta_free (&act_delta);
5497 if (infinite_cost_p (best_cost))
5499 for (i = 0; i < use->n_map_members; i++)
5501 cp = use->cost_map + i;
5502 cand = cp->cand;
5503 if (!cand)
5504 continue;
5506 /* Already tried this. */
5507 if (cand->important)
5509 if (originalp && cand->pos == IP_ORIGINAL)
5510 continue;
5511 if (!originalp && cand->iv->base_object == NULL_TREE)
5512 continue;
5515 if (iv_ca_cand_used_p (ivs, cand))
5516 continue;
5518 act_delta = NULL;
5519 iv_ca_set_cp (data, ivs, use, cp);
5520 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5521 iv_ca_set_no_cp (data, ivs, use);
5522 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5523 cp, act_delta);
5525 if (compare_costs (act_cost, best_cost) < 0)
5527 best_cost = act_cost;
5529 if (best_delta)
5530 iv_ca_delta_free (&best_delta);
5531 best_delta = act_delta;
5533 else
5534 iv_ca_delta_free (&act_delta);
5538 iv_ca_delta_commit (data, ivs, best_delta, true);
5539 iv_ca_delta_free (&best_delta);
5541 return !infinite_cost_p (best_cost);
5544 /* Finds an initial assignment of candidates to uses. */
5546 static struct iv_ca *
5547 get_initial_solution (struct ivopts_data *data, bool originalp)
5549 struct iv_ca *ivs = iv_ca_new (data);
5550 unsigned i;
5552 for (i = 0; i < n_iv_uses (data); i++)
5553 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5555 iv_ca_free (&ivs);
5556 return NULL;
5559 return ivs;
5562 /* Tries to improve set of induction variables IVS. */
5564 static bool
5565 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5567 unsigned i, n_ivs;
5568 comp_cost acost, best_cost = iv_ca_cost (ivs);
5569 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5570 struct iv_cand *cand;
5572 /* Try extending the set of induction variables by one. */
5573 for (i = 0; i < n_iv_cands (data); i++)
5575 cand = iv_cand (data, i);
5577 if (iv_ca_cand_used_p (ivs, cand))
5578 continue;
5580 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5581 if (!act_delta)
5582 continue;
5584 /* If we successfully added the candidate and the set is small enough,
5585 try optimizing it by removing other candidates. */
5586 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5588 iv_ca_delta_commit (data, ivs, act_delta, true);
5589 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5590 iv_ca_delta_commit (data, ivs, act_delta, false);
5591 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5594 if (compare_costs (acost, best_cost) < 0)
5596 best_cost = acost;
5597 iv_ca_delta_free (&best_delta);
5598 best_delta = act_delta;
5600 else
5601 iv_ca_delta_free (&act_delta);
5604 if (!best_delta)
5606 /* Try removing the candidates from the set instead. */
5607 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5609 /* Nothing more we can do. */
5610 if (!best_delta)
5611 return false;
5614 iv_ca_delta_commit (data, ivs, best_delta, true);
5615 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5616 iv_ca_delta_free (&best_delta);
5617 return true;
5620 /* Attempts to find the optimal set of induction variables. We do simple
5621 greedy heuristic -- we try to replace at most one candidate in the selected
5622 solution and remove the unused ivs while this improves the cost. */
5624 static struct iv_ca *
5625 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5627 struct iv_ca *set;
5629 /* Get the initial solution. */
5630 set = get_initial_solution (data, originalp);
5631 if (!set)
5633 if (dump_file && (dump_flags & TDF_DETAILS))
5634 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5635 return NULL;
5638 if (dump_file && (dump_flags & TDF_DETAILS))
5640 fprintf (dump_file, "Initial set of candidates:\n");
5641 iv_ca_dump (data, dump_file, set);
5644 while (try_improve_iv_set (data, set))
5646 if (dump_file && (dump_flags & TDF_DETAILS))
5648 fprintf (dump_file, "Improved to:\n");
5649 iv_ca_dump (data, dump_file, set);
5653 return set;
5656 static struct iv_ca *
5657 find_optimal_iv_set (struct ivopts_data *data)
5659 unsigned i;
5660 struct iv_ca *set, *origset;
5661 struct iv_use *use;
5662 comp_cost cost, origcost;
5664 /* Determine the cost based on a strategy that starts with original IVs,
5665 and try again using a strategy that prefers candidates not based
5666 on any IVs. */
5667 origset = find_optimal_iv_set_1 (data, true);
5668 set = find_optimal_iv_set_1 (data, false);
5670 if (!origset && !set)
5671 return NULL;
5673 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5674 cost = set ? iv_ca_cost (set) : infinite_cost;
5676 if (dump_file && (dump_flags & TDF_DETAILS))
5678 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5679 origcost.cost, origcost.complexity);
5680 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5681 cost.cost, cost.complexity);
5684 /* Choose the one with the best cost. */
5685 if (compare_costs (origcost, cost) <= 0)
5687 if (set)
5688 iv_ca_free (&set);
5689 set = origset;
5691 else if (origset)
5692 iv_ca_free (&origset);
5694 for (i = 0; i < n_iv_uses (data); i++)
5696 use = iv_use (data, i);
5697 use->selected = iv_ca_cand_for_use (set, use)->cand;
5700 return set;
5703 /* Creates a new induction variable corresponding to CAND. */
5705 static void
5706 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5708 gimple_stmt_iterator incr_pos;
5709 tree base;
5710 bool after = false;
5712 if (!cand->iv)
5713 return;
5715 switch (cand->pos)
5717 case IP_NORMAL:
5718 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5719 break;
5721 case IP_END:
5722 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5723 after = true;
5724 break;
5726 case IP_AFTER_USE:
5727 after = true;
5728 /* fall through */
5729 case IP_BEFORE_USE:
5730 incr_pos = gsi_for_stmt (cand->incremented_at);
5731 break;
5733 case IP_ORIGINAL:
5734 /* Mark that the iv is preserved. */
5735 name_info (data, cand->var_before)->preserve_biv = true;
5736 name_info (data, cand->var_after)->preserve_biv = true;
5738 /* Rewrite the increment so that it uses var_before directly. */
5739 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5740 return;
5743 gimple_add_tmp_var (cand->var_before);
5744 add_referenced_var (cand->var_before);
5746 base = unshare_expr (cand->iv->base);
5748 create_iv (base, unshare_expr (cand->iv->step),
5749 cand->var_before, data->current_loop,
5750 &incr_pos, after, &cand->var_before, &cand->var_after);
5753 /* Creates new induction variables described in SET. */
5755 static void
5756 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5758 unsigned i;
5759 struct iv_cand *cand;
5760 bitmap_iterator bi;
5762 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5764 cand = iv_cand (data, i);
5765 create_new_iv (data, cand);
5768 if (dump_file && (dump_flags & TDF_DETAILS))
5770 fprintf (dump_file, "\nSelected IV set: \n");
5771 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5773 cand = iv_cand (data, i);
5774 dump_cand (dump_file, cand);
5776 fprintf (dump_file, "\n");
5780 /* Rewrites USE (definition of iv used in a nonlinear expression)
5781 using candidate CAND. */
5783 static void
5784 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5785 struct iv_use *use, struct iv_cand *cand)
5787 tree comp;
5788 tree op, tgt;
5789 gimple ass;
5790 gimple_stmt_iterator bsi;
5792 /* An important special case -- if we are asked to express value of
5793 the original iv by itself, just exit; there is no need to
5794 introduce a new computation (that might also need casting the
5795 variable to unsigned and back). */
5796 if (cand->pos == IP_ORIGINAL
5797 && cand->incremented_at == use->stmt)
5799 tree step, ctype, utype;
5800 enum tree_code incr_code = PLUS_EXPR, old_code;
5802 gcc_assert (is_gimple_assign (use->stmt));
5803 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5805 step = cand->iv->step;
5806 ctype = TREE_TYPE (step);
5807 utype = TREE_TYPE (cand->var_after);
5808 if (TREE_CODE (step) == NEGATE_EXPR)
5810 incr_code = MINUS_EXPR;
5811 step = TREE_OPERAND (step, 0);
5814 /* Check whether we may leave the computation unchanged.
5815 This is the case only if it does not rely on other
5816 computations in the loop -- otherwise, the computation
5817 we rely upon may be removed in remove_unused_ivs,
5818 thus leading to ICE. */
5819 old_code = gimple_assign_rhs_code (use->stmt);
5820 if (old_code == PLUS_EXPR
5821 || old_code == MINUS_EXPR
5822 || old_code == POINTER_PLUS_EXPR)
5824 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5825 op = gimple_assign_rhs2 (use->stmt);
5826 else if (old_code != MINUS_EXPR
5827 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5828 op = gimple_assign_rhs1 (use->stmt);
5829 else
5830 op = NULL_TREE;
5832 else
5833 op = NULL_TREE;
5835 if (op
5836 && (TREE_CODE (op) == INTEGER_CST
5837 || operand_equal_p (op, step, 0)))
5838 return;
5840 /* Otherwise, add the necessary computations to express
5841 the iv. */
5842 op = fold_convert (ctype, cand->var_before);
5843 comp = fold_convert (utype,
5844 build2 (incr_code, ctype, op,
5845 unshare_expr (step)));
5847 else
5849 comp = get_computation (data->current_loop, use, cand);
5850 gcc_assert (comp != NULL_TREE);
5853 switch (gimple_code (use->stmt))
5855 case GIMPLE_PHI:
5856 tgt = PHI_RESULT (use->stmt);
5858 /* If we should keep the biv, do not replace it. */
5859 if (name_info (data, tgt)->preserve_biv)
5860 return;
5862 bsi = gsi_after_labels (gimple_bb (use->stmt));
5863 break;
5865 case GIMPLE_ASSIGN:
5866 tgt = gimple_assign_lhs (use->stmt);
5867 bsi = gsi_for_stmt (use->stmt);
5868 break;
5870 default:
5871 gcc_unreachable ();
5874 if (!valid_gimple_rhs_p (comp)
5875 || (gimple_code (use->stmt) != GIMPLE_PHI
5876 /* We can't allow re-allocating the stmt as it might be pointed
5877 to still. */
5878 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5879 >= gimple_num_ops (gsi_stmt (bsi)))))
5881 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5882 true, GSI_SAME_STMT);
5883 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5885 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5886 /* As this isn't a plain copy we have to reset alignment
5887 information. */
5888 if (SSA_NAME_PTR_INFO (comp))
5890 SSA_NAME_PTR_INFO (comp)->align = 1;
5891 SSA_NAME_PTR_INFO (comp)->misalign = 0;
5896 if (gimple_code (use->stmt) == GIMPLE_PHI)
5898 ass = gimple_build_assign (tgt, comp);
5899 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5901 bsi = gsi_for_stmt (use->stmt);
5902 remove_phi_node (&bsi, false);
5904 else
5906 gimple_assign_set_rhs_from_tree (&bsi, comp);
5907 use->stmt = gsi_stmt (bsi);
5911 /* Copies the reference information from OLD_REF to NEW_REF. */
5913 static void
5914 copy_ref_info (tree new_ref, tree old_ref)
5916 tree new_ptr_base = NULL_TREE;
5918 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5919 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5921 new_ptr_base = TREE_OPERAND (new_ref, 0);
5923 /* We can transfer points-to information from an old pointer
5924 or decl base to the new one. */
5925 if (new_ptr_base
5926 && TREE_CODE (new_ptr_base) == SSA_NAME
5927 && !SSA_NAME_PTR_INFO (new_ptr_base))
5929 tree base = get_base_address (old_ref);
5930 if (!base)
5932 else if ((TREE_CODE (base) == MEM_REF
5933 || TREE_CODE (base) == TARGET_MEM_REF)
5934 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
5935 && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
5937 struct ptr_info_def *new_pi;
5938 duplicate_ssa_name_ptr_info
5939 (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5940 new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
5941 /* We have to be careful about transfering alignment information. */
5942 if (TREE_CODE (old_ref) == MEM_REF
5943 && !(TREE_CODE (new_ref) == TARGET_MEM_REF
5944 && (TMR_INDEX2 (new_ref)
5945 || (TMR_STEP (new_ref)
5946 && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
5947 < new_pi->align)))))
5949 new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
5950 mem_ref_offset (new_ref)).low;
5951 new_pi->misalign &= (new_pi->align - 1);
5953 else
5955 new_pi->align = 1;
5956 new_pi->misalign = 0;
5959 else if (TREE_CODE (base) == VAR_DECL
5960 || TREE_CODE (base) == PARM_DECL
5961 || TREE_CODE (base) == RESULT_DECL)
5963 struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5964 pt_solution_set_var (&pi->pt, base);
5969 /* Performs a peephole optimization to reorder the iv update statement with
5970 a mem ref to enable instruction combining in later phases. The mem ref uses
5971 the iv value before the update, so the reordering transformation requires
5972 adjustment of the offset. CAND is the selected IV_CAND.
5974 Example:
5976 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5977 iv2 = iv1 + 1;
5979 if (t < val) (1)
5980 goto L;
5981 goto Head;
5984 directly propagating t over to (1) will introduce overlapping live range
5985 thus increase register pressure. This peephole transform it into:
5988 iv2 = iv1 + 1;
5989 t = MEM_REF (base, iv2, 8, 8);
5990 if (t < val)
5991 goto L;
5992 goto Head;
5995 static void
5996 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
5998 tree var_after;
5999 gimple iv_update, stmt;
6000 basic_block bb;
6001 gimple_stmt_iterator gsi, gsi_iv;
6003 if (cand->pos != IP_NORMAL)
6004 return;
6006 var_after = cand->var_after;
6007 iv_update = SSA_NAME_DEF_STMT (var_after);
6009 bb = gimple_bb (iv_update);
6010 gsi = gsi_last_nondebug_bb (bb);
6011 stmt = gsi_stmt (gsi);
6013 /* Only handle conditional statement for now. */
6014 if (gimple_code (stmt) != GIMPLE_COND)
6015 return;
6017 gsi_prev_nondebug (&gsi);
6018 stmt = gsi_stmt (gsi);
6019 if (stmt != iv_update)
6020 return;
6022 gsi_prev_nondebug (&gsi);
6023 if (gsi_end_p (gsi))
6024 return;
6026 stmt = gsi_stmt (gsi);
6027 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6028 return;
6030 if (stmt != use->stmt)
6031 return;
6033 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6034 return;
6036 if (dump_file && (dump_flags & TDF_DETAILS))
6038 fprintf (dump_file, "Reordering \n");
6039 print_gimple_stmt (dump_file, iv_update, 0, 0);
6040 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6041 fprintf (dump_file, "\n");
6044 gsi = gsi_for_stmt (use->stmt);
6045 gsi_iv = gsi_for_stmt (iv_update);
6046 gsi_move_before (&gsi_iv, &gsi);
6048 cand->pos = IP_BEFORE_USE;
6049 cand->incremented_at = use->stmt;
6052 /* Rewrites USE (address that is an iv) using candidate CAND. */
6054 static void
6055 rewrite_use_address (struct ivopts_data *data,
6056 struct iv_use *use, struct iv_cand *cand)
6058 aff_tree aff;
6059 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6060 tree base_hint = NULL_TREE;
6061 tree ref, iv;
6062 bool ok;
6064 adjust_iv_update_pos (cand, use);
6065 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6066 gcc_assert (ok);
6067 unshare_aff_combination (&aff);
6069 /* To avoid undefined overflow problems, all IV candidates use unsigned
6070 integer types. The drawback is that this makes it impossible for
6071 create_mem_ref to distinguish an IV that is based on a memory object
6072 from one that represents simply an offset.
6074 To work around this problem, we pass a hint to create_mem_ref that
6075 indicates which variable (if any) in aff is an IV based on a memory
6076 object. Note that we only consider the candidate. If this is not
6077 based on an object, the base of the reference is in some subexpression
6078 of the use -- but these will use pointer types, so they are recognized
6079 by the create_mem_ref heuristics anyway. */
6080 if (cand->iv->base_object)
6081 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6083 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6084 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6085 reference_alias_ptr_type (*use->op_p),
6086 iv, base_hint, data->speed);
6087 copy_ref_info (ref, *use->op_p);
6088 *use->op_p = ref;
6091 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6092 candidate CAND. */
6094 static void
6095 rewrite_use_compare (struct ivopts_data *data,
6096 struct iv_use *use, struct iv_cand *cand)
6098 tree comp, *var_p, op, bound;
6099 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6100 enum tree_code compare;
6101 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6102 bool ok;
6104 bound = cp->value;
6105 if (bound)
6107 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6108 tree var_type = TREE_TYPE (var);
6109 gimple_seq stmts;
6111 if (dump_file && (dump_flags & TDF_DETAILS))
6113 fprintf (dump_file, "Replacing exit test: ");
6114 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6116 compare = iv_elimination_compare (data, use);
6117 bound = unshare_expr (fold_convert (var_type, bound));
6118 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6119 if (stmts)
6120 gsi_insert_seq_on_edge_immediate (
6121 loop_preheader_edge (data->current_loop),
6122 stmts);
6124 gimple_cond_set_lhs (use->stmt, var);
6125 gimple_cond_set_code (use->stmt, compare);
6126 gimple_cond_set_rhs (use->stmt, op);
6127 return;
6130 /* The induction variable elimination failed; just express the original
6131 giv. */
6132 comp = get_computation (data->current_loop, use, cand);
6133 gcc_assert (comp != NULL_TREE);
6135 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6136 gcc_assert (ok);
6138 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6139 true, GSI_SAME_STMT);
6142 /* Rewrites USE using candidate CAND. */
6144 static void
6145 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6147 switch (use->type)
6149 case USE_NONLINEAR_EXPR:
6150 rewrite_use_nonlinear_expr (data, use, cand);
6151 break;
6153 case USE_ADDRESS:
6154 rewrite_use_address (data, use, cand);
6155 break;
6157 case USE_COMPARE:
6158 rewrite_use_compare (data, use, cand);
6159 break;
6161 default:
6162 gcc_unreachable ();
6165 update_stmt (use->stmt);
6168 /* Rewrite the uses using the selected induction variables. */
6170 static void
6171 rewrite_uses (struct ivopts_data *data)
6173 unsigned i;
6174 struct iv_cand *cand;
6175 struct iv_use *use;
6177 for (i = 0; i < n_iv_uses (data); i++)
6179 use = iv_use (data, i);
6180 cand = use->selected;
6181 gcc_assert (cand);
6183 rewrite_use (data, use, cand);
6187 /* Removes the ivs that are not used after rewriting. */
6189 static void
6190 remove_unused_ivs (struct ivopts_data *data)
6192 unsigned j;
6193 bitmap_iterator bi;
6194 bitmap toremove = BITMAP_ALLOC (NULL);
6196 /* Figure out an order in which to release SSA DEFs so that we don't
6197 release something that we'd have to propagate into a debug stmt
6198 afterwards. */
6199 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6201 struct version_info *info;
6203 info = ver_info (data, j);
6204 if (info->iv
6205 && !integer_zerop (info->iv->step)
6206 && !info->inv_id
6207 && !info->iv->have_use_for
6208 && !info->preserve_biv)
6209 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6212 release_defs_bitset (toremove);
6214 BITMAP_FREE (toremove);
6217 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6218 for pointer_map_traverse. */
6220 static bool
6221 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6222 void *data ATTRIBUTE_UNUSED)
6224 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6226 free (niter);
6227 return true;
6230 /* Frees data allocated by the optimization of a single loop. */
6232 static void
6233 free_loop_data (struct ivopts_data *data)
6235 unsigned i, j;
6236 bitmap_iterator bi;
6237 tree obj;
6239 if (data->niters)
6241 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6242 pointer_map_destroy (data->niters);
6243 data->niters = NULL;
6246 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6248 struct version_info *info;
6250 info = ver_info (data, i);
6251 if (info->iv)
6252 free (info->iv);
6253 info->iv = NULL;
6254 info->has_nonlin_use = false;
6255 info->preserve_biv = false;
6256 info->inv_id = 0;
6258 bitmap_clear (data->relevant);
6259 bitmap_clear (data->important_candidates);
6261 for (i = 0; i < n_iv_uses (data); i++)
6263 struct iv_use *use = iv_use (data, i);
6265 free (use->iv);
6266 BITMAP_FREE (use->related_cands);
6267 for (j = 0; j < use->n_map_members; j++)
6268 if (use->cost_map[j].depends_on)
6269 BITMAP_FREE (use->cost_map[j].depends_on);
6270 free (use->cost_map);
6271 free (use);
6273 VEC_truncate (iv_use_p, data->iv_uses, 0);
6275 for (i = 0; i < n_iv_cands (data); i++)
6277 struct iv_cand *cand = iv_cand (data, i);
6279 if (cand->iv)
6280 free (cand->iv);
6281 if (cand->depends_on)
6282 BITMAP_FREE (cand->depends_on);
6283 free (cand);
6285 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6287 if (data->version_info_size < num_ssa_names)
6289 data->version_info_size = 2 * num_ssa_names;
6290 free (data->version_info);
6291 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6294 data->max_inv_id = 0;
6296 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6297 SET_DECL_RTL (obj, NULL_RTX);
6299 VEC_truncate (tree, decl_rtl_to_reset, 0);
6301 htab_empty (data->inv_expr_tab);
6302 data->inv_expr_id = 0;
6305 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6306 loop tree. */
6308 static void
6309 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6311 free_loop_data (data);
6312 free (data->version_info);
6313 BITMAP_FREE (data->relevant);
6314 BITMAP_FREE (data->important_candidates);
6316 VEC_free (tree, heap, decl_rtl_to_reset);
6317 VEC_free (iv_use_p, heap, data->iv_uses);
6318 VEC_free (iv_cand_p, heap, data->iv_candidates);
6319 htab_delete (data->inv_expr_tab);
6322 /* Returns true if the loop body BODY includes any function calls. */
6324 static bool
6325 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6327 gimple_stmt_iterator gsi;
6328 unsigned i;
6330 for (i = 0; i < num_nodes; i++)
6331 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6333 gimple stmt = gsi_stmt (gsi);
6334 if (is_gimple_call (stmt)
6335 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6336 return true;
6338 return false;
6341 /* Optimizes the LOOP. Returns true if anything changed. */
6343 static bool
6344 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6346 bool changed = false;
6347 struct iv_ca *iv_ca;
6348 edge exit;
6349 basic_block *body;
6351 gcc_assert (!data->niters);
6352 data->current_loop = loop;
6353 data->speed = optimize_loop_for_speed_p (loop);
6355 if (dump_file && (dump_flags & TDF_DETAILS))
6357 fprintf (dump_file, "Processing loop %d\n", loop->num);
6359 exit = single_dom_exit (loop);
6360 if (exit)
6362 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6363 exit->src->index, exit->dest->index);
6364 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6365 fprintf (dump_file, "\n");
6368 fprintf (dump_file, "\n");
6371 body = get_loop_body (loop);
6372 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6373 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6374 free (body);
6376 /* For each ssa name determines whether it behaves as an induction variable
6377 in some loop. */
6378 if (!find_induction_variables (data))
6379 goto finish;
6381 /* Finds interesting uses (item 1). */
6382 find_interesting_uses (data);
6383 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6384 goto finish;
6386 /* Finds candidates for the induction variables (item 2). */
6387 find_iv_candidates (data);
6389 /* Calculates the costs (item 3, part 1). */
6390 determine_iv_costs (data);
6391 determine_use_iv_costs (data);
6392 determine_set_costs (data);
6394 /* Find the optimal set of induction variables (item 3, part 2). */
6395 iv_ca = find_optimal_iv_set (data);
6396 if (!iv_ca)
6397 goto finish;
6398 changed = true;
6400 /* Create the new induction variables (item 4, part 1). */
6401 create_new_ivs (data, iv_ca);
6402 iv_ca_free (&iv_ca);
6404 /* Rewrite the uses (item 4, part 2). */
6405 rewrite_uses (data);
6407 /* Remove the ivs that are unused after rewriting. */
6408 remove_unused_ivs (data);
6410 /* We have changed the structure of induction variables; it might happen
6411 that definitions in the scev database refer to some of them that were
6412 eliminated. */
6413 scev_reset ();
6415 finish:
6416 free_loop_data (data);
6418 return changed;
6421 /* Main entry point. Optimizes induction variables in loops. */
6423 void
6424 tree_ssa_iv_optimize (void)
6426 struct loop *loop;
6427 struct ivopts_data data;
6428 loop_iterator li;
6430 tree_ssa_iv_optimize_init (&data);
6432 /* Optimize the loops starting with the innermost ones. */
6433 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6435 if (dump_file && (dump_flags & TDF_DETAILS))
6436 flow_loop_dump (loop, dump_file, NULL, 1);
6438 tree_ssa_iv_optimize_loop (&data, loop);
6441 tree_ssa_iv_optimize_finalize (&data);