* config/rs6000/aix61.h (PROCESSOR_DEFAULT): Change to
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob479b46fb8dcd450963f3fabcc7f723814bf78667
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
98 #include "expr.h"
100 /* The infinite cost. */
101 #define INFTY 10000000
103 #define AVG_LOOP_NITER(LOOP) 5
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
107 exists. */
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop *loop)
112 HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
113 if (niter == -1)
114 return AVG_LOOP_NITER (loop);
116 return niter;
119 /* Representation of the induction variable. */
120 struct iv
122 tree base; /* Initial value of the iv. */
123 tree base_object; /* A memory object to that the induction variable points. */
124 tree step; /* Step of the iv (constant only). */
125 tree ssa_name; /* The ssa name with the value. */
126 bool biv_p; /* Is it a biv? */
127 bool have_use_for; /* Do we already have a use for it? */
128 unsigned use_id; /* The identifier in the use if it is the case. */
131 /* Per-ssa version information (induction variable descriptions, etc.). */
132 struct version_info
134 tree name; /* The ssa name. */
135 struct iv *iv; /* Induction variable description. */
136 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv; /* For the original biv, whether to preserve it. */
139 unsigned inv_id; /* Id of an invariant. */
142 /* Types of uses. */
143 enum use_type
145 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
146 USE_ADDRESS, /* Use in an address. */
147 USE_COMPARE /* Use is a compare. */
150 /* Cost of a computation. */
151 typedef struct
153 int cost; /* The runtime cost. */
154 unsigned complexity; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
158 } comp_cost;
160 static const comp_cost zero_cost = {0, 0};
161 static const comp_cost infinite_cost = {INFTY, INFTY};
163 /* The candidate - cost pair. */
164 struct cost_pair
166 struct iv_cand *cand; /* The candidate. */
167 comp_cost cost; /* The cost. */
168 bitmap depends_on; /* The list of invariants that have to be
169 preserved. */
170 tree value; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id; /* Loop invariant expression id. */
176 /* Use. */
177 struct iv_use
179 unsigned id; /* The id of the use. */
180 enum use_type type; /* Type of the use. */
181 struct iv *iv; /* The induction variable it is based on. */
182 gimple stmt; /* Statement in that it occurs. */
183 tree *op_p; /* The place where it occurs. */
184 bitmap related_cands; /* The set of "related" iv candidates, plus the common
185 important ones. */
187 unsigned n_map_members; /* Number of candidates in the cost_map list. */
188 struct cost_pair *cost_map;
189 /* The costs wrto the iv candidates. */
191 struct iv_cand *selected;
192 /* The selected candidate. */
195 /* The position where the iv is computed. */
196 enum iv_position
198 IP_NORMAL, /* At the end, just before the exit condition. */
199 IP_END, /* At the end of the latch block. */
200 IP_BEFORE_USE, /* Immediately before a specific use. */
201 IP_AFTER_USE, /* Immediately after a specific use. */
202 IP_ORIGINAL /* The original biv. */
205 /* The induction variable candidate. */
206 struct iv_cand
208 unsigned id; /* The number of the candidate. */
209 bool important; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
212 gimple incremented_at;/* For original biv, the statement where it is
213 incremented. */
214 tree var_before; /* The variable used for it before increment. */
215 tree var_after; /* The variable used for it after increment. */
216 struct iv *iv; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost; /* Cost of the candidate. */
221 unsigned cost_step; /* Cost of the candidate's increment operation. */
222 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on; /* The list of invariants that are used in step of the
225 biv. */
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
231 tree expr;
232 int id;
233 hashval_t hash;
236 /* The data used by the induction variable optimizations. */
238 typedef struct iv_use *iv_use_p;
239 DEF_VEC_P(iv_use_p);
240 DEF_VEC_ALLOC_P(iv_use_p,heap);
242 typedef struct iv_cand *iv_cand_p;
243 DEF_VEC_P(iv_cand_p);
244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
246 struct ivopts_data
248 /* The currently optimized loop. */
249 struct loop *current_loop;
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t *niters;
254 /* Number of registers used in it. */
255 unsigned regs_used;
257 /* The size of version_info array allocated. */
258 unsigned version_info_size;
260 /* The array of information for the ssa names. */
261 struct version_info *version_info;
263 /* The hashtable of loop invariant expressions created
264 by ivopt. */
265 htab_t inv_expr_tab;
267 /* Loop invariant expression id. */
268 int inv_expr_id;
270 /* The bitmap of indices in version_info whose value was changed. */
271 bitmap relevant;
273 /* The uses of induction variables. */
274 VEC(iv_use_p,heap) *iv_uses;
276 /* The candidates. */
277 VEC(iv_cand_p,heap) *iv_candidates;
279 /* A bitmap of important candidates. */
280 bitmap important_candidates;
282 /* The maximum invariant id. */
283 unsigned max_inv_id;
285 /* Whether to consider just related and important candidates when replacing a
286 use. */
287 bool consider_all_candidates;
289 /* Are we optimizing for speed? */
290 bool speed;
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call;
296 /* An assignment of iv candidates to uses. */
298 struct iv_ca
300 /* The number of uses covered by the assignment. */
301 unsigned upto;
303 /* Number of uses that cannot be expressed by the candidates in the set. */
304 unsigned bad_uses;
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair **cand_for_use;
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses;
312 /* The candidates used. */
313 bitmap cands;
315 /* The number of candidates in the set. */
316 unsigned n_cands;
318 /* Total number of registers needed. */
319 unsigned n_regs;
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost;
324 /* Total cost of candidates. */
325 unsigned cand_cost;
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses;
330 /* The array holding the number of uses of each loop
331 invariant expressions created by ivopt. */
332 unsigned *used_inv_expr;
334 /* The number of created loop invariants. */
335 unsigned num_used_inv_expr;
337 /* Total cost of the assignment. */
338 comp_cost cost;
341 /* Difference of two iv candidate assignments. */
343 struct iv_ca_delta
345 /* Changed use. */
346 struct iv_use *use;
348 /* An old assignment (for rollback purposes). */
349 struct cost_pair *old_cp;
351 /* A new assignment. */
352 struct cost_pair *new_cp;
354 /* Next change in the list. */
355 struct iv_ca_delta *next_change;
358 /* Bound on number of candidates below that all candidates are considered. */
360 #define CONSIDER_ALL_CANDIDATES_BOUND \
361 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
364 optimizing such a loop would help, and it would take ages). */
366 #define MAX_CONSIDERED_USES \
367 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
369 /* If there are at most this number of ivs in the set, try removing unnecessary
370 ivs from the set always. */
372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
373 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
375 /* The list of trees for that the decl_rtl field must be reset is stored
376 here. */
378 static VEC(tree,heap) *decl_rtl_to_reset;
380 /* Number of uses recorded in DATA. */
382 static inline unsigned
383 n_iv_uses (struct ivopts_data *data)
385 return VEC_length (iv_use_p, data->iv_uses);
388 /* Ith use recorded in DATA. */
390 static inline struct iv_use *
391 iv_use (struct ivopts_data *data, unsigned i)
393 return VEC_index (iv_use_p, data->iv_uses, i);
396 /* Number of candidates recorded in DATA. */
398 static inline unsigned
399 n_iv_cands (struct ivopts_data *data)
401 return VEC_length (iv_cand_p, data->iv_candidates);
404 /* Ith candidate recorded in DATA. */
406 static inline struct iv_cand *
407 iv_cand (struct ivopts_data *data, unsigned i)
409 return VEC_index (iv_cand_p, data->iv_candidates, i);
412 /* The single loop exit if it dominates the latch, NULL otherwise. */
414 edge
415 single_dom_exit (struct loop *loop)
417 edge exit = single_exit (loop);
419 if (!exit)
420 return NULL;
422 if (!just_once_each_iteration_p (loop, exit->src))
423 return NULL;
425 return exit;
428 /* Dumps information about the induction variable IV to FILE. */
430 extern void dump_iv (FILE *, struct iv *);
431 void
432 dump_iv (FILE *file, struct iv *iv)
434 if (iv->ssa_name)
436 fprintf (file, "ssa name ");
437 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
438 fprintf (file, "\n");
441 fprintf (file, " type ");
442 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
443 fprintf (file, "\n");
445 if (iv->step)
447 fprintf (file, " base ");
448 print_generic_expr (file, iv->base, TDF_SLIM);
449 fprintf (file, "\n");
451 fprintf (file, " step ");
452 print_generic_expr (file, iv->step, TDF_SLIM);
453 fprintf (file, "\n");
455 else
457 fprintf (file, " invariant ");
458 print_generic_expr (file, iv->base, TDF_SLIM);
459 fprintf (file, "\n");
462 if (iv->base_object)
464 fprintf (file, " base object ");
465 print_generic_expr (file, iv->base_object, TDF_SLIM);
466 fprintf (file, "\n");
469 if (iv->biv_p)
470 fprintf (file, " is a biv\n");
473 /* Dumps information about the USE to FILE. */
475 extern void dump_use (FILE *, struct iv_use *);
476 void
477 dump_use (FILE *file, struct iv_use *use)
479 fprintf (file, "use %d\n", use->id);
481 switch (use->type)
483 case USE_NONLINEAR_EXPR:
484 fprintf (file, " generic\n");
485 break;
487 case USE_ADDRESS:
488 fprintf (file, " address\n");
489 break;
491 case USE_COMPARE:
492 fprintf (file, " compare\n");
493 break;
495 default:
496 gcc_unreachable ();
499 fprintf (file, " in statement ");
500 print_gimple_stmt (file, use->stmt, 0, 0);
501 fprintf (file, "\n");
503 fprintf (file, " at position ");
504 if (use->op_p)
505 print_generic_expr (file, *use->op_p, TDF_SLIM);
506 fprintf (file, "\n");
508 dump_iv (file, use->iv);
510 if (use->related_cands)
512 fprintf (file, " related candidates ");
513 dump_bitmap (file, use->related_cands);
517 /* Dumps information about the uses to FILE. */
519 extern void dump_uses (FILE *, struct ivopts_data *);
520 void
521 dump_uses (FILE *file, struct ivopts_data *data)
523 unsigned i;
524 struct iv_use *use;
526 for (i = 0; i < n_iv_uses (data); i++)
528 use = iv_use (data, i);
530 dump_use (file, use);
531 fprintf (file, "\n");
535 /* Dumps information about induction variable candidate CAND to FILE. */
537 extern void dump_cand (FILE *, struct iv_cand *);
538 void
539 dump_cand (FILE *file, struct iv_cand *cand)
541 struct iv *iv = cand->iv;
543 fprintf (file, "candidate %d%s\n",
544 cand->id, cand->important ? " (important)" : "");
546 if (cand->depends_on)
548 fprintf (file, " depends on ");
549 dump_bitmap (file, cand->depends_on);
552 if (!iv)
554 fprintf (file, " final value replacement\n");
555 return;
558 if (cand->var_before)
560 fprintf (file, " var_before ");
561 print_generic_expr (file, cand->var_before, TDF_SLIM);
562 fprintf (file, "\n");
564 if (cand->var_after)
566 fprintf (file, " var_after ");
567 print_generic_expr (file, cand->var_after, TDF_SLIM);
568 fprintf (file, "\n");
571 switch (cand->pos)
573 case IP_NORMAL:
574 fprintf (file, " incremented before exit test\n");
575 break;
577 case IP_BEFORE_USE:
578 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
579 break;
581 case IP_AFTER_USE:
582 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
583 break;
585 case IP_END:
586 fprintf (file, " incremented at end\n");
587 break;
589 case IP_ORIGINAL:
590 fprintf (file, " original biv\n");
591 break;
594 dump_iv (file, iv);
597 /* Returns the info for ssa version VER. */
599 static inline struct version_info *
600 ver_info (struct ivopts_data *data, unsigned ver)
602 return data->version_info + ver;
605 /* Returns the info for ssa name NAME. */
607 static inline struct version_info *
608 name_info (struct ivopts_data *data, tree name)
610 return ver_info (data, SSA_NAME_VERSION (name));
613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
614 emitted in LOOP. */
616 static bool
617 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
619 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
621 gcc_assert (bb);
623 if (sbb == loop->latch)
624 return true;
626 if (sbb != bb)
627 return false;
629 return stmt == last_stmt (bb);
632 /* Returns true if STMT if after the place where the original induction
633 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
634 if the positions are identical. */
636 static bool
637 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
639 basic_block cand_bb = gimple_bb (cand->incremented_at);
640 basic_block stmt_bb = gimple_bb (stmt);
642 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
643 return false;
645 if (stmt_bb != cand_bb)
646 return true;
648 if (true_if_equal
649 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
650 return true;
651 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
654 /* Returns true if STMT if after the place where the induction variable
655 CAND is incremented in LOOP. */
657 static bool
658 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
660 switch (cand->pos)
662 case IP_END:
663 return false;
665 case IP_NORMAL:
666 return stmt_after_ip_normal_pos (loop, stmt);
668 case IP_ORIGINAL:
669 case IP_AFTER_USE:
670 return stmt_after_inc_pos (cand, stmt, false);
672 case IP_BEFORE_USE:
673 return stmt_after_inc_pos (cand, stmt, true);
675 default:
676 gcc_unreachable ();
680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
682 static bool
683 abnormal_ssa_name_p (tree exp)
685 if (!exp)
686 return false;
688 if (TREE_CODE (exp) != SSA_NAME)
689 return false;
691 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
695 abnormal phi node. Callback for for_each_index. */
697 static bool
698 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
699 void *data ATTRIBUTE_UNUSED)
701 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
703 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
704 return false;
705 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
706 return false;
709 return !abnormal_ssa_name_p (*index);
712 /* Returns true if EXPR contains a ssa name that occurs in an
713 abnormal phi node. */
715 bool
716 contains_abnormal_ssa_name_p (tree expr)
718 enum tree_code code;
719 enum tree_code_class codeclass;
721 if (!expr)
722 return false;
724 code = TREE_CODE (expr);
725 codeclass = TREE_CODE_CLASS (code);
727 if (code == SSA_NAME)
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
730 if (code == INTEGER_CST
731 || is_gimple_min_invariant (expr))
732 return false;
734 if (code == ADDR_EXPR)
735 return !for_each_index (&TREE_OPERAND (expr, 0),
736 idx_contains_abnormal_ssa_name_p,
737 NULL);
739 if (code == COND_EXPR)
740 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
742 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
744 switch (codeclass)
746 case tcc_binary:
747 case tcc_comparison:
748 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
749 return true;
751 /* Fallthru. */
752 case tcc_unary:
753 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
754 return true;
756 break;
758 default:
759 gcc_unreachable ();
762 return false;
765 /* Returns tree describing number of iterations determined from
766 EXIT of DATA->current_loop, or NULL if something goes wrong. */
768 static tree
769 niter_for_exit (struct ivopts_data *data, edge exit,
770 struct tree_niter_desc **desc_p)
772 struct tree_niter_desc* desc = NULL;
773 tree niter;
774 void **slot;
776 if (!data->niters)
778 data->niters = pointer_map_create ();
779 slot = NULL;
781 else
782 slot = pointer_map_contains (data->niters, exit);
784 if (!slot)
786 /* Try to determine number of iterations. We must know it
787 unconditionally (i.e., without possibility of # of iterations
788 being zero). Also, we cannot safely work with ssa names that
789 appear in phi nodes on abnormal edges, so that we do not create
790 overlapping life ranges for them (PR 27283). */
791 desc = XNEW (struct tree_niter_desc);
792 if (number_of_iterations_exit (data->current_loop,
793 exit, desc, true)
794 && integer_zerop (desc->may_be_zero)
795 && !contains_abnormal_ssa_name_p (desc->niter))
796 niter = desc->niter;
797 else
798 niter = NULL_TREE;
800 desc->niter = niter;
801 slot = pointer_map_insert (data->niters, exit);
802 *slot = desc;
804 else
805 niter = ((struct tree_niter_desc *) *slot)->niter;
807 if (desc_p)
808 *desc_p = (struct tree_niter_desc *) *slot;
809 return niter;
812 /* Returns tree describing number of iterations determined from
813 single dominating exit of DATA->current_loop, or NULL if something
814 goes wrong. */
816 static tree
817 niter_for_single_dom_exit (struct ivopts_data *data)
819 edge exit = single_dom_exit (data->current_loop);
821 if (!exit)
822 return NULL;
824 return niter_for_exit (data, exit, NULL);
827 /* Hash table equality function for expressions. */
829 static int
830 htab_inv_expr_eq (const void *ent1, const void *ent2)
832 const struct iv_inv_expr_ent *expr1 =
833 (const struct iv_inv_expr_ent *)ent1;
834 const struct iv_inv_expr_ent *expr2 =
835 (const struct iv_inv_expr_ent *)ent2;
837 return expr1->hash == expr2->hash
838 && operand_equal_p (expr1->expr, expr2->expr, 0);
841 /* Hash function for loop invariant expressions. */
843 static hashval_t
844 htab_inv_expr_hash (const void *ent)
846 const struct iv_inv_expr_ent *expr =
847 (const struct iv_inv_expr_ent *)ent;
848 return expr->hash;
851 /* Initializes data structures used by the iv optimization pass, stored
852 in DATA. */
854 static void
855 tree_ssa_iv_optimize_init (struct ivopts_data *data)
857 data->version_info_size = 2 * num_ssa_names;
858 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
859 data->relevant = BITMAP_ALLOC (NULL);
860 data->important_candidates = BITMAP_ALLOC (NULL);
861 data->max_inv_id = 0;
862 data->niters = NULL;
863 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
864 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
865 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
866 htab_inv_expr_eq, free);
867 data->inv_expr_id = 0;
868 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
871 /* Returns a memory object to that EXPR points. In case we are able to
872 determine that it does not point to any such object, NULL is returned. */
874 static tree
875 determine_base_object (tree expr)
877 enum tree_code code = TREE_CODE (expr);
878 tree base, obj;
880 /* If this is a pointer casted to any type, we need to determine
881 the base object for the pointer; so handle conversions before
882 throwing away non-pointer expressions. */
883 if (CONVERT_EXPR_P (expr))
884 return determine_base_object (TREE_OPERAND (expr, 0));
886 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
887 return NULL_TREE;
889 switch (code)
891 case INTEGER_CST:
892 return NULL_TREE;
894 case ADDR_EXPR:
895 obj = TREE_OPERAND (expr, 0);
896 base = get_base_address (obj);
898 if (!base)
899 return expr;
901 if (TREE_CODE (base) == MEM_REF)
902 return determine_base_object (TREE_OPERAND (base, 0));
904 return fold_convert (ptr_type_node,
905 build_fold_addr_expr (base));
907 case POINTER_PLUS_EXPR:
908 return determine_base_object (TREE_OPERAND (expr, 0));
910 case PLUS_EXPR:
911 case MINUS_EXPR:
912 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
913 gcc_unreachable ();
915 default:
916 return fold_convert (ptr_type_node, expr);
920 /* Allocates an induction variable with given initial value BASE and step STEP
921 for loop LOOP. */
923 static struct iv *
924 alloc_iv (tree base, tree step)
926 struct iv *iv = XCNEW (struct iv);
927 gcc_assert (step != NULL_TREE);
929 iv->base = base;
930 iv->base_object = determine_base_object (base);
931 iv->step = step;
932 iv->biv_p = false;
933 iv->have_use_for = false;
934 iv->use_id = 0;
935 iv->ssa_name = NULL_TREE;
937 return iv;
940 /* Sets STEP and BASE for induction variable IV. */
942 static void
943 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
945 struct version_info *info = name_info (data, iv);
947 gcc_assert (!info->iv);
949 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
950 info->iv = alloc_iv (base, step);
951 info->iv->ssa_name = iv;
954 /* Finds induction variable declaration for VAR. */
956 static struct iv *
957 get_iv (struct ivopts_data *data, tree var)
959 basic_block bb;
960 tree type = TREE_TYPE (var);
962 if (!POINTER_TYPE_P (type)
963 && !INTEGRAL_TYPE_P (type))
964 return NULL;
966 if (!name_info (data, var)->iv)
968 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
970 if (!bb
971 || !flow_bb_inside_loop_p (data->current_loop, bb))
972 set_iv (data, var, var, build_int_cst (type, 0));
975 return name_info (data, var)->iv;
978 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
979 not define a simple affine biv with nonzero step. */
981 static tree
982 determine_biv_step (gimple phi)
984 struct loop *loop = gimple_bb (phi)->loop_father;
985 tree name = PHI_RESULT (phi);
986 affine_iv iv;
988 if (!is_gimple_reg (name))
989 return NULL_TREE;
991 if (!simple_iv (loop, loop, name, &iv, true))
992 return NULL_TREE;
994 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
997 /* Finds basic ivs. */
999 static bool
1000 find_bivs (struct ivopts_data *data)
1002 gimple phi;
1003 tree step, type, base;
1004 bool found = false;
1005 struct loop *loop = data->current_loop;
1006 gimple_stmt_iterator psi;
1008 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1010 phi = gsi_stmt (psi);
1012 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1013 continue;
1015 step = determine_biv_step (phi);
1016 if (!step)
1017 continue;
1019 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1020 base = expand_simple_operations (base);
1021 if (contains_abnormal_ssa_name_p (base)
1022 || contains_abnormal_ssa_name_p (step))
1023 continue;
1025 type = TREE_TYPE (PHI_RESULT (phi));
1026 base = fold_convert (type, base);
1027 if (step)
1029 if (POINTER_TYPE_P (type))
1030 step = fold_convert (sizetype, step);
1031 else
1032 step = fold_convert (type, step);
1035 set_iv (data, PHI_RESULT (phi), base, step);
1036 found = true;
1039 return found;
1042 /* Marks basic ivs. */
1044 static void
1045 mark_bivs (struct ivopts_data *data)
1047 gimple phi;
1048 tree var;
1049 struct iv *iv, *incr_iv;
1050 struct loop *loop = data->current_loop;
1051 basic_block incr_bb;
1052 gimple_stmt_iterator psi;
1054 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1056 phi = gsi_stmt (psi);
1058 iv = get_iv (data, PHI_RESULT (phi));
1059 if (!iv)
1060 continue;
1062 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1063 incr_iv = get_iv (data, var);
1064 if (!incr_iv)
1065 continue;
1067 /* If the increment is in the subloop, ignore it. */
1068 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1069 if (incr_bb->loop_father != data->current_loop
1070 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1071 continue;
1073 iv->biv_p = true;
1074 incr_iv->biv_p = true;
1078 /* Checks whether STMT defines a linear induction variable and stores its
1079 parameters to IV. */
1081 static bool
1082 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1084 tree lhs;
1085 struct loop *loop = data->current_loop;
1087 iv->base = NULL_TREE;
1088 iv->step = NULL_TREE;
1090 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1091 return false;
1093 lhs = gimple_assign_lhs (stmt);
1094 if (TREE_CODE (lhs) != SSA_NAME)
1095 return false;
1097 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1098 return false;
1099 iv->base = expand_simple_operations (iv->base);
1101 if (contains_abnormal_ssa_name_p (iv->base)
1102 || contains_abnormal_ssa_name_p (iv->step))
1103 return false;
1105 /* If STMT could throw, then do not consider STMT as defining a GIV.
1106 While this will suppress optimizations, we can not safely delete this
1107 GIV and associated statements, even if it appears it is not used. */
1108 if (stmt_could_throw_p (stmt))
1109 return false;
1111 return true;
1114 /* Finds general ivs in statement STMT. */
1116 static void
1117 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1119 affine_iv iv;
1121 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1122 return;
1124 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1127 /* Finds general ivs in basic block BB. */
1129 static void
1130 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1132 gimple_stmt_iterator bsi;
1134 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1135 find_givs_in_stmt (data, gsi_stmt (bsi));
1138 /* Finds general ivs. */
1140 static void
1141 find_givs (struct ivopts_data *data)
1143 struct loop *loop = data->current_loop;
1144 basic_block *body = get_loop_body_in_dom_order (loop);
1145 unsigned i;
1147 for (i = 0; i < loop->num_nodes; i++)
1148 find_givs_in_bb (data, body[i]);
1149 free (body);
1152 /* For each ssa name defined in LOOP determines whether it is an induction
1153 variable and if so, its initial value and step. */
1155 static bool
1156 find_induction_variables (struct ivopts_data *data)
1158 unsigned i;
1159 bitmap_iterator bi;
1161 if (!find_bivs (data))
1162 return false;
1164 find_givs (data);
1165 mark_bivs (data);
1167 if (dump_file && (dump_flags & TDF_DETAILS))
1169 tree niter = niter_for_single_dom_exit (data);
1171 if (niter)
1173 fprintf (dump_file, " number of iterations ");
1174 print_generic_expr (dump_file, niter, TDF_SLIM);
1175 fprintf (dump_file, "\n\n");
1178 fprintf (dump_file, "Induction variables:\n\n");
1180 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1182 if (ver_info (data, i)->iv)
1183 dump_iv (dump_file, ver_info (data, i)->iv);
1187 return true;
1190 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1192 static struct iv_use *
1193 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1194 gimple stmt, enum use_type use_type)
1196 struct iv_use *use = XCNEW (struct iv_use);
1198 use->id = n_iv_uses (data);
1199 use->type = use_type;
1200 use->iv = iv;
1201 use->stmt = stmt;
1202 use->op_p = use_p;
1203 use->related_cands = BITMAP_ALLOC (NULL);
1205 /* To avoid showing ssa name in the dumps, if it was not reset by the
1206 caller. */
1207 iv->ssa_name = NULL_TREE;
1209 if (dump_file && (dump_flags & TDF_DETAILS))
1210 dump_use (dump_file, use);
1212 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1214 return use;
1217 /* Checks whether OP is a loop-level invariant and if so, records it.
1218 NONLINEAR_USE is true if the invariant is used in a way we do not
1219 handle specially. */
1221 static void
1222 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1224 basic_block bb;
1225 struct version_info *info;
1227 if (TREE_CODE (op) != SSA_NAME
1228 || !is_gimple_reg (op))
1229 return;
1231 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1232 if (bb
1233 && flow_bb_inside_loop_p (data->current_loop, bb))
1234 return;
1236 info = name_info (data, op);
1237 info->name = op;
1238 info->has_nonlin_use |= nonlinear_use;
1239 if (!info->inv_id)
1240 info->inv_id = ++data->max_inv_id;
1241 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1244 /* Checks whether the use OP is interesting and if so, records it. */
1246 static struct iv_use *
1247 find_interesting_uses_op (struct ivopts_data *data, tree op)
1249 struct iv *iv;
1250 struct iv *civ;
1251 gimple stmt;
1252 struct iv_use *use;
1254 if (TREE_CODE (op) != SSA_NAME)
1255 return NULL;
1257 iv = get_iv (data, op);
1258 if (!iv)
1259 return NULL;
1261 if (iv->have_use_for)
1263 use = iv_use (data, iv->use_id);
1265 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1266 return use;
1269 if (integer_zerop (iv->step))
1271 record_invariant (data, op, true);
1272 return NULL;
1274 iv->have_use_for = true;
1276 civ = XNEW (struct iv);
1277 *civ = *iv;
1279 stmt = SSA_NAME_DEF_STMT (op);
1280 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1281 || is_gimple_assign (stmt));
1283 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1284 iv->use_id = use->id;
1286 return use;
1289 /* Given a condition in statement STMT, checks whether it is a compare
1290 of an induction variable and an invariant. If this is the case,
1291 CONTROL_VAR is set to location of the iv, BOUND to the location of
1292 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1293 induction variable descriptions, and true is returned. If this is not
1294 the case, CONTROL_VAR and BOUND are set to the arguments of the
1295 condition and false is returned. */
1297 static bool
1298 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1299 tree **control_var, tree **bound,
1300 struct iv **iv_var, struct iv **iv_bound)
1302 /* The objects returned when COND has constant operands. */
1303 static struct iv const_iv;
1304 static tree zero;
1305 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1306 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1307 bool ret = false;
1309 if (gimple_code (stmt) == GIMPLE_COND)
1311 op0 = gimple_cond_lhs_ptr (stmt);
1312 op1 = gimple_cond_rhs_ptr (stmt);
1314 else
1316 op0 = gimple_assign_rhs1_ptr (stmt);
1317 op1 = gimple_assign_rhs2_ptr (stmt);
1320 zero = integer_zero_node;
1321 const_iv.step = integer_zero_node;
1323 if (TREE_CODE (*op0) == SSA_NAME)
1324 iv0 = get_iv (data, *op0);
1325 if (TREE_CODE (*op1) == SSA_NAME)
1326 iv1 = get_iv (data, *op1);
1328 /* Exactly one of the compared values must be an iv, and the other one must
1329 be an invariant. */
1330 if (!iv0 || !iv1)
1331 goto end;
1333 if (integer_zerop (iv0->step))
1335 /* Control variable may be on the other side. */
1336 tmp_op = op0; op0 = op1; op1 = tmp_op;
1337 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1339 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1341 end:
1342 if (control_var)
1343 *control_var = op0;;
1344 if (iv_var)
1345 *iv_var = iv0;;
1346 if (bound)
1347 *bound = op1;
1348 if (iv_bound)
1349 *iv_bound = iv1;
1351 return ret;
1354 /* Checks whether the condition in STMT is interesting and if so,
1355 records it. */
1357 static void
1358 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1360 tree *var_p, *bound_p;
1361 struct iv *var_iv, *civ;
1363 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1365 find_interesting_uses_op (data, *var_p);
1366 find_interesting_uses_op (data, *bound_p);
1367 return;
1370 civ = XNEW (struct iv);
1371 *civ = *var_iv;
1372 record_use (data, NULL, civ, stmt, USE_COMPARE);
1375 /* Returns true if expression EXPR is obviously invariant in LOOP,
1376 i.e. if all its operands are defined outside of the LOOP. LOOP
1377 should not be the function body. */
1379 bool
1380 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1382 basic_block def_bb;
1383 unsigned i, len;
1385 gcc_assert (loop_depth (loop) > 0);
1387 if (is_gimple_min_invariant (expr))
1388 return true;
1390 if (TREE_CODE (expr) == SSA_NAME)
1392 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1393 if (def_bb
1394 && flow_bb_inside_loop_p (loop, def_bb))
1395 return false;
1397 return true;
1400 if (!EXPR_P (expr))
1401 return false;
1403 len = TREE_OPERAND_LENGTH (expr);
1404 for (i = 0; i < len; i++)
1405 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1406 return false;
1408 return true;
1411 /* Returns true if statement STMT is obviously invariant in LOOP,
1412 i.e. if all its operands on the RHS are defined outside of the LOOP.
1413 LOOP should not be the function body. */
1415 bool
1416 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1418 unsigned i;
1419 tree lhs;
1421 gcc_assert (loop_depth (loop) > 0);
1423 lhs = gimple_get_lhs (stmt);
1424 for (i = 0; i < gimple_num_ops (stmt); i++)
1426 tree op = gimple_op (stmt, i);
1427 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1428 return false;
1431 return true;
1434 /* Cumulates the steps of indices into DATA and replaces their values with the
1435 initial ones. Returns false when the value of the index cannot be determined.
1436 Callback for for_each_index. */
1438 struct ifs_ivopts_data
1440 struct ivopts_data *ivopts_data;
1441 gimple stmt;
1442 tree step;
1445 static bool
1446 idx_find_step (tree base, tree *idx, void *data)
1448 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1449 struct iv *iv;
1450 tree step, iv_base, iv_step, lbound, off;
1451 struct loop *loop = dta->ivopts_data->current_loop;
1453 /* If base is a component ref, require that the offset of the reference
1454 be invariant. */
1455 if (TREE_CODE (base) == COMPONENT_REF)
1457 off = component_ref_field_offset (base);
1458 return expr_invariant_in_loop_p (loop, off);
1461 /* If base is array, first check whether we will be able to move the
1462 reference out of the loop (in order to take its address in strength
1463 reduction). In order for this to work we need both lower bound
1464 and step to be loop invariants. */
1465 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1467 /* Moreover, for a range, the size needs to be invariant as well. */
1468 if (TREE_CODE (base) == ARRAY_RANGE_REF
1469 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1470 return false;
1472 step = array_ref_element_size (base);
1473 lbound = array_ref_low_bound (base);
1475 if (!expr_invariant_in_loop_p (loop, step)
1476 || !expr_invariant_in_loop_p (loop, lbound))
1477 return false;
1480 if (TREE_CODE (*idx) != SSA_NAME)
1481 return true;
1483 iv = get_iv (dta->ivopts_data, *idx);
1484 if (!iv)
1485 return false;
1487 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1488 *&x[0], which is not folded and does not trigger the
1489 ARRAY_REF path below. */
1490 *idx = iv->base;
1492 if (integer_zerop (iv->step))
1493 return true;
1495 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1497 step = array_ref_element_size (base);
1499 /* We only handle addresses whose step is an integer constant. */
1500 if (TREE_CODE (step) != INTEGER_CST)
1501 return false;
1503 else
1504 /* The step for pointer arithmetics already is 1 byte. */
1505 step = size_one_node;
1507 iv_base = iv->base;
1508 iv_step = iv->step;
1509 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1510 sizetype, &iv_base, &iv_step, dta->stmt,
1511 false))
1513 /* The index might wrap. */
1514 return false;
1517 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1518 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1520 return true;
1523 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1524 object is passed to it in DATA. */
1526 static bool
1527 idx_record_use (tree base, tree *idx,
1528 void *vdata)
1530 struct ivopts_data *data = (struct ivopts_data *) vdata;
1531 find_interesting_uses_op (data, *idx);
1532 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1534 find_interesting_uses_op (data, array_ref_element_size (base));
1535 find_interesting_uses_op (data, array_ref_low_bound (base));
1537 return true;
1540 /* If we can prove that TOP = cst * BOT for some constant cst,
1541 store cst to MUL and return true. Otherwise return false.
1542 The returned value is always sign-extended, regardless of the
1543 signedness of TOP and BOT. */
1545 static bool
1546 constant_multiple_of (tree top, tree bot, double_int *mul)
1548 tree mby;
1549 enum tree_code code;
1550 double_int res, p0, p1;
1551 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1553 STRIP_NOPS (top);
1554 STRIP_NOPS (bot);
1556 if (operand_equal_p (top, bot, 0))
1558 *mul = double_int_one;
1559 return true;
1562 code = TREE_CODE (top);
1563 switch (code)
1565 case MULT_EXPR:
1566 mby = TREE_OPERAND (top, 1);
1567 if (TREE_CODE (mby) != INTEGER_CST)
1568 return false;
1570 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1571 return false;
1573 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1574 precision);
1575 return true;
1577 case PLUS_EXPR:
1578 case MINUS_EXPR:
1579 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1580 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1581 return false;
1583 if (code == MINUS_EXPR)
1584 p1 = double_int_neg (p1);
1585 *mul = double_int_sext (double_int_add (p0, p1), precision);
1586 return true;
1588 case INTEGER_CST:
1589 if (TREE_CODE (bot) != INTEGER_CST)
1590 return false;
1592 p0 = double_int_sext (tree_to_double_int (top), precision);
1593 p1 = double_int_sext (tree_to_double_int (bot), precision);
1594 if (double_int_zero_p (p1))
1595 return false;
1596 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1597 precision);
1598 return double_int_zero_p (res);
1600 default:
1601 return false;
1605 /* Returns true if memory reference REF with step STEP may be unaligned. */
1607 static bool
1608 may_be_unaligned_p (tree ref, tree step)
1610 tree base;
1611 tree base_type;
1612 HOST_WIDE_INT bitsize;
1613 HOST_WIDE_INT bitpos;
1614 tree toffset;
1615 enum machine_mode mode;
1616 int unsignedp, volatilep;
1617 unsigned base_align;
1619 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1620 thus they are not misaligned. */
1621 if (TREE_CODE (ref) == TARGET_MEM_REF)
1622 return false;
1624 /* The test below is basically copy of what expr.c:normal_inner_ref
1625 does to check whether the object must be loaded by parts when
1626 STRICT_ALIGNMENT is true. */
1627 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1628 &unsignedp, &volatilep, true);
1629 base_type = TREE_TYPE (base);
1630 base_align = TYPE_ALIGN (base_type);
1632 if (mode != BLKmode)
1634 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1636 if (base_align < mode_align
1637 || (bitpos % mode_align) != 0
1638 || (bitpos % BITS_PER_UNIT) != 0)
1639 return true;
1641 if (toffset
1642 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1643 return true;
1645 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1646 return true;
1649 return false;
1652 /* Return true if EXPR may be non-addressable. */
1654 bool
1655 may_be_nonaddressable_p (tree expr)
1657 switch (TREE_CODE (expr))
1659 case TARGET_MEM_REF:
1660 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1661 target, thus they are always addressable. */
1662 return false;
1664 case COMPONENT_REF:
1665 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1666 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1668 case VIEW_CONVERT_EXPR:
1669 /* This kind of view-conversions may wrap non-addressable objects
1670 and make them look addressable. After some processing the
1671 non-addressability may be uncovered again, causing ADDR_EXPRs
1672 of inappropriate objects to be built. */
1673 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1674 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1675 return true;
1677 /* ... fall through ... */
1679 case ARRAY_REF:
1680 case ARRAY_RANGE_REF:
1681 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1683 CASE_CONVERT:
1684 return true;
1686 default:
1687 break;
1690 return false;
1693 /* Finds addresses in *OP_P inside STMT. */
1695 static void
1696 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1698 tree base = *op_p, step = size_zero_node;
1699 struct iv *civ;
1700 struct ifs_ivopts_data ifs_ivopts_data;
1702 /* Do not play with volatile memory references. A bit too conservative,
1703 perhaps, but safe. */
1704 if (gimple_has_volatile_ops (stmt))
1705 goto fail;
1707 /* Ignore bitfields for now. Not really something terribly complicated
1708 to handle. TODO. */
1709 if (TREE_CODE (base) == BIT_FIELD_REF)
1710 goto fail;
1712 base = unshare_expr (base);
1714 if (TREE_CODE (base) == TARGET_MEM_REF)
1716 tree type = build_pointer_type (TREE_TYPE (base));
1717 tree astep;
1719 if (TMR_BASE (base)
1720 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1722 civ = get_iv (data, TMR_BASE (base));
1723 if (!civ)
1724 goto fail;
1726 TMR_BASE (base) = civ->base;
1727 step = civ->step;
1729 if (TMR_INDEX2 (base)
1730 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1732 civ = get_iv (data, TMR_INDEX2 (base));
1733 if (!civ)
1734 goto fail;
1736 TMR_INDEX2 (base) = civ->base;
1737 step = civ->step;
1739 if (TMR_INDEX (base)
1740 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1742 civ = get_iv (data, TMR_INDEX (base));
1743 if (!civ)
1744 goto fail;
1746 TMR_INDEX (base) = civ->base;
1747 astep = civ->step;
1749 if (astep)
1751 if (TMR_STEP (base))
1752 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1754 step = fold_build2 (PLUS_EXPR, type, step, astep);
1758 if (integer_zerop (step))
1759 goto fail;
1760 base = tree_mem_ref_addr (type, base);
1762 else
1764 ifs_ivopts_data.ivopts_data = data;
1765 ifs_ivopts_data.stmt = stmt;
1766 ifs_ivopts_data.step = size_zero_node;
1767 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1768 || integer_zerop (ifs_ivopts_data.step))
1769 goto fail;
1770 step = ifs_ivopts_data.step;
1772 /* Check that the base expression is addressable. This needs
1773 to be done after substituting bases of IVs into it. */
1774 if (may_be_nonaddressable_p (base))
1775 goto fail;
1777 /* Moreover, on strict alignment platforms, check that it is
1778 sufficiently aligned. */
1779 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1780 goto fail;
1782 base = build_fold_addr_expr (base);
1784 /* Substituting bases of IVs into the base expression might
1785 have caused folding opportunities. */
1786 if (TREE_CODE (base) == ADDR_EXPR)
1788 tree *ref = &TREE_OPERAND (base, 0);
1789 while (handled_component_p (*ref))
1790 ref = &TREE_OPERAND (*ref, 0);
1791 if (TREE_CODE (*ref) == MEM_REF)
1793 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1794 TREE_OPERAND (*ref, 0),
1795 TREE_OPERAND (*ref, 1));
1796 if (tem)
1797 *ref = tem;
1802 civ = alloc_iv (base, step);
1803 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1804 return;
1806 fail:
1807 for_each_index (op_p, idx_record_use, data);
1810 /* Finds and records invariants used in STMT. */
1812 static void
1813 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1815 ssa_op_iter iter;
1816 use_operand_p use_p;
1817 tree op;
1819 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1821 op = USE_FROM_PTR (use_p);
1822 record_invariant (data, op, false);
1826 /* Finds interesting uses of induction variables in the statement STMT. */
1828 static void
1829 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1831 struct iv *iv;
1832 tree op, *lhs, *rhs;
1833 ssa_op_iter iter;
1834 use_operand_p use_p;
1835 enum tree_code code;
1837 find_invariants_stmt (data, stmt);
1839 if (gimple_code (stmt) == GIMPLE_COND)
1841 find_interesting_uses_cond (data, stmt);
1842 return;
1845 if (is_gimple_assign (stmt))
1847 lhs = gimple_assign_lhs_ptr (stmt);
1848 rhs = gimple_assign_rhs1_ptr (stmt);
1850 if (TREE_CODE (*lhs) == SSA_NAME)
1852 /* If the statement defines an induction variable, the uses are not
1853 interesting by themselves. */
1855 iv = get_iv (data, *lhs);
1857 if (iv && !integer_zerop (iv->step))
1858 return;
1861 code = gimple_assign_rhs_code (stmt);
1862 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1863 && (REFERENCE_CLASS_P (*rhs)
1864 || is_gimple_val (*rhs)))
1866 if (REFERENCE_CLASS_P (*rhs))
1867 find_interesting_uses_address (data, stmt, rhs);
1868 else
1869 find_interesting_uses_op (data, *rhs);
1871 if (REFERENCE_CLASS_P (*lhs))
1872 find_interesting_uses_address (data, stmt, lhs);
1873 return;
1875 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1877 find_interesting_uses_cond (data, stmt);
1878 return;
1881 /* TODO -- we should also handle address uses of type
1883 memory = call (whatever);
1887 call (memory). */
1890 if (gimple_code (stmt) == GIMPLE_PHI
1891 && gimple_bb (stmt) == data->current_loop->header)
1893 iv = get_iv (data, PHI_RESULT (stmt));
1895 if (iv && !integer_zerop (iv->step))
1896 return;
1899 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1901 op = USE_FROM_PTR (use_p);
1903 if (TREE_CODE (op) != SSA_NAME)
1904 continue;
1906 iv = get_iv (data, op);
1907 if (!iv)
1908 continue;
1910 find_interesting_uses_op (data, op);
1914 /* Finds interesting uses of induction variables outside of loops
1915 on loop exit edge EXIT. */
1917 static void
1918 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1920 gimple phi;
1921 gimple_stmt_iterator psi;
1922 tree def;
1924 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1926 phi = gsi_stmt (psi);
1927 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1928 if (is_gimple_reg (def))
1929 find_interesting_uses_op (data, def);
1933 /* Finds uses of the induction variables that are interesting. */
1935 static void
1936 find_interesting_uses (struct ivopts_data *data)
1938 basic_block bb;
1939 gimple_stmt_iterator bsi;
1940 basic_block *body = get_loop_body (data->current_loop);
1941 unsigned i;
1942 struct version_info *info;
1943 edge e;
1945 if (dump_file && (dump_flags & TDF_DETAILS))
1946 fprintf (dump_file, "Uses:\n\n");
1948 for (i = 0; i < data->current_loop->num_nodes; i++)
1950 edge_iterator ei;
1951 bb = body[i];
1953 FOR_EACH_EDGE (e, ei, bb->succs)
1954 if (e->dest != EXIT_BLOCK_PTR
1955 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1956 find_interesting_uses_outside (data, e);
1958 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1959 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1960 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1961 if (!is_gimple_debug (gsi_stmt (bsi)))
1962 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1965 if (dump_file && (dump_flags & TDF_DETAILS))
1967 bitmap_iterator bi;
1969 fprintf (dump_file, "\n");
1971 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1973 info = ver_info (data, i);
1974 if (info->inv_id)
1976 fprintf (dump_file, " ");
1977 print_generic_expr (dump_file, info->name, TDF_SLIM);
1978 fprintf (dump_file, " is invariant (%d)%s\n",
1979 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1983 fprintf (dump_file, "\n");
1986 free (body);
1989 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1990 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1991 we are at the top-level of the processed address. */
1993 static tree
1994 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1995 unsigned HOST_WIDE_INT *offset)
1997 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1998 enum tree_code code;
1999 tree type, orig_type = TREE_TYPE (expr);
2000 unsigned HOST_WIDE_INT off0, off1, st;
2001 tree orig_expr = expr;
2003 STRIP_NOPS (expr);
2005 type = TREE_TYPE (expr);
2006 code = TREE_CODE (expr);
2007 *offset = 0;
2009 switch (code)
2011 case INTEGER_CST:
2012 if (!cst_and_fits_in_hwi (expr)
2013 || integer_zerop (expr))
2014 return orig_expr;
2016 *offset = int_cst_value (expr);
2017 return build_int_cst (orig_type, 0);
2019 case POINTER_PLUS_EXPR:
2020 case PLUS_EXPR:
2021 case MINUS_EXPR:
2022 op0 = TREE_OPERAND (expr, 0);
2023 op1 = TREE_OPERAND (expr, 1);
2025 op0 = strip_offset_1 (op0, false, false, &off0);
2026 op1 = strip_offset_1 (op1, false, false, &off1);
2028 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2029 if (op0 == TREE_OPERAND (expr, 0)
2030 && op1 == TREE_OPERAND (expr, 1))
2031 return orig_expr;
2033 if (integer_zerop (op1))
2034 expr = op0;
2035 else if (integer_zerop (op0))
2037 if (code == MINUS_EXPR)
2038 expr = fold_build1 (NEGATE_EXPR, type, op1);
2039 else
2040 expr = op1;
2042 else
2043 expr = fold_build2 (code, type, op0, op1);
2045 return fold_convert (orig_type, expr);
2047 case MULT_EXPR:
2048 op1 = TREE_OPERAND (expr, 1);
2049 if (!cst_and_fits_in_hwi (op1))
2050 return orig_expr;
2052 op0 = TREE_OPERAND (expr, 0);
2053 op0 = strip_offset_1 (op0, false, false, &off0);
2054 if (op0 == TREE_OPERAND (expr, 0))
2055 return orig_expr;
2057 *offset = off0 * int_cst_value (op1);
2058 if (integer_zerop (op0))
2059 expr = op0;
2060 else
2061 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2063 return fold_convert (orig_type, expr);
2065 case ARRAY_REF:
2066 case ARRAY_RANGE_REF:
2067 if (!inside_addr)
2068 return orig_expr;
2070 step = array_ref_element_size (expr);
2071 if (!cst_and_fits_in_hwi (step))
2072 break;
2074 st = int_cst_value (step);
2075 op1 = TREE_OPERAND (expr, 1);
2076 op1 = strip_offset_1 (op1, false, false, &off1);
2077 *offset = off1 * st;
2079 if (top_compref
2080 && integer_zerop (op1))
2082 /* Strip the component reference completely. */
2083 op0 = TREE_OPERAND (expr, 0);
2084 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2085 *offset += off0;
2086 return op0;
2088 break;
2090 case COMPONENT_REF:
2091 if (!inside_addr)
2092 return orig_expr;
2094 tmp = component_ref_field_offset (expr);
2095 if (top_compref
2096 && cst_and_fits_in_hwi (tmp))
2098 /* Strip the component reference completely. */
2099 op0 = TREE_OPERAND (expr, 0);
2100 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2101 *offset = off0 + int_cst_value (tmp);
2102 return op0;
2104 break;
2106 case ADDR_EXPR:
2107 op0 = TREE_OPERAND (expr, 0);
2108 op0 = strip_offset_1 (op0, true, true, &off0);
2109 *offset += off0;
2111 if (op0 == TREE_OPERAND (expr, 0))
2112 return orig_expr;
2114 expr = build_fold_addr_expr (op0);
2115 return fold_convert (orig_type, expr);
2117 case MEM_REF:
2118 /* ??? Offset operand? */
2119 inside_addr = false;
2120 break;
2122 default:
2123 return orig_expr;
2126 /* Default handling of expressions for that we want to recurse into
2127 the first operand. */
2128 op0 = TREE_OPERAND (expr, 0);
2129 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2130 *offset += off0;
2132 if (op0 == TREE_OPERAND (expr, 0)
2133 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2134 return orig_expr;
2136 expr = copy_node (expr);
2137 TREE_OPERAND (expr, 0) = op0;
2138 if (op1)
2139 TREE_OPERAND (expr, 1) = op1;
2141 /* Inside address, we might strip the top level component references,
2142 thus changing type of the expression. Handling of ADDR_EXPR
2143 will fix that. */
2144 expr = fold_convert (orig_type, expr);
2146 return expr;
2149 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2151 static tree
2152 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2154 return strip_offset_1 (expr, false, false, offset);
2157 /* Returns variant of TYPE that can be used as base for different uses.
2158 We return unsigned type with the same precision, which avoids problems
2159 with overflows. */
2161 static tree
2162 generic_type_for (tree type)
2164 if (POINTER_TYPE_P (type))
2165 return unsigned_type_for (type);
2167 if (TYPE_UNSIGNED (type))
2168 return type;
2170 return unsigned_type_for (type);
2173 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2174 the bitmap to that we should store it. */
2176 static struct ivopts_data *fd_ivopts_data;
2177 static tree
2178 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2180 bitmap *depends_on = (bitmap *) data;
2181 struct version_info *info;
2183 if (TREE_CODE (*expr_p) != SSA_NAME)
2184 return NULL_TREE;
2185 info = name_info (fd_ivopts_data, *expr_p);
2187 if (!info->inv_id || info->has_nonlin_use)
2188 return NULL_TREE;
2190 if (!*depends_on)
2191 *depends_on = BITMAP_ALLOC (NULL);
2192 bitmap_set_bit (*depends_on, info->inv_id);
2194 return NULL_TREE;
2197 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2198 position to POS. If USE is not NULL, the candidate is set as related to
2199 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2200 replacement of the final value of the iv by a direct computation. */
2202 static struct iv_cand *
2203 add_candidate_1 (struct ivopts_data *data,
2204 tree base, tree step, bool important, enum iv_position pos,
2205 struct iv_use *use, gimple incremented_at)
2207 unsigned i;
2208 struct iv_cand *cand = NULL;
2209 tree type, orig_type;
2211 if (base)
2213 orig_type = TREE_TYPE (base);
2214 type = generic_type_for (orig_type);
2215 if (type != orig_type)
2217 base = fold_convert (type, base);
2218 step = fold_convert (type, step);
2222 for (i = 0; i < n_iv_cands (data); i++)
2224 cand = iv_cand (data, i);
2226 if (cand->pos != pos)
2227 continue;
2229 if (cand->incremented_at != incremented_at
2230 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2231 && cand->ainc_use != use))
2232 continue;
2234 if (!cand->iv)
2236 if (!base && !step)
2237 break;
2239 continue;
2242 if (!base && !step)
2243 continue;
2245 if (operand_equal_p (base, cand->iv->base, 0)
2246 && operand_equal_p (step, cand->iv->step, 0)
2247 && (TYPE_PRECISION (TREE_TYPE (base))
2248 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2249 break;
2252 if (i == n_iv_cands (data))
2254 cand = XCNEW (struct iv_cand);
2255 cand->id = i;
2257 if (!base && !step)
2258 cand->iv = NULL;
2259 else
2260 cand->iv = alloc_iv (base, step);
2262 cand->pos = pos;
2263 if (pos != IP_ORIGINAL && cand->iv)
2265 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2266 cand->var_after = cand->var_before;
2268 cand->important = important;
2269 cand->incremented_at = incremented_at;
2270 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2272 if (step
2273 && TREE_CODE (step) != INTEGER_CST)
2275 fd_ivopts_data = data;
2276 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2279 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2280 cand->ainc_use = use;
2281 else
2282 cand->ainc_use = NULL;
2284 if (dump_file && (dump_flags & TDF_DETAILS))
2285 dump_cand (dump_file, cand);
2288 if (important && !cand->important)
2290 cand->important = true;
2291 if (dump_file && (dump_flags & TDF_DETAILS))
2292 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2295 if (use)
2297 bitmap_set_bit (use->related_cands, i);
2298 if (dump_file && (dump_flags & TDF_DETAILS))
2299 fprintf (dump_file, "Candidate %d is related to use %d\n",
2300 cand->id, use->id);
2303 return cand;
2306 /* Returns true if incrementing the induction variable at the end of the LOOP
2307 is allowed.
2309 The purpose is to avoid splitting latch edge with a biv increment, thus
2310 creating a jump, possibly confusing other optimization passes and leaving
2311 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2312 is not available (so we do not have a better alternative), or if the latch
2313 edge is already nonempty. */
2315 static bool
2316 allow_ip_end_pos_p (struct loop *loop)
2318 if (!ip_normal_pos (loop))
2319 return true;
2321 if (!empty_block_p (ip_end_pos (loop)))
2322 return true;
2324 return false;
2327 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2328 Important field is set to IMPORTANT. */
2330 static void
2331 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2332 bool important, struct iv_use *use)
2334 basic_block use_bb = gimple_bb (use->stmt);
2335 enum machine_mode mem_mode;
2336 unsigned HOST_WIDE_INT cstepi;
2338 /* If we insert the increment in any position other than the standard
2339 ones, we must ensure that it is incremented once per iteration.
2340 It must not be in an inner nested loop, or one side of an if
2341 statement. */
2342 if (use_bb->loop_father != data->current_loop
2343 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2344 || stmt_could_throw_p (use->stmt)
2345 || !cst_and_fits_in_hwi (step))
2346 return;
2348 cstepi = int_cst_value (step);
2350 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2351 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2352 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2354 enum tree_code code = MINUS_EXPR;
2355 tree new_base;
2356 tree new_step = step;
2358 if (POINTER_TYPE_P (TREE_TYPE (base)))
2360 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2361 code = POINTER_PLUS_EXPR;
2363 else
2364 new_step = fold_convert (TREE_TYPE (base), new_step);
2365 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2366 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2367 use->stmt);
2369 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2370 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2372 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2373 use->stmt);
2377 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2378 position to POS. If USE is not NULL, the candidate is set as related to
2379 it. The candidate computation is scheduled on all available positions. */
2381 static void
2382 add_candidate (struct ivopts_data *data,
2383 tree base, tree step, bool important, struct iv_use *use)
2385 if (ip_normal_pos (data->current_loop))
2386 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2387 if (ip_end_pos (data->current_loop)
2388 && allow_ip_end_pos_p (data->current_loop))
2389 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2391 if (use != NULL && use->type == USE_ADDRESS)
2392 add_autoinc_candidates (data, base, step, important, use);
2395 /* Add a standard "0 + 1 * iteration" iv candidate for a
2396 type with SIZE bits. */
2398 static void
2399 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2400 unsigned int size)
2402 tree type = lang_hooks.types.type_for_size (size, true);
2403 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2404 true, NULL);
2407 /* Adds standard iv candidates. */
2409 static void
2410 add_standard_iv_candidates (struct ivopts_data *data)
2412 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2414 /* The same for a double-integer type if it is still fast enough. */
2415 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2416 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2420 /* Adds candidates bases on the old induction variable IV. */
2422 static void
2423 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2425 gimple phi;
2426 tree def;
2427 struct iv_cand *cand;
2429 add_candidate (data, iv->base, iv->step, true, NULL);
2431 /* The same, but with initial value zero. */
2432 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2433 add_candidate (data, size_int (0), iv->step, true, NULL);
2434 else
2435 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2436 iv->step, true, NULL);
2438 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2439 if (gimple_code (phi) == GIMPLE_PHI)
2441 /* Additionally record the possibility of leaving the original iv
2442 untouched. */
2443 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2444 cand = add_candidate_1 (data,
2445 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2446 SSA_NAME_DEF_STMT (def));
2447 cand->var_before = iv->ssa_name;
2448 cand->var_after = def;
2452 /* Adds candidates based on the old induction variables. */
2454 static void
2455 add_old_ivs_candidates (struct ivopts_data *data)
2457 unsigned i;
2458 struct iv *iv;
2459 bitmap_iterator bi;
2461 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2463 iv = ver_info (data, i)->iv;
2464 if (iv && iv->biv_p && !integer_zerop (iv->step))
2465 add_old_iv_candidates (data, iv);
2469 /* Adds candidates based on the value of the induction variable IV and USE. */
2471 static void
2472 add_iv_value_candidates (struct ivopts_data *data,
2473 struct iv *iv, struct iv_use *use)
2475 unsigned HOST_WIDE_INT offset;
2476 tree base;
2477 tree basetype;
2479 add_candidate (data, iv->base, iv->step, false, use);
2481 /* The same, but with initial value zero. Make such variable important,
2482 since it is generic enough so that possibly many uses may be based
2483 on it. */
2484 basetype = TREE_TYPE (iv->base);
2485 if (POINTER_TYPE_P (basetype))
2486 basetype = sizetype;
2487 add_candidate (data, build_int_cst (basetype, 0),
2488 iv->step, true, use);
2490 /* Third, try removing the constant offset. Make sure to even
2491 add a candidate for &a[0] vs. (T *)&a. */
2492 base = strip_offset (iv->base, &offset);
2493 if (offset
2494 || base != iv->base)
2495 add_candidate (data, base, iv->step, false, use);
2498 /* Adds candidates based on the uses. */
2500 static void
2501 add_derived_ivs_candidates (struct ivopts_data *data)
2503 unsigned i;
2505 for (i = 0; i < n_iv_uses (data); i++)
2507 struct iv_use *use = iv_use (data, i);
2509 if (!use)
2510 continue;
2512 switch (use->type)
2514 case USE_NONLINEAR_EXPR:
2515 case USE_COMPARE:
2516 case USE_ADDRESS:
2517 /* Just add the ivs based on the value of the iv used here. */
2518 add_iv_value_candidates (data, use->iv, use);
2519 break;
2521 default:
2522 gcc_unreachable ();
2527 /* Record important candidates and add them to related_cands bitmaps
2528 if needed. */
2530 static void
2531 record_important_candidates (struct ivopts_data *data)
2533 unsigned i;
2534 struct iv_use *use;
2536 for (i = 0; i < n_iv_cands (data); i++)
2538 struct iv_cand *cand = iv_cand (data, i);
2540 if (cand->important)
2541 bitmap_set_bit (data->important_candidates, i);
2544 data->consider_all_candidates = (n_iv_cands (data)
2545 <= CONSIDER_ALL_CANDIDATES_BOUND);
2547 if (data->consider_all_candidates)
2549 /* We will not need "related_cands" bitmaps in this case,
2550 so release them to decrease peak memory consumption. */
2551 for (i = 0; i < n_iv_uses (data); i++)
2553 use = iv_use (data, i);
2554 BITMAP_FREE (use->related_cands);
2557 else
2559 /* Add important candidates to the related_cands bitmaps. */
2560 for (i = 0; i < n_iv_uses (data); i++)
2561 bitmap_ior_into (iv_use (data, i)->related_cands,
2562 data->important_candidates);
2566 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2567 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2568 we allocate a simple list to every use. */
2570 static void
2571 alloc_use_cost_map (struct ivopts_data *data)
2573 unsigned i, size, s, j;
2575 for (i = 0; i < n_iv_uses (data); i++)
2577 struct iv_use *use = iv_use (data, i);
2578 bitmap_iterator bi;
2580 if (data->consider_all_candidates)
2581 size = n_iv_cands (data);
2582 else
2584 s = 0;
2585 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2587 s++;
2590 /* Round up to the power of two, so that moduling by it is fast. */
2591 for (size = 1; size < s; size <<= 1)
2592 continue;
2595 use->n_map_members = size;
2596 use->cost_map = XCNEWVEC (struct cost_pair, size);
2600 /* Returns description of computation cost of expression whose runtime
2601 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2603 static comp_cost
2604 new_cost (unsigned runtime, unsigned complexity)
2606 comp_cost cost;
2608 cost.cost = runtime;
2609 cost.complexity = complexity;
2611 return cost;
2614 /* Adds costs COST1 and COST2. */
2616 static comp_cost
2617 add_costs (comp_cost cost1, comp_cost cost2)
2619 cost1.cost += cost2.cost;
2620 cost1.complexity += cost2.complexity;
2622 return cost1;
2624 /* Subtracts costs COST1 and COST2. */
2626 static comp_cost
2627 sub_costs (comp_cost cost1, comp_cost cost2)
2629 cost1.cost -= cost2.cost;
2630 cost1.complexity -= cost2.complexity;
2632 return cost1;
2635 /* Returns a negative number if COST1 < COST2, a positive number if
2636 COST1 > COST2, and 0 if COST1 = COST2. */
2638 static int
2639 compare_costs (comp_cost cost1, comp_cost cost2)
2641 if (cost1.cost == cost2.cost)
2642 return cost1.complexity - cost2.complexity;
2644 return cost1.cost - cost2.cost;
2647 /* Returns true if COST is infinite. */
2649 static bool
2650 infinite_cost_p (comp_cost cost)
2652 return cost.cost == INFTY;
2655 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2656 on invariants DEPENDS_ON and that the value used in expressing it
2657 is VALUE. */
2659 static void
2660 set_use_iv_cost (struct ivopts_data *data,
2661 struct iv_use *use, struct iv_cand *cand,
2662 comp_cost cost, bitmap depends_on, tree value,
2663 int inv_expr_id)
2665 unsigned i, s;
2667 if (infinite_cost_p (cost))
2669 BITMAP_FREE (depends_on);
2670 return;
2673 if (data->consider_all_candidates)
2675 use->cost_map[cand->id].cand = cand;
2676 use->cost_map[cand->id].cost = cost;
2677 use->cost_map[cand->id].depends_on = depends_on;
2678 use->cost_map[cand->id].value = value;
2679 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2680 return;
2683 /* n_map_members is a power of two, so this computes modulo. */
2684 s = cand->id & (use->n_map_members - 1);
2685 for (i = s; i < use->n_map_members; i++)
2686 if (!use->cost_map[i].cand)
2687 goto found;
2688 for (i = 0; i < s; i++)
2689 if (!use->cost_map[i].cand)
2690 goto found;
2692 gcc_unreachable ();
2694 found:
2695 use->cost_map[i].cand = cand;
2696 use->cost_map[i].cost = cost;
2697 use->cost_map[i].depends_on = depends_on;
2698 use->cost_map[i].value = value;
2699 use->cost_map[i].inv_expr_id = inv_expr_id;
2702 /* Gets cost of (USE, CANDIDATE) pair. */
2704 static struct cost_pair *
2705 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2706 struct iv_cand *cand)
2708 unsigned i, s;
2709 struct cost_pair *ret;
2711 if (!cand)
2712 return NULL;
2714 if (data->consider_all_candidates)
2716 ret = use->cost_map + cand->id;
2717 if (!ret->cand)
2718 return NULL;
2720 return ret;
2723 /* n_map_members is a power of two, so this computes modulo. */
2724 s = cand->id & (use->n_map_members - 1);
2725 for (i = s; i < use->n_map_members; i++)
2726 if (use->cost_map[i].cand == cand)
2727 return use->cost_map + i;
2729 for (i = 0; i < s; i++)
2730 if (use->cost_map[i].cand == cand)
2731 return use->cost_map + i;
2733 return NULL;
2736 /* Returns estimate on cost of computing SEQ. */
2738 static unsigned
2739 seq_cost (rtx seq, bool speed)
2741 unsigned cost = 0;
2742 rtx set;
2744 for (; seq; seq = NEXT_INSN (seq))
2746 set = single_set (seq);
2747 if (set)
2748 cost += rtx_cost (set, SET,speed);
2749 else
2750 cost++;
2753 return cost;
2756 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2757 static rtx
2758 produce_memory_decl_rtl (tree obj, int *regno)
2760 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2761 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2762 rtx x;
2764 gcc_assert (obj);
2765 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2767 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2768 x = gen_rtx_SYMBOL_REF (address_mode, name);
2769 SET_SYMBOL_REF_DECL (x, obj);
2770 x = gen_rtx_MEM (DECL_MODE (obj), x);
2771 set_mem_addr_space (x, as);
2772 targetm.encode_section_info (obj, x, true);
2774 else
2776 x = gen_raw_REG (address_mode, (*regno)++);
2777 x = gen_rtx_MEM (DECL_MODE (obj), x);
2778 set_mem_addr_space (x, as);
2781 return x;
2784 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2785 walk_tree. DATA contains the actual fake register number. */
2787 static tree
2788 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2790 tree obj = NULL_TREE;
2791 rtx x = NULL_RTX;
2792 int *regno = (int *) data;
2794 switch (TREE_CODE (*expr_p))
2796 case ADDR_EXPR:
2797 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2798 handled_component_p (*expr_p);
2799 expr_p = &TREE_OPERAND (*expr_p, 0))
2800 continue;
2801 obj = *expr_p;
2802 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2803 x = produce_memory_decl_rtl (obj, regno);
2804 break;
2806 case SSA_NAME:
2807 *ws = 0;
2808 obj = SSA_NAME_VAR (*expr_p);
2809 if (!DECL_RTL_SET_P (obj))
2810 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2811 break;
2813 case VAR_DECL:
2814 case PARM_DECL:
2815 case RESULT_DECL:
2816 *ws = 0;
2817 obj = *expr_p;
2819 if (DECL_RTL_SET_P (obj))
2820 break;
2822 if (DECL_MODE (obj) == BLKmode)
2823 x = produce_memory_decl_rtl (obj, regno);
2824 else
2825 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2827 break;
2829 default:
2830 break;
2833 if (x)
2835 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2836 SET_DECL_RTL (obj, x);
2839 return NULL_TREE;
2842 /* Determines cost of the computation of EXPR. */
2844 static unsigned
2845 computation_cost (tree expr, bool speed)
2847 rtx seq, rslt;
2848 tree type = TREE_TYPE (expr);
2849 unsigned cost;
2850 /* Avoid using hard regs in ways which may be unsupported. */
2851 int regno = LAST_VIRTUAL_REGISTER + 1;
2852 struct cgraph_node *node = cgraph_node (current_function_decl);
2853 enum node_frequency real_frequency = node->frequency;
2855 node->frequency = NODE_FREQUENCY_NORMAL;
2856 crtl->maybe_hot_insn_p = speed;
2857 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2858 start_sequence ();
2859 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2860 seq = get_insns ();
2861 end_sequence ();
2862 default_rtl_profile ();
2863 node->frequency = real_frequency;
2865 cost = seq_cost (seq, speed);
2866 if (MEM_P (rslt))
2867 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2868 TYPE_ADDR_SPACE (type), speed);
2870 return cost;
2873 /* Returns variable containing the value of candidate CAND at statement AT. */
2875 static tree
2876 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2878 if (stmt_after_increment (loop, cand, stmt))
2879 return cand->var_after;
2880 else
2881 return cand->var_before;
2884 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2885 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2888 tree_int_cst_sign_bit (const_tree t)
2890 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2891 unsigned HOST_WIDE_INT w;
2893 if (bitno < HOST_BITS_PER_WIDE_INT)
2894 w = TREE_INT_CST_LOW (t);
2895 else
2897 w = TREE_INT_CST_HIGH (t);
2898 bitno -= HOST_BITS_PER_WIDE_INT;
2901 return (w >> bitno) & 1;
2904 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2905 same precision that is at least as wide as the precision of TYPE, stores
2906 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2907 type of A and B. */
2909 static tree
2910 determine_common_wider_type (tree *a, tree *b)
2912 tree wider_type = NULL;
2913 tree suba, subb;
2914 tree atype = TREE_TYPE (*a);
2916 if (CONVERT_EXPR_P (*a))
2918 suba = TREE_OPERAND (*a, 0);
2919 wider_type = TREE_TYPE (suba);
2920 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2921 return atype;
2923 else
2924 return atype;
2926 if (CONVERT_EXPR_P (*b))
2928 subb = TREE_OPERAND (*b, 0);
2929 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2930 return atype;
2932 else
2933 return atype;
2935 *a = suba;
2936 *b = subb;
2937 return wider_type;
2940 /* Determines the expression by that USE is expressed from induction variable
2941 CAND at statement AT in LOOP. The expression is stored in a decomposed
2942 form into AFF. Returns false if USE cannot be expressed using CAND. */
2944 static bool
2945 get_computation_aff (struct loop *loop,
2946 struct iv_use *use, struct iv_cand *cand, gimple at,
2947 struct affine_tree_combination *aff)
2949 tree ubase = use->iv->base;
2950 tree ustep = use->iv->step;
2951 tree cbase = cand->iv->base;
2952 tree cstep = cand->iv->step, cstep_common;
2953 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2954 tree common_type, var;
2955 tree uutype;
2956 aff_tree cbase_aff, var_aff;
2957 double_int rat;
2959 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2961 /* We do not have a precision to express the values of use. */
2962 return false;
2965 var = var_at_stmt (loop, cand, at);
2966 uutype = unsigned_type_for (utype);
2968 /* If the conversion is not noop, perform it. */
2969 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2971 cstep = fold_convert (uutype, cstep);
2972 cbase = fold_convert (uutype, cbase);
2973 var = fold_convert (uutype, var);
2976 if (!constant_multiple_of (ustep, cstep, &rat))
2977 return false;
2979 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2980 type, we achieve better folding by computing their difference in this
2981 wider type, and cast the result to UUTYPE. We do not need to worry about
2982 overflows, as all the arithmetics will in the end be performed in UUTYPE
2983 anyway. */
2984 common_type = determine_common_wider_type (&ubase, &cbase);
2986 /* use = ubase - ratio * cbase + ratio * var. */
2987 tree_to_aff_combination (ubase, common_type, aff);
2988 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2989 tree_to_aff_combination (var, uutype, &var_aff);
2991 /* We need to shift the value if we are after the increment. */
2992 if (stmt_after_increment (loop, cand, at))
2994 aff_tree cstep_aff;
2996 if (common_type != uutype)
2997 cstep_common = fold_convert (common_type, cstep);
2998 else
2999 cstep_common = cstep;
3001 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3002 aff_combination_add (&cbase_aff, &cstep_aff);
3005 aff_combination_scale (&cbase_aff, double_int_neg (rat));
3006 aff_combination_add (aff, &cbase_aff);
3007 if (common_type != uutype)
3008 aff_combination_convert (aff, uutype);
3010 aff_combination_scale (&var_aff, rat);
3011 aff_combination_add (aff, &var_aff);
3013 return true;
3016 /* Determines the expression by that USE is expressed from induction variable
3017 CAND at statement AT in LOOP. The computation is unshared. */
3019 static tree
3020 get_computation_at (struct loop *loop,
3021 struct iv_use *use, struct iv_cand *cand, gimple at)
3023 aff_tree aff;
3024 tree type = TREE_TYPE (use->iv->base);
3026 if (!get_computation_aff (loop, use, cand, at, &aff))
3027 return NULL_TREE;
3028 unshare_aff_combination (&aff);
3029 return fold_convert (type, aff_combination_to_tree (&aff));
3032 /* Determines the expression by that USE is expressed from induction variable
3033 CAND in LOOP. The computation is unshared. */
3035 static tree
3036 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3038 return get_computation_at (loop, use, cand, use->stmt);
3041 /* Adjust the cost COST for being in loop setup rather than loop body.
3042 If we're optimizing for space, the loop setup overhead is constant;
3043 if we're optimizing for speed, amortize it over the per-iteration cost. */
3044 static unsigned
3045 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3047 if (cost == INFTY)
3048 return cost;
3049 else if (optimize_loop_for_speed_p (data->current_loop))
3050 return cost / avg_loop_niter (data->current_loop);
3051 else
3052 return cost;
3055 /* Returns cost of addition in MODE. */
3057 static unsigned
3058 add_cost (enum machine_mode mode, bool speed)
3060 static unsigned costs[NUM_MACHINE_MODES];
3061 rtx seq;
3062 unsigned cost;
3064 if (costs[mode])
3065 return costs[mode];
3067 start_sequence ();
3068 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3069 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3070 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3071 NULL_RTX);
3072 seq = get_insns ();
3073 end_sequence ();
3075 cost = seq_cost (seq, speed);
3076 if (!cost)
3077 cost = 1;
3079 costs[mode] = cost;
3081 if (dump_file && (dump_flags & TDF_DETAILS))
3082 fprintf (dump_file, "Addition in %s costs %d\n",
3083 GET_MODE_NAME (mode), cost);
3084 return cost;
3087 /* Entry in a hashtable of already known costs for multiplication. */
3088 struct mbc_entry
3090 HOST_WIDE_INT cst; /* The constant to multiply by. */
3091 enum machine_mode mode; /* In mode. */
3092 unsigned cost; /* The cost. */
3095 /* Counts hash value for the ENTRY. */
3097 static hashval_t
3098 mbc_entry_hash (const void *entry)
3100 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3102 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3105 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3107 static int
3108 mbc_entry_eq (const void *entry1, const void *entry2)
3110 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3111 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3113 return (e1->mode == e2->mode
3114 && e1->cst == e2->cst);
3117 /* Returns cost of multiplication by constant CST in MODE. */
3119 unsigned
3120 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3122 static htab_t costs;
3123 struct mbc_entry **cached, act;
3124 rtx seq;
3125 unsigned cost;
3127 if (!costs)
3128 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3130 act.mode = mode;
3131 act.cst = cst;
3132 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3133 if (*cached)
3134 return (*cached)->cost;
3136 *cached = XNEW (struct mbc_entry);
3137 (*cached)->mode = mode;
3138 (*cached)->cst = cst;
3140 start_sequence ();
3141 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3142 gen_int_mode (cst, mode), NULL_RTX, 0);
3143 seq = get_insns ();
3144 end_sequence ();
3146 cost = seq_cost (seq, speed);
3148 if (dump_file && (dump_flags & TDF_DETAILS))
3149 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3150 (int) cst, GET_MODE_NAME (mode), cost);
3152 (*cached)->cost = cost;
3154 return cost;
3157 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3158 validity for a memory reference accessing memory of mode MODE in
3159 address space AS. */
3161 DEF_VEC_P (sbitmap);
3162 DEF_VEC_ALLOC_P (sbitmap, heap);
3164 bool
3165 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3166 addr_space_t as)
3168 #define MAX_RATIO 128
3169 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3170 static VEC (sbitmap, heap) *valid_mult_list;
3171 sbitmap valid_mult;
3173 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3174 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3176 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3177 if (!valid_mult)
3179 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3180 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3181 rtx addr;
3182 HOST_WIDE_INT i;
3184 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3185 sbitmap_zero (valid_mult);
3186 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3187 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3189 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3190 if (memory_address_addr_space_p (mode, addr, as))
3191 SET_BIT (valid_mult, i + MAX_RATIO);
3194 if (dump_file && (dump_flags & TDF_DETAILS))
3196 fprintf (dump_file, " allowed multipliers:");
3197 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3198 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3199 fprintf (dump_file, " %d", (int) i);
3200 fprintf (dump_file, "\n");
3201 fprintf (dump_file, "\n");
3204 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3207 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3208 return false;
3210 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3213 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3214 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3215 variable is omitted. Compute the cost for a memory reference that accesses
3216 a memory location of mode MEM_MODE in address space AS.
3218 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3219 size of MEM_MODE / RATIO) is available. To make this determination, we
3220 look at the size of the increment to be made, which is given in CSTEP.
3221 CSTEP may be zero if the step is unknown.
3222 STMT_AFTER_INC is true iff the statement we're looking at is after the
3223 increment of the original biv.
3225 TODO -- there must be some better way. This all is quite crude. */
3227 typedef struct
3229 HOST_WIDE_INT min_offset, max_offset;
3230 unsigned costs[2][2][2][2];
3231 } *address_cost_data;
3233 DEF_VEC_P (address_cost_data);
3234 DEF_VEC_ALLOC_P (address_cost_data, heap);
3236 static comp_cost
3237 get_address_cost (bool symbol_present, bool var_present,
3238 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3239 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3240 addr_space_t as, bool speed,
3241 bool stmt_after_inc, bool *may_autoinc)
3243 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3244 static VEC(address_cost_data, heap) *address_cost_data_list;
3245 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3246 address_cost_data data;
3247 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3248 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3249 unsigned cost, acost, complexity;
3250 bool offset_p, ratio_p, autoinc;
3251 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3252 unsigned HOST_WIDE_INT mask;
3253 unsigned bits;
3255 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3256 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3257 data_index + 1);
3259 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3260 if (!data)
3262 HOST_WIDE_INT i;
3263 HOST_WIDE_INT rat, off = 0;
3264 int old_cse_not_expected, width;
3265 unsigned sym_p, var_p, off_p, rat_p, add_c;
3266 rtx seq, addr, base;
3267 rtx reg0, reg1;
3269 data = (address_cost_data) xcalloc (1, sizeof (*data));
3271 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3273 width = GET_MODE_BITSIZE (address_mode) - 1;
3274 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3275 width = HOST_BITS_PER_WIDE_INT - 1;
3276 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3278 for (i = width; i >= 0; i--)
3280 off = -((HOST_WIDE_INT) 1 << i);
3281 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3282 if (memory_address_addr_space_p (mem_mode, addr, as))
3283 break;
3285 data->min_offset = (i == -1? 0 : off);
3287 for (i = width; i >= 0; i--)
3289 off = ((HOST_WIDE_INT) 1 << i) - 1;
3290 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3291 if (memory_address_addr_space_p (mem_mode, addr, as))
3292 break;
3294 if (i == -1)
3295 off = 0;
3296 data->max_offset = off;
3298 if (dump_file && (dump_flags & TDF_DETAILS))
3300 fprintf (dump_file, "get_address_cost:\n");
3301 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3302 GET_MODE_NAME (mem_mode),
3303 data->min_offset);
3304 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3305 GET_MODE_NAME (mem_mode),
3306 data->max_offset);
3309 rat = 1;
3310 for (i = 2; i <= MAX_RATIO; i++)
3311 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3313 rat = i;
3314 break;
3317 /* Compute the cost of various addressing modes. */
3318 acost = 0;
3319 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3320 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3322 if (HAVE_PRE_DECREMENT)
3324 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3325 has_predec[mem_mode]
3326 = memory_address_addr_space_p (mem_mode, addr, as);
3328 if (HAVE_POST_DECREMENT)
3330 addr = gen_rtx_POST_DEC (address_mode, reg0);
3331 has_postdec[mem_mode]
3332 = memory_address_addr_space_p (mem_mode, addr, as);
3334 if (HAVE_PRE_INCREMENT)
3336 addr = gen_rtx_PRE_INC (address_mode, reg0);
3337 has_preinc[mem_mode]
3338 = memory_address_addr_space_p (mem_mode, addr, as);
3340 if (HAVE_POST_INCREMENT)
3342 addr = gen_rtx_POST_INC (address_mode, reg0);
3343 has_postinc[mem_mode]
3344 = memory_address_addr_space_p (mem_mode, addr, as);
3346 for (i = 0; i < 16; i++)
3348 sym_p = i & 1;
3349 var_p = (i >> 1) & 1;
3350 off_p = (i >> 2) & 1;
3351 rat_p = (i >> 3) & 1;
3353 addr = reg0;
3354 if (rat_p)
3355 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3356 gen_int_mode (rat, address_mode));
3358 if (var_p)
3359 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3361 if (sym_p)
3363 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3364 /* ??? We can run into trouble with some backends by presenting
3365 it with symbols which haven't been properly passed through
3366 targetm.encode_section_info. By setting the local bit, we
3367 enhance the probability of things working. */
3368 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3370 if (off_p)
3371 base = gen_rtx_fmt_e (CONST, address_mode,
3372 gen_rtx_fmt_ee
3373 (PLUS, address_mode, base,
3374 gen_int_mode (off, address_mode)));
3376 else if (off_p)
3377 base = gen_int_mode (off, address_mode);
3378 else
3379 base = NULL_RTX;
3381 if (base)
3382 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3384 start_sequence ();
3385 /* To avoid splitting addressing modes, pretend that no cse will
3386 follow. */
3387 old_cse_not_expected = cse_not_expected;
3388 cse_not_expected = true;
3389 addr = memory_address_addr_space (mem_mode, addr, as);
3390 cse_not_expected = old_cse_not_expected;
3391 seq = get_insns ();
3392 end_sequence ();
3394 acost = seq_cost (seq, speed);
3395 acost += address_cost (addr, mem_mode, as, speed);
3397 if (!acost)
3398 acost = 1;
3399 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3402 /* On some targets, it is quite expensive to load symbol to a register,
3403 which makes addresses that contain symbols look much more expensive.
3404 However, the symbol will have to be loaded in any case before the
3405 loop (and quite likely we have it in register already), so it does not
3406 make much sense to penalize them too heavily. So make some final
3407 tweaks for the SYMBOL_PRESENT modes:
3409 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3410 var is cheaper, use this mode with small penalty.
3411 If VAR_PRESENT is true, try whether the mode with
3412 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3413 if this is the case, use it. */
3414 add_c = add_cost (address_mode, speed);
3415 for (i = 0; i < 8; i++)
3417 var_p = i & 1;
3418 off_p = (i >> 1) & 1;
3419 rat_p = (i >> 2) & 1;
3421 acost = data->costs[0][1][off_p][rat_p] + 1;
3422 if (var_p)
3423 acost += add_c;
3425 if (acost < data->costs[1][var_p][off_p][rat_p])
3426 data->costs[1][var_p][off_p][rat_p] = acost;
3429 if (dump_file && (dump_flags & TDF_DETAILS))
3431 fprintf (dump_file, "Address costs:\n");
3433 for (i = 0; i < 16; i++)
3435 sym_p = i & 1;
3436 var_p = (i >> 1) & 1;
3437 off_p = (i >> 2) & 1;
3438 rat_p = (i >> 3) & 1;
3440 fprintf (dump_file, " ");
3441 if (sym_p)
3442 fprintf (dump_file, "sym + ");
3443 if (var_p)
3444 fprintf (dump_file, "var + ");
3445 if (off_p)
3446 fprintf (dump_file, "cst + ");
3447 if (rat_p)
3448 fprintf (dump_file, "rat * ");
3450 acost = data->costs[sym_p][var_p][off_p][rat_p];
3451 fprintf (dump_file, "index costs %d\n", acost);
3453 if (has_predec[mem_mode] || has_postdec[mem_mode]
3454 || has_preinc[mem_mode] || has_postinc[mem_mode])
3455 fprintf (dump_file, " May include autoinc/dec\n");
3456 fprintf (dump_file, "\n");
3459 VEC_replace (address_cost_data, address_cost_data_list,
3460 data_index, data);
3463 bits = GET_MODE_BITSIZE (address_mode);
3464 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3465 offset &= mask;
3466 if ((offset >> (bits - 1) & 1))
3467 offset |= ~mask;
3468 s_offset = offset;
3470 autoinc = false;
3471 msize = GET_MODE_SIZE (mem_mode);
3472 autoinc_offset = offset;
3473 if (stmt_after_inc)
3474 autoinc_offset += ratio * cstep;
3475 if (symbol_present || var_present || ratio != 1)
3476 autoinc = false;
3477 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3478 && msize == cstep)
3479 || (has_postdec[mem_mode] && autoinc_offset == 0
3480 && msize == -cstep)
3481 || (has_preinc[mem_mode] && autoinc_offset == msize
3482 && msize == cstep)
3483 || (has_predec[mem_mode] && autoinc_offset == -msize
3484 && msize == -cstep))
3485 autoinc = true;
3487 cost = 0;
3488 offset_p = (s_offset != 0
3489 && data->min_offset <= s_offset
3490 && s_offset <= data->max_offset);
3491 ratio_p = (ratio != 1
3492 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3494 if (ratio != 1 && !ratio_p)
3495 cost += multiply_by_cost (ratio, address_mode, speed);
3497 if (s_offset && !offset_p && !symbol_present)
3498 cost += add_cost (address_mode, speed);
3500 if (may_autoinc)
3501 *may_autoinc = autoinc;
3502 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3503 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3504 return new_cost (cost + acost, complexity);
3507 /* Estimates cost of forcing expression EXPR into a variable. */
3509 static comp_cost
3510 force_expr_to_var_cost (tree expr, bool speed)
3512 static bool costs_initialized = false;
3513 static unsigned integer_cost [2];
3514 static unsigned symbol_cost [2];
3515 static unsigned address_cost [2];
3516 tree op0, op1;
3517 comp_cost cost0, cost1, cost;
3518 enum machine_mode mode;
3520 if (!costs_initialized)
3522 tree type = build_pointer_type (integer_type_node);
3523 tree var, addr;
3524 rtx x;
3525 int i;
3527 var = create_tmp_var_raw (integer_type_node, "test_var");
3528 TREE_STATIC (var) = 1;
3529 x = produce_memory_decl_rtl (var, NULL);
3530 SET_DECL_RTL (var, x);
3532 addr = build1 (ADDR_EXPR, type, var);
3535 for (i = 0; i < 2; i++)
3537 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3538 2000), i);
3540 symbol_cost[i] = computation_cost (addr, i) + 1;
3542 address_cost[i]
3543 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3544 addr,
3545 build_int_cst (sizetype, 2000)), i) + 1;
3546 if (dump_file && (dump_flags & TDF_DETAILS))
3548 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3549 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3550 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3551 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3552 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3553 fprintf (dump_file, "\n");
3557 costs_initialized = true;
3560 STRIP_NOPS (expr);
3562 if (SSA_VAR_P (expr))
3563 return zero_cost;
3565 if (is_gimple_min_invariant (expr))
3567 if (TREE_CODE (expr) == INTEGER_CST)
3568 return new_cost (integer_cost [speed], 0);
3570 if (TREE_CODE (expr) == ADDR_EXPR)
3572 tree obj = TREE_OPERAND (expr, 0);
3574 if (TREE_CODE (obj) == VAR_DECL
3575 || TREE_CODE (obj) == PARM_DECL
3576 || TREE_CODE (obj) == RESULT_DECL)
3577 return new_cost (symbol_cost [speed], 0);
3580 return new_cost (address_cost [speed], 0);
3583 switch (TREE_CODE (expr))
3585 case POINTER_PLUS_EXPR:
3586 case PLUS_EXPR:
3587 case MINUS_EXPR:
3588 case MULT_EXPR:
3589 op0 = TREE_OPERAND (expr, 0);
3590 op1 = TREE_OPERAND (expr, 1);
3591 STRIP_NOPS (op0);
3592 STRIP_NOPS (op1);
3594 if (is_gimple_val (op0))
3595 cost0 = zero_cost;
3596 else
3597 cost0 = force_expr_to_var_cost (op0, speed);
3599 if (is_gimple_val (op1))
3600 cost1 = zero_cost;
3601 else
3602 cost1 = force_expr_to_var_cost (op1, speed);
3604 break;
3606 case NEGATE_EXPR:
3607 op0 = TREE_OPERAND (expr, 0);
3608 STRIP_NOPS (op0);
3609 op1 = NULL_TREE;
3611 if (is_gimple_val (op0))
3612 cost0 = zero_cost;
3613 else
3614 cost0 = force_expr_to_var_cost (op0, speed);
3616 cost1 = zero_cost;
3617 break;
3619 default:
3620 /* Just an arbitrary value, FIXME. */
3621 return new_cost (target_spill_cost[speed], 0);
3624 mode = TYPE_MODE (TREE_TYPE (expr));
3625 switch (TREE_CODE (expr))
3627 case POINTER_PLUS_EXPR:
3628 case PLUS_EXPR:
3629 case MINUS_EXPR:
3630 case NEGATE_EXPR:
3631 cost = new_cost (add_cost (mode, speed), 0);
3632 break;
3634 case MULT_EXPR:
3635 if (cst_and_fits_in_hwi (op0))
3636 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3637 else if (cst_and_fits_in_hwi (op1))
3638 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3639 else
3640 return new_cost (target_spill_cost [speed], 0);
3641 break;
3643 default:
3644 gcc_unreachable ();
3647 cost = add_costs (cost, cost0);
3648 cost = add_costs (cost, cost1);
3650 /* Bound the cost by target_spill_cost. The parts of complicated
3651 computations often are either loop invariant or at least can
3652 be shared between several iv uses, so letting this grow without
3653 limits would not give reasonable results. */
3654 if (cost.cost > (int) target_spill_cost [speed])
3655 cost.cost = target_spill_cost [speed];
3657 return cost;
3660 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3661 invariants the computation depends on. */
3663 static comp_cost
3664 force_var_cost (struct ivopts_data *data,
3665 tree expr, bitmap *depends_on)
3667 if (depends_on)
3669 fd_ivopts_data = data;
3670 walk_tree (&expr, find_depends, depends_on, NULL);
3673 return force_expr_to_var_cost (expr, data->speed);
3676 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3677 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3678 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3679 invariants the computation depends on. */
3681 static comp_cost
3682 split_address_cost (struct ivopts_data *data,
3683 tree addr, bool *symbol_present, bool *var_present,
3684 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3686 tree core;
3687 HOST_WIDE_INT bitsize;
3688 HOST_WIDE_INT bitpos;
3689 tree toffset;
3690 enum machine_mode mode;
3691 int unsignedp, volatilep;
3693 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3694 &unsignedp, &volatilep, false);
3696 if (toffset != 0
3697 || bitpos % BITS_PER_UNIT != 0
3698 || TREE_CODE (core) != VAR_DECL)
3700 *symbol_present = false;
3701 *var_present = true;
3702 fd_ivopts_data = data;
3703 walk_tree (&addr, find_depends, depends_on, NULL);
3704 return new_cost (target_spill_cost[data->speed], 0);
3707 *offset += bitpos / BITS_PER_UNIT;
3708 if (TREE_STATIC (core)
3709 || DECL_EXTERNAL (core))
3711 *symbol_present = true;
3712 *var_present = false;
3713 return zero_cost;
3716 *symbol_present = false;
3717 *var_present = true;
3718 return zero_cost;
3721 /* Estimates cost of expressing difference of addresses E1 - E2 as
3722 var + symbol + offset. The value of offset is added to OFFSET,
3723 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3724 part is missing. DEPENDS_ON is a set of the invariants the computation
3725 depends on. */
3727 static comp_cost
3728 ptr_difference_cost (struct ivopts_data *data,
3729 tree e1, tree e2, bool *symbol_present, bool *var_present,
3730 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3732 HOST_WIDE_INT diff = 0;
3733 aff_tree aff_e1, aff_e2;
3734 tree type;
3736 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3738 if (ptr_difference_const (e1, e2, &diff))
3740 *offset += diff;
3741 *symbol_present = false;
3742 *var_present = false;
3743 return zero_cost;
3746 if (integer_zerop (e2))
3747 return split_address_cost (data, TREE_OPERAND (e1, 0),
3748 symbol_present, var_present, offset, depends_on);
3750 *symbol_present = false;
3751 *var_present = true;
3753 type = signed_type_for (TREE_TYPE (e1));
3754 tree_to_aff_combination (e1, type, &aff_e1);
3755 tree_to_aff_combination (e2, type, &aff_e2);
3756 aff_combination_scale (&aff_e2, double_int_minus_one);
3757 aff_combination_add (&aff_e1, &aff_e2);
3759 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3762 /* Estimates cost of expressing difference E1 - E2 as
3763 var + symbol + offset. The value of offset is added to OFFSET,
3764 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3765 part is missing. DEPENDS_ON is a set of the invariants the computation
3766 depends on. */
3768 static comp_cost
3769 difference_cost (struct ivopts_data *data,
3770 tree e1, tree e2, bool *symbol_present, bool *var_present,
3771 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3773 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3774 unsigned HOST_WIDE_INT off1, off2;
3775 aff_tree aff_e1, aff_e2;
3776 tree type;
3778 e1 = strip_offset (e1, &off1);
3779 e2 = strip_offset (e2, &off2);
3780 *offset += off1 - off2;
3782 STRIP_NOPS (e1);
3783 STRIP_NOPS (e2);
3785 if (TREE_CODE (e1) == ADDR_EXPR)
3786 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3787 offset, depends_on);
3788 *symbol_present = false;
3790 if (operand_equal_p (e1, e2, 0))
3792 *var_present = false;
3793 return zero_cost;
3796 *var_present = true;
3798 if (integer_zerop (e2))
3799 return force_var_cost (data, e1, depends_on);
3801 if (integer_zerop (e1))
3803 comp_cost cost = force_var_cost (data, e2, depends_on);
3804 cost.cost += multiply_by_cost (-1, mode, data->speed);
3805 return cost;
3808 type = signed_type_for (TREE_TYPE (e1));
3809 tree_to_aff_combination (e1, type, &aff_e1);
3810 tree_to_aff_combination (e2, type, &aff_e2);
3811 aff_combination_scale (&aff_e2, double_int_minus_one);
3812 aff_combination_add (&aff_e1, &aff_e2);
3814 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3817 /* Returns true if AFF1 and AFF2 are identical. */
3819 static bool
3820 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3822 unsigned i;
3824 if (aff1->n != aff2->n)
3825 return false;
3827 for (i = 0; i < aff1->n; i++)
3829 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3830 return false;
3832 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3833 return false;
3835 return true;
3838 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3839 requires a new compiler generated temporary. Returns -1 otherwise.
3840 ADDRESS_P is a flag indicating if the expression is for address
3841 computation. */
3843 static int
3844 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3845 tree cbase, HOST_WIDE_INT ratio,
3846 bool address_p)
3848 aff_tree ubase_aff, cbase_aff;
3849 tree expr, ub, cb;
3850 struct iv_inv_expr_ent ent;
3851 struct iv_inv_expr_ent **slot;
3853 STRIP_NOPS (ubase);
3854 STRIP_NOPS (cbase);
3855 ub = ubase;
3856 cb = cbase;
3858 if ((TREE_CODE (ubase) == INTEGER_CST)
3859 && (TREE_CODE (cbase) == INTEGER_CST))
3860 return -1;
3862 /* Strips the constant part. */
3863 if (TREE_CODE (ubase) == PLUS_EXPR
3864 || TREE_CODE (ubase) == MINUS_EXPR
3865 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3867 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3868 ubase = TREE_OPERAND (ubase, 0);
3871 /* Strips the constant part. */
3872 if (TREE_CODE (cbase) == PLUS_EXPR
3873 || TREE_CODE (cbase) == MINUS_EXPR
3874 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3876 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3877 cbase = TREE_OPERAND (cbase, 0);
3880 if (address_p)
3882 if (((TREE_CODE (ubase) == SSA_NAME)
3883 || (TREE_CODE (ubase) == ADDR_EXPR
3884 && is_gimple_min_invariant (ubase)))
3885 && (TREE_CODE (cbase) == INTEGER_CST))
3886 return -1;
3888 if (((TREE_CODE (cbase) == SSA_NAME)
3889 || (TREE_CODE (cbase) == ADDR_EXPR
3890 && is_gimple_min_invariant (cbase)))
3891 && (TREE_CODE (ubase) == INTEGER_CST))
3892 return -1;
3895 if (ratio == 1)
3897 if(operand_equal_p (ubase, cbase, 0))
3898 return -1;
3900 if (TREE_CODE (ubase) == ADDR_EXPR
3901 && TREE_CODE (cbase) == ADDR_EXPR)
3903 tree usym, csym;
3905 usym = TREE_OPERAND (ubase, 0);
3906 csym = TREE_OPERAND (cbase, 0);
3907 if (TREE_CODE (usym) == ARRAY_REF)
3909 tree ind = TREE_OPERAND (usym, 1);
3910 if (TREE_CODE (ind) == INTEGER_CST
3911 && host_integerp (ind, 0)
3912 && TREE_INT_CST_LOW (ind) == 0)
3913 usym = TREE_OPERAND (usym, 0);
3915 if (TREE_CODE (csym) == ARRAY_REF)
3917 tree ind = TREE_OPERAND (csym, 1);
3918 if (TREE_CODE (ind) == INTEGER_CST
3919 && host_integerp (ind, 0)
3920 && TREE_INT_CST_LOW (ind) == 0)
3921 csym = TREE_OPERAND (csym, 0);
3923 if (operand_equal_p (usym, csym, 0))
3924 return -1;
3926 /* Now do more complex comparison */
3927 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3928 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3929 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3930 return -1;
3933 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3934 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3936 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3937 aff_combination_add (&ubase_aff, &cbase_aff);
3938 expr = aff_combination_to_tree (&ubase_aff);
3939 ent.expr = expr;
3940 ent.hash = iterative_hash_expr (expr, 0);
3941 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3942 &ent, INSERT);
3943 if (*slot)
3944 return (*slot)->id;
3946 *slot = XNEW (struct iv_inv_expr_ent);
3947 (*slot)->expr = expr;
3948 (*slot)->hash = ent.hash;
3949 (*slot)->id = data->inv_expr_id++;
3950 return (*slot)->id;
3955 /* Determines the cost of the computation by that USE is expressed
3956 from induction variable CAND. If ADDRESS_P is true, we just need
3957 to create an address from it, otherwise we want to get it into
3958 register. A set of invariants we depend on is stored in
3959 DEPENDS_ON. AT is the statement at that the value is computed.
3960 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3961 addressing is likely. */
3963 static comp_cost
3964 get_computation_cost_at (struct ivopts_data *data,
3965 struct iv_use *use, struct iv_cand *cand,
3966 bool address_p, bitmap *depends_on, gimple at,
3967 bool *can_autoinc,
3968 int *inv_expr_id)
3970 tree ubase = use->iv->base, ustep = use->iv->step;
3971 tree cbase, cstep;
3972 tree utype = TREE_TYPE (ubase), ctype;
3973 unsigned HOST_WIDE_INT cstepi, offset = 0;
3974 HOST_WIDE_INT ratio, aratio;
3975 bool var_present, symbol_present, stmt_is_after_inc;
3976 comp_cost cost;
3977 double_int rat;
3978 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3980 *depends_on = NULL;
3982 /* Only consider real candidates. */
3983 if (!cand->iv)
3984 return infinite_cost;
3986 cbase = cand->iv->base;
3987 cstep = cand->iv->step;
3988 ctype = TREE_TYPE (cbase);
3990 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3992 /* We do not have a precision to express the values of use. */
3993 return infinite_cost;
3996 if (address_p)
3998 /* Do not try to express address of an object with computation based
3999 on address of a different object. This may cause problems in rtl
4000 level alias analysis (that does not expect this to be happening,
4001 as this is illegal in C), and would be unlikely to be useful
4002 anyway. */
4003 if (use->iv->base_object
4004 && cand->iv->base_object
4005 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4006 return infinite_cost;
4009 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4011 /* TODO -- add direct handling of this case. */
4012 goto fallback;
4015 /* CSTEPI is removed from the offset in case statement is after the
4016 increment. If the step is not constant, we use zero instead.
4017 This is a bit imprecise (there is the extra addition), but
4018 redundancy elimination is likely to transform the code so that
4019 it uses value of the variable before increment anyway,
4020 so it is not that much unrealistic. */
4021 if (cst_and_fits_in_hwi (cstep))
4022 cstepi = int_cst_value (cstep);
4023 else
4024 cstepi = 0;
4026 if (!constant_multiple_of (ustep, cstep, &rat))
4027 return infinite_cost;
4029 if (double_int_fits_in_shwi_p (rat))
4030 ratio = double_int_to_shwi (rat);
4031 else
4032 return infinite_cost;
4034 STRIP_NOPS (cbase);
4035 ctype = TREE_TYPE (cbase);
4037 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4039 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4040 or ratio == 1, it is better to handle this like
4042 ubase - ratio * cbase + ratio * var
4044 (also holds in the case ratio == -1, TODO. */
4046 if (cst_and_fits_in_hwi (cbase))
4048 offset = - ratio * int_cst_value (cbase);
4049 cost = difference_cost (data,
4050 ubase, build_int_cst (utype, 0),
4051 &symbol_present, &var_present, &offset,
4052 depends_on);
4053 cost.cost /= avg_loop_niter (data->current_loop);
4055 else if (ratio == 1)
4057 tree real_cbase = cbase;
4059 /* Check to see if any adjustment is needed. */
4060 if (cstepi == 0 && stmt_is_after_inc)
4062 aff_tree real_cbase_aff;
4063 aff_tree cstep_aff;
4065 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4066 &real_cbase_aff);
4067 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4069 aff_combination_add (&real_cbase_aff, &cstep_aff);
4070 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4073 cost = difference_cost (data,
4074 ubase, real_cbase,
4075 &symbol_present, &var_present, &offset,
4076 depends_on);
4077 cost.cost /= avg_loop_niter (data->current_loop);
4079 else if (address_p
4080 && !POINTER_TYPE_P (ctype)
4081 && multiplier_allowed_in_address_p
4082 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4083 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4085 cbase
4086 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4087 cost = difference_cost (data,
4088 ubase, cbase,
4089 &symbol_present, &var_present, &offset,
4090 depends_on);
4091 cost.cost /= avg_loop_niter (data->current_loop);
4093 else
4095 cost = force_var_cost (data, cbase, depends_on);
4096 cost = add_costs (cost,
4097 difference_cost (data,
4098 ubase, build_int_cst (utype, 0),
4099 &symbol_present, &var_present,
4100 &offset, depends_on));
4101 cost.cost /= avg_loop_niter (data->current_loop);
4102 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4105 if (inv_expr_id)
4107 *inv_expr_id =
4108 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4109 /* Clear depends on. */
4110 if (*inv_expr_id != -1 && depends_on && *depends_on)
4111 bitmap_clear (*depends_on);
4114 /* If we are after the increment, the value of the candidate is higher by
4115 one iteration. */
4116 if (stmt_is_after_inc)
4117 offset -= ratio * cstepi;
4119 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4120 (symbol/var1/const parts may be omitted). If we are looking for an
4121 address, find the cost of addressing this. */
4122 if (address_p)
4123 return add_costs (cost,
4124 get_address_cost (symbol_present, var_present,
4125 offset, ratio, cstepi,
4126 TYPE_MODE (TREE_TYPE (utype)),
4127 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4128 speed, stmt_is_after_inc,
4129 can_autoinc));
4131 /* Otherwise estimate the costs for computing the expression. */
4132 if (!symbol_present && !var_present && !offset)
4134 if (ratio != 1)
4135 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4136 return cost;
4139 /* Symbol + offset should be compile-time computable so consider that they
4140 are added once to the variable, if present. */
4141 if (var_present && (symbol_present || offset))
4142 cost.cost += adjust_setup_cost (data,
4143 add_cost (TYPE_MODE (ctype), speed));
4145 /* Having offset does not affect runtime cost in case it is added to
4146 symbol, but it increases complexity. */
4147 if (offset)
4148 cost.complexity++;
4150 cost.cost += add_cost (TYPE_MODE (ctype), speed);
4152 aratio = ratio > 0 ? ratio : -ratio;
4153 if (aratio != 1)
4154 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4155 return cost;
4157 fallback:
4158 if (can_autoinc)
4159 *can_autoinc = false;
4162 /* Just get the expression, expand it and measure the cost. */
4163 tree comp = get_computation_at (data->current_loop, use, cand, at);
4165 if (!comp)
4166 return infinite_cost;
4168 if (address_p)
4169 comp = build_simple_mem_ref (comp);
4171 return new_cost (computation_cost (comp, speed), 0);
4175 /* Determines the cost of the computation by that USE is expressed
4176 from induction variable CAND. If ADDRESS_P is true, we just need
4177 to create an address from it, otherwise we want to get it into
4178 register. A set of invariants we depend on is stored in
4179 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4180 autoinc addressing is likely. */
4182 static comp_cost
4183 get_computation_cost (struct ivopts_data *data,
4184 struct iv_use *use, struct iv_cand *cand,
4185 bool address_p, bitmap *depends_on,
4186 bool *can_autoinc, int *inv_expr_id)
4188 return get_computation_cost_at (data,
4189 use, cand, address_p, depends_on, use->stmt,
4190 can_autoinc, inv_expr_id);
4193 /* Determines cost of basing replacement of USE on CAND in a generic
4194 expression. */
4196 static bool
4197 determine_use_iv_cost_generic (struct ivopts_data *data,
4198 struct iv_use *use, struct iv_cand *cand)
4200 bitmap depends_on;
4201 comp_cost cost;
4202 int inv_expr_id = -1;
4204 /* The simple case first -- if we need to express value of the preserved
4205 original biv, the cost is 0. This also prevents us from counting the
4206 cost of increment twice -- once at this use and once in the cost of
4207 the candidate. */
4208 if (cand->pos == IP_ORIGINAL
4209 && cand->incremented_at == use->stmt)
4211 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4212 return true;
4215 cost = get_computation_cost (data, use, cand, false, &depends_on,
4216 NULL, &inv_expr_id);
4218 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4219 inv_expr_id);
4221 return !infinite_cost_p (cost);
4224 /* Determines cost of basing replacement of USE on CAND in an address. */
4226 static bool
4227 determine_use_iv_cost_address (struct ivopts_data *data,
4228 struct iv_use *use, struct iv_cand *cand)
4230 bitmap depends_on;
4231 bool can_autoinc;
4232 int inv_expr_id = -1;
4233 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4234 &can_autoinc, &inv_expr_id);
4236 if (cand->ainc_use == use)
4238 if (can_autoinc)
4239 cost.cost -= cand->cost_step;
4240 /* If we generated the candidate solely for exploiting autoincrement
4241 opportunities, and it turns out it can't be used, set the cost to
4242 infinity to make sure we ignore it. */
4243 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4244 cost = infinite_cost;
4246 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4247 inv_expr_id);
4249 return !infinite_cost_p (cost);
4252 /* Computes value of candidate CAND at position AT in iteration NITER, and
4253 stores it to VAL. */
4255 static void
4256 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4257 aff_tree *val)
4259 aff_tree step, delta, nit;
4260 struct iv *iv = cand->iv;
4261 tree type = TREE_TYPE (iv->base);
4262 tree steptype = type;
4263 if (POINTER_TYPE_P (type))
4264 steptype = sizetype;
4266 tree_to_aff_combination (iv->step, steptype, &step);
4267 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4268 aff_combination_convert (&nit, steptype);
4269 aff_combination_mult (&nit, &step, &delta);
4270 if (stmt_after_increment (loop, cand, at))
4271 aff_combination_add (&delta, &step);
4273 tree_to_aff_combination (iv->base, type, val);
4274 aff_combination_add (val, &delta);
4277 /* Returns period of induction variable iv. */
4279 static tree
4280 iv_period (struct iv *iv)
4282 tree step = iv->step, period, type;
4283 tree pow2div;
4285 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4287 type = unsigned_type_for (TREE_TYPE (step));
4288 /* Period of the iv is lcm (step, type_range)/step -1,
4289 i.e., N*type_range/step - 1. Since type range is power
4290 of two, N == (step >> num_of_ending_zeros_binary (step),
4291 so the final result is
4293 (type_range >> num_of_ending_zeros_binary (step)) - 1
4296 pow2div = num_ending_zeros (step);
4298 period = build_low_bits_mask (type,
4299 (TYPE_PRECISION (type)
4300 - tree_low_cst (pow2div, 1)));
4302 return period;
4305 /* Returns the comparison operator used when eliminating the iv USE. */
4307 static enum tree_code
4308 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4310 struct loop *loop = data->current_loop;
4311 basic_block ex_bb;
4312 edge exit;
4314 ex_bb = gimple_bb (use->stmt);
4315 exit = EDGE_SUCC (ex_bb, 0);
4316 if (flow_bb_inside_loop_p (loop, exit->dest))
4317 exit = EDGE_SUCC (ex_bb, 1);
4319 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4322 /* Check whether it is possible to express the condition in USE by comparison
4323 of candidate CAND. If so, store the value compared with to BOUND. */
4325 static bool
4326 may_eliminate_iv (struct ivopts_data *data,
4327 struct iv_use *use, struct iv_cand *cand, tree *bound)
4329 basic_block ex_bb;
4330 edge exit;
4331 tree nit, period;
4332 struct loop *loop = data->current_loop;
4333 aff_tree bnd;
4334 struct tree_niter_desc *desc = NULL;
4336 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4337 return false;
4339 /* For now works only for exits that dominate the loop latch.
4340 TODO: extend to other conditions inside loop body. */
4341 ex_bb = gimple_bb (use->stmt);
4342 if (use->stmt != last_stmt (ex_bb)
4343 || gimple_code (use->stmt) != GIMPLE_COND
4344 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4345 return false;
4347 exit = EDGE_SUCC (ex_bb, 0);
4348 if (flow_bb_inside_loop_p (loop, exit->dest))
4349 exit = EDGE_SUCC (ex_bb, 1);
4350 if (flow_bb_inside_loop_p (loop, exit->dest))
4351 return false;
4353 nit = niter_for_exit (data, exit, &desc);
4354 if (!nit)
4355 return false;
4357 /* Determine whether we can use the variable to test the exit condition.
4358 This is the case iff the period of the induction variable is greater
4359 than the number of iterations for which the exit condition is true. */
4360 period = iv_period (cand->iv);
4362 /* If the number of iterations is constant, compare against it directly. */
4363 if (TREE_CODE (nit) == INTEGER_CST)
4365 /* See cand_value_at. */
4366 if (stmt_after_increment (loop, cand, use->stmt))
4368 if (!tree_int_cst_lt (nit, period))
4369 return false;
4371 else
4373 if (tree_int_cst_lt (period, nit))
4374 return false;
4378 /* If not, and if this is the only possible exit of the loop, see whether
4379 we can get a conservative estimate on the number of iterations of the
4380 entire loop and compare against that instead. */
4381 else
4383 double_int period_value, max_niter;
4385 max_niter = desc->max;
4386 if (stmt_after_increment (loop, cand, use->stmt))
4387 max_niter = double_int_add (max_niter, double_int_one);
4388 period_value = tree_to_double_int (period);
4389 if (double_int_ucmp (max_niter, period_value) > 0)
4391 /* See if we can take advantage of infered loop bound information. */
4392 if (loop_only_exit_p (loop, exit))
4394 if (!estimated_loop_iterations (loop, true, &max_niter))
4395 return false;
4396 /* The loop bound is already adjusted by adding 1. */
4397 if (double_int_ucmp (max_niter, period_value) > 0)
4398 return false;
4400 else
4401 return false;
4405 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4407 *bound = aff_combination_to_tree (&bnd);
4408 /* It is unlikely that computing the number of iterations using division
4409 would be more profitable than keeping the original induction variable. */
4410 if (expression_expensive_p (*bound))
4411 return false;
4412 return true;
4416 /* Determines cost of basing replacement of USE on CAND in a condition. */
4418 static bool
4419 determine_use_iv_cost_condition (struct ivopts_data *data,
4420 struct iv_use *use, struct iv_cand *cand)
4422 tree bound = NULL_TREE;
4423 struct iv *cmp_iv;
4424 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4425 comp_cost elim_cost, express_cost, cost;
4426 bool ok;
4427 int inv_expr_id = -1;
4428 tree *control_var, *bound_cst;
4430 /* Only consider real candidates. */
4431 if (!cand->iv)
4433 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4434 return false;
4437 /* Try iv elimination. */
4438 if (may_eliminate_iv (data, use, cand, &bound))
4440 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4441 /* The bound is a loop invariant, so it will be only computed
4442 once. */
4443 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4445 else
4446 elim_cost = infinite_cost;
4448 /* Try expressing the original giv. If it is compared with an invariant,
4449 note that we cannot get rid of it. */
4450 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4451 NULL, &cmp_iv);
4452 gcc_assert (ok);
4454 /* When the condition is a comparison of the candidate IV against
4455 zero, prefer this IV.
4457 TODO: The constant that we're substracting from the cost should
4458 be target-dependent. This information should be added to the
4459 target costs for each backend. */
4460 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4461 && integer_zerop (*bound_cst)
4462 && (operand_equal_p (*control_var, cand->var_after, 0)
4463 || operand_equal_p (*control_var, cand->var_before, 0)))
4464 elim_cost.cost -= 1;
4466 express_cost = get_computation_cost (data, use, cand, false,
4467 &depends_on_express, NULL,
4468 &inv_expr_id);
4469 fd_ivopts_data = data;
4470 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4472 /* Choose the better approach, preferring the eliminated IV. */
4473 if (compare_costs (elim_cost, express_cost) <= 0)
4475 cost = elim_cost;
4476 depends_on = depends_on_elim;
4477 depends_on_elim = NULL;
4479 else
4481 cost = express_cost;
4482 depends_on = depends_on_express;
4483 depends_on_express = NULL;
4484 bound = NULL_TREE;
4487 set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4489 if (depends_on_elim)
4490 BITMAP_FREE (depends_on_elim);
4491 if (depends_on_express)
4492 BITMAP_FREE (depends_on_express);
4494 return !infinite_cost_p (cost);
4497 /* Determines cost of basing replacement of USE on CAND. Returns false
4498 if USE cannot be based on CAND. */
4500 static bool
4501 determine_use_iv_cost (struct ivopts_data *data,
4502 struct iv_use *use, struct iv_cand *cand)
4504 switch (use->type)
4506 case USE_NONLINEAR_EXPR:
4507 return determine_use_iv_cost_generic (data, use, cand);
4509 case USE_ADDRESS:
4510 return determine_use_iv_cost_address (data, use, cand);
4512 case USE_COMPARE:
4513 return determine_use_iv_cost_condition (data, use, cand);
4515 default:
4516 gcc_unreachable ();
4520 /* Return true if get_computation_cost indicates that autoincrement is
4521 a possibility for the pair of USE and CAND, false otherwise. */
4523 static bool
4524 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4525 struct iv_cand *cand)
4527 bitmap depends_on;
4528 bool can_autoinc;
4529 comp_cost cost;
4531 if (use->type != USE_ADDRESS)
4532 return false;
4534 cost = get_computation_cost (data, use, cand, true, &depends_on,
4535 &can_autoinc, NULL);
4537 BITMAP_FREE (depends_on);
4539 return !infinite_cost_p (cost) && can_autoinc;
4542 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4543 use that allows autoincrement, and set their AINC_USE if possible. */
4545 static void
4546 set_autoinc_for_original_candidates (struct ivopts_data *data)
4548 unsigned i, j;
4550 for (i = 0; i < n_iv_cands (data); i++)
4552 struct iv_cand *cand = iv_cand (data, i);
4553 struct iv_use *closest = NULL;
4554 if (cand->pos != IP_ORIGINAL)
4555 continue;
4556 for (j = 0; j < n_iv_uses (data); j++)
4558 struct iv_use *use = iv_use (data, j);
4559 unsigned uid = gimple_uid (use->stmt);
4560 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4561 || uid > gimple_uid (cand->incremented_at))
4562 continue;
4563 if (closest == NULL || uid > gimple_uid (closest->stmt))
4564 closest = use;
4566 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4567 continue;
4568 cand->ainc_use = closest;
4572 /* Finds the candidates for the induction variables. */
4574 static void
4575 find_iv_candidates (struct ivopts_data *data)
4577 /* Add commonly used ivs. */
4578 add_standard_iv_candidates (data);
4580 /* Add old induction variables. */
4581 add_old_ivs_candidates (data);
4583 /* Add induction variables derived from uses. */
4584 add_derived_ivs_candidates (data);
4586 set_autoinc_for_original_candidates (data);
4588 /* Record the important candidates. */
4589 record_important_candidates (data);
4592 /* Determines costs of basing the use of the iv on an iv candidate. */
4594 static void
4595 determine_use_iv_costs (struct ivopts_data *data)
4597 unsigned i, j;
4598 struct iv_use *use;
4599 struct iv_cand *cand;
4600 bitmap to_clear = BITMAP_ALLOC (NULL);
4602 alloc_use_cost_map (data);
4604 for (i = 0; i < n_iv_uses (data); i++)
4606 use = iv_use (data, i);
4608 if (data->consider_all_candidates)
4610 for (j = 0; j < n_iv_cands (data); j++)
4612 cand = iv_cand (data, j);
4613 determine_use_iv_cost (data, use, cand);
4616 else
4618 bitmap_iterator bi;
4620 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4622 cand = iv_cand (data, j);
4623 if (!determine_use_iv_cost (data, use, cand))
4624 bitmap_set_bit (to_clear, j);
4627 /* Remove the candidates for that the cost is infinite from
4628 the list of related candidates. */
4629 bitmap_and_compl_into (use->related_cands, to_clear);
4630 bitmap_clear (to_clear);
4634 BITMAP_FREE (to_clear);
4636 if (dump_file && (dump_flags & TDF_DETAILS))
4638 fprintf (dump_file, "Use-candidate costs:\n");
4640 for (i = 0; i < n_iv_uses (data); i++)
4642 use = iv_use (data, i);
4644 fprintf (dump_file, "Use %d:\n", i);
4645 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4646 for (j = 0; j < use->n_map_members; j++)
4648 if (!use->cost_map[j].cand
4649 || infinite_cost_p (use->cost_map[j].cost))
4650 continue;
4652 fprintf (dump_file, " %d\t%d\t%d\t",
4653 use->cost_map[j].cand->id,
4654 use->cost_map[j].cost.cost,
4655 use->cost_map[j].cost.complexity);
4656 if (use->cost_map[j].depends_on)
4657 bitmap_print (dump_file,
4658 use->cost_map[j].depends_on, "","");
4659 if (use->cost_map[j].inv_expr_id != -1)
4660 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4661 fprintf (dump_file, "\n");
4664 fprintf (dump_file, "\n");
4666 fprintf (dump_file, "\n");
4670 /* Determines cost of the candidate CAND. */
4672 static void
4673 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4675 comp_cost cost_base;
4676 unsigned cost, cost_step;
4677 tree base;
4679 if (!cand->iv)
4681 cand->cost = 0;
4682 return;
4685 /* There are two costs associated with the candidate -- its increment
4686 and its initialization. The second is almost negligible for any loop
4687 that rolls enough, so we take it just very little into account. */
4689 base = cand->iv->base;
4690 cost_base = force_var_cost (data, base, NULL);
4691 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4693 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4695 /* Prefer the original ivs unless we may gain something by replacing it.
4696 The reason is to make debugging simpler; so this is not relevant for
4697 artificial ivs created by other optimization passes. */
4698 if (cand->pos != IP_ORIGINAL
4699 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4700 cost++;
4702 /* Prefer not to insert statements into latch unless there are some
4703 already (so that we do not create unnecessary jumps). */
4704 if (cand->pos == IP_END
4705 && empty_block_p (ip_end_pos (data->current_loop)))
4706 cost++;
4708 cand->cost = cost;
4709 cand->cost_step = cost_step;
4712 /* Determines costs of computation of the candidates. */
4714 static void
4715 determine_iv_costs (struct ivopts_data *data)
4717 unsigned i;
4719 if (dump_file && (dump_flags & TDF_DETAILS))
4721 fprintf (dump_file, "Candidate costs:\n");
4722 fprintf (dump_file, " cand\tcost\n");
4725 for (i = 0; i < n_iv_cands (data); i++)
4727 struct iv_cand *cand = iv_cand (data, i);
4729 determine_iv_cost (data, cand);
4731 if (dump_file && (dump_flags & TDF_DETAILS))
4732 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4735 if (dump_file && (dump_flags & TDF_DETAILS))
4736 fprintf (dump_file, "\n");
4739 /* Calculates cost for having SIZE induction variables. */
4741 static unsigned
4742 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4744 /* We add size to the cost, so that we prefer eliminating ivs
4745 if possible. */
4746 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4747 data->body_includes_call);
4750 /* For each size of the induction variable set determine the penalty. */
4752 static void
4753 determine_set_costs (struct ivopts_data *data)
4755 unsigned j, n;
4756 gimple phi;
4757 gimple_stmt_iterator psi;
4758 tree op;
4759 struct loop *loop = data->current_loop;
4760 bitmap_iterator bi;
4762 if (dump_file && (dump_flags & TDF_DETAILS))
4764 fprintf (dump_file, "Global costs:\n");
4765 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4766 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
4767 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4768 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4771 n = 0;
4772 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4774 phi = gsi_stmt (psi);
4775 op = PHI_RESULT (phi);
4777 if (!is_gimple_reg (op))
4778 continue;
4780 if (get_iv (data, op))
4781 continue;
4783 n++;
4786 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4788 struct version_info *info = ver_info (data, j);
4790 if (info->inv_id && info->has_nonlin_use)
4791 n++;
4794 data->regs_used = n;
4795 if (dump_file && (dump_flags & TDF_DETAILS))
4796 fprintf (dump_file, " regs_used %d\n", n);
4798 if (dump_file && (dump_flags & TDF_DETAILS))
4800 fprintf (dump_file, " cost for size:\n");
4801 fprintf (dump_file, " ivs\tcost\n");
4802 for (j = 0; j <= 2 * target_avail_regs; j++)
4803 fprintf (dump_file, " %d\t%d\n", j,
4804 ivopts_global_cost_for_size (data, j));
4805 fprintf (dump_file, "\n");
4809 /* Returns true if A is a cheaper cost pair than B. */
4811 static bool
4812 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4814 int cmp;
4816 if (!a)
4817 return false;
4819 if (!b)
4820 return true;
4822 cmp = compare_costs (a->cost, b->cost);
4823 if (cmp < 0)
4824 return true;
4826 if (cmp > 0)
4827 return false;
4829 /* In case the costs are the same, prefer the cheaper candidate. */
4830 if (a->cand->cost < b->cand->cost)
4831 return true;
4833 return false;
4837 /* Returns candidate by that USE is expressed in IVS. */
4839 static struct cost_pair *
4840 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4842 return ivs->cand_for_use[use->id];
4845 /* Computes the cost field of IVS structure. */
4847 static void
4848 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4850 comp_cost cost = ivs->cand_use_cost;
4852 cost.cost += ivs->cand_cost;
4854 cost.cost += ivopts_global_cost_for_size (data,
4855 ivs->n_regs + ivs->num_used_inv_expr);
4857 ivs->cost = cost;
4860 /* Remove invariants in set INVS to set IVS. */
4862 static void
4863 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4865 bitmap_iterator bi;
4866 unsigned iid;
4868 if (!invs)
4869 return;
4871 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4873 ivs->n_invariant_uses[iid]--;
4874 if (ivs->n_invariant_uses[iid] == 0)
4875 ivs->n_regs--;
4879 /* Set USE not to be expressed by any candidate in IVS. */
4881 static void
4882 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4883 struct iv_use *use)
4885 unsigned uid = use->id, cid;
4886 struct cost_pair *cp;
4888 cp = ivs->cand_for_use[uid];
4889 if (!cp)
4890 return;
4891 cid = cp->cand->id;
4893 ivs->bad_uses++;
4894 ivs->cand_for_use[uid] = NULL;
4895 ivs->n_cand_uses[cid]--;
4897 if (ivs->n_cand_uses[cid] == 0)
4899 bitmap_clear_bit (ivs->cands, cid);
4900 /* Do not count the pseudocandidates. */
4901 if (cp->cand->iv)
4902 ivs->n_regs--;
4903 ivs->n_cands--;
4904 ivs->cand_cost -= cp->cand->cost;
4906 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4909 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4911 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4913 if (cp->inv_expr_id != -1)
4915 ivs->used_inv_expr[cp->inv_expr_id]--;
4916 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
4917 ivs->num_used_inv_expr--;
4919 iv_ca_recount_cost (data, ivs);
4922 /* Add invariants in set INVS to set IVS. */
4924 static void
4925 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4927 bitmap_iterator bi;
4928 unsigned iid;
4930 if (!invs)
4931 return;
4933 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4935 ivs->n_invariant_uses[iid]++;
4936 if (ivs->n_invariant_uses[iid] == 1)
4937 ivs->n_regs++;
4941 /* Set cost pair for USE in set IVS to CP. */
4943 static void
4944 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4945 struct iv_use *use, struct cost_pair *cp)
4947 unsigned uid = use->id, cid;
4949 if (ivs->cand_for_use[uid] == cp)
4950 return;
4952 if (ivs->cand_for_use[uid])
4953 iv_ca_set_no_cp (data, ivs, use);
4955 if (cp)
4957 cid = cp->cand->id;
4959 ivs->bad_uses--;
4960 ivs->cand_for_use[uid] = cp;
4961 ivs->n_cand_uses[cid]++;
4962 if (ivs->n_cand_uses[cid] == 1)
4964 bitmap_set_bit (ivs->cands, cid);
4965 /* Do not count the pseudocandidates. */
4966 if (cp->cand->iv)
4967 ivs->n_regs++;
4968 ivs->n_cands++;
4969 ivs->cand_cost += cp->cand->cost;
4971 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4974 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4975 iv_ca_set_add_invariants (ivs, cp->depends_on);
4977 if (cp->inv_expr_id != -1)
4979 ivs->used_inv_expr[cp->inv_expr_id]++;
4980 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
4981 ivs->num_used_inv_expr++;
4983 iv_ca_recount_cost (data, ivs);
4987 /* Extend set IVS by expressing USE by some of the candidates in it
4988 if possible. All important candidates will be considered
4989 if IMPORTANT_CANDIDATES is true. */
4991 static void
4992 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4993 struct iv_use *use, bool important_candidates)
4995 struct cost_pair *best_cp = NULL, *cp;
4996 bitmap_iterator bi;
4997 bitmap cands;
4998 unsigned i;
5000 gcc_assert (ivs->upto >= use->id);
5002 if (ivs->upto == use->id)
5004 ivs->upto++;
5005 ivs->bad_uses++;
5008 cands = (important_candidates ? data->important_candidates : ivs->cands);
5009 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5011 struct iv_cand *cand = iv_cand (data, i);
5013 cp = get_use_iv_cost (data, use, cand);
5015 if (cheaper_cost_pair (cp, best_cp))
5016 best_cp = cp;
5019 iv_ca_set_cp (data, ivs, use, best_cp);
5022 /* Get cost for assignment IVS. */
5024 static comp_cost
5025 iv_ca_cost (struct iv_ca *ivs)
5027 /* This was a conditional expression but it triggered a bug in
5028 Sun C 5.5. */
5029 if (ivs->bad_uses)
5030 return infinite_cost;
5031 else
5032 return ivs->cost;
5035 /* Returns true if all dependences of CP are among invariants in IVS. */
5037 static bool
5038 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5040 unsigned i;
5041 bitmap_iterator bi;
5043 if (!cp->depends_on)
5044 return true;
5046 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5048 if (ivs->n_invariant_uses[i] == 0)
5049 return false;
5052 return true;
5055 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5056 it before NEXT_CHANGE. */
5058 static struct iv_ca_delta *
5059 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5060 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5062 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5064 change->use = use;
5065 change->old_cp = old_cp;
5066 change->new_cp = new_cp;
5067 change->next_change = next_change;
5069 return change;
5072 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5073 are rewritten. */
5075 static struct iv_ca_delta *
5076 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5078 struct iv_ca_delta *last;
5080 if (!l2)
5081 return l1;
5083 if (!l1)
5084 return l2;
5086 for (last = l1; last->next_change; last = last->next_change)
5087 continue;
5088 last->next_change = l2;
5090 return l1;
5093 /* Reverse the list of changes DELTA, forming the inverse to it. */
5095 static struct iv_ca_delta *
5096 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5098 struct iv_ca_delta *act, *next, *prev = NULL;
5099 struct cost_pair *tmp;
5101 for (act = delta; act; act = next)
5103 next = act->next_change;
5104 act->next_change = prev;
5105 prev = act;
5107 tmp = act->old_cp;
5108 act->old_cp = act->new_cp;
5109 act->new_cp = tmp;
5112 return prev;
5115 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5116 reverted instead. */
5118 static void
5119 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5120 struct iv_ca_delta *delta, bool forward)
5122 struct cost_pair *from, *to;
5123 struct iv_ca_delta *act;
5125 if (!forward)
5126 delta = iv_ca_delta_reverse (delta);
5128 for (act = delta; act; act = act->next_change)
5130 from = act->old_cp;
5131 to = act->new_cp;
5132 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5133 iv_ca_set_cp (data, ivs, act->use, to);
5136 if (!forward)
5137 iv_ca_delta_reverse (delta);
5140 /* Returns true if CAND is used in IVS. */
5142 static bool
5143 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5145 return ivs->n_cand_uses[cand->id] > 0;
5148 /* Returns number of induction variable candidates in the set IVS. */
5150 static unsigned
5151 iv_ca_n_cands (struct iv_ca *ivs)
5153 return ivs->n_cands;
5156 /* Free the list of changes DELTA. */
5158 static void
5159 iv_ca_delta_free (struct iv_ca_delta **delta)
5161 struct iv_ca_delta *act, *next;
5163 for (act = *delta; act; act = next)
5165 next = act->next_change;
5166 free (act);
5169 *delta = NULL;
5172 /* Allocates new iv candidates assignment. */
5174 static struct iv_ca *
5175 iv_ca_new (struct ivopts_data *data)
5177 struct iv_ca *nw = XNEW (struct iv_ca);
5179 nw->upto = 0;
5180 nw->bad_uses = 0;
5181 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5182 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5183 nw->cands = BITMAP_ALLOC (NULL);
5184 nw->n_cands = 0;
5185 nw->n_regs = 0;
5186 nw->cand_use_cost = zero_cost;
5187 nw->cand_cost = 0;
5188 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5189 nw->cost = zero_cost;
5190 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5191 nw->num_used_inv_expr = 0;
5193 return nw;
5196 /* Free memory occupied by the set IVS. */
5198 static void
5199 iv_ca_free (struct iv_ca **ivs)
5201 free ((*ivs)->cand_for_use);
5202 free ((*ivs)->n_cand_uses);
5203 BITMAP_FREE ((*ivs)->cands);
5204 free ((*ivs)->n_invariant_uses);
5205 free ((*ivs)->used_inv_expr);
5206 free (*ivs);
5207 *ivs = NULL;
5210 /* Dumps IVS to FILE. */
5212 static void
5213 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5215 const char *pref = " invariants ";
5216 unsigned i;
5217 comp_cost cost = iv_ca_cost (ivs);
5219 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5220 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5221 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5222 bitmap_print (file, ivs->cands, " candidates: ","\n");
5224 for (i = 0; i < ivs->upto; i++)
5226 struct iv_use *use = iv_use (data, i);
5227 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5228 if (cp)
5229 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5230 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5231 else
5232 fprintf (file, " use:%d --> ??\n", use->id);
5235 for (i = 1; i <= data->max_inv_id; i++)
5236 if (ivs->n_invariant_uses[i])
5238 fprintf (file, "%s%d", pref, i);
5239 pref = ", ";
5241 fprintf (file, "\n\n");
5244 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5245 new set, and store differences in DELTA. Number of induction variables
5246 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5247 the function will try to find a solution with mimimal iv candidates. */
5249 static comp_cost
5250 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5251 struct iv_cand *cand, struct iv_ca_delta **delta,
5252 unsigned *n_ivs, bool min_ncand)
5254 unsigned i;
5255 comp_cost cost;
5256 struct iv_use *use;
5257 struct cost_pair *old_cp, *new_cp;
5259 *delta = NULL;
5260 for (i = 0; i < ivs->upto; i++)
5262 use = iv_use (data, i);
5263 old_cp = iv_ca_cand_for_use (ivs, use);
5265 if (old_cp
5266 && old_cp->cand == cand)
5267 continue;
5269 new_cp = get_use_iv_cost (data, use, cand);
5270 if (!new_cp)
5271 continue;
5273 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5274 continue;
5276 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5277 continue;
5279 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5282 iv_ca_delta_commit (data, ivs, *delta, true);
5283 cost = iv_ca_cost (ivs);
5284 if (n_ivs)
5285 *n_ivs = iv_ca_n_cands (ivs);
5286 iv_ca_delta_commit (data, ivs, *delta, false);
5288 return cost;
5291 /* Try narrowing set IVS by removing CAND. Return the cost of
5292 the new set and store the differences in DELTA. */
5294 static comp_cost
5295 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5296 struct iv_cand *cand, struct iv_ca_delta **delta)
5298 unsigned i, ci;
5299 struct iv_use *use;
5300 struct cost_pair *old_cp, *new_cp, *cp;
5301 bitmap_iterator bi;
5302 struct iv_cand *cnd;
5303 comp_cost cost;
5305 *delta = NULL;
5306 for (i = 0; i < n_iv_uses (data); i++)
5308 use = iv_use (data, i);
5310 old_cp = iv_ca_cand_for_use (ivs, use);
5311 if (old_cp->cand != cand)
5312 continue;
5314 new_cp = NULL;
5316 if (data->consider_all_candidates)
5318 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5320 if (ci == cand->id)
5321 continue;
5323 cnd = iv_cand (data, ci);
5325 cp = get_use_iv_cost (data, use, cnd);
5326 if (!cp)
5327 continue;
5329 if (!iv_ca_has_deps (ivs, cp))
5330 continue;
5332 if (!cheaper_cost_pair (cp, new_cp))
5333 continue;
5335 new_cp = cp;
5338 else
5340 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5342 if (ci == cand->id)
5343 continue;
5345 cnd = iv_cand (data, ci);
5347 cp = get_use_iv_cost (data, use, cnd);
5348 if (!cp)
5349 continue;
5350 if (!iv_ca_has_deps (ivs, cp))
5351 continue;
5353 if (!cheaper_cost_pair (cp, new_cp))
5354 continue;
5356 new_cp = cp;
5360 if (!new_cp)
5362 iv_ca_delta_free (delta);
5363 return infinite_cost;
5366 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5369 iv_ca_delta_commit (data, ivs, *delta, true);
5370 cost = iv_ca_cost (ivs);
5371 iv_ca_delta_commit (data, ivs, *delta, false);
5373 return cost;
5376 /* Try optimizing the set of candidates IVS by removing candidates different
5377 from to EXCEPT_CAND from it. Return cost of the new set, and store
5378 differences in DELTA. */
5380 static comp_cost
5381 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5382 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5384 bitmap_iterator bi;
5385 struct iv_ca_delta *act_delta, *best_delta;
5386 unsigned i;
5387 comp_cost best_cost, acost;
5388 struct iv_cand *cand;
5390 best_delta = NULL;
5391 best_cost = iv_ca_cost (ivs);
5393 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5395 cand = iv_cand (data, i);
5397 if (cand == except_cand)
5398 continue;
5400 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5402 if (compare_costs (acost, best_cost) < 0)
5404 best_cost = acost;
5405 iv_ca_delta_free (&best_delta);
5406 best_delta = act_delta;
5408 else
5409 iv_ca_delta_free (&act_delta);
5412 if (!best_delta)
5414 *delta = NULL;
5415 return best_cost;
5418 /* Recurse to possibly remove other unnecessary ivs. */
5419 iv_ca_delta_commit (data, ivs, best_delta, true);
5420 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5421 iv_ca_delta_commit (data, ivs, best_delta, false);
5422 *delta = iv_ca_delta_join (best_delta, *delta);
5423 return best_cost;
5426 /* Tries to extend the sets IVS in the best possible way in order
5427 to express the USE. If ORIGINALP is true, prefer candidates from
5428 the original set of IVs, otherwise favor important candidates not
5429 based on any memory object. */
5431 static bool
5432 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5433 struct iv_use *use, bool originalp)
5435 comp_cost best_cost, act_cost;
5436 unsigned i;
5437 bitmap_iterator bi;
5438 struct iv_cand *cand;
5439 struct iv_ca_delta *best_delta = NULL, *act_delta;
5440 struct cost_pair *cp;
5442 iv_ca_add_use (data, ivs, use, false);
5443 best_cost = iv_ca_cost (ivs);
5445 cp = iv_ca_cand_for_use (ivs, use);
5446 if (!cp)
5448 ivs->upto--;
5449 ivs->bad_uses--;
5450 iv_ca_add_use (data, ivs, use, true);
5451 best_cost = iv_ca_cost (ivs);
5452 cp = iv_ca_cand_for_use (ivs, use);
5454 if (cp)
5456 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5457 iv_ca_set_no_cp (data, ivs, use);
5460 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5461 first try important candidates not based on any memory object. Only if
5462 this fails, try the specific ones. Rationale -- in loops with many
5463 variables the best choice often is to use just one generic biv. If we
5464 added here many ivs specific to the uses, the optimization algorithm later
5465 would be likely to get stuck in a local minimum, thus causing us to create
5466 too many ivs. The approach from few ivs to more seems more likely to be
5467 successful -- starting from few ivs, replacing an expensive use by a
5468 specific iv should always be a win. */
5469 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5471 cand = iv_cand (data, i);
5473 if (originalp && cand->pos !=IP_ORIGINAL)
5474 continue;
5476 if (!originalp && cand->iv->base_object != NULL_TREE)
5477 continue;
5479 if (iv_ca_cand_used_p (ivs, cand))
5480 continue;
5482 cp = get_use_iv_cost (data, use, cand);
5483 if (!cp)
5484 continue;
5486 iv_ca_set_cp (data, ivs, use, cp);
5487 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5488 true);
5489 iv_ca_set_no_cp (data, ivs, use);
5490 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5492 if (compare_costs (act_cost, best_cost) < 0)
5494 best_cost = act_cost;
5496 iv_ca_delta_free (&best_delta);
5497 best_delta = act_delta;
5499 else
5500 iv_ca_delta_free (&act_delta);
5503 if (infinite_cost_p (best_cost))
5505 for (i = 0; i < use->n_map_members; i++)
5507 cp = use->cost_map + i;
5508 cand = cp->cand;
5509 if (!cand)
5510 continue;
5512 /* Already tried this. */
5513 if (cand->important)
5515 if (originalp && cand->pos == IP_ORIGINAL)
5516 continue;
5517 if (!originalp && cand->iv->base_object == NULL_TREE)
5518 continue;
5521 if (iv_ca_cand_used_p (ivs, cand))
5522 continue;
5524 act_delta = NULL;
5525 iv_ca_set_cp (data, ivs, use, cp);
5526 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5527 iv_ca_set_no_cp (data, ivs, use);
5528 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5529 cp, act_delta);
5531 if (compare_costs (act_cost, best_cost) < 0)
5533 best_cost = act_cost;
5535 if (best_delta)
5536 iv_ca_delta_free (&best_delta);
5537 best_delta = act_delta;
5539 else
5540 iv_ca_delta_free (&act_delta);
5544 iv_ca_delta_commit (data, ivs, best_delta, true);
5545 iv_ca_delta_free (&best_delta);
5547 return !infinite_cost_p (best_cost);
5550 /* Finds an initial assignment of candidates to uses. */
5552 static struct iv_ca *
5553 get_initial_solution (struct ivopts_data *data, bool originalp)
5555 struct iv_ca *ivs = iv_ca_new (data);
5556 unsigned i;
5558 for (i = 0; i < n_iv_uses (data); i++)
5559 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5561 iv_ca_free (&ivs);
5562 return NULL;
5565 return ivs;
5568 /* Tries to improve set of induction variables IVS. */
5570 static bool
5571 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5573 unsigned i, n_ivs;
5574 comp_cost acost, best_cost = iv_ca_cost (ivs);
5575 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5576 struct iv_cand *cand;
5578 /* Try extending the set of induction variables by one. */
5579 for (i = 0; i < n_iv_cands (data); i++)
5581 cand = iv_cand (data, i);
5583 if (iv_ca_cand_used_p (ivs, cand))
5584 continue;
5586 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5587 if (!act_delta)
5588 continue;
5590 /* If we successfully added the candidate and the set is small enough,
5591 try optimizing it by removing other candidates. */
5592 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5594 iv_ca_delta_commit (data, ivs, act_delta, true);
5595 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5596 iv_ca_delta_commit (data, ivs, act_delta, false);
5597 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5600 if (compare_costs (acost, best_cost) < 0)
5602 best_cost = acost;
5603 iv_ca_delta_free (&best_delta);
5604 best_delta = act_delta;
5606 else
5607 iv_ca_delta_free (&act_delta);
5610 if (!best_delta)
5612 /* Try removing the candidates from the set instead. */
5613 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5615 /* Nothing more we can do. */
5616 if (!best_delta)
5617 return false;
5620 iv_ca_delta_commit (data, ivs, best_delta, true);
5621 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5622 iv_ca_delta_free (&best_delta);
5623 return true;
5626 /* Attempts to find the optimal set of induction variables. We do simple
5627 greedy heuristic -- we try to replace at most one candidate in the selected
5628 solution and remove the unused ivs while this improves the cost. */
5630 static struct iv_ca *
5631 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5633 struct iv_ca *set;
5635 /* Get the initial solution. */
5636 set = get_initial_solution (data, originalp);
5637 if (!set)
5639 if (dump_file && (dump_flags & TDF_DETAILS))
5640 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5641 return NULL;
5644 if (dump_file && (dump_flags & TDF_DETAILS))
5646 fprintf (dump_file, "Initial set of candidates:\n");
5647 iv_ca_dump (data, dump_file, set);
5650 while (try_improve_iv_set (data, set))
5652 if (dump_file && (dump_flags & TDF_DETAILS))
5654 fprintf (dump_file, "Improved to:\n");
5655 iv_ca_dump (data, dump_file, set);
5659 return set;
5662 static struct iv_ca *
5663 find_optimal_iv_set (struct ivopts_data *data)
5665 unsigned i;
5666 struct iv_ca *set, *origset;
5667 struct iv_use *use;
5668 comp_cost cost, origcost;
5670 /* Determine the cost based on a strategy that starts with original IVs,
5671 and try again using a strategy that prefers candidates not based
5672 on any IVs. */
5673 origset = find_optimal_iv_set_1 (data, true);
5674 set = find_optimal_iv_set_1 (data, false);
5676 if (!origset && !set)
5677 return NULL;
5679 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5680 cost = set ? iv_ca_cost (set) : infinite_cost;
5682 if (dump_file && (dump_flags & TDF_DETAILS))
5684 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5685 origcost.cost, origcost.complexity);
5686 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5687 cost.cost, cost.complexity);
5690 /* Choose the one with the best cost. */
5691 if (compare_costs (origcost, cost) <= 0)
5693 if (set)
5694 iv_ca_free (&set);
5695 set = origset;
5697 else if (origset)
5698 iv_ca_free (&origset);
5700 for (i = 0; i < n_iv_uses (data); i++)
5702 use = iv_use (data, i);
5703 use->selected = iv_ca_cand_for_use (set, use)->cand;
5706 return set;
5709 /* Creates a new induction variable corresponding to CAND. */
5711 static void
5712 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5714 gimple_stmt_iterator incr_pos;
5715 tree base;
5716 bool after = false;
5718 if (!cand->iv)
5719 return;
5721 switch (cand->pos)
5723 case IP_NORMAL:
5724 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5725 break;
5727 case IP_END:
5728 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5729 after = true;
5730 break;
5732 case IP_AFTER_USE:
5733 after = true;
5734 /* fall through */
5735 case IP_BEFORE_USE:
5736 incr_pos = gsi_for_stmt (cand->incremented_at);
5737 break;
5739 case IP_ORIGINAL:
5740 /* Mark that the iv is preserved. */
5741 name_info (data, cand->var_before)->preserve_biv = true;
5742 name_info (data, cand->var_after)->preserve_biv = true;
5744 /* Rewrite the increment so that it uses var_before directly. */
5745 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5746 return;
5749 gimple_add_tmp_var (cand->var_before);
5750 add_referenced_var (cand->var_before);
5752 base = unshare_expr (cand->iv->base);
5754 create_iv (base, unshare_expr (cand->iv->step),
5755 cand->var_before, data->current_loop,
5756 &incr_pos, after, &cand->var_before, &cand->var_after);
5759 /* Creates new induction variables described in SET. */
5761 static void
5762 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5764 unsigned i;
5765 struct iv_cand *cand;
5766 bitmap_iterator bi;
5768 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5770 cand = iv_cand (data, i);
5771 create_new_iv (data, cand);
5774 if (dump_file && (dump_flags & TDF_DETAILS))
5776 fprintf (dump_file, "\nSelected IV set: \n");
5777 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5779 cand = iv_cand (data, i);
5780 dump_cand (dump_file, cand);
5782 fprintf (dump_file, "\n");
5786 /* Rewrites USE (definition of iv used in a nonlinear expression)
5787 using candidate CAND. */
5789 static void
5790 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5791 struct iv_use *use, struct iv_cand *cand)
5793 tree comp;
5794 tree op, tgt;
5795 gimple ass;
5796 gimple_stmt_iterator bsi;
5798 /* An important special case -- if we are asked to express value of
5799 the original iv by itself, just exit; there is no need to
5800 introduce a new computation (that might also need casting the
5801 variable to unsigned and back). */
5802 if (cand->pos == IP_ORIGINAL
5803 && cand->incremented_at == use->stmt)
5805 tree step, ctype, utype;
5806 enum tree_code incr_code = PLUS_EXPR, old_code;
5808 gcc_assert (is_gimple_assign (use->stmt));
5809 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5811 step = cand->iv->step;
5812 ctype = TREE_TYPE (step);
5813 utype = TREE_TYPE (cand->var_after);
5814 if (TREE_CODE (step) == NEGATE_EXPR)
5816 incr_code = MINUS_EXPR;
5817 step = TREE_OPERAND (step, 0);
5820 /* Check whether we may leave the computation unchanged.
5821 This is the case only if it does not rely on other
5822 computations in the loop -- otherwise, the computation
5823 we rely upon may be removed in remove_unused_ivs,
5824 thus leading to ICE. */
5825 old_code = gimple_assign_rhs_code (use->stmt);
5826 if (old_code == PLUS_EXPR
5827 || old_code == MINUS_EXPR
5828 || old_code == POINTER_PLUS_EXPR)
5830 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5831 op = gimple_assign_rhs2 (use->stmt);
5832 else if (old_code != MINUS_EXPR
5833 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5834 op = gimple_assign_rhs1 (use->stmt);
5835 else
5836 op = NULL_TREE;
5838 else
5839 op = NULL_TREE;
5841 if (op
5842 && (TREE_CODE (op) == INTEGER_CST
5843 || operand_equal_p (op, step, 0)))
5844 return;
5846 /* Otherwise, add the necessary computations to express
5847 the iv. */
5848 op = fold_convert (ctype, cand->var_before);
5849 comp = fold_convert (utype,
5850 build2 (incr_code, ctype, op,
5851 unshare_expr (step)));
5853 else
5855 comp = get_computation (data->current_loop, use, cand);
5856 gcc_assert (comp != NULL_TREE);
5859 switch (gimple_code (use->stmt))
5861 case GIMPLE_PHI:
5862 tgt = PHI_RESULT (use->stmt);
5864 /* If we should keep the biv, do not replace it. */
5865 if (name_info (data, tgt)->preserve_biv)
5866 return;
5868 bsi = gsi_after_labels (gimple_bb (use->stmt));
5869 break;
5871 case GIMPLE_ASSIGN:
5872 tgt = gimple_assign_lhs (use->stmt);
5873 bsi = gsi_for_stmt (use->stmt);
5874 break;
5876 default:
5877 gcc_unreachable ();
5880 if (!valid_gimple_rhs_p (comp)
5881 || (gimple_code (use->stmt) != GIMPLE_PHI
5882 /* We can't allow re-allocating the stmt as it might be pointed
5883 to still. */
5884 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5885 >= gimple_num_ops (gsi_stmt (bsi)))))
5887 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5888 true, GSI_SAME_STMT);
5889 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5891 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5892 /* As this isn't a plain copy we have to reset alignment
5893 information. */
5894 if (SSA_NAME_PTR_INFO (comp))
5896 SSA_NAME_PTR_INFO (comp)->align = 1;
5897 SSA_NAME_PTR_INFO (comp)->misalign = 0;
5902 if (gimple_code (use->stmt) == GIMPLE_PHI)
5904 ass = gimple_build_assign (tgt, comp);
5905 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5907 bsi = gsi_for_stmt (use->stmt);
5908 remove_phi_node (&bsi, false);
5910 else
5912 gimple_assign_set_rhs_from_tree (&bsi, comp);
5913 use->stmt = gsi_stmt (bsi);
5917 /* Copies the reference information from OLD_REF to NEW_REF. */
5919 static void
5920 copy_ref_info (tree new_ref, tree old_ref)
5922 tree new_ptr_base = NULL_TREE;
5924 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5925 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5927 new_ptr_base = TREE_OPERAND (new_ref, 0);
5929 /* We can transfer points-to information from an old pointer
5930 or decl base to the new one. */
5931 if (new_ptr_base
5932 && TREE_CODE (new_ptr_base) == SSA_NAME
5933 && !SSA_NAME_PTR_INFO (new_ptr_base))
5935 tree base = get_base_address (old_ref);
5936 if (!base)
5938 else if ((TREE_CODE (base) == MEM_REF
5939 || TREE_CODE (base) == TARGET_MEM_REF)
5940 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
5941 && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
5943 struct ptr_info_def *new_pi;
5944 duplicate_ssa_name_ptr_info
5945 (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5946 new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
5947 /* We have to be careful about transfering alignment information. */
5948 if (TREE_CODE (old_ref) == MEM_REF
5949 && !(TREE_CODE (new_ref) == TARGET_MEM_REF
5950 && (TMR_INDEX2 (new_ref)
5951 || (TMR_STEP (new_ref)
5952 && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
5953 < new_pi->align)))))
5955 new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
5956 mem_ref_offset (new_ref)).low;
5957 new_pi->misalign &= (new_pi->align - 1);
5959 else
5961 new_pi->align = 1;
5962 new_pi->misalign = 0;
5965 else if (TREE_CODE (base) == VAR_DECL
5966 || TREE_CODE (base) == PARM_DECL
5967 || TREE_CODE (base) == RESULT_DECL)
5969 struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5970 pt_solution_set_var (&pi->pt, base);
5975 /* Performs a peephole optimization to reorder the iv update statement with
5976 a mem ref to enable instruction combining in later phases. The mem ref uses
5977 the iv value before the update, so the reordering transformation requires
5978 adjustment of the offset. CAND is the selected IV_CAND.
5980 Example:
5982 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5983 iv2 = iv1 + 1;
5985 if (t < val) (1)
5986 goto L;
5987 goto Head;
5990 directly propagating t over to (1) will introduce overlapping live range
5991 thus increase register pressure. This peephole transform it into:
5994 iv2 = iv1 + 1;
5995 t = MEM_REF (base, iv2, 8, 8);
5996 if (t < val)
5997 goto L;
5998 goto Head;
6001 static void
6002 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6004 tree var_after;
6005 gimple iv_update, stmt;
6006 basic_block bb;
6007 gimple_stmt_iterator gsi, gsi_iv;
6009 if (cand->pos != IP_NORMAL)
6010 return;
6012 var_after = cand->var_after;
6013 iv_update = SSA_NAME_DEF_STMT (var_after);
6015 bb = gimple_bb (iv_update);
6016 gsi = gsi_last_nondebug_bb (bb);
6017 stmt = gsi_stmt (gsi);
6019 /* Only handle conditional statement for now. */
6020 if (gimple_code (stmt) != GIMPLE_COND)
6021 return;
6023 gsi_prev_nondebug (&gsi);
6024 stmt = gsi_stmt (gsi);
6025 if (stmt != iv_update)
6026 return;
6028 gsi_prev_nondebug (&gsi);
6029 if (gsi_end_p (gsi))
6030 return;
6032 stmt = gsi_stmt (gsi);
6033 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6034 return;
6036 if (stmt != use->stmt)
6037 return;
6039 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6040 return;
6042 if (dump_file && (dump_flags & TDF_DETAILS))
6044 fprintf (dump_file, "Reordering \n");
6045 print_gimple_stmt (dump_file, iv_update, 0, 0);
6046 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6047 fprintf (dump_file, "\n");
6050 gsi = gsi_for_stmt (use->stmt);
6051 gsi_iv = gsi_for_stmt (iv_update);
6052 gsi_move_before (&gsi_iv, &gsi);
6054 cand->pos = IP_BEFORE_USE;
6055 cand->incremented_at = use->stmt;
6058 /* Rewrites USE (address that is an iv) using candidate CAND. */
6060 static void
6061 rewrite_use_address (struct ivopts_data *data,
6062 struct iv_use *use, struct iv_cand *cand)
6064 aff_tree aff;
6065 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6066 tree base_hint = NULL_TREE;
6067 tree ref, iv;
6068 bool ok;
6070 adjust_iv_update_pos (cand, use);
6071 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6072 gcc_assert (ok);
6073 unshare_aff_combination (&aff);
6075 /* To avoid undefined overflow problems, all IV candidates use unsigned
6076 integer types. The drawback is that this makes it impossible for
6077 create_mem_ref to distinguish an IV that is based on a memory object
6078 from one that represents simply an offset.
6080 To work around this problem, we pass a hint to create_mem_ref that
6081 indicates which variable (if any) in aff is an IV based on a memory
6082 object. Note that we only consider the candidate. If this is not
6083 based on an object, the base of the reference is in some subexpression
6084 of the use -- but these will use pointer types, so they are recognized
6085 by the create_mem_ref heuristics anyway. */
6086 if (cand->iv->base_object)
6087 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6089 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6090 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6091 reference_alias_ptr_type (*use->op_p),
6092 iv, base_hint, data->speed);
6093 copy_ref_info (ref, *use->op_p);
6094 *use->op_p = ref;
6097 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6098 candidate CAND. */
6100 static void
6101 rewrite_use_compare (struct ivopts_data *data,
6102 struct iv_use *use, struct iv_cand *cand)
6104 tree comp, *var_p, op, bound;
6105 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6106 enum tree_code compare;
6107 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6108 bool ok;
6110 bound = cp->value;
6111 if (bound)
6113 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6114 tree var_type = TREE_TYPE (var);
6115 gimple_seq stmts;
6117 if (dump_file && (dump_flags & TDF_DETAILS))
6119 fprintf (dump_file, "Replacing exit test: ");
6120 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6122 compare = iv_elimination_compare (data, use);
6123 bound = unshare_expr (fold_convert (var_type, bound));
6124 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6125 if (stmts)
6126 gsi_insert_seq_on_edge_immediate (
6127 loop_preheader_edge (data->current_loop),
6128 stmts);
6130 gimple_cond_set_lhs (use->stmt, var);
6131 gimple_cond_set_code (use->stmt, compare);
6132 gimple_cond_set_rhs (use->stmt, op);
6133 return;
6136 /* The induction variable elimination failed; just express the original
6137 giv. */
6138 comp = get_computation (data->current_loop, use, cand);
6139 gcc_assert (comp != NULL_TREE);
6141 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6142 gcc_assert (ok);
6144 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6145 true, GSI_SAME_STMT);
6148 /* Rewrites USE using candidate CAND. */
6150 static void
6151 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6153 switch (use->type)
6155 case USE_NONLINEAR_EXPR:
6156 rewrite_use_nonlinear_expr (data, use, cand);
6157 break;
6159 case USE_ADDRESS:
6160 rewrite_use_address (data, use, cand);
6161 break;
6163 case USE_COMPARE:
6164 rewrite_use_compare (data, use, cand);
6165 break;
6167 default:
6168 gcc_unreachable ();
6171 update_stmt (use->stmt);
6174 /* Rewrite the uses using the selected induction variables. */
6176 static void
6177 rewrite_uses (struct ivopts_data *data)
6179 unsigned i;
6180 struct iv_cand *cand;
6181 struct iv_use *use;
6183 for (i = 0; i < n_iv_uses (data); i++)
6185 use = iv_use (data, i);
6186 cand = use->selected;
6187 gcc_assert (cand);
6189 rewrite_use (data, use, cand);
6193 /* Removes the ivs that are not used after rewriting. */
6195 static void
6196 remove_unused_ivs (struct ivopts_data *data)
6198 unsigned j;
6199 bitmap_iterator bi;
6200 bitmap toremove = BITMAP_ALLOC (NULL);
6202 /* Figure out an order in which to release SSA DEFs so that we don't
6203 release something that we'd have to propagate into a debug stmt
6204 afterwards. */
6205 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6207 struct version_info *info;
6209 info = ver_info (data, j);
6210 if (info->iv
6211 && !integer_zerop (info->iv->step)
6212 && !info->inv_id
6213 && !info->iv->have_use_for
6214 && !info->preserve_biv)
6215 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6218 release_defs_bitset (toremove);
6220 BITMAP_FREE (toremove);
6223 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6224 for pointer_map_traverse. */
6226 static bool
6227 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6228 void *data ATTRIBUTE_UNUSED)
6230 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6232 free (niter);
6233 return true;
6236 /* Frees data allocated by the optimization of a single loop. */
6238 static void
6239 free_loop_data (struct ivopts_data *data)
6241 unsigned i, j;
6242 bitmap_iterator bi;
6243 tree obj;
6245 if (data->niters)
6247 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6248 pointer_map_destroy (data->niters);
6249 data->niters = NULL;
6252 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6254 struct version_info *info;
6256 info = ver_info (data, i);
6257 if (info->iv)
6258 free (info->iv);
6259 info->iv = NULL;
6260 info->has_nonlin_use = false;
6261 info->preserve_biv = false;
6262 info->inv_id = 0;
6264 bitmap_clear (data->relevant);
6265 bitmap_clear (data->important_candidates);
6267 for (i = 0; i < n_iv_uses (data); i++)
6269 struct iv_use *use = iv_use (data, i);
6271 free (use->iv);
6272 BITMAP_FREE (use->related_cands);
6273 for (j = 0; j < use->n_map_members; j++)
6274 if (use->cost_map[j].depends_on)
6275 BITMAP_FREE (use->cost_map[j].depends_on);
6276 free (use->cost_map);
6277 free (use);
6279 VEC_truncate (iv_use_p, data->iv_uses, 0);
6281 for (i = 0; i < n_iv_cands (data); i++)
6283 struct iv_cand *cand = iv_cand (data, i);
6285 if (cand->iv)
6286 free (cand->iv);
6287 if (cand->depends_on)
6288 BITMAP_FREE (cand->depends_on);
6289 free (cand);
6291 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6293 if (data->version_info_size < num_ssa_names)
6295 data->version_info_size = 2 * num_ssa_names;
6296 free (data->version_info);
6297 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6300 data->max_inv_id = 0;
6302 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6303 SET_DECL_RTL (obj, NULL_RTX);
6305 VEC_truncate (tree, decl_rtl_to_reset, 0);
6307 htab_empty (data->inv_expr_tab);
6308 data->inv_expr_id = 0;
6311 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6312 loop tree. */
6314 static void
6315 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6317 free_loop_data (data);
6318 free (data->version_info);
6319 BITMAP_FREE (data->relevant);
6320 BITMAP_FREE (data->important_candidates);
6322 VEC_free (tree, heap, decl_rtl_to_reset);
6323 VEC_free (iv_use_p, heap, data->iv_uses);
6324 VEC_free (iv_cand_p, heap, data->iv_candidates);
6325 htab_delete (data->inv_expr_tab);
6328 /* Returns true if the loop body BODY includes any function calls. */
6330 static bool
6331 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6333 gimple_stmt_iterator gsi;
6334 unsigned i;
6336 for (i = 0; i < num_nodes; i++)
6337 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6339 gimple stmt = gsi_stmt (gsi);
6340 if (is_gimple_call (stmt)
6341 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6342 return true;
6344 return false;
6347 /* Optimizes the LOOP. Returns true if anything changed. */
6349 static bool
6350 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6352 bool changed = false;
6353 struct iv_ca *iv_ca;
6354 edge exit;
6355 basic_block *body;
6357 gcc_assert (!data->niters);
6358 data->current_loop = loop;
6359 data->speed = optimize_loop_for_speed_p (loop);
6361 if (dump_file && (dump_flags & TDF_DETAILS))
6363 fprintf (dump_file, "Processing loop %d\n", loop->num);
6365 exit = single_dom_exit (loop);
6366 if (exit)
6368 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6369 exit->src->index, exit->dest->index);
6370 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6371 fprintf (dump_file, "\n");
6374 fprintf (dump_file, "\n");
6377 body = get_loop_body (loop);
6378 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6379 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6380 free (body);
6382 /* For each ssa name determines whether it behaves as an induction variable
6383 in some loop. */
6384 if (!find_induction_variables (data))
6385 goto finish;
6387 /* Finds interesting uses (item 1). */
6388 find_interesting_uses (data);
6389 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6390 goto finish;
6392 /* Finds candidates for the induction variables (item 2). */
6393 find_iv_candidates (data);
6395 /* Calculates the costs (item 3, part 1). */
6396 determine_iv_costs (data);
6397 determine_use_iv_costs (data);
6398 determine_set_costs (data);
6400 /* Find the optimal set of induction variables (item 3, part 2). */
6401 iv_ca = find_optimal_iv_set (data);
6402 if (!iv_ca)
6403 goto finish;
6404 changed = true;
6406 /* Create the new induction variables (item 4, part 1). */
6407 create_new_ivs (data, iv_ca);
6408 iv_ca_free (&iv_ca);
6410 /* Rewrite the uses (item 4, part 2). */
6411 rewrite_uses (data);
6413 /* Remove the ivs that are unused after rewriting. */
6414 remove_unused_ivs (data);
6416 /* We have changed the structure of induction variables; it might happen
6417 that definitions in the scev database refer to some of them that were
6418 eliminated. */
6419 scev_reset ();
6421 finish:
6422 free_loop_data (data);
6424 return changed;
6427 /* Main entry point. Optimizes induction variables in loops. */
6429 void
6430 tree_ssa_iv_optimize (void)
6432 struct loop *loop;
6433 struct ivopts_data data;
6434 loop_iterator li;
6436 tree_ssa_iv_optimize_init (&data);
6438 /* Optimize the loops starting with the innermost ones. */
6439 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6441 if (dump_file && (dump_flags & TDF_DETAILS))
6442 flow_loop_dump (loop, dump_file, NULL, 1);
6444 tree_ssa_iv_optimize_loop (&data, loop);
6447 tree_ssa_iv_optimize_finalize (&data);