fix pr/45972
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob88fc015ad7f0826842c1654d0a0506cfcdcea3e2
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
98 #include "expr.h"
100 /* The infinite cost. */
101 #define INFTY 10000000
103 #define AVG_LOOP_NITER(LOOP) 5
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
107 exists. */
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop *loop)
112 HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
113 if (niter == -1)
114 return AVG_LOOP_NITER (loop);
116 return niter;
119 /* Representation of the induction variable. */
120 struct iv
122 tree base; /* Initial value of the iv. */
123 tree base_object; /* A memory object to that the induction variable points. */
124 tree step; /* Step of the iv (constant only). */
125 tree ssa_name; /* The ssa name with the value. */
126 bool biv_p; /* Is it a biv? */
127 bool have_use_for; /* Do we already have a use for it? */
128 unsigned use_id; /* The identifier in the use if it is the case. */
131 /* Per-ssa version information (induction variable descriptions, etc.). */
132 struct version_info
134 tree name; /* The ssa name. */
135 struct iv *iv; /* Induction variable description. */
136 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv; /* For the original biv, whether to preserve it. */
139 unsigned inv_id; /* Id of an invariant. */
142 /* Types of uses. */
143 enum use_type
145 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
146 USE_ADDRESS, /* Use in an address. */
147 USE_COMPARE /* Use is a compare. */
150 /* Cost of a computation. */
151 typedef struct
153 int cost; /* The runtime cost. */
154 unsigned complexity; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
158 } comp_cost;
160 static const comp_cost zero_cost = {0, 0};
161 static const comp_cost infinite_cost = {INFTY, INFTY};
163 /* The candidate - cost pair. */
164 struct cost_pair
166 struct iv_cand *cand; /* The candidate. */
167 comp_cost cost; /* The cost. */
168 bitmap depends_on; /* The list of invariants that have to be
169 preserved. */
170 tree value; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id; /* Loop invariant expression id. */
176 /* Use. */
177 struct iv_use
179 unsigned id; /* The id of the use. */
180 enum use_type type; /* Type of the use. */
181 struct iv *iv; /* The induction variable it is based on. */
182 gimple stmt; /* Statement in that it occurs. */
183 tree *op_p; /* The place where it occurs. */
184 bitmap related_cands; /* The set of "related" iv candidates, plus the common
185 important ones. */
187 unsigned n_map_members; /* Number of candidates in the cost_map list. */
188 struct cost_pair *cost_map;
189 /* The costs wrto the iv candidates. */
191 struct iv_cand *selected;
192 /* The selected candidate. */
195 /* The position where the iv is computed. */
196 enum iv_position
198 IP_NORMAL, /* At the end, just before the exit condition. */
199 IP_END, /* At the end of the latch block. */
200 IP_BEFORE_USE, /* Immediately before a specific use. */
201 IP_AFTER_USE, /* Immediately after a specific use. */
202 IP_ORIGINAL /* The original biv. */
205 /* The induction variable candidate. */
206 struct iv_cand
208 unsigned id; /* The number of the candidate. */
209 bool important; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
212 gimple incremented_at;/* For original biv, the statement where it is
213 incremented. */
214 tree var_before; /* The variable used for it before increment. */
215 tree var_after; /* The variable used for it after increment. */
216 struct iv *iv; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost; /* Cost of the candidate. */
221 unsigned cost_step; /* Cost of the candidate's increment operation. */
222 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on; /* The list of invariants that are used in step of the
225 biv. */
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
231 tree expr;
232 int id;
233 hashval_t hash;
236 /* The data used by the induction variable optimizations. */
238 typedef struct iv_use *iv_use_p;
239 DEF_VEC_P(iv_use_p);
240 DEF_VEC_ALLOC_P(iv_use_p,heap);
242 typedef struct iv_cand *iv_cand_p;
243 DEF_VEC_P(iv_cand_p);
244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
246 struct ivopts_data
248 /* The currently optimized loop. */
249 struct loop *current_loop;
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t *niters;
254 /* Number of registers used in it. */
255 unsigned regs_used;
257 /* The size of version_info array allocated. */
258 unsigned version_info_size;
260 /* The array of information for the ssa names. */
261 struct version_info *version_info;
263 /* The hashtable of loop invariant expressions created
264 by ivopt. */
265 htab_t inv_expr_tab;
267 /* Loop invariant expression id. */
268 int inv_expr_id;
270 /* The bitmap of indices in version_info whose value was changed. */
271 bitmap relevant;
273 /* The uses of induction variables. */
274 VEC(iv_use_p,heap) *iv_uses;
276 /* The candidates. */
277 VEC(iv_cand_p,heap) *iv_candidates;
279 /* A bitmap of important candidates. */
280 bitmap important_candidates;
282 /* The maximum invariant id. */
283 unsigned max_inv_id;
285 /* Whether to consider just related and important candidates when replacing a
286 use. */
287 bool consider_all_candidates;
289 /* Are we optimizing for speed? */
290 bool speed;
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call;
296 /* An assignment of iv candidates to uses. */
298 struct iv_ca
300 /* The number of uses covered by the assignment. */
301 unsigned upto;
303 /* Number of uses that cannot be expressed by the candidates in the set. */
304 unsigned bad_uses;
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair **cand_for_use;
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses;
312 /* The candidates used. */
313 bitmap cands;
315 /* The number of candidates in the set. */
316 unsigned n_cands;
318 /* Total number of registers needed. */
319 unsigned n_regs;
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost;
324 /* Total cost of candidates. */
325 unsigned cand_cost;
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses;
330 /* The array holding the number of uses of each loop
331 invariant expressions created by ivopt. */
332 unsigned *used_inv_expr;
334 /* The number of created loop invariants. */
335 unsigned num_used_inv_expr;
337 /* Total cost of the assignment. */
338 comp_cost cost;
341 /* Difference of two iv candidate assignments. */
343 struct iv_ca_delta
345 /* Changed use. */
346 struct iv_use *use;
348 /* An old assignment (for rollback purposes). */
349 struct cost_pair *old_cp;
351 /* A new assignment. */
352 struct cost_pair *new_cp;
354 /* Next change in the list. */
355 struct iv_ca_delta *next_change;
358 /* Bound on number of candidates below that all candidates are considered. */
360 #define CONSIDER_ALL_CANDIDATES_BOUND \
361 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
364 optimizing such a loop would help, and it would take ages). */
366 #define MAX_CONSIDERED_USES \
367 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
369 /* If there are at most this number of ivs in the set, try removing unnecessary
370 ivs from the set always. */
372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
373 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
375 /* The list of trees for that the decl_rtl field must be reset is stored
376 here. */
378 static VEC(tree,heap) *decl_rtl_to_reset;
380 /* Number of uses recorded in DATA. */
382 static inline unsigned
383 n_iv_uses (struct ivopts_data *data)
385 return VEC_length (iv_use_p, data->iv_uses);
388 /* Ith use recorded in DATA. */
390 static inline struct iv_use *
391 iv_use (struct ivopts_data *data, unsigned i)
393 return VEC_index (iv_use_p, data->iv_uses, i);
396 /* Number of candidates recorded in DATA. */
398 static inline unsigned
399 n_iv_cands (struct ivopts_data *data)
401 return VEC_length (iv_cand_p, data->iv_candidates);
404 /* Ith candidate recorded in DATA. */
406 static inline struct iv_cand *
407 iv_cand (struct ivopts_data *data, unsigned i)
409 return VEC_index (iv_cand_p, data->iv_candidates, i);
412 /* The single loop exit if it dominates the latch, NULL otherwise. */
414 edge
415 single_dom_exit (struct loop *loop)
417 edge exit = single_exit (loop);
419 if (!exit)
420 return NULL;
422 if (!just_once_each_iteration_p (loop, exit->src))
423 return NULL;
425 return exit;
428 /* Dumps information about the induction variable IV to FILE. */
430 extern void dump_iv (FILE *, struct iv *);
431 void
432 dump_iv (FILE *file, struct iv *iv)
434 if (iv->ssa_name)
436 fprintf (file, "ssa name ");
437 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
438 fprintf (file, "\n");
441 fprintf (file, " type ");
442 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
443 fprintf (file, "\n");
445 if (iv->step)
447 fprintf (file, " base ");
448 print_generic_expr (file, iv->base, TDF_SLIM);
449 fprintf (file, "\n");
451 fprintf (file, " step ");
452 print_generic_expr (file, iv->step, TDF_SLIM);
453 fprintf (file, "\n");
455 else
457 fprintf (file, " invariant ");
458 print_generic_expr (file, iv->base, TDF_SLIM);
459 fprintf (file, "\n");
462 if (iv->base_object)
464 fprintf (file, " base object ");
465 print_generic_expr (file, iv->base_object, TDF_SLIM);
466 fprintf (file, "\n");
469 if (iv->biv_p)
470 fprintf (file, " is a biv\n");
473 /* Dumps information about the USE to FILE. */
475 extern void dump_use (FILE *, struct iv_use *);
476 void
477 dump_use (FILE *file, struct iv_use *use)
479 fprintf (file, "use %d\n", use->id);
481 switch (use->type)
483 case USE_NONLINEAR_EXPR:
484 fprintf (file, " generic\n");
485 break;
487 case USE_ADDRESS:
488 fprintf (file, " address\n");
489 break;
491 case USE_COMPARE:
492 fprintf (file, " compare\n");
493 break;
495 default:
496 gcc_unreachable ();
499 fprintf (file, " in statement ");
500 print_gimple_stmt (file, use->stmt, 0, 0);
501 fprintf (file, "\n");
503 fprintf (file, " at position ");
504 if (use->op_p)
505 print_generic_expr (file, *use->op_p, TDF_SLIM);
506 fprintf (file, "\n");
508 dump_iv (file, use->iv);
510 if (use->related_cands)
512 fprintf (file, " related candidates ");
513 dump_bitmap (file, use->related_cands);
517 /* Dumps information about the uses to FILE. */
519 extern void dump_uses (FILE *, struct ivopts_data *);
520 void
521 dump_uses (FILE *file, struct ivopts_data *data)
523 unsigned i;
524 struct iv_use *use;
526 for (i = 0; i < n_iv_uses (data); i++)
528 use = iv_use (data, i);
530 dump_use (file, use);
531 fprintf (file, "\n");
535 /* Dumps information about induction variable candidate CAND to FILE. */
537 extern void dump_cand (FILE *, struct iv_cand *);
538 void
539 dump_cand (FILE *file, struct iv_cand *cand)
541 struct iv *iv = cand->iv;
543 fprintf (file, "candidate %d%s\n",
544 cand->id, cand->important ? " (important)" : "");
546 if (cand->depends_on)
548 fprintf (file, " depends on ");
549 dump_bitmap (file, cand->depends_on);
552 if (!iv)
554 fprintf (file, " final value replacement\n");
555 return;
558 if (cand->var_before)
560 fprintf (file, " var_before ");
561 print_generic_expr (file, cand->var_before, TDF_SLIM);
562 fprintf (file, "\n");
564 if (cand->var_after)
566 fprintf (file, " var_after ");
567 print_generic_expr (file, cand->var_after, TDF_SLIM);
568 fprintf (file, "\n");
571 switch (cand->pos)
573 case IP_NORMAL:
574 fprintf (file, " incremented before exit test\n");
575 break;
577 case IP_BEFORE_USE:
578 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
579 break;
581 case IP_AFTER_USE:
582 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
583 break;
585 case IP_END:
586 fprintf (file, " incremented at end\n");
587 break;
589 case IP_ORIGINAL:
590 fprintf (file, " original biv\n");
591 break;
594 dump_iv (file, iv);
597 /* Returns the info for ssa version VER. */
599 static inline struct version_info *
600 ver_info (struct ivopts_data *data, unsigned ver)
602 return data->version_info + ver;
605 /* Returns the info for ssa name NAME. */
607 static inline struct version_info *
608 name_info (struct ivopts_data *data, tree name)
610 return ver_info (data, SSA_NAME_VERSION (name));
613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
614 emitted in LOOP. */
616 static bool
617 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
619 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
621 gcc_assert (bb);
623 if (sbb == loop->latch)
624 return true;
626 if (sbb != bb)
627 return false;
629 return stmt == last_stmt (bb);
632 /* Returns true if STMT if after the place where the original induction
633 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
634 if the positions are identical. */
636 static bool
637 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
639 basic_block cand_bb = gimple_bb (cand->incremented_at);
640 basic_block stmt_bb = gimple_bb (stmt);
642 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
643 return false;
645 if (stmt_bb != cand_bb)
646 return true;
648 if (true_if_equal
649 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
650 return true;
651 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
654 /* Returns true if STMT if after the place where the induction variable
655 CAND is incremented in LOOP. */
657 static bool
658 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
660 switch (cand->pos)
662 case IP_END:
663 return false;
665 case IP_NORMAL:
666 return stmt_after_ip_normal_pos (loop, stmt);
668 case IP_ORIGINAL:
669 case IP_AFTER_USE:
670 return stmt_after_inc_pos (cand, stmt, false);
672 case IP_BEFORE_USE:
673 return stmt_after_inc_pos (cand, stmt, true);
675 default:
676 gcc_unreachable ();
680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
682 static bool
683 abnormal_ssa_name_p (tree exp)
685 if (!exp)
686 return false;
688 if (TREE_CODE (exp) != SSA_NAME)
689 return false;
691 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
695 abnormal phi node. Callback for for_each_index. */
697 static bool
698 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
699 void *data ATTRIBUTE_UNUSED)
701 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
703 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
704 return false;
705 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
706 return false;
709 return !abnormal_ssa_name_p (*index);
712 /* Returns true if EXPR contains a ssa name that occurs in an
713 abnormal phi node. */
715 bool
716 contains_abnormal_ssa_name_p (tree expr)
718 enum tree_code code;
719 enum tree_code_class codeclass;
721 if (!expr)
722 return false;
724 code = TREE_CODE (expr);
725 codeclass = TREE_CODE_CLASS (code);
727 if (code == SSA_NAME)
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
730 if (code == INTEGER_CST
731 || is_gimple_min_invariant (expr))
732 return false;
734 if (code == ADDR_EXPR)
735 return !for_each_index (&TREE_OPERAND (expr, 0),
736 idx_contains_abnormal_ssa_name_p,
737 NULL);
739 if (code == COND_EXPR)
740 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
742 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
744 switch (codeclass)
746 case tcc_binary:
747 case tcc_comparison:
748 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
749 return true;
751 /* Fallthru. */
752 case tcc_unary:
753 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
754 return true;
756 break;
758 default:
759 gcc_unreachable ();
762 return false;
765 /* Returns tree describing number of iterations determined from
766 EXIT of DATA->current_loop, or NULL if something goes wrong. */
768 static tree
769 niter_for_exit (struct ivopts_data *data, edge exit,
770 struct tree_niter_desc **desc_p)
772 struct tree_niter_desc* desc = NULL;
773 tree niter;
774 void **slot;
776 if (!data->niters)
778 data->niters = pointer_map_create ();
779 slot = NULL;
781 else
782 slot = pointer_map_contains (data->niters, exit);
784 if (!slot)
786 /* Try to determine number of iterations. We must know it
787 unconditionally (i.e., without possibility of # of iterations
788 being zero). Also, we cannot safely work with ssa names that
789 appear in phi nodes on abnormal edges, so that we do not create
790 overlapping life ranges for them (PR 27283). */
791 desc = XNEW (struct tree_niter_desc);
792 if (number_of_iterations_exit (data->current_loop,
793 exit, desc, true)
794 && integer_zerop (desc->may_be_zero)
795 && !contains_abnormal_ssa_name_p (desc->niter))
796 niter = desc->niter;
797 else
798 niter = NULL_TREE;
800 desc->niter = niter;
801 slot = pointer_map_insert (data->niters, exit);
802 *slot = desc;
804 else
805 niter = ((struct tree_niter_desc *) *slot)->niter;
807 if (desc_p)
808 *desc_p = (struct tree_niter_desc *) *slot;
809 return niter;
812 /* Returns tree describing number of iterations determined from
813 single dominating exit of DATA->current_loop, or NULL if something
814 goes wrong. */
816 static tree
817 niter_for_single_dom_exit (struct ivopts_data *data)
819 edge exit = single_dom_exit (data->current_loop);
821 if (!exit)
822 return NULL;
824 return niter_for_exit (data, exit, NULL);
827 /* Hash table equality function for expressions. */
829 static int
830 htab_inv_expr_eq (const void *ent1, const void *ent2)
832 const struct iv_inv_expr_ent *expr1 =
833 (const struct iv_inv_expr_ent *)ent1;
834 const struct iv_inv_expr_ent *expr2 =
835 (const struct iv_inv_expr_ent *)ent2;
837 return operand_equal_p (expr1->expr, expr2->expr, 0);
840 /* Hash function for loop invariant expressions. */
842 static hashval_t
843 htab_inv_expr_hash (const void *ent)
845 const struct iv_inv_expr_ent *expr =
846 (const struct iv_inv_expr_ent *)ent;
847 return expr->hash;
850 /* Initializes data structures used by the iv optimization pass, stored
851 in DATA. */
853 static void
854 tree_ssa_iv_optimize_init (struct ivopts_data *data)
856 data->version_info_size = 2 * num_ssa_names;
857 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
858 data->relevant = BITMAP_ALLOC (NULL);
859 data->important_candidates = BITMAP_ALLOC (NULL);
860 data->max_inv_id = 0;
861 data->niters = NULL;
862 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
863 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
864 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
865 htab_inv_expr_eq, free);
866 data->inv_expr_id = 0;
867 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
870 /* Returns a memory object to that EXPR points. In case we are able to
871 determine that it does not point to any such object, NULL is returned. */
873 static tree
874 determine_base_object (tree expr)
876 enum tree_code code = TREE_CODE (expr);
877 tree base, obj;
879 /* If this is a pointer casted to any type, we need to determine
880 the base object for the pointer; so handle conversions before
881 throwing away non-pointer expressions. */
882 if (CONVERT_EXPR_P (expr))
883 return determine_base_object (TREE_OPERAND (expr, 0));
885 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
886 return NULL_TREE;
888 switch (code)
890 case INTEGER_CST:
891 return NULL_TREE;
893 case ADDR_EXPR:
894 obj = TREE_OPERAND (expr, 0);
895 base = get_base_address (obj);
897 if (!base)
898 return expr;
900 if (TREE_CODE (base) == MEM_REF)
901 return determine_base_object (TREE_OPERAND (base, 0));
903 return fold_convert (ptr_type_node,
904 build_fold_addr_expr (base));
906 case POINTER_PLUS_EXPR:
907 return determine_base_object (TREE_OPERAND (expr, 0));
909 case PLUS_EXPR:
910 case MINUS_EXPR:
911 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
912 gcc_unreachable ();
914 default:
915 return fold_convert (ptr_type_node, expr);
919 /* Allocates an induction variable with given initial value BASE and step STEP
920 for loop LOOP. */
922 static struct iv *
923 alloc_iv (tree base, tree step)
925 struct iv *iv = XCNEW (struct iv);
926 gcc_assert (step != NULL_TREE);
928 iv->base = base;
929 iv->base_object = determine_base_object (base);
930 iv->step = step;
931 iv->biv_p = false;
932 iv->have_use_for = false;
933 iv->use_id = 0;
934 iv->ssa_name = NULL_TREE;
936 return iv;
939 /* Sets STEP and BASE for induction variable IV. */
941 static void
942 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
944 struct version_info *info = name_info (data, iv);
946 gcc_assert (!info->iv);
948 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
949 info->iv = alloc_iv (base, step);
950 info->iv->ssa_name = iv;
953 /* Finds induction variable declaration for VAR. */
955 static struct iv *
956 get_iv (struct ivopts_data *data, tree var)
958 basic_block bb;
959 tree type = TREE_TYPE (var);
961 if (!POINTER_TYPE_P (type)
962 && !INTEGRAL_TYPE_P (type))
963 return NULL;
965 if (!name_info (data, var)->iv)
967 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
969 if (!bb
970 || !flow_bb_inside_loop_p (data->current_loop, bb))
971 set_iv (data, var, var, build_int_cst (type, 0));
974 return name_info (data, var)->iv;
977 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
978 not define a simple affine biv with nonzero step. */
980 static tree
981 determine_biv_step (gimple phi)
983 struct loop *loop = gimple_bb (phi)->loop_father;
984 tree name = PHI_RESULT (phi);
985 affine_iv iv;
987 if (!is_gimple_reg (name))
988 return NULL_TREE;
990 if (!simple_iv (loop, loop, name, &iv, true))
991 return NULL_TREE;
993 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
996 /* Finds basic ivs. */
998 static bool
999 find_bivs (struct ivopts_data *data)
1001 gimple phi;
1002 tree step, type, base;
1003 bool found = false;
1004 struct loop *loop = data->current_loop;
1005 gimple_stmt_iterator psi;
1007 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1009 phi = gsi_stmt (psi);
1011 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1012 continue;
1014 step = determine_biv_step (phi);
1015 if (!step)
1016 continue;
1018 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1019 base = expand_simple_operations (base);
1020 if (contains_abnormal_ssa_name_p (base)
1021 || contains_abnormal_ssa_name_p (step))
1022 continue;
1024 type = TREE_TYPE (PHI_RESULT (phi));
1025 base = fold_convert (type, base);
1026 if (step)
1028 if (POINTER_TYPE_P (type))
1029 step = fold_convert (sizetype, step);
1030 else
1031 step = fold_convert (type, step);
1034 set_iv (data, PHI_RESULT (phi), base, step);
1035 found = true;
1038 return found;
1041 /* Marks basic ivs. */
1043 static void
1044 mark_bivs (struct ivopts_data *data)
1046 gimple phi;
1047 tree var;
1048 struct iv *iv, *incr_iv;
1049 struct loop *loop = data->current_loop;
1050 basic_block incr_bb;
1051 gimple_stmt_iterator psi;
1053 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1055 phi = gsi_stmt (psi);
1057 iv = get_iv (data, PHI_RESULT (phi));
1058 if (!iv)
1059 continue;
1061 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1062 incr_iv = get_iv (data, var);
1063 if (!incr_iv)
1064 continue;
1066 /* If the increment is in the subloop, ignore it. */
1067 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1068 if (incr_bb->loop_father != data->current_loop
1069 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1070 continue;
1072 iv->biv_p = true;
1073 incr_iv->biv_p = true;
1077 /* Checks whether STMT defines a linear induction variable and stores its
1078 parameters to IV. */
1080 static bool
1081 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1083 tree lhs;
1084 struct loop *loop = data->current_loop;
1086 iv->base = NULL_TREE;
1087 iv->step = NULL_TREE;
1089 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1090 return false;
1092 lhs = gimple_assign_lhs (stmt);
1093 if (TREE_CODE (lhs) != SSA_NAME)
1094 return false;
1096 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1097 return false;
1098 iv->base = expand_simple_operations (iv->base);
1100 if (contains_abnormal_ssa_name_p (iv->base)
1101 || contains_abnormal_ssa_name_p (iv->step))
1102 return false;
1104 return true;
1107 /* Finds general ivs in statement STMT. */
1109 static void
1110 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1112 affine_iv iv;
1114 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1115 return;
1117 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1120 /* Finds general ivs in basic block BB. */
1122 static void
1123 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1125 gimple_stmt_iterator bsi;
1127 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1128 find_givs_in_stmt (data, gsi_stmt (bsi));
1131 /* Finds general ivs. */
1133 static void
1134 find_givs (struct ivopts_data *data)
1136 struct loop *loop = data->current_loop;
1137 basic_block *body = get_loop_body_in_dom_order (loop);
1138 unsigned i;
1140 for (i = 0; i < loop->num_nodes; i++)
1141 find_givs_in_bb (data, body[i]);
1142 free (body);
1145 /* For each ssa name defined in LOOP determines whether it is an induction
1146 variable and if so, its initial value and step. */
1148 static bool
1149 find_induction_variables (struct ivopts_data *data)
1151 unsigned i;
1152 bitmap_iterator bi;
1154 if (!find_bivs (data))
1155 return false;
1157 find_givs (data);
1158 mark_bivs (data);
1160 if (dump_file && (dump_flags & TDF_DETAILS))
1162 tree niter = niter_for_single_dom_exit (data);
1164 if (niter)
1166 fprintf (dump_file, " number of iterations ");
1167 print_generic_expr (dump_file, niter, TDF_SLIM);
1168 fprintf (dump_file, "\n\n");
1171 fprintf (dump_file, "Induction variables:\n\n");
1173 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1175 if (ver_info (data, i)->iv)
1176 dump_iv (dump_file, ver_info (data, i)->iv);
1180 return true;
1183 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1185 static struct iv_use *
1186 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1187 gimple stmt, enum use_type use_type)
1189 struct iv_use *use = XCNEW (struct iv_use);
1191 use->id = n_iv_uses (data);
1192 use->type = use_type;
1193 use->iv = iv;
1194 use->stmt = stmt;
1195 use->op_p = use_p;
1196 use->related_cands = BITMAP_ALLOC (NULL);
1198 /* To avoid showing ssa name in the dumps, if it was not reset by the
1199 caller. */
1200 iv->ssa_name = NULL_TREE;
1202 if (dump_file && (dump_flags & TDF_DETAILS))
1203 dump_use (dump_file, use);
1205 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1207 return use;
1210 /* Checks whether OP is a loop-level invariant and if so, records it.
1211 NONLINEAR_USE is true if the invariant is used in a way we do not
1212 handle specially. */
1214 static void
1215 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1217 basic_block bb;
1218 struct version_info *info;
1220 if (TREE_CODE (op) != SSA_NAME
1221 || !is_gimple_reg (op))
1222 return;
1224 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1225 if (bb
1226 && flow_bb_inside_loop_p (data->current_loop, bb))
1227 return;
1229 info = name_info (data, op);
1230 info->name = op;
1231 info->has_nonlin_use |= nonlinear_use;
1232 if (!info->inv_id)
1233 info->inv_id = ++data->max_inv_id;
1234 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1237 /* Checks whether the use OP is interesting and if so, records it. */
1239 static struct iv_use *
1240 find_interesting_uses_op (struct ivopts_data *data, tree op)
1242 struct iv *iv;
1243 struct iv *civ;
1244 gimple stmt;
1245 struct iv_use *use;
1247 if (TREE_CODE (op) != SSA_NAME)
1248 return NULL;
1250 iv = get_iv (data, op);
1251 if (!iv)
1252 return NULL;
1254 if (iv->have_use_for)
1256 use = iv_use (data, iv->use_id);
1258 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1259 return use;
1262 if (integer_zerop (iv->step))
1264 record_invariant (data, op, true);
1265 return NULL;
1267 iv->have_use_for = true;
1269 civ = XNEW (struct iv);
1270 *civ = *iv;
1272 stmt = SSA_NAME_DEF_STMT (op);
1273 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1274 || is_gimple_assign (stmt));
1276 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1277 iv->use_id = use->id;
1279 return use;
1282 /* Given a condition in statement STMT, checks whether it is a compare
1283 of an induction variable and an invariant. If this is the case,
1284 CONTROL_VAR is set to location of the iv, BOUND to the location of
1285 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1286 induction variable descriptions, and true is returned. If this is not
1287 the case, CONTROL_VAR and BOUND are set to the arguments of the
1288 condition and false is returned. */
1290 static bool
1291 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1292 tree **control_var, tree **bound,
1293 struct iv **iv_var, struct iv **iv_bound)
1295 /* The objects returned when COND has constant operands. */
1296 static struct iv const_iv;
1297 static tree zero;
1298 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1299 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1300 bool ret = false;
1302 if (gimple_code (stmt) == GIMPLE_COND)
1304 op0 = gimple_cond_lhs_ptr (stmt);
1305 op1 = gimple_cond_rhs_ptr (stmt);
1307 else
1309 op0 = gimple_assign_rhs1_ptr (stmt);
1310 op1 = gimple_assign_rhs2_ptr (stmt);
1313 zero = integer_zero_node;
1314 const_iv.step = integer_zero_node;
1316 if (TREE_CODE (*op0) == SSA_NAME)
1317 iv0 = get_iv (data, *op0);
1318 if (TREE_CODE (*op1) == SSA_NAME)
1319 iv1 = get_iv (data, *op1);
1321 /* Exactly one of the compared values must be an iv, and the other one must
1322 be an invariant. */
1323 if (!iv0 || !iv1)
1324 goto end;
1326 if (integer_zerop (iv0->step))
1328 /* Control variable may be on the other side. */
1329 tmp_op = op0; op0 = op1; op1 = tmp_op;
1330 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1332 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1334 end:
1335 if (control_var)
1336 *control_var = op0;;
1337 if (iv_var)
1338 *iv_var = iv0;;
1339 if (bound)
1340 *bound = op1;
1341 if (iv_bound)
1342 *iv_bound = iv1;
1344 return ret;
1347 /* Checks whether the condition in STMT is interesting and if so,
1348 records it. */
1350 static void
1351 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1353 tree *var_p, *bound_p;
1354 struct iv *var_iv, *civ;
1356 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1358 find_interesting_uses_op (data, *var_p);
1359 find_interesting_uses_op (data, *bound_p);
1360 return;
1363 civ = XNEW (struct iv);
1364 *civ = *var_iv;
1365 record_use (data, NULL, civ, stmt, USE_COMPARE);
1368 /* Returns true if expression EXPR is obviously invariant in LOOP,
1369 i.e. if all its operands are defined outside of the LOOP. LOOP
1370 should not be the function body. */
1372 bool
1373 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1375 basic_block def_bb;
1376 unsigned i, len;
1378 gcc_assert (loop_depth (loop) > 0);
1380 if (is_gimple_min_invariant (expr))
1381 return true;
1383 if (TREE_CODE (expr) == SSA_NAME)
1385 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1386 if (def_bb
1387 && flow_bb_inside_loop_p (loop, def_bb))
1388 return false;
1390 return true;
1393 if (!EXPR_P (expr))
1394 return false;
1396 len = TREE_OPERAND_LENGTH (expr);
1397 for (i = 0; i < len; i++)
1398 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1399 return false;
1401 return true;
1404 /* Returns true if statement STMT is obviously invariant in LOOP,
1405 i.e. if all its operands on the RHS are defined outside of the LOOP.
1406 LOOP should not be the function body. */
1408 bool
1409 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1411 unsigned i;
1412 tree lhs;
1414 gcc_assert (loop_depth (loop) > 0);
1416 lhs = gimple_get_lhs (stmt);
1417 for (i = 0; i < gimple_num_ops (stmt); i++)
1419 tree op = gimple_op (stmt, i);
1420 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1421 return false;
1424 return true;
1427 /* Cumulates the steps of indices into DATA and replaces their values with the
1428 initial ones. Returns false when the value of the index cannot be determined.
1429 Callback for for_each_index. */
1431 struct ifs_ivopts_data
1433 struct ivopts_data *ivopts_data;
1434 gimple stmt;
1435 tree step;
1438 static bool
1439 idx_find_step (tree base, tree *idx, void *data)
1441 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1442 struct iv *iv;
1443 tree step, iv_base, iv_step, lbound, off;
1444 struct loop *loop = dta->ivopts_data->current_loop;
1446 /* If base is a component ref, require that the offset of the reference
1447 be invariant. */
1448 if (TREE_CODE (base) == COMPONENT_REF)
1450 off = component_ref_field_offset (base);
1451 return expr_invariant_in_loop_p (loop, off);
1454 /* If base is array, first check whether we will be able to move the
1455 reference out of the loop (in order to take its address in strength
1456 reduction). In order for this to work we need both lower bound
1457 and step to be loop invariants. */
1458 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1460 /* Moreover, for a range, the size needs to be invariant as well. */
1461 if (TREE_CODE (base) == ARRAY_RANGE_REF
1462 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1463 return false;
1465 step = array_ref_element_size (base);
1466 lbound = array_ref_low_bound (base);
1468 if (!expr_invariant_in_loop_p (loop, step)
1469 || !expr_invariant_in_loop_p (loop, lbound))
1470 return false;
1473 if (TREE_CODE (*idx) != SSA_NAME)
1474 return true;
1476 iv = get_iv (dta->ivopts_data, *idx);
1477 if (!iv)
1478 return false;
1480 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1481 *&x[0], which is not folded and does not trigger the
1482 ARRAY_REF path below. */
1483 *idx = iv->base;
1485 if (integer_zerop (iv->step))
1486 return true;
1488 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1490 step = array_ref_element_size (base);
1492 /* We only handle addresses whose step is an integer constant. */
1493 if (TREE_CODE (step) != INTEGER_CST)
1494 return false;
1496 else
1497 /* The step for pointer arithmetics already is 1 byte. */
1498 step = size_one_node;
1500 iv_base = iv->base;
1501 iv_step = iv->step;
1502 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1503 sizetype, &iv_base, &iv_step, dta->stmt,
1504 false))
1506 /* The index might wrap. */
1507 return false;
1510 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1511 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1513 return true;
1516 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1517 object is passed to it in DATA. */
1519 static bool
1520 idx_record_use (tree base, tree *idx,
1521 void *vdata)
1523 struct ivopts_data *data = (struct ivopts_data *) vdata;
1524 find_interesting_uses_op (data, *idx);
1525 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1527 find_interesting_uses_op (data, array_ref_element_size (base));
1528 find_interesting_uses_op (data, array_ref_low_bound (base));
1530 return true;
1533 /* If we can prove that TOP = cst * BOT for some constant cst,
1534 store cst to MUL and return true. Otherwise return false.
1535 The returned value is always sign-extended, regardless of the
1536 signedness of TOP and BOT. */
1538 static bool
1539 constant_multiple_of (tree top, tree bot, double_int *mul)
1541 tree mby;
1542 enum tree_code code;
1543 double_int res, p0, p1;
1544 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1546 STRIP_NOPS (top);
1547 STRIP_NOPS (bot);
1549 if (operand_equal_p (top, bot, 0))
1551 *mul = double_int_one;
1552 return true;
1555 code = TREE_CODE (top);
1556 switch (code)
1558 case MULT_EXPR:
1559 mby = TREE_OPERAND (top, 1);
1560 if (TREE_CODE (mby) != INTEGER_CST)
1561 return false;
1563 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1564 return false;
1566 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1567 precision);
1568 return true;
1570 case PLUS_EXPR:
1571 case MINUS_EXPR:
1572 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1573 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1574 return false;
1576 if (code == MINUS_EXPR)
1577 p1 = double_int_neg (p1);
1578 *mul = double_int_sext (double_int_add (p0, p1), precision);
1579 return true;
1581 case INTEGER_CST:
1582 if (TREE_CODE (bot) != INTEGER_CST)
1583 return false;
1585 p0 = double_int_sext (tree_to_double_int (top), precision);
1586 p1 = double_int_sext (tree_to_double_int (bot), precision);
1587 if (double_int_zero_p (p1))
1588 return false;
1589 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1590 precision);
1591 return double_int_zero_p (res);
1593 default:
1594 return false;
1598 /* Returns true if memory reference REF with step STEP may be unaligned. */
1600 static bool
1601 may_be_unaligned_p (tree ref, tree step)
1603 tree base;
1604 tree base_type;
1605 HOST_WIDE_INT bitsize;
1606 HOST_WIDE_INT bitpos;
1607 tree toffset;
1608 enum machine_mode mode;
1609 int unsignedp, volatilep;
1610 unsigned base_align;
1612 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1613 thus they are not misaligned. */
1614 if (TREE_CODE (ref) == TARGET_MEM_REF)
1615 return false;
1617 /* The test below is basically copy of what expr.c:normal_inner_ref
1618 does to check whether the object must be loaded by parts when
1619 STRICT_ALIGNMENT is true. */
1620 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1621 &unsignedp, &volatilep, true);
1622 base_type = TREE_TYPE (base);
1623 base_align = TYPE_ALIGN (base_type);
1625 if (mode != BLKmode)
1627 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1629 if (base_align < mode_align
1630 || (bitpos % mode_align) != 0
1631 || (bitpos % BITS_PER_UNIT) != 0)
1632 return true;
1634 if (toffset
1635 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1636 return true;
1638 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1639 return true;
1642 return false;
1645 /* Return true if EXPR may be non-addressable. */
1647 bool
1648 may_be_nonaddressable_p (tree expr)
1650 switch (TREE_CODE (expr))
1652 case TARGET_MEM_REF:
1653 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1654 target, thus they are always addressable. */
1655 return false;
1657 case COMPONENT_REF:
1658 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1659 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1661 case VIEW_CONVERT_EXPR:
1662 /* This kind of view-conversions may wrap non-addressable objects
1663 and make them look addressable. After some processing the
1664 non-addressability may be uncovered again, causing ADDR_EXPRs
1665 of inappropriate objects to be built. */
1666 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1667 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1668 return true;
1670 /* ... fall through ... */
1672 case ARRAY_REF:
1673 case ARRAY_RANGE_REF:
1674 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1676 CASE_CONVERT:
1677 return true;
1679 default:
1680 break;
1683 return false;
1686 /* Finds addresses in *OP_P inside STMT. */
1688 static void
1689 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1691 tree base = *op_p, step = size_zero_node;
1692 struct iv *civ;
1693 struct ifs_ivopts_data ifs_ivopts_data;
1695 /* Do not play with volatile memory references. A bit too conservative,
1696 perhaps, but safe. */
1697 if (gimple_has_volatile_ops (stmt))
1698 goto fail;
1700 /* Ignore bitfields for now. Not really something terribly complicated
1701 to handle. TODO. */
1702 if (TREE_CODE (base) == BIT_FIELD_REF)
1703 goto fail;
1705 base = unshare_expr (base);
1707 if (TREE_CODE (base) == TARGET_MEM_REF)
1709 tree type = build_pointer_type (TREE_TYPE (base));
1710 tree astep;
1712 if (TMR_BASE (base)
1713 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1715 civ = get_iv (data, TMR_BASE (base));
1716 if (!civ)
1717 goto fail;
1719 TMR_BASE (base) = civ->base;
1720 step = civ->step;
1722 if (TMR_INDEX2 (base)
1723 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1725 civ = get_iv (data, TMR_INDEX2 (base));
1726 if (!civ)
1727 goto fail;
1729 TMR_INDEX2 (base) = civ->base;
1730 step = civ->step;
1732 if (TMR_INDEX (base)
1733 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1735 civ = get_iv (data, TMR_INDEX (base));
1736 if (!civ)
1737 goto fail;
1739 TMR_INDEX (base) = civ->base;
1740 astep = civ->step;
1742 if (astep)
1744 if (TMR_STEP (base))
1745 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1747 step = fold_build2 (PLUS_EXPR, type, step, astep);
1751 if (integer_zerop (step))
1752 goto fail;
1753 base = tree_mem_ref_addr (type, base);
1755 else
1757 ifs_ivopts_data.ivopts_data = data;
1758 ifs_ivopts_data.stmt = stmt;
1759 ifs_ivopts_data.step = size_zero_node;
1760 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1761 || integer_zerop (ifs_ivopts_data.step))
1762 goto fail;
1763 step = ifs_ivopts_data.step;
1765 /* Check that the base expression is addressable. This needs
1766 to be done after substituting bases of IVs into it. */
1767 if (may_be_nonaddressable_p (base))
1768 goto fail;
1770 /* Moreover, on strict alignment platforms, check that it is
1771 sufficiently aligned. */
1772 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1773 goto fail;
1775 base = build_fold_addr_expr (base);
1777 /* Substituting bases of IVs into the base expression might
1778 have caused folding opportunities. */
1779 if (TREE_CODE (base) == ADDR_EXPR)
1781 tree *ref = &TREE_OPERAND (base, 0);
1782 while (handled_component_p (*ref))
1783 ref = &TREE_OPERAND (*ref, 0);
1784 if (TREE_CODE (*ref) == MEM_REF)
1786 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1787 TREE_OPERAND (*ref, 0),
1788 TREE_OPERAND (*ref, 1));
1789 if (tem)
1790 *ref = tem;
1795 civ = alloc_iv (base, step);
1796 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1797 return;
1799 fail:
1800 for_each_index (op_p, idx_record_use, data);
1803 /* Finds and records invariants used in STMT. */
1805 static void
1806 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1808 ssa_op_iter iter;
1809 use_operand_p use_p;
1810 tree op;
1812 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1814 op = USE_FROM_PTR (use_p);
1815 record_invariant (data, op, false);
1819 /* Finds interesting uses of induction variables in the statement STMT. */
1821 static void
1822 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1824 struct iv *iv;
1825 tree op, *lhs, *rhs;
1826 ssa_op_iter iter;
1827 use_operand_p use_p;
1828 enum tree_code code;
1830 find_invariants_stmt (data, stmt);
1832 if (gimple_code (stmt) == GIMPLE_COND)
1834 find_interesting_uses_cond (data, stmt);
1835 return;
1838 if (is_gimple_assign (stmt))
1840 lhs = gimple_assign_lhs_ptr (stmt);
1841 rhs = gimple_assign_rhs1_ptr (stmt);
1843 if (TREE_CODE (*lhs) == SSA_NAME)
1845 /* If the statement defines an induction variable, the uses are not
1846 interesting by themselves. */
1848 iv = get_iv (data, *lhs);
1850 if (iv && !integer_zerop (iv->step))
1851 return;
1854 code = gimple_assign_rhs_code (stmt);
1855 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1856 && (REFERENCE_CLASS_P (*rhs)
1857 || is_gimple_val (*rhs)))
1859 if (REFERENCE_CLASS_P (*rhs))
1860 find_interesting_uses_address (data, stmt, rhs);
1861 else
1862 find_interesting_uses_op (data, *rhs);
1864 if (REFERENCE_CLASS_P (*lhs))
1865 find_interesting_uses_address (data, stmt, lhs);
1866 return;
1868 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1870 find_interesting_uses_cond (data, stmt);
1871 return;
1874 /* TODO -- we should also handle address uses of type
1876 memory = call (whatever);
1880 call (memory). */
1883 if (gimple_code (stmt) == GIMPLE_PHI
1884 && gimple_bb (stmt) == data->current_loop->header)
1886 iv = get_iv (data, PHI_RESULT (stmt));
1888 if (iv && !integer_zerop (iv->step))
1889 return;
1892 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1894 op = USE_FROM_PTR (use_p);
1896 if (TREE_CODE (op) != SSA_NAME)
1897 continue;
1899 iv = get_iv (data, op);
1900 if (!iv)
1901 continue;
1903 find_interesting_uses_op (data, op);
1907 /* Finds interesting uses of induction variables outside of loops
1908 on loop exit edge EXIT. */
1910 static void
1911 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1913 gimple phi;
1914 gimple_stmt_iterator psi;
1915 tree def;
1917 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1919 phi = gsi_stmt (psi);
1920 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1921 if (is_gimple_reg (def))
1922 find_interesting_uses_op (data, def);
1926 /* Finds uses of the induction variables that are interesting. */
1928 static void
1929 find_interesting_uses (struct ivopts_data *data)
1931 basic_block bb;
1932 gimple_stmt_iterator bsi;
1933 basic_block *body = get_loop_body (data->current_loop);
1934 unsigned i;
1935 struct version_info *info;
1936 edge e;
1938 if (dump_file && (dump_flags & TDF_DETAILS))
1939 fprintf (dump_file, "Uses:\n\n");
1941 for (i = 0; i < data->current_loop->num_nodes; i++)
1943 edge_iterator ei;
1944 bb = body[i];
1946 FOR_EACH_EDGE (e, ei, bb->succs)
1947 if (e->dest != EXIT_BLOCK_PTR
1948 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1949 find_interesting_uses_outside (data, e);
1951 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1952 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1953 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1954 if (!is_gimple_debug (gsi_stmt (bsi)))
1955 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1958 if (dump_file && (dump_flags & TDF_DETAILS))
1960 bitmap_iterator bi;
1962 fprintf (dump_file, "\n");
1964 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1966 info = ver_info (data, i);
1967 if (info->inv_id)
1969 fprintf (dump_file, " ");
1970 print_generic_expr (dump_file, info->name, TDF_SLIM);
1971 fprintf (dump_file, " is invariant (%d)%s\n",
1972 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1976 fprintf (dump_file, "\n");
1979 free (body);
1982 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1983 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1984 we are at the top-level of the processed address. */
1986 static tree
1987 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1988 unsigned HOST_WIDE_INT *offset)
1990 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1991 enum tree_code code;
1992 tree type, orig_type = TREE_TYPE (expr);
1993 unsigned HOST_WIDE_INT off0, off1, st;
1994 tree orig_expr = expr;
1996 STRIP_NOPS (expr);
1998 type = TREE_TYPE (expr);
1999 code = TREE_CODE (expr);
2000 *offset = 0;
2002 switch (code)
2004 case INTEGER_CST:
2005 if (!cst_and_fits_in_hwi (expr)
2006 || integer_zerop (expr))
2007 return orig_expr;
2009 *offset = int_cst_value (expr);
2010 return build_int_cst (orig_type, 0);
2012 case POINTER_PLUS_EXPR:
2013 case PLUS_EXPR:
2014 case MINUS_EXPR:
2015 op0 = TREE_OPERAND (expr, 0);
2016 op1 = TREE_OPERAND (expr, 1);
2018 op0 = strip_offset_1 (op0, false, false, &off0);
2019 op1 = strip_offset_1 (op1, false, false, &off1);
2021 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2022 if (op0 == TREE_OPERAND (expr, 0)
2023 && op1 == TREE_OPERAND (expr, 1))
2024 return orig_expr;
2026 if (integer_zerop (op1))
2027 expr = op0;
2028 else if (integer_zerop (op0))
2030 if (code == MINUS_EXPR)
2031 expr = fold_build1 (NEGATE_EXPR, type, op1);
2032 else
2033 expr = op1;
2035 else
2036 expr = fold_build2 (code, type, op0, op1);
2038 return fold_convert (orig_type, expr);
2040 case MULT_EXPR:
2041 op1 = TREE_OPERAND (expr, 1);
2042 if (!cst_and_fits_in_hwi (op1))
2043 return orig_expr;
2045 op0 = TREE_OPERAND (expr, 0);
2046 op0 = strip_offset_1 (op0, false, false, &off0);
2047 if (op0 == TREE_OPERAND (expr, 0))
2048 return orig_expr;
2050 *offset = off0 * int_cst_value (op1);
2051 if (integer_zerop (op0))
2052 expr = op0;
2053 else
2054 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2056 return fold_convert (orig_type, expr);
2058 case ARRAY_REF:
2059 case ARRAY_RANGE_REF:
2060 if (!inside_addr)
2061 return orig_expr;
2063 step = array_ref_element_size (expr);
2064 if (!cst_and_fits_in_hwi (step))
2065 break;
2067 st = int_cst_value (step);
2068 op1 = TREE_OPERAND (expr, 1);
2069 op1 = strip_offset_1 (op1, false, false, &off1);
2070 *offset = off1 * st;
2072 if (top_compref
2073 && integer_zerop (op1))
2075 /* Strip the component reference completely. */
2076 op0 = TREE_OPERAND (expr, 0);
2077 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2078 *offset += off0;
2079 return op0;
2081 break;
2083 case COMPONENT_REF:
2084 if (!inside_addr)
2085 return orig_expr;
2087 tmp = component_ref_field_offset (expr);
2088 if (top_compref
2089 && cst_and_fits_in_hwi (tmp))
2091 /* Strip the component reference completely. */
2092 op0 = TREE_OPERAND (expr, 0);
2093 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2094 *offset = off0 + int_cst_value (tmp);
2095 return op0;
2097 break;
2099 case ADDR_EXPR:
2100 op0 = TREE_OPERAND (expr, 0);
2101 op0 = strip_offset_1 (op0, true, true, &off0);
2102 *offset += off0;
2104 if (op0 == TREE_OPERAND (expr, 0))
2105 return orig_expr;
2107 expr = build_fold_addr_expr (op0);
2108 return fold_convert (orig_type, expr);
2110 case MEM_REF:
2111 /* ??? Offset operand? */
2112 inside_addr = false;
2113 break;
2115 default:
2116 return orig_expr;
2119 /* Default handling of expressions for that we want to recurse into
2120 the first operand. */
2121 op0 = TREE_OPERAND (expr, 0);
2122 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2123 *offset += off0;
2125 if (op0 == TREE_OPERAND (expr, 0)
2126 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2127 return orig_expr;
2129 expr = copy_node (expr);
2130 TREE_OPERAND (expr, 0) = op0;
2131 if (op1)
2132 TREE_OPERAND (expr, 1) = op1;
2134 /* Inside address, we might strip the top level component references,
2135 thus changing type of the expression. Handling of ADDR_EXPR
2136 will fix that. */
2137 expr = fold_convert (orig_type, expr);
2139 return expr;
2142 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2144 static tree
2145 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2147 return strip_offset_1 (expr, false, false, offset);
2150 /* Returns variant of TYPE that can be used as base for different uses.
2151 We return unsigned type with the same precision, which avoids problems
2152 with overflows. */
2154 static tree
2155 generic_type_for (tree type)
2157 if (POINTER_TYPE_P (type))
2158 return unsigned_type_for (type);
2160 if (TYPE_UNSIGNED (type))
2161 return type;
2163 return unsigned_type_for (type);
2166 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2167 the bitmap to that we should store it. */
2169 static struct ivopts_data *fd_ivopts_data;
2170 static tree
2171 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2173 bitmap *depends_on = (bitmap *) data;
2174 struct version_info *info;
2176 if (TREE_CODE (*expr_p) != SSA_NAME)
2177 return NULL_TREE;
2178 info = name_info (fd_ivopts_data, *expr_p);
2180 if (!info->inv_id || info->has_nonlin_use)
2181 return NULL_TREE;
2183 if (!*depends_on)
2184 *depends_on = BITMAP_ALLOC (NULL);
2185 bitmap_set_bit (*depends_on, info->inv_id);
2187 return NULL_TREE;
2190 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2191 position to POS. If USE is not NULL, the candidate is set as related to
2192 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2193 replacement of the final value of the iv by a direct computation. */
2195 static struct iv_cand *
2196 add_candidate_1 (struct ivopts_data *data,
2197 tree base, tree step, bool important, enum iv_position pos,
2198 struct iv_use *use, gimple incremented_at)
2200 unsigned i;
2201 struct iv_cand *cand = NULL;
2202 tree type, orig_type;
2204 if (base)
2206 orig_type = TREE_TYPE (base);
2207 type = generic_type_for (orig_type);
2208 if (type != orig_type)
2210 base = fold_convert (type, base);
2211 step = fold_convert (type, step);
2215 for (i = 0; i < n_iv_cands (data); i++)
2217 cand = iv_cand (data, i);
2219 if (cand->pos != pos)
2220 continue;
2222 if (cand->incremented_at != incremented_at
2223 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2224 && cand->ainc_use != use))
2225 continue;
2227 if (!cand->iv)
2229 if (!base && !step)
2230 break;
2232 continue;
2235 if (!base && !step)
2236 continue;
2238 if (operand_equal_p (base, cand->iv->base, 0)
2239 && operand_equal_p (step, cand->iv->step, 0)
2240 && (TYPE_PRECISION (TREE_TYPE (base))
2241 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2242 break;
2245 if (i == n_iv_cands (data))
2247 cand = XCNEW (struct iv_cand);
2248 cand->id = i;
2250 if (!base && !step)
2251 cand->iv = NULL;
2252 else
2253 cand->iv = alloc_iv (base, step);
2255 cand->pos = pos;
2256 if (pos != IP_ORIGINAL && cand->iv)
2258 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2259 cand->var_after = cand->var_before;
2261 cand->important = important;
2262 cand->incremented_at = incremented_at;
2263 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2265 if (step
2266 && TREE_CODE (step) != INTEGER_CST)
2268 fd_ivopts_data = data;
2269 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2272 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2273 cand->ainc_use = use;
2274 else
2275 cand->ainc_use = NULL;
2277 if (dump_file && (dump_flags & TDF_DETAILS))
2278 dump_cand (dump_file, cand);
2281 if (important && !cand->important)
2283 cand->important = true;
2284 if (dump_file && (dump_flags & TDF_DETAILS))
2285 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2288 if (use)
2290 bitmap_set_bit (use->related_cands, i);
2291 if (dump_file && (dump_flags & TDF_DETAILS))
2292 fprintf (dump_file, "Candidate %d is related to use %d\n",
2293 cand->id, use->id);
2296 return cand;
2299 /* Returns true if incrementing the induction variable at the end of the LOOP
2300 is allowed.
2302 The purpose is to avoid splitting latch edge with a biv increment, thus
2303 creating a jump, possibly confusing other optimization passes and leaving
2304 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2305 is not available (so we do not have a better alternative), or if the latch
2306 edge is already nonempty. */
2308 static bool
2309 allow_ip_end_pos_p (struct loop *loop)
2311 if (!ip_normal_pos (loop))
2312 return true;
2314 if (!empty_block_p (ip_end_pos (loop)))
2315 return true;
2317 return false;
2320 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2321 Important field is set to IMPORTANT. */
2323 static void
2324 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2325 bool important, struct iv_use *use)
2327 basic_block use_bb = gimple_bb (use->stmt);
2328 enum machine_mode mem_mode;
2329 unsigned HOST_WIDE_INT cstepi;
2331 /* If we insert the increment in any position other than the standard
2332 ones, we must ensure that it is incremented once per iteration.
2333 It must not be in an inner nested loop, or one side of an if
2334 statement. */
2335 if (use_bb->loop_father != data->current_loop
2336 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2337 || stmt_could_throw_p (use->stmt)
2338 || !cst_and_fits_in_hwi (step))
2339 return;
2341 cstepi = int_cst_value (step);
2343 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2344 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2345 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2347 enum tree_code code = MINUS_EXPR;
2348 tree new_base;
2349 tree new_step = step;
2351 if (POINTER_TYPE_P (TREE_TYPE (base)))
2353 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2354 code = POINTER_PLUS_EXPR;
2356 else
2357 new_step = fold_convert (TREE_TYPE (base), new_step);
2358 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2359 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2360 use->stmt);
2362 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2363 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2365 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2366 use->stmt);
2370 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2371 position to POS. If USE is not NULL, the candidate is set as related to
2372 it. The candidate computation is scheduled on all available positions. */
2374 static void
2375 add_candidate (struct ivopts_data *data,
2376 tree base, tree step, bool important, struct iv_use *use)
2378 if (ip_normal_pos (data->current_loop))
2379 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2380 if (ip_end_pos (data->current_loop)
2381 && allow_ip_end_pos_p (data->current_loop))
2382 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2384 if (use != NULL && use->type == USE_ADDRESS)
2385 add_autoinc_candidates (data, base, step, important, use);
2388 /* Add a standard "0 + 1 * iteration" iv candidate for a
2389 type with SIZE bits. */
2391 static void
2392 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2393 unsigned int size)
2395 tree type = lang_hooks.types.type_for_size (size, true);
2396 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2397 true, NULL);
2400 /* Adds standard iv candidates. */
2402 static void
2403 add_standard_iv_candidates (struct ivopts_data *data)
2405 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2407 /* The same for a double-integer type if it is still fast enough. */
2408 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2409 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2413 /* Adds candidates bases on the old induction variable IV. */
2415 static void
2416 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2418 gimple phi;
2419 tree def;
2420 struct iv_cand *cand;
2422 add_candidate (data, iv->base, iv->step, true, NULL);
2424 /* The same, but with initial value zero. */
2425 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2426 add_candidate (data, size_int (0), iv->step, true, NULL);
2427 else
2428 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2429 iv->step, true, NULL);
2431 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2432 if (gimple_code (phi) == GIMPLE_PHI)
2434 /* Additionally record the possibility of leaving the original iv
2435 untouched. */
2436 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2437 cand = add_candidate_1 (data,
2438 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2439 SSA_NAME_DEF_STMT (def));
2440 cand->var_before = iv->ssa_name;
2441 cand->var_after = def;
2445 /* Adds candidates based on the old induction variables. */
2447 static void
2448 add_old_ivs_candidates (struct ivopts_data *data)
2450 unsigned i;
2451 struct iv *iv;
2452 bitmap_iterator bi;
2454 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2456 iv = ver_info (data, i)->iv;
2457 if (iv && iv->biv_p && !integer_zerop (iv->step))
2458 add_old_iv_candidates (data, iv);
2462 /* Adds candidates based on the value of the induction variable IV and USE. */
2464 static void
2465 add_iv_value_candidates (struct ivopts_data *data,
2466 struct iv *iv, struct iv_use *use)
2468 unsigned HOST_WIDE_INT offset;
2469 tree base;
2470 tree basetype;
2472 add_candidate (data, iv->base, iv->step, false, use);
2474 /* The same, but with initial value zero. Make such variable important,
2475 since it is generic enough so that possibly many uses may be based
2476 on it. */
2477 basetype = TREE_TYPE (iv->base);
2478 if (POINTER_TYPE_P (basetype))
2479 basetype = sizetype;
2480 add_candidate (data, build_int_cst (basetype, 0),
2481 iv->step, true, use);
2483 /* Third, try removing the constant offset. Make sure to even
2484 add a candidate for &a[0] vs. (T *)&a. */
2485 base = strip_offset (iv->base, &offset);
2486 if (offset
2487 || base != iv->base)
2488 add_candidate (data, base, iv->step, false, use);
2491 /* Adds candidates based on the uses. */
2493 static void
2494 add_derived_ivs_candidates (struct ivopts_data *data)
2496 unsigned i;
2498 for (i = 0; i < n_iv_uses (data); i++)
2500 struct iv_use *use = iv_use (data, i);
2502 if (!use)
2503 continue;
2505 switch (use->type)
2507 case USE_NONLINEAR_EXPR:
2508 case USE_COMPARE:
2509 case USE_ADDRESS:
2510 /* Just add the ivs based on the value of the iv used here. */
2511 add_iv_value_candidates (data, use->iv, use);
2512 break;
2514 default:
2515 gcc_unreachable ();
2520 /* Record important candidates and add them to related_cands bitmaps
2521 if needed. */
2523 static void
2524 record_important_candidates (struct ivopts_data *data)
2526 unsigned i;
2527 struct iv_use *use;
2529 for (i = 0; i < n_iv_cands (data); i++)
2531 struct iv_cand *cand = iv_cand (data, i);
2533 if (cand->important)
2534 bitmap_set_bit (data->important_candidates, i);
2537 data->consider_all_candidates = (n_iv_cands (data)
2538 <= CONSIDER_ALL_CANDIDATES_BOUND);
2540 if (data->consider_all_candidates)
2542 /* We will not need "related_cands" bitmaps in this case,
2543 so release them to decrease peak memory consumption. */
2544 for (i = 0; i < n_iv_uses (data); i++)
2546 use = iv_use (data, i);
2547 BITMAP_FREE (use->related_cands);
2550 else
2552 /* Add important candidates to the related_cands bitmaps. */
2553 for (i = 0; i < n_iv_uses (data); i++)
2554 bitmap_ior_into (iv_use (data, i)->related_cands,
2555 data->important_candidates);
2559 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2560 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2561 we allocate a simple list to every use. */
2563 static void
2564 alloc_use_cost_map (struct ivopts_data *data)
2566 unsigned i, size, s, j;
2568 for (i = 0; i < n_iv_uses (data); i++)
2570 struct iv_use *use = iv_use (data, i);
2571 bitmap_iterator bi;
2573 if (data->consider_all_candidates)
2574 size = n_iv_cands (data);
2575 else
2577 s = 0;
2578 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2580 s++;
2583 /* Round up to the power of two, so that moduling by it is fast. */
2584 for (size = 1; size < s; size <<= 1)
2585 continue;
2588 use->n_map_members = size;
2589 use->cost_map = XCNEWVEC (struct cost_pair, size);
2593 /* Returns description of computation cost of expression whose runtime
2594 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2596 static comp_cost
2597 new_cost (unsigned runtime, unsigned complexity)
2599 comp_cost cost;
2601 cost.cost = runtime;
2602 cost.complexity = complexity;
2604 return cost;
2607 /* Adds costs COST1 and COST2. */
2609 static comp_cost
2610 add_costs (comp_cost cost1, comp_cost cost2)
2612 cost1.cost += cost2.cost;
2613 cost1.complexity += cost2.complexity;
2615 return cost1;
2617 /* Subtracts costs COST1 and COST2. */
2619 static comp_cost
2620 sub_costs (comp_cost cost1, comp_cost cost2)
2622 cost1.cost -= cost2.cost;
2623 cost1.complexity -= cost2.complexity;
2625 return cost1;
2628 /* Returns a negative number if COST1 < COST2, a positive number if
2629 COST1 > COST2, and 0 if COST1 = COST2. */
2631 static int
2632 compare_costs (comp_cost cost1, comp_cost cost2)
2634 if (cost1.cost == cost2.cost)
2635 return cost1.complexity - cost2.complexity;
2637 return cost1.cost - cost2.cost;
2640 /* Returns true if COST is infinite. */
2642 static bool
2643 infinite_cost_p (comp_cost cost)
2645 return cost.cost == INFTY;
2648 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2649 on invariants DEPENDS_ON and that the value used in expressing it
2650 is VALUE. */
2652 static void
2653 set_use_iv_cost (struct ivopts_data *data,
2654 struct iv_use *use, struct iv_cand *cand,
2655 comp_cost cost, bitmap depends_on, tree value,
2656 int inv_expr_id)
2658 unsigned i, s;
2660 if (infinite_cost_p (cost))
2662 BITMAP_FREE (depends_on);
2663 return;
2666 if (data->consider_all_candidates)
2668 use->cost_map[cand->id].cand = cand;
2669 use->cost_map[cand->id].cost = cost;
2670 use->cost_map[cand->id].depends_on = depends_on;
2671 use->cost_map[cand->id].value = value;
2672 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2673 return;
2676 /* n_map_members is a power of two, so this computes modulo. */
2677 s = cand->id & (use->n_map_members - 1);
2678 for (i = s; i < use->n_map_members; i++)
2679 if (!use->cost_map[i].cand)
2680 goto found;
2681 for (i = 0; i < s; i++)
2682 if (!use->cost_map[i].cand)
2683 goto found;
2685 gcc_unreachable ();
2687 found:
2688 use->cost_map[i].cand = cand;
2689 use->cost_map[i].cost = cost;
2690 use->cost_map[i].depends_on = depends_on;
2691 use->cost_map[i].value = value;
2692 use->cost_map[i].inv_expr_id = inv_expr_id;
2695 /* Gets cost of (USE, CANDIDATE) pair. */
2697 static struct cost_pair *
2698 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2699 struct iv_cand *cand)
2701 unsigned i, s;
2702 struct cost_pair *ret;
2704 if (!cand)
2705 return NULL;
2707 if (data->consider_all_candidates)
2709 ret = use->cost_map + cand->id;
2710 if (!ret->cand)
2711 return NULL;
2713 return ret;
2716 /* n_map_members is a power of two, so this computes modulo. */
2717 s = cand->id & (use->n_map_members - 1);
2718 for (i = s; i < use->n_map_members; i++)
2719 if (use->cost_map[i].cand == cand)
2720 return use->cost_map + i;
2722 for (i = 0; i < s; i++)
2723 if (use->cost_map[i].cand == cand)
2724 return use->cost_map + i;
2726 return NULL;
2729 /* Returns estimate on cost of computing SEQ. */
2731 static unsigned
2732 seq_cost (rtx seq, bool speed)
2734 unsigned cost = 0;
2735 rtx set;
2737 for (; seq; seq = NEXT_INSN (seq))
2739 set = single_set (seq);
2740 if (set)
2741 cost += rtx_cost (set, SET,speed);
2742 else
2743 cost++;
2746 return cost;
2749 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2750 static rtx
2751 produce_memory_decl_rtl (tree obj, int *regno)
2753 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2754 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2755 rtx x;
2757 gcc_assert (obj);
2758 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2760 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2761 x = gen_rtx_SYMBOL_REF (address_mode, name);
2762 SET_SYMBOL_REF_DECL (x, obj);
2763 x = gen_rtx_MEM (DECL_MODE (obj), x);
2764 set_mem_addr_space (x, as);
2765 targetm.encode_section_info (obj, x, true);
2767 else
2769 x = gen_raw_REG (address_mode, (*regno)++);
2770 x = gen_rtx_MEM (DECL_MODE (obj), x);
2771 set_mem_addr_space (x, as);
2774 return x;
2777 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2778 walk_tree. DATA contains the actual fake register number. */
2780 static tree
2781 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2783 tree obj = NULL_TREE;
2784 rtx x = NULL_RTX;
2785 int *regno = (int *) data;
2787 switch (TREE_CODE (*expr_p))
2789 case ADDR_EXPR:
2790 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2791 handled_component_p (*expr_p);
2792 expr_p = &TREE_OPERAND (*expr_p, 0))
2793 continue;
2794 obj = *expr_p;
2795 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2796 x = produce_memory_decl_rtl (obj, regno);
2797 break;
2799 case SSA_NAME:
2800 *ws = 0;
2801 obj = SSA_NAME_VAR (*expr_p);
2802 if (!DECL_RTL_SET_P (obj))
2803 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2804 break;
2806 case VAR_DECL:
2807 case PARM_DECL:
2808 case RESULT_DECL:
2809 *ws = 0;
2810 obj = *expr_p;
2812 if (DECL_RTL_SET_P (obj))
2813 break;
2815 if (DECL_MODE (obj) == BLKmode)
2816 x = produce_memory_decl_rtl (obj, regno);
2817 else
2818 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2820 break;
2822 default:
2823 break;
2826 if (x)
2828 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2829 SET_DECL_RTL (obj, x);
2832 return NULL_TREE;
2835 /* Determines cost of the computation of EXPR. */
2837 static unsigned
2838 computation_cost (tree expr, bool speed)
2840 rtx seq, rslt;
2841 tree type = TREE_TYPE (expr);
2842 unsigned cost;
2843 /* Avoid using hard regs in ways which may be unsupported. */
2844 int regno = LAST_VIRTUAL_REGISTER + 1;
2845 struct cgraph_node *node = cgraph_node (current_function_decl);
2846 enum node_frequency real_frequency = node->frequency;
2848 node->frequency = NODE_FREQUENCY_NORMAL;
2849 crtl->maybe_hot_insn_p = speed;
2850 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2851 start_sequence ();
2852 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2853 seq = get_insns ();
2854 end_sequence ();
2855 default_rtl_profile ();
2856 node->frequency = real_frequency;
2858 cost = seq_cost (seq, speed);
2859 if (MEM_P (rslt))
2860 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2861 TYPE_ADDR_SPACE (type), speed);
2863 return cost;
2866 /* Returns variable containing the value of candidate CAND at statement AT. */
2868 static tree
2869 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2871 if (stmt_after_increment (loop, cand, stmt))
2872 return cand->var_after;
2873 else
2874 return cand->var_before;
2877 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2878 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2881 tree_int_cst_sign_bit (const_tree t)
2883 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2884 unsigned HOST_WIDE_INT w;
2886 if (bitno < HOST_BITS_PER_WIDE_INT)
2887 w = TREE_INT_CST_LOW (t);
2888 else
2890 w = TREE_INT_CST_HIGH (t);
2891 bitno -= HOST_BITS_PER_WIDE_INT;
2894 return (w >> bitno) & 1;
2897 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2898 same precision that is at least as wide as the precision of TYPE, stores
2899 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2900 type of A and B. */
2902 static tree
2903 determine_common_wider_type (tree *a, tree *b)
2905 tree wider_type = NULL;
2906 tree suba, subb;
2907 tree atype = TREE_TYPE (*a);
2909 if (CONVERT_EXPR_P (*a))
2911 suba = TREE_OPERAND (*a, 0);
2912 wider_type = TREE_TYPE (suba);
2913 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2914 return atype;
2916 else
2917 return atype;
2919 if (CONVERT_EXPR_P (*b))
2921 subb = TREE_OPERAND (*b, 0);
2922 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2923 return atype;
2925 else
2926 return atype;
2928 *a = suba;
2929 *b = subb;
2930 return wider_type;
2933 /* Determines the expression by that USE is expressed from induction variable
2934 CAND at statement AT in LOOP. The expression is stored in a decomposed
2935 form into AFF. Returns false if USE cannot be expressed using CAND. */
2937 static bool
2938 get_computation_aff (struct loop *loop,
2939 struct iv_use *use, struct iv_cand *cand, gimple at,
2940 struct affine_tree_combination *aff)
2942 tree ubase = use->iv->base;
2943 tree ustep = use->iv->step;
2944 tree cbase = cand->iv->base;
2945 tree cstep = cand->iv->step, cstep_common;
2946 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2947 tree common_type, var;
2948 tree uutype;
2949 aff_tree cbase_aff, var_aff;
2950 double_int rat;
2952 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2954 /* We do not have a precision to express the values of use. */
2955 return false;
2958 var = var_at_stmt (loop, cand, at);
2959 uutype = unsigned_type_for (utype);
2961 /* If the conversion is not noop, perform it. */
2962 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2964 cstep = fold_convert (uutype, cstep);
2965 cbase = fold_convert (uutype, cbase);
2966 var = fold_convert (uutype, var);
2969 if (!constant_multiple_of (ustep, cstep, &rat))
2970 return false;
2972 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2973 type, we achieve better folding by computing their difference in this
2974 wider type, and cast the result to UUTYPE. We do not need to worry about
2975 overflows, as all the arithmetics will in the end be performed in UUTYPE
2976 anyway. */
2977 common_type = determine_common_wider_type (&ubase, &cbase);
2979 /* use = ubase - ratio * cbase + ratio * var. */
2980 tree_to_aff_combination (ubase, common_type, aff);
2981 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2982 tree_to_aff_combination (var, uutype, &var_aff);
2984 /* We need to shift the value if we are after the increment. */
2985 if (stmt_after_increment (loop, cand, at))
2987 aff_tree cstep_aff;
2989 if (common_type != uutype)
2990 cstep_common = fold_convert (common_type, cstep);
2991 else
2992 cstep_common = cstep;
2994 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2995 aff_combination_add (&cbase_aff, &cstep_aff);
2998 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2999 aff_combination_add (aff, &cbase_aff);
3000 if (common_type != uutype)
3001 aff_combination_convert (aff, uutype);
3003 aff_combination_scale (&var_aff, rat);
3004 aff_combination_add (aff, &var_aff);
3006 return true;
3009 /* Determines the expression by that USE is expressed from induction variable
3010 CAND at statement AT in LOOP. The computation is unshared. */
3012 static tree
3013 get_computation_at (struct loop *loop,
3014 struct iv_use *use, struct iv_cand *cand, gimple at)
3016 aff_tree aff;
3017 tree type = TREE_TYPE (use->iv->base);
3019 if (!get_computation_aff (loop, use, cand, at, &aff))
3020 return NULL_TREE;
3021 unshare_aff_combination (&aff);
3022 return fold_convert (type, aff_combination_to_tree (&aff));
3025 /* Determines the expression by that USE is expressed from induction variable
3026 CAND in LOOP. The computation is unshared. */
3028 static tree
3029 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3031 return get_computation_at (loop, use, cand, use->stmt);
3034 /* Adjust the cost COST for being in loop setup rather than loop body.
3035 If we're optimizing for space, the loop setup overhead is constant;
3036 if we're optimizing for speed, amortize it over the per-iteration cost. */
3037 static unsigned
3038 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3040 if (cost == INFTY)
3041 return cost;
3042 else if (optimize_loop_for_speed_p (data->current_loop))
3043 return cost / avg_loop_niter (data->current_loop);
3044 else
3045 return cost;
3048 /* Returns cost of addition in MODE. */
3050 static unsigned
3051 add_cost (enum machine_mode mode, bool speed)
3053 static unsigned costs[NUM_MACHINE_MODES];
3054 rtx seq;
3055 unsigned cost;
3057 if (costs[mode])
3058 return costs[mode];
3060 start_sequence ();
3061 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3062 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3063 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3064 NULL_RTX);
3065 seq = get_insns ();
3066 end_sequence ();
3068 cost = seq_cost (seq, speed);
3069 if (!cost)
3070 cost = 1;
3072 costs[mode] = cost;
3074 if (dump_file && (dump_flags & TDF_DETAILS))
3075 fprintf (dump_file, "Addition in %s costs %d\n",
3076 GET_MODE_NAME (mode), cost);
3077 return cost;
3080 /* Entry in a hashtable of already known costs for multiplication. */
3081 struct mbc_entry
3083 HOST_WIDE_INT cst; /* The constant to multiply by. */
3084 enum machine_mode mode; /* In mode. */
3085 unsigned cost; /* The cost. */
3088 /* Counts hash value for the ENTRY. */
3090 static hashval_t
3091 mbc_entry_hash (const void *entry)
3093 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3095 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3098 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3100 static int
3101 mbc_entry_eq (const void *entry1, const void *entry2)
3103 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3104 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3106 return (e1->mode == e2->mode
3107 && e1->cst == e2->cst);
3110 /* Returns cost of multiplication by constant CST in MODE. */
3112 unsigned
3113 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3115 static htab_t costs;
3116 struct mbc_entry **cached, act;
3117 rtx seq;
3118 unsigned cost;
3120 if (!costs)
3121 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3123 act.mode = mode;
3124 act.cst = cst;
3125 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3126 if (*cached)
3127 return (*cached)->cost;
3129 *cached = XNEW (struct mbc_entry);
3130 (*cached)->mode = mode;
3131 (*cached)->cst = cst;
3133 start_sequence ();
3134 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3135 gen_int_mode (cst, mode), NULL_RTX, 0);
3136 seq = get_insns ();
3137 end_sequence ();
3139 cost = seq_cost (seq, speed);
3141 if (dump_file && (dump_flags & TDF_DETAILS))
3142 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3143 (int) cst, GET_MODE_NAME (mode), cost);
3145 (*cached)->cost = cost;
3147 return cost;
3150 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3151 validity for a memory reference accessing memory of mode MODE in
3152 address space AS. */
3154 DEF_VEC_P (sbitmap);
3155 DEF_VEC_ALLOC_P (sbitmap, heap);
3157 bool
3158 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3159 addr_space_t as)
3161 #define MAX_RATIO 128
3162 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3163 static VEC (sbitmap, heap) *valid_mult_list;
3164 sbitmap valid_mult;
3166 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3167 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3169 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3170 if (!valid_mult)
3172 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3173 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3174 rtx addr;
3175 HOST_WIDE_INT i;
3177 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3178 sbitmap_zero (valid_mult);
3179 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3180 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3182 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3183 if (memory_address_addr_space_p (mode, addr, as))
3184 SET_BIT (valid_mult, i + MAX_RATIO);
3187 if (dump_file && (dump_flags & TDF_DETAILS))
3189 fprintf (dump_file, " allowed multipliers:");
3190 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3191 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3192 fprintf (dump_file, " %d", (int) i);
3193 fprintf (dump_file, "\n");
3194 fprintf (dump_file, "\n");
3197 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3200 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3201 return false;
3203 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3206 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3207 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3208 variable is omitted. Compute the cost for a memory reference that accesses
3209 a memory location of mode MEM_MODE in address space AS.
3211 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3212 size of MEM_MODE / RATIO) is available. To make this determination, we
3213 look at the size of the increment to be made, which is given in CSTEP.
3214 CSTEP may be zero if the step is unknown.
3215 STMT_AFTER_INC is true iff the statement we're looking at is after the
3216 increment of the original biv.
3218 TODO -- there must be some better way. This all is quite crude. */
3220 typedef struct
3222 HOST_WIDE_INT min_offset, max_offset;
3223 unsigned costs[2][2][2][2];
3224 } *address_cost_data;
3226 DEF_VEC_P (address_cost_data);
3227 DEF_VEC_ALLOC_P (address_cost_data, heap);
3229 static comp_cost
3230 get_address_cost (bool symbol_present, bool var_present,
3231 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3232 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3233 addr_space_t as, bool speed,
3234 bool stmt_after_inc, bool *may_autoinc)
3236 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3237 static VEC(address_cost_data, heap) *address_cost_data_list;
3238 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3239 address_cost_data data;
3240 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3241 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3242 unsigned cost, acost, complexity;
3243 bool offset_p, ratio_p, autoinc;
3244 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3245 unsigned HOST_WIDE_INT mask;
3246 unsigned bits;
3248 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3249 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3250 data_index + 1);
3252 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3253 if (!data)
3255 HOST_WIDE_INT i;
3256 HOST_WIDE_INT rat, off = 0;
3257 int old_cse_not_expected, width;
3258 unsigned sym_p, var_p, off_p, rat_p, add_c;
3259 rtx seq, addr, base;
3260 rtx reg0, reg1;
3262 data = (address_cost_data) xcalloc (1, sizeof (*data));
3264 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3266 width = GET_MODE_BITSIZE (address_mode) - 1;
3267 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3268 width = HOST_BITS_PER_WIDE_INT - 1;
3269 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3271 for (i = width; i >= 0; i--)
3273 off = -((HOST_WIDE_INT) 1 << i);
3274 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3275 if (memory_address_addr_space_p (mem_mode, addr, as))
3276 break;
3278 data->min_offset = (i == -1? 0 : off);
3280 for (i = width; i >= 0; i--)
3282 off = ((HOST_WIDE_INT) 1 << i) - 1;
3283 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3284 if (memory_address_addr_space_p (mem_mode, addr, as))
3285 break;
3287 if (i == -1)
3288 off = 0;
3289 data->max_offset = off;
3291 if (dump_file && (dump_flags & TDF_DETAILS))
3293 fprintf (dump_file, "get_address_cost:\n");
3294 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3295 GET_MODE_NAME (mem_mode),
3296 data->min_offset);
3297 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3298 GET_MODE_NAME (mem_mode),
3299 data->max_offset);
3302 rat = 1;
3303 for (i = 2; i <= MAX_RATIO; i++)
3304 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3306 rat = i;
3307 break;
3310 /* Compute the cost of various addressing modes. */
3311 acost = 0;
3312 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3313 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3315 if (HAVE_PRE_DECREMENT)
3317 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3318 has_predec[mem_mode]
3319 = memory_address_addr_space_p (mem_mode, addr, as);
3321 if (HAVE_POST_DECREMENT)
3323 addr = gen_rtx_POST_DEC (address_mode, reg0);
3324 has_postdec[mem_mode]
3325 = memory_address_addr_space_p (mem_mode, addr, as);
3327 if (HAVE_PRE_INCREMENT)
3329 addr = gen_rtx_PRE_INC (address_mode, reg0);
3330 has_preinc[mem_mode]
3331 = memory_address_addr_space_p (mem_mode, addr, as);
3333 if (HAVE_POST_INCREMENT)
3335 addr = gen_rtx_POST_INC (address_mode, reg0);
3336 has_postinc[mem_mode]
3337 = memory_address_addr_space_p (mem_mode, addr, as);
3339 for (i = 0; i < 16; i++)
3341 sym_p = i & 1;
3342 var_p = (i >> 1) & 1;
3343 off_p = (i >> 2) & 1;
3344 rat_p = (i >> 3) & 1;
3346 addr = reg0;
3347 if (rat_p)
3348 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3349 gen_int_mode (rat, address_mode));
3351 if (var_p)
3352 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3354 if (sym_p)
3356 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3357 /* ??? We can run into trouble with some backends by presenting
3358 it with symbols which haven't been properly passed through
3359 targetm.encode_section_info. By setting the local bit, we
3360 enhance the probability of things working. */
3361 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3363 if (off_p)
3364 base = gen_rtx_fmt_e (CONST, address_mode,
3365 gen_rtx_fmt_ee
3366 (PLUS, address_mode, base,
3367 gen_int_mode (off, address_mode)));
3369 else if (off_p)
3370 base = gen_int_mode (off, address_mode);
3371 else
3372 base = NULL_RTX;
3374 if (base)
3375 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3377 start_sequence ();
3378 /* To avoid splitting addressing modes, pretend that no cse will
3379 follow. */
3380 old_cse_not_expected = cse_not_expected;
3381 cse_not_expected = true;
3382 addr = memory_address_addr_space (mem_mode, addr, as);
3383 cse_not_expected = old_cse_not_expected;
3384 seq = get_insns ();
3385 end_sequence ();
3387 acost = seq_cost (seq, speed);
3388 acost += address_cost (addr, mem_mode, as, speed);
3390 if (!acost)
3391 acost = 1;
3392 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3395 /* On some targets, it is quite expensive to load symbol to a register,
3396 which makes addresses that contain symbols look much more expensive.
3397 However, the symbol will have to be loaded in any case before the
3398 loop (and quite likely we have it in register already), so it does not
3399 make much sense to penalize them too heavily. So make some final
3400 tweaks for the SYMBOL_PRESENT modes:
3402 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3403 var is cheaper, use this mode with small penalty.
3404 If VAR_PRESENT is true, try whether the mode with
3405 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3406 if this is the case, use it. */
3407 add_c = add_cost (address_mode, speed);
3408 for (i = 0; i < 8; i++)
3410 var_p = i & 1;
3411 off_p = (i >> 1) & 1;
3412 rat_p = (i >> 2) & 1;
3414 acost = data->costs[0][1][off_p][rat_p] + 1;
3415 if (var_p)
3416 acost += add_c;
3418 if (acost < data->costs[1][var_p][off_p][rat_p])
3419 data->costs[1][var_p][off_p][rat_p] = acost;
3422 if (dump_file && (dump_flags & TDF_DETAILS))
3424 fprintf (dump_file, "Address costs:\n");
3426 for (i = 0; i < 16; i++)
3428 sym_p = i & 1;
3429 var_p = (i >> 1) & 1;
3430 off_p = (i >> 2) & 1;
3431 rat_p = (i >> 3) & 1;
3433 fprintf (dump_file, " ");
3434 if (sym_p)
3435 fprintf (dump_file, "sym + ");
3436 if (var_p)
3437 fprintf (dump_file, "var + ");
3438 if (off_p)
3439 fprintf (dump_file, "cst + ");
3440 if (rat_p)
3441 fprintf (dump_file, "rat * ");
3443 acost = data->costs[sym_p][var_p][off_p][rat_p];
3444 fprintf (dump_file, "index costs %d\n", acost);
3446 if (has_predec[mem_mode] || has_postdec[mem_mode]
3447 || has_preinc[mem_mode] || has_postinc[mem_mode])
3448 fprintf (dump_file, " May include autoinc/dec\n");
3449 fprintf (dump_file, "\n");
3452 VEC_replace (address_cost_data, address_cost_data_list,
3453 data_index, data);
3456 bits = GET_MODE_BITSIZE (address_mode);
3457 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3458 offset &= mask;
3459 if ((offset >> (bits - 1) & 1))
3460 offset |= ~mask;
3461 s_offset = offset;
3463 autoinc = false;
3464 msize = GET_MODE_SIZE (mem_mode);
3465 autoinc_offset = offset;
3466 if (stmt_after_inc)
3467 autoinc_offset += ratio * cstep;
3468 if (symbol_present || var_present || ratio != 1)
3469 autoinc = false;
3470 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3471 && msize == cstep)
3472 || (has_postdec[mem_mode] && autoinc_offset == 0
3473 && msize == -cstep)
3474 || (has_preinc[mem_mode] && autoinc_offset == msize
3475 && msize == cstep)
3476 || (has_predec[mem_mode] && autoinc_offset == -msize
3477 && msize == -cstep))
3478 autoinc = true;
3480 cost = 0;
3481 offset_p = (s_offset != 0
3482 && data->min_offset <= s_offset
3483 && s_offset <= data->max_offset);
3484 ratio_p = (ratio != 1
3485 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3487 if (ratio != 1 && !ratio_p)
3488 cost += multiply_by_cost (ratio, address_mode, speed);
3490 if (s_offset && !offset_p && !symbol_present)
3491 cost += add_cost (address_mode, speed);
3493 if (may_autoinc)
3494 *may_autoinc = autoinc;
3495 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3496 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3497 return new_cost (cost + acost, complexity);
3500 /* Estimates cost of forcing expression EXPR into a variable. */
3502 static comp_cost
3503 force_expr_to_var_cost (tree expr, bool speed)
3505 static bool costs_initialized = false;
3506 static unsigned integer_cost [2];
3507 static unsigned symbol_cost [2];
3508 static unsigned address_cost [2];
3509 tree op0, op1;
3510 comp_cost cost0, cost1, cost;
3511 enum machine_mode mode;
3513 if (!costs_initialized)
3515 tree type = build_pointer_type (integer_type_node);
3516 tree var, addr;
3517 rtx x;
3518 int i;
3520 var = create_tmp_var_raw (integer_type_node, "test_var");
3521 TREE_STATIC (var) = 1;
3522 x = produce_memory_decl_rtl (var, NULL);
3523 SET_DECL_RTL (var, x);
3525 addr = build1 (ADDR_EXPR, type, var);
3528 for (i = 0; i < 2; i++)
3530 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3531 2000), i);
3533 symbol_cost[i] = computation_cost (addr, i) + 1;
3535 address_cost[i]
3536 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3537 addr,
3538 build_int_cst (sizetype, 2000)), i) + 1;
3539 if (dump_file && (dump_flags & TDF_DETAILS))
3541 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3542 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3543 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3544 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3545 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3546 fprintf (dump_file, "\n");
3550 costs_initialized = true;
3553 STRIP_NOPS (expr);
3555 if (SSA_VAR_P (expr))
3556 return zero_cost;
3558 if (is_gimple_min_invariant (expr))
3560 if (TREE_CODE (expr) == INTEGER_CST)
3561 return new_cost (integer_cost [speed], 0);
3563 if (TREE_CODE (expr) == ADDR_EXPR)
3565 tree obj = TREE_OPERAND (expr, 0);
3567 if (TREE_CODE (obj) == VAR_DECL
3568 || TREE_CODE (obj) == PARM_DECL
3569 || TREE_CODE (obj) == RESULT_DECL)
3570 return new_cost (symbol_cost [speed], 0);
3573 return new_cost (address_cost [speed], 0);
3576 switch (TREE_CODE (expr))
3578 case POINTER_PLUS_EXPR:
3579 case PLUS_EXPR:
3580 case MINUS_EXPR:
3581 case MULT_EXPR:
3582 op0 = TREE_OPERAND (expr, 0);
3583 op1 = TREE_OPERAND (expr, 1);
3584 STRIP_NOPS (op0);
3585 STRIP_NOPS (op1);
3587 if (is_gimple_val (op0))
3588 cost0 = zero_cost;
3589 else
3590 cost0 = force_expr_to_var_cost (op0, speed);
3592 if (is_gimple_val (op1))
3593 cost1 = zero_cost;
3594 else
3595 cost1 = force_expr_to_var_cost (op1, speed);
3597 break;
3599 case NEGATE_EXPR:
3600 op0 = TREE_OPERAND (expr, 0);
3601 STRIP_NOPS (op0);
3602 op1 = NULL_TREE;
3604 if (is_gimple_val (op0))
3605 cost0 = zero_cost;
3606 else
3607 cost0 = force_expr_to_var_cost (op0, speed);
3609 cost1 = zero_cost;
3610 break;
3612 default:
3613 /* Just an arbitrary value, FIXME. */
3614 return new_cost (target_spill_cost[speed], 0);
3617 mode = TYPE_MODE (TREE_TYPE (expr));
3618 switch (TREE_CODE (expr))
3620 case POINTER_PLUS_EXPR:
3621 case PLUS_EXPR:
3622 case MINUS_EXPR:
3623 case NEGATE_EXPR:
3624 cost = new_cost (add_cost (mode, speed), 0);
3625 break;
3627 case MULT_EXPR:
3628 if (cst_and_fits_in_hwi (op0))
3629 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3630 else if (cst_and_fits_in_hwi (op1))
3631 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3632 else
3633 return new_cost (target_spill_cost [speed], 0);
3634 break;
3636 default:
3637 gcc_unreachable ();
3640 cost = add_costs (cost, cost0);
3641 cost = add_costs (cost, cost1);
3643 /* Bound the cost by target_spill_cost. The parts of complicated
3644 computations often are either loop invariant or at least can
3645 be shared between several iv uses, so letting this grow without
3646 limits would not give reasonable results. */
3647 if (cost.cost > (int) target_spill_cost [speed])
3648 cost.cost = target_spill_cost [speed];
3650 return cost;
3653 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3654 invariants the computation depends on. */
3656 static comp_cost
3657 force_var_cost (struct ivopts_data *data,
3658 tree expr, bitmap *depends_on)
3660 if (depends_on)
3662 fd_ivopts_data = data;
3663 walk_tree (&expr, find_depends, depends_on, NULL);
3666 return force_expr_to_var_cost (expr, data->speed);
3669 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3670 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3671 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3672 invariants the computation depends on. */
3674 static comp_cost
3675 split_address_cost (struct ivopts_data *data,
3676 tree addr, bool *symbol_present, bool *var_present,
3677 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3679 tree core;
3680 HOST_WIDE_INT bitsize;
3681 HOST_WIDE_INT bitpos;
3682 tree toffset;
3683 enum machine_mode mode;
3684 int unsignedp, volatilep;
3686 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3687 &unsignedp, &volatilep, false);
3689 if (toffset != 0
3690 || bitpos % BITS_PER_UNIT != 0
3691 || TREE_CODE (core) != VAR_DECL)
3693 *symbol_present = false;
3694 *var_present = true;
3695 fd_ivopts_data = data;
3696 walk_tree (&addr, find_depends, depends_on, NULL);
3697 return new_cost (target_spill_cost[data->speed], 0);
3700 *offset += bitpos / BITS_PER_UNIT;
3701 if (TREE_STATIC (core)
3702 || DECL_EXTERNAL (core))
3704 *symbol_present = true;
3705 *var_present = false;
3706 return zero_cost;
3709 *symbol_present = false;
3710 *var_present = true;
3711 return zero_cost;
3714 /* Estimates cost of expressing difference of addresses E1 - E2 as
3715 var + symbol + offset. The value of offset is added to OFFSET,
3716 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3717 part is missing. DEPENDS_ON is a set of the invariants the computation
3718 depends on. */
3720 static comp_cost
3721 ptr_difference_cost (struct ivopts_data *data,
3722 tree e1, tree e2, bool *symbol_present, bool *var_present,
3723 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3725 HOST_WIDE_INT diff = 0;
3726 aff_tree aff_e1, aff_e2;
3727 tree type;
3729 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3731 if (ptr_difference_const (e1, e2, &diff))
3733 *offset += diff;
3734 *symbol_present = false;
3735 *var_present = false;
3736 return zero_cost;
3739 if (integer_zerop (e2))
3740 return split_address_cost (data, TREE_OPERAND (e1, 0),
3741 symbol_present, var_present, offset, depends_on);
3743 *symbol_present = false;
3744 *var_present = true;
3746 type = signed_type_for (TREE_TYPE (e1));
3747 tree_to_aff_combination (e1, type, &aff_e1);
3748 tree_to_aff_combination (e2, type, &aff_e2);
3749 aff_combination_scale (&aff_e2, double_int_minus_one);
3750 aff_combination_add (&aff_e1, &aff_e2);
3752 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3755 /* Estimates cost of expressing difference E1 - E2 as
3756 var + symbol + offset. The value of offset is added to OFFSET,
3757 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3758 part is missing. DEPENDS_ON is a set of the invariants the computation
3759 depends on. */
3761 static comp_cost
3762 difference_cost (struct ivopts_data *data,
3763 tree e1, tree e2, bool *symbol_present, bool *var_present,
3764 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3766 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3767 unsigned HOST_WIDE_INT off1, off2;
3768 aff_tree aff_e1, aff_e2;
3769 tree type;
3771 e1 = strip_offset (e1, &off1);
3772 e2 = strip_offset (e2, &off2);
3773 *offset += off1 - off2;
3775 STRIP_NOPS (e1);
3776 STRIP_NOPS (e2);
3778 if (TREE_CODE (e1) == ADDR_EXPR)
3779 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3780 offset, depends_on);
3781 *symbol_present = false;
3783 if (operand_equal_p (e1, e2, 0))
3785 *var_present = false;
3786 return zero_cost;
3789 *var_present = true;
3791 if (integer_zerop (e2))
3792 return force_var_cost (data, e1, depends_on);
3794 if (integer_zerop (e1))
3796 comp_cost cost = force_var_cost (data, e2, depends_on);
3797 cost.cost += multiply_by_cost (-1, mode, data->speed);
3798 return cost;
3801 type = signed_type_for (TREE_TYPE (e1));
3802 tree_to_aff_combination (e1, type, &aff_e1);
3803 tree_to_aff_combination (e2, type, &aff_e2);
3804 aff_combination_scale (&aff_e2, double_int_minus_one);
3805 aff_combination_add (&aff_e1, &aff_e2);
3807 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3810 /* Returns true if AFF1 and AFF2 are identical. */
3812 static bool
3813 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3815 unsigned i;
3817 if (aff1->n != aff2->n)
3818 return false;
3820 for (i = 0; i < aff1->n; i++)
3822 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3823 return false;
3825 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3826 return false;
3828 return true;
3831 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3832 requires a new compiler generated temporary. Returns -1 otherwise.
3833 ADDRESS_P is a flag indicating if the expression is for address
3834 computation. */
3836 static int
3837 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3838 tree cbase, HOST_WIDE_INT ratio,
3839 bool address_p)
3841 aff_tree ubase_aff, cbase_aff;
3842 tree expr, ub, cb;
3843 struct iv_inv_expr_ent ent;
3844 struct iv_inv_expr_ent **slot;
3846 STRIP_NOPS (ubase);
3847 STRIP_NOPS (cbase);
3848 ub = ubase;
3849 cb = cbase;
3851 if ((TREE_CODE (ubase) == INTEGER_CST)
3852 && (TREE_CODE (cbase) == INTEGER_CST))
3853 return -1;
3855 /* Strips the constant part. */
3856 if (TREE_CODE (ubase) == PLUS_EXPR
3857 || TREE_CODE (ubase) == MINUS_EXPR
3858 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3860 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3861 ubase = TREE_OPERAND (ubase, 0);
3864 /* Strips the constant part. */
3865 if (TREE_CODE (cbase) == PLUS_EXPR
3866 || TREE_CODE (cbase) == MINUS_EXPR
3867 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3869 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3870 cbase = TREE_OPERAND (cbase, 0);
3873 if (address_p)
3875 if (((TREE_CODE (ubase) == SSA_NAME)
3876 || (TREE_CODE (ubase) == ADDR_EXPR
3877 && is_gimple_min_invariant (ubase)))
3878 && (TREE_CODE (cbase) == INTEGER_CST))
3879 return -1;
3881 if (((TREE_CODE (cbase) == SSA_NAME)
3882 || (TREE_CODE (cbase) == ADDR_EXPR
3883 && is_gimple_min_invariant (cbase)))
3884 && (TREE_CODE (ubase) == INTEGER_CST))
3885 return -1;
3888 if (ratio == 1)
3890 if(operand_equal_p (ubase, cbase, 0))
3891 return -1;
3893 if (TREE_CODE (ubase) == ADDR_EXPR
3894 && TREE_CODE (cbase) == ADDR_EXPR)
3896 tree usym, csym;
3898 usym = TREE_OPERAND (ubase, 0);
3899 csym = TREE_OPERAND (cbase, 0);
3900 if (TREE_CODE (usym) == ARRAY_REF)
3902 tree ind = TREE_OPERAND (usym, 1);
3903 if (TREE_CODE (ind) == INTEGER_CST
3904 && host_integerp (ind, 0)
3905 && TREE_INT_CST_LOW (ind) == 0)
3906 usym = TREE_OPERAND (usym, 0);
3908 if (TREE_CODE (csym) == ARRAY_REF)
3910 tree ind = TREE_OPERAND (csym, 1);
3911 if (TREE_CODE (ind) == INTEGER_CST
3912 && host_integerp (ind, 0)
3913 && TREE_INT_CST_LOW (ind) == 0)
3914 csym = TREE_OPERAND (csym, 0);
3916 if (operand_equal_p (usym, csym, 0))
3917 return -1;
3919 /* Now do more complex comparison */
3920 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3921 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3922 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3923 return -1;
3926 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3927 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3929 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3930 aff_combination_add (&ubase_aff, &cbase_aff);
3931 expr = aff_combination_to_tree (&ubase_aff);
3932 ent.expr = expr;
3933 ent.hash = iterative_hash_expr (expr, 0);
3934 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3935 &ent, INSERT);
3936 if (*slot)
3937 return (*slot)->id;
3939 *slot = XNEW (struct iv_inv_expr_ent);
3940 (*slot)->expr = expr;
3941 (*slot)->hash = ent.hash;
3942 (*slot)->id = data->inv_expr_id++;
3943 return (*slot)->id;
3948 /* Determines the cost of the computation by that USE is expressed
3949 from induction variable CAND. If ADDRESS_P is true, we just need
3950 to create an address from it, otherwise we want to get it into
3951 register. A set of invariants we depend on is stored in
3952 DEPENDS_ON. AT is the statement at that the value is computed.
3953 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3954 addressing is likely. */
3956 static comp_cost
3957 get_computation_cost_at (struct ivopts_data *data,
3958 struct iv_use *use, struct iv_cand *cand,
3959 bool address_p, bitmap *depends_on, gimple at,
3960 bool *can_autoinc,
3961 int *inv_expr_id)
3963 tree ubase = use->iv->base, ustep = use->iv->step;
3964 tree cbase, cstep;
3965 tree utype = TREE_TYPE (ubase), ctype;
3966 unsigned HOST_WIDE_INT cstepi, offset = 0;
3967 HOST_WIDE_INT ratio, aratio;
3968 bool var_present, symbol_present, stmt_is_after_inc;
3969 comp_cost cost;
3970 double_int rat;
3971 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3973 *depends_on = NULL;
3975 /* Only consider real candidates. */
3976 if (!cand->iv)
3977 return infinite_cost;
3979 cbase = cand->iv->base;
3980 cstep = cand->iv->step;
3981 ctype = TREE_TYPE (cbase);
3983 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3985 /* We do not have a precision to express the values of use. */
3986 return infinite_cost;
3989 if (address_p)
3991 /* Do not try to express address of an object with computation based
3992 on address of a different object. This may cause problems in rtl
3993 level alias analysis (that does not expect this to be happening,
3994 as this is illegal in C), and would be unlikely to be useful
3995 anyway. */
3996 if (use->iv->base_object
3997 && cand->iv->base_object
3998 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3999 return infinite_cost;
4002 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4004 /* TODO -- add direct handling of this case. */
4005 goto fallback;
4008 /* CSTEPI is removed from the offset in case statement is after the
4009 increment. If the step is not constant, we use zero instead.
4010 This is a bit imprecise (there is the extra addition), but
4011 redundancy elimination is likely to transform the code so that
4012 it uses value of the variable before increment anyway,
4013 so it is not that much unrealistic. */
4014 if (cst_and_fits_in_hwi (cstep))
4015 cstepi = int_cst_value (cstep);
4016 else
4017 cstepi = 0;
4019 if (!constant_multiple_of (ustep, cstep, &rat))
4020 return infinite_cost;
4022 if (double_int_fits_in_shwi_p (rat))
4023 ratio = double_int_to_shwi (rat);
4024 else
4025 return infinite_cost;
4027 STRIP_NOPS (cbase);
4028 ctype = TREE_TYPE (cbase);
4030 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4031 or ratio == 1, it is better to handle this like
4033 ubase - ratio * cbase + ratio * var
4035 (also holds in the case ratio == -1, TODO. */
4037 if (cst_and_fits_in_hwi (cbase))
4039 offset = - ratio * int_cst_value (cbase);
4040 cost = difference_cost (data,
4041 ubase, build_int_cst (utype, 0),
4042 &symbol_present, &var_present, &offset,
4043 depends_on);
4044 cost.cost /= avg_loop_niter (data->current_loop);
4046 else if (ratio == 1)
4048 cost = difference_cost (data,
4049 ubase, cbase,
4050 &symbol_present, &var_present, &offset,
4051 depends_on);
4052 cost.cost /= avg_loop_niter (data->current_loop);
4054 else if (address_p
4055 && !POINTER_TYPE_P (ctype)
4056 && multiplier_allowed_in_address_p
4057 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4058 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4060 cbase
4061 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4062 cost = difference_cost (data,
4063 ubase, cbase,
4064 &symbol_present, &var_present, &offset,
4065 depends_on);
4066 cost.cost /= avg_loop_niter (data->current_loop);
4068 else
4070 cost = force_var_cost (data, cbase, depends_on);
4071 cost = add_costs (cost,
4072 difference_cost (data,
4073 ubase, build_int_cst (utype, 0),
4074 &symbol_present, &var_present,
4075 &offset, depends_on));
4076 cost.cost /= avg_loop_niter (data->current_loop);
4077 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4080 if (inv_expr_id)
4082 *inv_expr_id =
4083 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4084 /* Clear depends on. */
4085 if (*inv_expr_id != -1 && depends_on && *depends_on)
4086 bitmap_clear (*depends_on);
4089 /* If we are after the increment, the value of the candidate is higher by
4090 one iteration. */
4091 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4092 if (stmt_is_after_inc)
4093 offset -= ratio * cstepi;
4095 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4096 (symbol/var1/const parts may be omitted). If we are looking for an
4097 address, find the cost of addressing this. */
4098 if (address_p)
4099 return add_costs (cost,
4100 get_address_cost (symbol_present, var_present,
4101 offset, ratio, cstepi,
4102 TYPE_MODE (TREE_TYPE (utype)),
4103 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4104 speed, stmt_is_after_inc,
4105 can_autoinc));
4107 /* Otherwise estimate the costs for computing the expression. */
4108 if (!symbol_present && !var_present && !offset)
4110 if (ratio != 1)
4111 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4112 return cost;
4115 /* Symbol + offset should be compile-time computable so consider that they
4116 are added once to the variable, if present. */
4117 if (var_present && (symbol_present || offset))
4118 cost.cost += adjust_setup_cost (data,
4119 add_cost (TYPE_MODE (ctype), speed));
4121 /* Having offset does not affect runtime cost in case it is added to
4122 symbol, but it increases complexity. */
4123 if (offset)
4124 cost.complexity++;
4126 cost.cost += add_cost (TYPE_MODE (ctype), speed);
4128 aratio = ratio > 0 ? ratio : -ratio;
4129 if (aratio != 1)
4130 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4131 return cost;
4133 fallback:
4134 if (can_autoinc)
4135 *can_autoinc = false;
4138 /* Just get the expression, expand it and measure the cost. */
4139 tree comp = get_computation_at (data->current_loop, use, cand, at);
4141 if (!comp)
4142 return infinite_cost;
4144 if (address_p)
4145 comp = build_simple_mem_ref (comp);
4147 return new_cost (computation_cost (comp, speed), 0);
4151 /* Determines the cost of the computation by that USE is expressed
4152 from induction variable CAND. If ADDRESS_P is true, we just need
4153 to create an address from it, otherwise we want to get it into
4154 register. A set of invariants we depend on is stored in
4155 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4156 autoinc addressing is likely. */
4158 static comp_cost
4159 get_computation_cost (struct ivopts_data *data,
4160 struct iv_use *use, struct iv_cand *cand,
4161 bool address_p, bitmap *depends_on,
4162 bool *can_autoinc, int *inv_expr_id)
4164 return get_computation_cost_at (data,
4165 use, cand, address_p, depends_on, use->stmt,
4166 can_autoinc, inv_expr_id);
4169 /* Determines cost of basing replacement of USE on CAND in a generic
4170 expression. */
4172 static bool
4173 determine_use_iv_cost_generic (struct ivopts_data *data,
4174 struct iv_use *use, struct iv_cand *cand)
4176 bitmap depends_on;
4177 comp_cost cost;
4178 int inv_expr_id = -1;
4180 /* The simple case first -- if we need to express value of the preserved
4181 original biv, the cost is 0. This also prevents us from counting the
4182 cost of increment twice -- once at this use and once in the cost of
4183 the candidate. */
4184 if (cand->pos == IP_ORIGINAL
4185 && cand->incremented_at == use->stmt)
4187 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4188 return true;
4191 cost = get_computation_cost (data, use, cand, false, &depends_on,
4192 NULL, &inv_expr_id);
4194 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4195 inv_expr_id);
4197 return !infinite_cost_p (cost);
4200 /* Determines cost of basing replacement of USE on CAND in an address. */
4202 static bool
4203 determine_use_iv_cost_address (struct ivopts_data *data,
4204 struct iv_use *use, struct iv_cand *cand)
4206 bitmap depends_on;
4207 bool can_autoinc;
4208 int inv_expr_id = -1;
4209 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4210 &can_autoinc, &inv_expr_id);
4212 if (cand->ainc_use == use)
4214 if (can_autoinc)
4215 cost.cost -= cand->cost_step;
4216 /* If we generated the candidate solely for exploiting autoincrement
4217 opportunities, and it turns out it can't be used, set the cost to
4218 infinity to make sure we ignore it. */
4219 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4220 cost = infinite_cost;
4222 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4223 inv_expr_id);
4225 return !infinite_cost_p (cost);
4228 /* Computes value of candidate CAND at position AT in iteration NITER, and
4229 stores it to VAL. */
4231 static void
4232 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4233 aff_tree *val)
4235 aff_tree step, delta, nit;
4236 struct iv *iv = cand->iv;
4237 tree type = TREE_TYPE (iv->base);
4238 tree steptype = type;
4239 if (POINTER_TYPE_P (type))
4240 steptype = sizetype;
4242 tree_to_aff_combination (iv->step, steptype, &step);
4243 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4244 aff_combination_convert (&nit, steptype);
4245 aff_combination_mult (&nit, &step, &delta);
4246 if (stmt_after_increment (loop, cand, at))
4247 aff_combination_add (&delta, &step);
4249 tree_to_aff_combination (iv->base, type, val);
4250 aff_combination_add (val, &delta);
4253 /* Returns period of induction variable iv. */
4255 static tree
4256 iv_period (struct iv *iv)
4258 tree step = iv->step, period, type;
4259 tree pow2div;
4261 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4263 type = unsigned_type_for (TREE_TYPE (step));
4264 /* Period of the iv is lcm (step, type_range)/step -1,
4265 i.e., N*type_range/step - 1. Since type range is power
4266 of two, N == (step >> num_of_ending_zeros_binary (step),
4267 so the final result is
4269 (type_range >> num_of_ending_zeros_binary (step)) - 1
4272 pow2div = num_ending_zeros (step);
4274 period = build_low_bits_mask (type,
4275 (TYPE_PRECISION (type)
4276 - tree_low_cst (pow2div, 1)));
4278 return period;
4281 /* Returns the comparison operator used when eliminating the iv USE. */
4283 static enum tree_code
4284 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4286 struct loop *loop = data->current_loop;
4287 basic_block ex_bb;
4288 edge exit;
4290 ex_bb = gimple_bb (use->stmt);
4291 exit = EDGE_SUCC (ex_bb, 0);
4292 if (flow_bb_inside_loop_p (loop, exit->dest))
4293 exit = EDGE_SUCC (ex_bb, 1);
4295 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4298 /* Check whether it is possible to express the condition in USE by comparison
4299 of candidate CAND. If so, store the value compared with to BOUND. */
4301 static bool
4302 may_eliminate_iv (struct ivopts_data *data,
4303 struct iv_use *use, struct iv_cand *cand, tree *bound)
4305 basic_block ex_bb;
4306 edge exit;
4307 tree nit, period;
4308 struct loop *loop = data->current_loop;
4309 aff_tree bnd;
4310 struct tree_niter_desc *desc = NULL;
4312 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4313 return false;
4315 /* For now works only for exits that dominate the loop latch.
4316 TODO: extend to other conditions inside loop body. */
4317 ex_bb = gimple_bb (use->stmt);
4318 if (use->stmt != last_stmt (ex_bb)
4319 || gimple_code (use->stmt) != GIMPLE_COND
4320 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4321 return false;
4323 exit = EDGE_SUCC (ex_bb, 0);
4324 if (flow_bb_inside_loop_p (loop, exit->dest))
4325 exit = EDGE_SUCC (ex_bb, 1);
4326 if (flow_bb_inside_loop_p (loop, exit->dest))
4327 return false;
4329 nit = niter_for_exit (data, exit, &desc);
4330 if (!nit)
4331 return false;
4333 /* Determine whether we can use the variable to test the exit condition.
4334 This is the case iff the period of the induction variable is greater
4335 than the number of iterations for which the exit condition is true. */
4336 period = iv_period (cand->iv);
4338 /* If the number of iterations is constant, compare against it directly. */
4339 if (TREE_CODE (nit) == INTEGER_CST)
4341 /* See cand_value_at. */
4342 if (stmt_after_increment (loop, cand, use->stmt))
4344 if (!tree_int_cst_lt (nit, period))
4345 return false;
4347 else
4349 if (tree_int_cst_lt (period, nit))
4350 return false;
4354 /* If not, and if this is the only possible exit of the loop, see whether
4355 we can get a conservative estimate on the number of iterations of the
4356 entire loop and compare against that instead. */
4357 else
4359 double_int period_value, max_niter;
4361 max_niter = desc->max;
4362 if (stmt_after_increment (loop, cand, use->stmt))
4363 max_niter = double_int_add (max_niter, double_int_one);
4364 period_value = tree_to_double_int (period);
4365 if (double_int_ucmp (max_niter, period_value) > 0)
4367 /* See if we can take advantage of infered loop bound information. */
4368 if (loop_only_exit_p (loop, exit))
4370 if (!estimated_loop_iterations (loop, true, &max_niter))
4371 return false;
4372 /* The loop bound is already adjusted by adding 1. */
4373 if (double_int_ucmp (max_niter, period_value) > 0)
4374 return false;
4376 else
4377 return false;
4381 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4383 *bound = aff_combination_to_tree (&bnd);
4384 /* It is unlikely that computing the number of iterations using division
4385 would be more profitable than keeping the original induction variable. */
4386 if (expression_expensive_p (*bound))
4387 return false;
4388 return true;
4392 /* Determines cost of basing replacement of USE on CAND in a condition. */
4394 static bool
4395 determine_use_iv_cost_condition (struct ivopts_data *data,
4396 struct iv_use *use, struct iv_cand *cand)
4398 tree bound = NULL_TREE;
4399 struct iv *cmp_iv;
4400 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4401 comp_cost elim_cost, express_cost, cost;
4402 bool ok;
4403 int inv_expr_id = -1;
4404 tree *control_var, *bound_cst;
4406 /* Only consider real candidates. */
4407 if (!cand->iv)
4409 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4410 return false;
4413 /* Try iv elimination. */
4414 if (may_eliminate_iv (data, use, cand, &bound))
4416 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4417 /* The bound is a loop invariant, so it will be only computed
4418 once. */
4419 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4421 else
4422 elim_cost = infinite_cost;
4424 /* Try expressing the original giv. If it is compared with an invariant,
4425 note that we cannot get rid of it. */
4426 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4427 NULL, &cmp_iv);
4428 gcc_assert (ok);
4430 /* When the condition is a comparison of the candidate IV against
4431 zero, prefer this IV.
4433 TODO: The constant that we're substracting from the cost should
4434 be target-dependent. This information should be added to the
4435 target costs for each backend. */
4436 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4437 && integer_zerop (*bound_cst)
4438 && (operand_equal_p (*control_var, cand->var_after, 0)
4439 || operand_equal_p (*control_var, cand->var_before, 0)))
4440 elim_cost.cost -= 1;
4442 express_cost = get_computation_cost (data, use, cand, false,
4443 &depends_on_express, NULL,
4444 &inv_expr_id);
4445 fd_ivopts_data = data;
4446 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4448 /* Choose the better approach, preferring the eliminated IV. */
4449 if (compare_costs (elim_cost, express_cost) <= 0)
4451 cost = elim_cost;
4452 depends_on = depends_on_elim;
4453 depends_on_elim = NULL;
4455 else
4457 cost = express_cost;
4458 depends_on = depends_on_express;
4459 depends_on_express = NULL;
4460 bound = NULL_TREE;
4463 set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4465 if (depends_on_elim)
4466 BITMAP_FREE (depends_on_elim);
4467 if (depends_on_express)
4468 BITMAP_FREE (depends_on_express);
4470 return !infinite_cost_p (cost);
4473 /* Determines cost of basing replacement of USE on CAND. Returns false
4474 if USE cannot be based on CAND. */
4476 static bool
4477 determine_use_iv_cost (struct ivopts_data *data,
4478 struct iv_use *use, struct iv_cand *cand)
4480 switch (use->type)
4482 case USE_NONLINEAR_EXPR:
4483 return determine_use_iv_cost_generic (data, use, cand);
4485 case USE_ADDRESS:
4486 return determine_use_iv_cost_address (data, use, cand);
4488 case USE_COMPARE:
4489 return determine_use_iv_cost_condition (data, use, cand);
4491 default:
4492 gcc_unreachable ();
4496 /* Return true if get_computation_cost indicates that autoincrement is
4497 a possibility for the pair of USE and CAND, false otherwise. */
4499 static bool
4500 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4501 struct iv_cand *cand)
4503 bitmap depends_on;
4504 bool can_autoinc;
4505 comp_cost cost;
4507 if (use->type != USE_ADDRESS)
4508 return false;
4510 cost = get_computation_cost (data, use, cand, true, &depends_on,
4511 &can_autoinc, NULL);
4513 BITMAP_FREE (depends_on);
4515 return !infinite_cost_p (cost) && can_autoinc;
4518 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4519 use that allows autoincrement, and set their AINC_USE if possible. */
4521 static void
4522 set_autoinc_for_original_candidates (struct ivopts_data *data)
4524 unsigned i, j;
4526 for (i = 0; i < n_iv_cands (data); i++)
4528 struct iv_cand *cand = iv_cand (data, i);
4529 struct iv_use *closest = NULL;
4530 if (cand->pos != IP_ORIGINAL)
4531 continue;
4532 for (j = 0; j < n_iv_uses (data); j++)
4534 struct iv_use *use = iv_use (data, j);
4535 unsigned uid = gimple_uid (use->stmt);
4536 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4537 || uid > gimple_uid (cand->incremented_at))
4538 continue;
4539 if (closest == NULL || uid > gimple_uid (closest->stmt))
4540 closest = use;
4542 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4543 continue;
4544 cand->ainc_use = closest;
4548 /* Finds the candidates for the induction variables. */
4550 static void
4551 find_iv_candidates (struct ivopts_data *data)
4553 /* Add commonly used ivs. */
4554 add_standard_iv_candidates (data);
4556 /* Add old induction variables. */
4557 add_old_ivs_candidates (data);
4559 /* Add induction variables derived from uses. */
4560 add_derived_ivs_candidates (data);
4562 set_autoinc_for_original_candidates (data);
4564 /* Record the important candidates. */
4565 record_important_candidates (data);
4568 /* Determines costs of basing the use of the iv on an iv candidate. */
4570 static void
4571 determine_use_iv_costs (struct ivopts_data *data)
4573 unsigned i, j;
4574 struct iv_use *use;
4575 struct iv_cand *cand;
4576 bitmap to_clear = BITMAP_ALLOC (NULL);
4578 alloc_use_cost_map (data);
4580 for (i = 0; i < n_iv_uses (data); i++)
4582 use = iv_use (data, i);
4584 if (data->consider_all_candidates)
4586 for (j = 0; j < n_iv_cands (data); j++)
4588 cand = iv_cand (data, j);
4589 determine_use_iv_cost (data, use, cand);
4592 else
4594 bitmap_iterator bi;
4596 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4598 cand = iv_cand (data, j);
4599 if (!determine_use_iv_cost (data, use, cand))
4600 bitmap_set_bit (to_clear, j);
4603 /* Remove the candidates for that the cost is infinite from
4604 the list of related candidates. */
4605 bitmap_and_compl_into (use->related_cands, to_clear);
4606 bitmap_clear (to_clear);
4610 BITMAP_FREE (to_clear);
4612 if (dump_file && (dump_flags & TDF_DETAILS))
4614 fprintf (dump_file, "Use-candidate costs:\n");
4616 for (i = 0; i < n_iv_uses (data); i++)
4618 use = iv_use (data, i);
4620 fprintf (dump_file, "Use %d:\n", i);
4621 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4622 for (j = 0; j < use->n_map_members; j++)
4624 if (!use->cost_map[j].cand
4625 || infinite_cost_p (use->cost_map[j].cost))
4626 continue;
4628 fprintf (dump_file, " %d\t%d\t%d\t",
4629 use->cost_map[j].cand->id,
4630 use->cost_map[j].cost.cost,
4631 use->cost_map[j].cost.complexity);
4632 if (use->cost_map[j].depends_on)
4633 bitmap_print (dump_file,
4634 use->cost_map[j].depends_on, "","");
4635 if (use->cost_map[j].inv_expr_id != -1)
4636 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4637 fprintf (dump_file, "\n");
4640 fprintf (dump_file, "\n");
4642 fprintf (dump_file, "\n");
4646 /* Determines cost of the candidate CAND. */
4648 static void
4649 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4651 comp_cost cost_base;
4652 unsigned cost, cost_step;
4653 tree base;
4655 if (!cand->iv)
4657 cand->cost = 0;
4658 return;
4661 /* There are two costs associated with the candidate -- its increment
4662 and its initialization. The second is almost negligible for any loop
4663 that rolls enough, so we take it just very little into account. */
4665 base = cand->iv->base;
4666 cost_base = force_var_cost (data, base, NULL);
4667 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4669 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4671 /* Prefer the original ivs unless we may gain something by replacing it.
4672 The reason is to make debugging simpler; so this is not relevant for
4673 artificial ivs created by other optimization passes. */
4674 if (cand->pos != IP_ORIGINAL
4675 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4676 cost++;
4678 /* Prefer not to insert statements into latch unless there are some
4679 already (so that we do not create unnecessary jumps). */
4680 if (cand->pos == IP_END
4681 && empty_block_p (ip_end_pos (data->current_loop)))
4682 cost++;
4684 cand->cost = cost;
4685 cand->cost_step = cost_step;
4688 /* Determines costs of computation of the candidates. */
4690 static void
4691 determine_iv_costs (struct ivopts_data *data)
4693 unsigned i;
4695 if (dump_file && (dump_flags & TDF_DETAILS))
4697 fprintf (dump_file, "Candidate costs:\n");
4698 fprintf (dump_file, " cand\tcost\n");
4701 for (i = 0; i < n_iv_cands (data); i++)
4703 struct iv_cand *cand = iv_cand (data, i);
4705 determine_iv_cost (data, cand);
4707 if (dump_file && (dump_flags & TDF_DETAILS))
4708 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4711 if (dump_file && (dump_flags & TDF_DETAILS))
4712 fprintf (dump_file, "\n");
4715 /* Calculates cost for having SIZE induction variables. */
4717 static unsigned
4718 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4720 /* We add size to the cost, so that we prefer eliminating ivs
4721 if possible. */
4722 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4723 data->body_includes_call);
4726 /* For each size of the induction variable set determine the penalty. */
4728 static void
4729 determine_set_costs (struct ivopts_data *data)
4731 unsigned j, n;
4732 gimple phi;
4733 gimple_stmt_iterator psi;
4734 tree op;
4735 struct loop *loop = data->current_loop;
4736 bitmap_iterator bi;
4738 if (dump_file && (dump_flags & TDF_DETAILS))
4740 fprintf (dump_file, "Global costs:\n");
4741 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4742 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
4743 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4744 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4747 n = 0;
4748 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4750 phi = gsi_stmt (psi);
4751 op = PHI_RESULT (phi);
4753 if (!is_gimple_reg (op))
4754 continue;
4756 if (get_iv (data, op))
4757 continue;
4759 n++;
4762 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4764 struct version_info *info = ver_info (data, j);
4766 if (info->inv_id && info->has_nonlin_use)
4767 n++;
4770 data->regs_used = n;
4771 if (dump_file && (dump_flags & TDF_DETAILS))
4772 fprintf (dump_file, " regs_used %d\n", n);
4774 if (dump_file && (dump_flags & TDF_DETAILS))
4776 fprintf (dump_file, " cost for size:\n");
4777 fprintf (dump_file, " ivs\tcost\n");
4778 for (j = 0; j <= 2 * target_avail_regs; j++)
4779 fprintf (dump_file, " %d\t%d\n", j,
4780 ivopts_global_cost_for_size (data, j));
4781 fprintf (dump_file, "\n");
4785 /* Returns true if A is a cheaper cost pair than B. */
4787 static bool
4788 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4790 int cmp;
4792 if (!a)
4793 return false;
4795 if (!b)
4796 return true;
4798 cmp = compare_costs (a->cost, b->cost);
4799 if (cmp < 0)
4800 return true;
4802 if (cmp > 0)
4803 return false;
4805 /* In case the costs are the same, prefer the cheaper candidate. */
4806 if (a->cand->cost < b->cand->cost)
4807 return true;
4809 return false;
4813 /* Returns candidate by that USE is expressed in IVS. */
4815 static struct cost_pair *
4816 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4818 return ivs->cand_for_use[use->id];
4821 /* Computes the cost field of IVS structure. */
4823 static void
4824 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4826 comp_cost cost = ivs->cand_use_cost;
4828 cost.cost += ivs->cand_cost;
4830 cost.cost += ivopts_global_cost_for_size (data,
4831 ivs->n_regs + ivs->num_used_inv_expr);
4833 ivs->cost = cost;
4836 /* Remove invariants in set INVS to set IVS. */
4838 static void
4839 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4841 bitmap_iterator bi;
4842 unsigned iid;
4844 if (!invs)
4845 return;
4847 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4849 ivs->n_invariant_uses[iid]--;
4850 if (ivs->n_invariant_uses[iid] == 0)
4851 ivs->n_regs--;
4855 /* Set USE not to be expressed by any candidate in IVS. */
4857 static void
4858 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4859 struct iv_use *use)
4861 unsigned uid = use->id, cid;
4862 struct cost_pair *cp;
4864 cp = ivs->cand_for_use[uid];
4865 if (!cp)
4866 return;
4867 cid = cp->cand->id;
4869 ivs->bad_uses++;
4870 ivs->cand_for_use[uid] = NULL;
4871 ivs->n_cand_uses[cid]--;
4873 if (ivs->n_cand_uses[cid] == 0)
4875 bitmap_clear_bit (ivs->cands, cid);
4876 /* Do not count the pseudocandidates. */
4877 if (cp->cand->iv)
4878 ivs->n_regs--;
4879 ivs->n_cands--;
4880 ivs->cand_cost -= cp->cand->cost;
4882 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4885 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4887 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4889 if (cp->inv_expr_id != -1)
4891 ivs->used_inv_expr[cp->inv_expr_id]--;
4892 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
4893 ivs->num_used_inv_expr--;
4895 iv_ca_recount_cost (data, ivs);
4898 /* Add invariants in set INVS to set IVS. */
4900 static void
4901 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4903 bitmap_iterator bi;
4904 unsigned iid;
4906 if (!invs)
4907 return;
4909 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4911 ivs->n_invariant_uses[iid]++;
4912 if (ivs->n_invariant_uses[iid] == 1)
4913 ivs->n_regs++;
4917 /* Set cost pair for USE in set IVS to CP. */
4919 static void
4920 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4921 struct iv_use *use, struct cost_pair *cp)
4923 unsigned uid = use->id, cid;
4925 if (ivs->cand_for_use[uid] == cp)
4926 return;
4928 if (ivs->cand_for_use[uid])
4929 iv_ca_set_no_cp (data, ivs, use);
4931 if (cp)
4933 cid = cp->cand->id;
4935 ivs->bad_uses--;
4936 ivs->cand_for_use[uid] = cp;
4937 ivs->n_cand_uses[cid]++;
4938 if (ivs->n_cand_uses[cid] == 1)
4940 bitmap_set_bit (ivs->cands, cid);
4941 /* Do not count the pseudocandidates. */
4942 if (cp->cand->iv)
4943 ivs->n_regs++;
4944 ivs->n_cands++;
4945 ivs->cand_cost += cp->cand->cost;
4947 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4950 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4951 iv_ca_set_add_invariants (ivs, cp->depends_on);
4953 if (cp->inv_expr_id != -1)
4955 ivs->used_inv_expr[cp->inv_expr_id]++;
4956 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
4957 ivs->num_used_inv_expr++;
4959 iv_ca_recount_cost (data, ivs);
4963 /* Extend set IVS by expressing USE by some of the candidates in it
4964 if possible. All important candidates will be considered
4965 if IMPORTANT_CANDIDATES is true. */
4967 static void
4968 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4969 struct iv_use *use, bool important_candidates)
4971 struct cost_pair *best_cp = NULL, *cp;
4972 bitmap_iterator bi;
4973 bitmap cands;
4974 unsigned i;
4976 gcc_assert (ivs->upto >= use->id);
4978 if (ivs->upto == use->id)
4980 ivs->upto++;
4981 ivs->bad_uses++;
4984 cands = (important_candidates ? data->important_candidates : ivs->cands);
4985 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
4987 struct iv_cand *cand = iv_cand (data, i);
4989 cp = get_use_iv_cost (data, use, cand);
4991 if (cheaper_cost_pair (cp, best_cp))
4992 best_cp = cp;
4995 iv_ca_set_cp (data, ivs, use, best_cp);
4998 /* Get cost for assignment IVS. */
5000 static comp_cost
5001 iv_ca_cost (struct iv_ca *ivs)
5003 /* This was a conditional expression but it triggered a bug in
5004 Sun C 5.5. */
5005 if (ivs->bad_uses)
5006 return infinite_cost;
5007 else
5008 return ivs->cost;
5011 /* Returns true if all dependences of CP are among invariants in IVS. */
5013 static bool
5014 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5016 unsigned i;
5017 bitmap_iterator bi;
5019 if (!cp->depends_on)
5020 return true;
5022 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5024 if (ivs->n_invariant_uses[i] == 0)
5025 return false;
5028 return true;
5031 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5032 it before NEXT_CHANGE. */
5034 static struct iv_ca_delta *
5035 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5036 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5038 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5040 change->use = use;
5041 change->old_cp = old_cp;
5042 change->new_cp = new_cp;
5043 change->next_change = next_change;
5045 return change;
5048 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5049 are rewritten. */
5051 static struct iv_ca_delta *
5052 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5054 struct iv_ca_delta *last;
5056 if (!l2)
5057 return l1;
5059 if (!l1)
5060 return l2;
5062 for (last = l1; last->next_change; last = last->next_change)
5063 continue;
5064 last->next_change = l2;
5066 return l1;
5069 /* Reverse the list of changes DELTA, forming the inverse to it. */
5071 static struct iv_ca_delta *
5072 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5074 struct iv_ca_delta *act, *next, *prev = NULL;
5075 struct cost_pair *tmp;
5077 for (act = delta; act; act = next)
5079 next = act->next_change;
5080 act->next_change = prev;
5081 prev = act;
5083 tmp = act->old_cp;
5084 act->old_cp = act->new_cp;
5085 act->new_cp = tmp;
5088 return prev;
5091 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5092 reverted instead. */
5094 static void
5095 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5096 struct iv_ca_delta *delta, bool forward)
5098 struct cost_pair *from, *to;
5099 struct iv_ca_delta *act;
5101 if (!forward)
5102 delta = iv_ca_delta_reverse (delta);
5104 for (act = delta; act; act = act->next_change)
5106 from = act->old_cp;
5107 to = act->new_cp;
5108 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5109 iv_ca_set_cp (data, ivs, act->use, to);
5112 if (!forward)
5113 iv_ca_delta_reverse (delta);
5116 /* Returns true if CAND is used in IVS. */
5118 static bool
5119 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5121 return ivs->n_cand_uses[cand->id] > 0;
5124 /* Returns number of induction variable candidates in the set IVS. */
5126 static unsigned
5127 iv_ca_n_cands (struct iv_ca *ivs)
5129 return ivs->n_cands;
5132 /* Free the list of changes DELTA. */
5134 static void
5135 iv_ca_delta_free (struct iv_ca_delta **delta)
5137 struct iv_ca_delta *act, *next;
5139 for (act = *delta; act; act = next)
5141 next = act->next_change;
5142 free (act);
5145 *delta = NULL;
5148 /* Allocates new iv candidates assignment. */
5150 static struct iv_ca *
5151 iv_ca_new (struct ivopts_data *data)
5153 struct iv_ca *nw = XNEW (struct iv_ca);
5155 nw->upto = 0;
5156 nw->bad_uses = 0;
5157 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5158 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5159 nw->cands = BITMAP_ALLOC (NULL);
5160 nw->n_cands = 0;
5161 nw->n_regs = 0;
5162 nw->cand_use_cost = zero_cost;
5163 nw->cand_cost = 0;
5164 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5165 nw->cost = zero_cost;
5166 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5167 nw->num_used_inv_expr = 0;
5169 return nw;
5172 /* Free memory occupied by the set IVS. */
5174 static void
5175 iv_ca_free (struct iv_ca **ivs)
5177 free ((*ivs)->cand_for_use);
5178 free ((*ivs)->n_cand_uses);
5179 BITMAP_FREE ((*ivs)->cands);
5180 free ((*ivs)->n_invariant_uses);
5181 free ((*ivs)->used_inv_expr);
5182 free (*ivs);
5183 *ivs = NULL;
5186 /* Dumps IVS to FILE. */
5188 static void
5189 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5191 const char *pref = " invariants ";
5192 unsigned i;
5193 comp_cost cost = iv_ca_cost (ivs);
5195 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5196 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5197 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5198 bitmap_print (file, ivs->cands, " candidates: ","\n");
5200 for (i = 0; i < ivs->upto; i++)
5202 struct iv_use *use = iv_use (data, i);
5203 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5204 if (cp)
5205 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5206 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5207 else
5208 fprintf (file, " use:%d --> ??\n", use->id);
5211 for (i = 1; i <= data->max_inv_id; i++)
5212 if (ivs->n_invariant_uses[i])
5214 fprintf (file, "%s%d", pref, i);
5215 pref = ", ";
5217 fprintf (file, "\n\n");
5220 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5221 new set, and store differences in DELTA. Number of induction variables
5222 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5223 the function will try to find a solution with mimimal iv candidates. */
5225 static comp_cost
5226 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5227 struct iv_cand *cand, struct iv_ca_delta **delta,
5228 unsigned *n_ivs, bool min_ncand)
5230 unsigned i;
5231 comp_cost cost;
5232 struct iv_use *use;
5233 struct cost_pair *old_cp, *new_cp;
5235 *delta = NULL;
5236 for (i = 0; i < ivs->upto; i++)
5238 use = iv_use (data, i);
5239 old_cp = iv_ca_cand_for_use (ivs, use);
5241 if (old_cp
5242 && old_cp->cand == cand)
5243 continue;
5245 new_cp = get_use_iv_cost (data, use, cand);
5246 if (!new_cp)
5247 continue;
5249 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5250 continue;
5252 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5253 continue;
5255 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5258 iv_ca_delta_commit (data, ivs, *delta, true);
5259 cost = iv_ca_cost (ivs);
5260 if (n_ivs)
5261 *n_ivs = iv_ca_n_cands (ivs);
5262 iv_ca_delta_commit (data, ivs, *delta, false);
5264 return cost;
5267 /* Try narrowing set IVS by removing CAND. Return the cost of
5268 the new set and store the differences in DELTA. */
5270 static comp_cost
5271 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5272 struct iv_cand *cand, struct iv_ca_delta **delta)
5274 unsigned i, ci;
5275 struct iv_use *use;
5276 struct cost_pair *old_cp, *new_cp, *cp;
5277 bitmap_iterator bi;
5278 struct iv_cand *cnd;
5279 comp_cost cost;
5281 *delta = NULL;
5282 for (i = 0; i < n_iv_uses (data); i++)
5284 use = iv_use (data, i);
5286 old_cp = iv_ca_cand_for_use (ivs, use);
5287 if (old_cp->cand != cand)
5288 continue;
5290 new_cp = NULL;
5292 if (data->consider_all_candidates)
5294 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5296 if (ci == cand->id)
5297 continue;
5299 cnd = iv_cand (data, ci);
5301 cp = get_use_iv_cost (data, use, cnd);
5302 if (!cp)
5303 continue;
5305 if (!iv_ca_has_deps (ivs, cp))
5306 continue;
5308 if (!cheaper_cost_pair (cp, new_cp))
5309 continue;
5311 new_cp = cp;
5314 else
5316 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5318 if (ci == cand->id)
5319 continue;
5321 cnd = iv_cand (data, ci);
5323 cp = get_use_iv_cost (data, use, cnd);
5324 if (!cp)
5325 continue;
5326 if (!iv_ca_has_deps (ivs, cp))
5327 continue;
5329 if (!cheaper_cost_pair (cp, new_cp))
5330 continue;
5332 new_cp = cp;
5336 if (!new_cp)
5338 iv_ca_delta_free (delta);
5339 return infinite_cost;
5342 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5345 iv_ca_delta_commit (data, ivs, *delta, true);
5346 cost = iv_ca_cost (ivs);
5347 iv_ca_delta_commit (data, ivs, *delta, false);
5349 return cost;
5352 /* Try optimizing the set of candidates IVS by removing candidates different
5353 from to EXCEPT_CAND from it. Return cost of the new set, and store
5354 differences in DELTA. */
5356 static comp_cost
5357 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5358 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5360 bitmap_iterator bi;
5361 struct iv_ca_delta *act_delta, *best_delta;
5362 unsigned i;
5363 comp_cost best_cost, acost;
5364 struct iv_cand *cand;
5366 best_delta = NULL;
5367 best_cost = iv_ca_cost (ivs);
5369 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5371 cand = iv_cand (data, i);
5373 if (cand == except_cand)
5374 continue;
5376 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5378 if (compare_costs (acost, best_cost) < 0)
5380 best_cost = acost;
5381 iv_ca_delta_free (&best_delta);
5382 best_delta = act_delta;
5384 else
5385 iv_ca_delta_free (&act_delta);
5388 if (!best_delta)
5390 *delta = NULL;
5391 return best_cost;
5394 /* Recurse to possibly remove other unnecessary ivs. */
5395 iv_ca_delta_commit (data, ivs, best_delta, true);
5396 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5397 iv_ca_delta_commit (data, ivs, best_delta, false);
5398 *delta = iv_ca_delta_join (best_delta, *delta);
5399 return best_cost;
5402 /* Tries to extend the sets IVS in the best possible way in order
5403 to express the USE. If ORIGINALP is true, prefer candidates from
5404 the original set of IVs, otherwise favor important candidates not
5405 based on any memory object. */
5407 static bool
5408 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5409 struct iv_use *use, bool originalp)
5411 comp_cost best_cost, act_cost;
5412 unsigned i;
5413 bitmap_iterator bi;
5414 struct iv_cand *cand;
5415 struct iv_ca_delta *best_delta = NULL, *act_delta;
5416 struct cost_pair *cp;
5418 iv_ca_add_use (data, ivs, use, false);
5419 best_cost = iv_ca_cost (ivs);
5421 cp = iv_ca_cand_for_use (ivs, use);
5422 if (!cp)
5424 ivs->upto--;
5425 ivs->bad_uses--;
5426 iv_ca_add_use (data, ivs, use, true);
5427 best_cost = iv_ca_cost (ivs);
5428 cp = iv_ca_cand_for_use (ivs, use);
5430 if (cp)
5432 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5433 iv_ca_set_no_cp (data, ivs, use);
5436 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5437 first try important candidates not based on any memory object. Only if
5438 this fails, try the specific ones. Rationale -- in loops with many
5439 variables the best choice often is to use just one generic biv. If we
5440 added here many ivs specific to the uses, the optimization algorithm later
5441 would be likely to get stuck in a local minimum, thus causing us to create
5442 too many ivs. The approach from few ivs to more seems more likely to be
5443 successful -- starting from few ivs, replacing an expensive use by a
5444 specific iv should always be a win. */
5445 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5447 cand = iv_cand (data, i);
5449 if (originalp && cand->pos !=IP_ORIGINAL)
5450 continue;
5452 if (!originalp && cand->iv->base_object != NULL_TREE)
5453 continue;
5455 if (iv_ca_cand_used_p (ivs, cand))
5456 continue;
5458 cp = get_use_iv_cost (data, use, cand);
5459 if (!cp)
5460 continue;
5462 iv_ca_set_cp (data, ivs, use, cp);
5463 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5464 true);
5465 iv_ca_set_no_cp (data, ivs, use);
5466 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5468 if (compare_costs (act_cost, best_cost) < 0)
5470 best_cost = act_cost;
5472 iv_ca_delta_free (&best_delta);
5473 best_delta = act_delta;
5475 else
5476 iv_ca_delta_free (&act_delta);
5479 if (infinite_cost_p (best_cost))
5481 for (i = 0; i < use->n_map_members; i++)
5483 cp = use->cost_map + i;
5484 cand = cp->cand;
5485 if (!cand)
5486 continue;
5488 /* Already tried this. */
5489 if (cand->important)
5491 if (originalp && cand->pos == IP_ORIGINAL)
5492 continue;
5493 if (!originalp && cand->iv->base_object == NULL_TREE)
5494 continue;
5497 if (iv_ca_cand_used_p (ivs, cand))
5498 continue;
5500 act_delta = NULL;
5501 iv_ca_set_cp (data, ivs, use, cp);
5502 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5503 iv_ca_set_no_cp (data, ivs, use);
5504 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5505 cp, act_delta);
5507 if (compare_costs (act_cost, best_cost) < 0)
5509 best_cost = act_cost;
5511 if (best_delta)
5512 iv_ca_delta_free (&best_delta);
5513 best_delta = act_delta;
5515 else
5516 iv_ca_delta_free (&act_delta);
5520 iv_ca_delta_commit (data, ivs, best_delta, true);
5521 iv_ca_delta_free (&best_delta);
5523 return !infinite_cost_p (best_cost);
5526 /* Finds an initial assignment of candidates to uses. */
5528 static struct iv_ca *
5529 get_initial_solution (struct ivopts_data *data, bool originalp)
5531 struct iv_ca *ivs = iv_ca_new (data);
5532 unsigned i;
5534 for (i = 0; i < n_iv_uses (data); i++)
5535 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5537 iv_ca_free (&ivs);
5538 return NULL;
5541 return ivs;
5544 /* Tries to improve set of induction variables IVS. */
5546 static bool
5547 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5549 unsigned i, n_ivs;
5550 comp_cost acost, best_cost = iv_ca_cost (ivs);
5551 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5552 struct iv_cand *cand;
5554 /* Try extending the set of induction variables by one. */
5555 for (i = 0; i < n_iv_cands (data); i++)
5557 cand = iv_cand (data, i);
5559 if (iv_ca_cand_used_p (ivs, cand))
5560 continue;
5562 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5563 if (!act_delta)
5564 continue;
5566 /* If we successfully added the candidate and the set is small enough,
5567 try optimizing it by removing other candidates. */
5568 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5570 iv_ca_delta_commit (data, ivs, act_delta, true);
5571 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5572 iv_ca_delta_commit (data, ivs, act_delta, false);
5573 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5576 if (compare_costs (acost, best_cost) < 0)
5578 best_cost = acost;
5579 iv_ca_delta_free (&best_delta);
5580 best_delta = act_delta;
5582 else
5583 iv_ca_delta_free (&act_delta);
5586 if (!best_delta)
5588 /* Try removing the candidates from the set instead. */
5589 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5591 /* Nothing more we can do. */
5592 if (!best_delta)
5593 return false;
5596 iv_ca_delta_commit (data, ivs, best_delta, true);
5597 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5598 iv_ca_delta_free (&best_delta);
5599 return true;
5602 /* Attempts to find the optimal set of induction variables. We do simple
5603 greedy heuristic -- we try to replace at most one candidate in the selected
5604 solution and remove the unused ivs while this improves the cost. */
5606 static struct iv_ca *
5607 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5609 struct iv_ca *set;
5611 /* Get the initial solution. */
5612 set = get_initial_solution (data, originalp);
5613 if (!set)
5615 if (dump_file && (dump_flags & TDF_DETAILS))
5616 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5617 return NULL;
5620 if (dump_file && (dump_flags & TDF_DETAILS))
5622 fprintf (dump_file, "Initial set of candidates:\n");
5623 iv_ca_dump (data, dump_file, set);
5626 while (try_improve_iv_set (data, set))
5628 if (dump_file && (dump_flags & TDF_DETAILS))
5630 fprintf (dump_file, "Improved to:\n");
5631 iv_ca_dump (data, dump_file, set);
5635 return set;
5638 static struct iv_ca *
5639 find_optimal_iv_set (struct ivopts_data *data)
5641 unsigned i;
5642 struct iv_ca *set, *origset;
5643 struct iv_use *use;
5644 comp_cost cost, origcost;
5646 /* Determine the cost based on a strategy that starts with original IVs,
5647 and try again using a strategy that prefers candidates not based
5648 on any IVs. */
5649 origset = find_optimal_iv_set_1 (data, true);
5650 set = find_optimal_iv_set_1 (data, false);
5652 if (!origset && !set)
5653 return NULL;
5655 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5656 cost = set ? iv_ca_cost (set) : infinite_cost;
5658 if (dump_file && (dump_flags & TDF_DETAILS))
5660 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5661 origcost.cost, origcost.complexity);
5662 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5663 cost.cost, cost.complexity);
5666 /* Choose the one with the best cost. */
5667 if (compare_costs (origcost, cost) <= 0)
5669 if (set)
5670 iv_ca_free (&set);
5671 set = origset;
5673 else if (origset)
5674 iv_ca_free (&origset);
5676 for (i = 0; i < n_iv_uses (data); i++)
5678 use = iv_use (data, i);
5679 use->selected = iv_ca_cand_for_use (set, use)->cand;
5682 return set;
5685 /* Creates a new induction variable corresponding to CAND. */
5687 static void
5688 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5690 gimple_stmt_iterator incr_pos;
5691 tree base;
5692 bool after = false;
5694 if (!cand->iv)
5695 return;
5697 switch (cand->pos)
5699 case IP_NORMAL:
5700 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5701 break;
5703 case IP_END:
5704 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5705 after = true;
5706 break;
5708 case IP_AFTER_USE:
5709 after = true;
5710 /* fall through */
5711 case IP_BEFORE_USE:
5712 incr_pos = gsi_for_stmt (cand->incremented_at);
5713 break;
5715 case IP_ORIGINAL:
5716 /* Mark that the iv is preserved. */
5717 name_info (data, cand->var_before)->preserve_biv = true;
5718 name_info (data, cand->var_after)->preserve_biv = true;
5720 /* Rewrite the increment so that it uses var_before directly. */
5721 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5722 return;
5725 gimple_add_tmp_var (cand->var_before);
5726 add_referenced_var (cand->var_before);
5728 base = unshare_expr (cand->iv->base);
5730 create_iv (base, unshare_expr (cand->iv->step),
5731 cand->var_before, data->current_loop,
5732 &incr_pos, after, &cand->var_before, &cand->var_after);
5735 /* Creates new induction variables described in SET. */
5737 static void
5738 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5740 unsigned i;
5741 struct iv_cand *cand;
5742 bitmap_iterator bi;
5744 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5746 cand = iv_cand (data, i);
5747 create_new_iv (data, cand);
5750 if (dump_file && (dump_flags & TDF_DETAILS))
5752 fprintf (dump_file, "\nSelected IV set: \n");
5753 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5755 cand = iv_cand (data, i);
5756 dump_cand (dump_file, cand);
5758 fprintf (dump_file, "\n");
5762 /* Rewrites USE (definition of iv used in a nonlinear expression)
5763 using candidate CAND. */
5765 static void
5766 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5767 struct iv_use *use, struct iv_cand *cand)
5769 tree comp;
5770 tree op, tgt;
5771 gimple ass;
5772 gimple_stmt_iterator bsi;
5774 /* An important special case -- if we are asked to express value of
5775 the original iv by itself, just exit; there is no need to
5776 introduce a new computation (that might also need casting the
5777 variable to unsigned and back). */
5778 if (cand->pos == IP_ORIGINAL
5779 && cand->incremented_at == use->stmt)
5781 tree step, ctype, utype;
5782 enum tree_code incr_code = PLUS_EXPR, old_code;
5784 gcc_assert (is_gimple_assign (use->stmt));
5785 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5787 step = cand->iv->step;
5788 ctype = TREE_TYPE (step);
5789 utype = TREE_TYPE (cand->var_after);
5790 if (TREE_CODE (step) == NEGATE_EXPR)
5792 incr_code = MINUS_EXPR;
5793 step = TREE_OPERAND (step, 0);
5796 /* Check whether we may leave the computation unchanged.
5797 This is the case only if it does not rely on other
5798 computations in the loop -- otherwise, the computation
5799 we rely upon may be removed in remove_unused_ivs,
5800 thus leading to ICE. */
5801 old_code = gimple_assign_rhs_code (use->stmt);
5802 if (old_code == PLUS_EXPR
5803 || old_code == MINUS_EXPR
5804 || old_code == POINTER_PLUS_EXPR)
5806 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5807 op = gimple_assign_rhs2 (use->stmt);
5808 else if (old_code != MINUS_EXPR
5809 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5810 op = gimple_assign_rhs1 (use->stmt);
5811 else
5812 op = NULL_TREE;
5814 else
5815 op = NULL_TREE;
5817 if (op
5818 && (TREE_CODE (op) == INTEGER_CST
5819 || operand_equal_p (op, step, 0)))
5820 return;
5822 /* Otherwise, add the necessary computations to express
5823 the iv. */
5824 op = fold_convert (ctype, cand->var_before);
5825 comp = fold_convert (utype,
5826 build2 (incr_code, ctype, op,
5827 unshare_expr (step)));
5829 else
5831 comp = get_computation (data->current_loop, use, cand);
5832 gcc_assert (comp != NULL_TREE);
5835 switch (gimple_code (use->stmt))
5837 case GIMPLE_PHI:
5838 tgt = PHI_RESULT (use->stmt);
5840 /* If we should keep the biv, do not replace it. */
5841 if (name_info (data, tgt)->preserve_biv)
5842 return;
5844 bsi = gsi_after_labels (gimple_bb (use->stmt));
5845 break;
5847 case GIMPLE_ASSIGN:
5848 tgt = gimple_assign_lhs (use->stmt);
5849 bsi = gsi_for_stmt (use->stmt);
5850 break;
5852 default:
5853 gcc_unreachable ();
5856 if (!valid_gimple_rhs_p (comp)
5857 || (gimple_code (use->stmt) != GIMPLE_PHI
5858 /* We can't allow re-allocating the stmt as it might be pointed
5859 to still. */
5860 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5861 >= gimple_num_ops (gsi_stmt (bsi)))))
5863 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5864 true, GSI_SAME_STMT);
5865 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5867 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5868 /* As this isn't a plain copy we have to reset alignment
5869 information. */
5870 if (SSA_NAME_PTR_INFO (comp))
5872 SSA_NAME_PTR_INFO (comp)->align = 1;
5873 SSA_NAME_PTR_INFO (comp)->misalign = 0;
5878 if (gimple_code (use->stmt) == GIMPLE_PHI)
5880 ass = gimple_build_assign (tgt, comp);
5881 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5883 bsi = gsi_for_stmt (use->stmt);
5884 remove_phi_node (&bsi, false);
5886 else
5888 gimple_assign_set_rhs_from_tree (&bsi, comp);
5889 use->stmt = gsi_stmt (bsi);
5893 /* Copies the reference information from OLD_REF to NEW_REF. */
5895 static void
5896 copy_ref_info (tree new_ref, tree old_ref)
5898 tree new_ptr_base = NULL_TREE;
5900 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5901 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5903 new_ptr_base = TREE_OPERAND (new_ref, 0);
5905 /* We can transfer points-to information from an old pointer
5906 or decl base to the new one. */
5907 if (new_ptr_base
5908 && TREE_CODE (new_ptr_base) == SSA_NAME
5909 && !SSA_NAME_PTR_INFO (new_ptr_base))
5911 tree base = get_base_address (old_ref);
5912 if (!base)
5914 else if ((TREE_CODE (base) == MEM_REF
5915 || TREE_CODE (base) == TARGET_MEM_REF)
5916 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
5917 && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
5919 struct ptr_info_def *new_pi;
5920 duplicate_ssa_name_ptr_info
5921 (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5922 new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
5923 /* We have to be careful about transfering alignment information. */
5924 if (TREE_CODE (old_ref) == MEM_REF
5925 && !(TREE_CODE (new_ref) == TARGET_MEM_REF
5926 && (TMR_INDEX2 (new_ref)
5927 || (TMR_STEP (new_ref)
5928 && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
5929 < new_pi->align)))))
5931 new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
5932 mem_ref_offset (new_ref)).low;
5933 new_pi->misalign &= (new_pi->align - 1);
5935 else
5937 new_pi->align = 1;
5938 new_pi->misalign = 0;
5941 else if (TREE_CODE (base) == VAR_DECL
5942 || TREE_CODE (base) == PARM_DECL
5943 || TREE_CODE (base) == RESULT_DECL)
5945 struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5946 pt_solution_set_var (&pi->pt, base);
5951 /* Performs a peephole optimization to reorder the iv update statement with
5952 a mem ref to enable instruction combining in later phases. The mem ref uses
5953 the iv value before the update, so the reordering transformation requires
5954 adjustment of the offset. CAND is the selected IV_CAND.
5956 Example:
5958 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5959 iv2 = iv1 + 1;
5961 if (t < val) (1)
5962 goto L;
5963 goto Head;
5966 directly propagating t over to (1) will introduce overlapping live range
5967 thus increase register pressure. This peephole transform it into:
5970 iv2 = iv1 + 1;
5971 t = MEM_REF (base, iv2, 8, 8);
5972 if (t < val)
5973 goto L;
5974 goto Head;
5977 static void
5978 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
5980 tree var_after;
5981 gimple iv_update, stmt;
5982 basic_block bb;
5983 gimple_stmt_iterator gsi, gsi_iv;
5985 if (cand->pos != IP_NORMAL)
5986 return;
5988 var_after = cand->var_after;
5989 iv_update = SSA_NAME_DEF_STMT (var_after);
5991 bb = gimple_bb (iv_update);
5992 gsi = gsi_last_nondebug_bb (bb);
5993 stmt = gsi_stmt (gsi);
5995 /* Only handle conditional statement for now. */
5996 if (gimple_code (stmt) != GIMPLE_COND)
5997 return;
5999 gsi_prev_nondebug (&gsi);
6000 stmt = gsi_stmt (gsi);
6001 if (stmt != iv_update)
6002 return;
6004 gsi_prev_nondebug (&gsi);
6005 if (gsi_end_p (gsi))
6006 return;
6008 stmt = gsi_stmt (gsi);
6009 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6010 return;
6012 if (stmt != use->stmt)
6013 return;
6015 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6016 return;
6018 if (dump_file && (dump_flags & TDF_DETAILS))
6020 fprintf (dump_file, "Reordering \n");
6021 print_gimple_stmt (dump_file, iv_update, 0, 0);
6022 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6023 fprintf (dump_file, "\n");
6026 gsi = gsi_for_stmt (use->stmt);
6027 gsi_iv = gsi_for_stmt (iv_update);
6028 gsi_move_before (&gsi_iv, &gsi);
6030 cand->pos = IP_BEFORE_USE;
6031 cand->incremented_at = use->stmt;
6034 /* Rewrites USE (address that is an iv) using candidate CAND. */
6036 static void
6037 rewrite_use_address (struct ivopts_data *data,
6038 struct iv_use *use, struct iv_cand *cand)
6040 aff_tree aff;
6041 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6042 tree base_hint = NULL_TREE;
6043 tree ref, iv;
6044 bool ok;
6046 adjust_iv_update_pos (cand, use);
6047 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6048 gcc_assert (ok);
6049 unshare_aff_combination (&aff);
6051 /* To avoid undefined overflow problems, all IV candidates use unsigned
6052 integer types. The drawback is that this makes it impossible for
6053 create_mem_ref to distinguish an IV that is based on a memory object
6054 from one that represents simply an offset.
6056 To work around this problem, we pass a hint to create_mem_ref that
6057 indicates which variable (if any) in aff is an IV based on a memory
6058 object. Note that we only consider the candidate. If this is not
6059 based on an object, the base of the reference is in some subexpression
6060 of the use -- but these will use pointer types, so they are recognized
6061 by the create_mem_ref heuristics anyway. */
6062 if (cand->iv->base_object)
6063 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6065 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6066 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6067 reference_alias_ptr_type (*use->op_p),
6068 iv, base_hint, data->speed);
6069 copy_ref_info (ref, *use->op_p);
6070 *use->op_p = ref;
6073 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6074 candidate CAND. */
6076 static void
6077 rewrite_use_compare (struct ivopts_data *data,
6078 struct iv_use *use, struct iv_cand *cand)
6080 tree comp, *var_p, op, bound;
6081 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6082 enum tree_code compare;
6083 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6084 bool ok;
6086 bound = cp->value;
6087 if (bound)
6089 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6090 tree var_type = TREE_TYPE (var);
6091 gimple_seq stmts;
6093 if (dump_file && (dump_flags & TDF_DETAILS))
6095 fprintf (dump_file, "Replacing exit test: ");
6096 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6098 compare = iv_elimination_compare (data, use);
6099 bound = unshare_expr (fold_convert (var_type, bound));
6100 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6101 if (stmts)
6102 gsi_insert_seq_on_edge_immediate (
6103 loop_preheader_edge (data->current_loop),
6104 stmts);
6106 gimple_cond_set_lhs (use->stmt, var);
6107 gimple_cond_set_code (use->stmt, compare);
6108 gimple_cond_set_rhs (use->stmt, op);
6109 return;
6112 /* The induction variable elimination failed; just express the original
6113 giv. */
6114 comp = get_computation (data->current_loop, use, cand);
6115 gcc_assert (comp != NULL_TREE);
6117 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6118 gcc_assert (ok);
6120 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6121 true, GSI_SAME_STMT);
6124 /* Rewrites USE using candidate CAND. */
6126 static void
6127 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6129 switch (use->type)
6131 case USE_NONLINEAR_EXPR:
6132 rewrite_use_nonlinear_expr (data, use, cand);
6133 break;
6135 case USE_ADDRESS:
6136 rewrite_use_address (data, use, cand);
6137 break;
6139 case USE_COMPARE:
6140 rewrite_use_compare (data, use, cand);
6141 break;
6143 default:
6144 gcc_unreachable ();
6147 update_stmt (use->stmt);
6150 /* Rewrite the uses using the selected induction variables. */
6152 static void
6153 rewrite_uses (struct ivopts_data *data)
6155 unsigned i;
6156 struct iv_cand *cand;
6157 struct iv_use *use;
6159 for (i = 0; i < n_iv_uses (data); i++)
6161 use = iv_use (data, i);
6162 cand = use->selected;
6163 gcc_assert (cand);
6165 rewrite_use (data, use, cand);
6169 /* Removes the ivs that are not used after rewriting. */
6171 static void
6172 remove_unused_ivs (struct ivopts_data *data)
6174 unsigned j;
6175 bitmap_iterator bi;
6176 bitmap toremove = BITMAP_ALLOC (NULL);
6178 /* Figure out an order in which to release SSA DEFs so that we don't
6179 release something that we'd have to propagate into a debug stmt
6180 afterwards. */
6181 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6183 struct version_info *info;
6185 info = ver_info (data, j);
6186 if (info->iv
6187 && !integer_zerop (info->iv->step)
6188 && !info->inv_id
6189 && !info->iv->have_use_for
6190 && !info->preserve_biv)
6191 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6194 release_defs_bitset (toremove);
6196 BITMAP_FREE (toremove);
6199 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6200 for pointer_map_traverse. */
6202 static bool
6203 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6204 void *data ATTRIBUTE_UNUSED)
6206 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6208 free (niter);
6209 return true;
6212 /* Frees data allocated by the optimization of a single loop. */
6214 static void
6215 free_loop_data (struct ivopts_data *data)
6217 unsigned i, j;
6218 bitmap_iterator bi;
6219 tree obj;
6221 if (data->niters)
6223 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6224 pointer_map_destroy (data->niters);
6225 data->niters = NULL;
6228 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6230 struct version_info *info;
6232 info = ver_info (data, i);
6233 if (info->iv)
6234 free (info->iv);
6235 info->iv = NULL;
6236 info->has_nonlin_use = false;
6237 info->preserve_biv = false;
6238 info->inv_id = 0;
6240 bitmap_clear (data->relevant);
6241 bitmap_clear (data->important_candidates);
6243 for (i = 0; i < n_iv_uses (data); i++)
6245 struct iv_use *use = iv_use (data, i);
6247 free (use->iv);
6248 BITMAP_FREE (use->related_cands);
6249 for (j = 0; j < use->n_map_members; j++)
6250 if (use->cost_map[j].depends_on)
6251 BITMAP_FREE (use->cost_map[j].depends_on);
6252 free (use->cost_map);
6253 free (use);
6255 VEC_truncate (iv_use_p, data->iv_uses, 0);
6257 for (i = 0; i < n_iv_cands (data); i++)
6259 struct iv_cand *cand = iv_cand (data, i);
6261 if (cand->iv)
6262 free (cand->iv);
6263 if (cand->depends_on)
6264 BITMAP_FREE (cand->depends_on);
6265 free (cand);
6267 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6269 if (data->version_info_size < num_ssa_names)
6271 data->version_info_size = 2 * num_ssa_names;
6272 free (data->version_info);
6273 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6276 data->max_inv_id = 0;
6278 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6279 SET_DECL_RTL (obj, NULL_RTX);
6281 VEC_truncate (tree, decl_rtl_to_reset, 0);
6283 htab_empty (data->inv_expr_tab);
6284 data->inv_expr_id = 0;
6287 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6288 loop tree. */
6290 static void
6291 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6293 free_loop_data (data);
6294 free (data->version_info);
6295 BITMAP_FREE (data->relevant);
6296 BITMAP_FREE (data->important_candidates);
6298 VEC_free (tree, heap, decl_rtl_to_reset);
6299 VEC_free (iv_use_p, heap, data->iv_uses);
6300 VEC_free (iv_cand_p, heap, data->iv_candidates);
6301 htab_delete (data->inv_expr_tab);
6304 /* Returns true if the loop body BODY includes any function calls. */
6306 static bool
6307 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6309 gimple_stmt_iterator gsi;
6310 unsigned i;
6312 for (i = 0; i < num_nodes; i++)
6313 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6315 gimple stmt = gsi_stmt (gsi);
6316 if (is_gimple_call (stmt)
6317 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6318 return true;
6320 return false;
6323 /* Optimizes the LOOP. Returns true if anything changed. */
6325 static bool
6326 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6328 bool changed = false;
6329 struct iv_ca *iv_ca;
6330 edge exit;
6331 basic_block *body;
6333 gcc_assert (!data->niters);
6334 data->current_loop = loop;
6335 data->speed = optimize_loop_for_speed_p (loop);
6337 if (dump_file && (dump_flags & TDF_DETAILS))
6339 fprintf (dump_file, "Processing loop %d\n", loop->num);
6341 exit = single_dom_exit (loop);
6342 if (exit)
6344 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6345 exit->src->index, exit->dest->index);
6346 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6347 fprintf (dump_file, "\n");
6350 fprintf (dump_file, "\n");
6353 body = get_loop_body (loop);
6354 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6355 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6356 free (body);
6358 /* For each ssa name determines whether it behaves as an induction variable
6359 in some loop. */
6360 if (!find_induction_variables (data))
6361 goto finish;
6363 /* Finds interesting uses (item 1). */
6364 find_interesting_uses (data);
6365 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6366 goto finish;
6368 /* Finds candidates for the induction variables (item 2). */
6369 find_iv_candidates (data);
6371 /* Calculates the costs (item 3, part 1). */
6372 determine_iv_costs (data);
6373 determine_use_iv_costs (data);
6374 determine_set_costs (data);
6376 /* Find the optimal set of induction variables (item 3, part 2). */
6377 iv_ca = find_optimal_iv_set (data);
6378 if (!iv_ca)
6379 goto finish;
6380 changed = true;
6382 /* Create the new induction variables (item 4, part 1). */
6383 create_new_ivs (data, iv_ca);
6384 iv_ca_free (&iv_ca);
6386 /* Rewrite the uses (item 4, part 2). */
6387 rewrite_uses (data);
6389 /* Remove the ivs that are unused after rewriting. */
6390 remove_unused_ivs (data);
6392 /* We have changed the structure of induction variables; it might happen
6393 that definitions in the scev database refer to some of them that were
6394 eliminated. */
6395 scev_reset ();
6397 finish:
6398 free_loop_data (data);
6400 return changed;
6403 /* Main entry point. Optimizes induction variables in loops. */
6405 void
6406 tree_ssa_iv_optimize (void)
6408 struct loop *loop;
6409 struct ivopts_data data;
6410 loop_iterator li;
6412 tree_ssa_iv_optimize_init (&data);
6414 /* Optimize the loops starting with the innermost ones. */
6415 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6417 if (dump_file && (dump_flags & TDF_DETAILS))
6418 flow_loop_dump (loop, dump_file, NULL, 1);
6420 tree_ssa_iv_optimize_loop (&data, loop);
6423 tree_ssa_iv_optimize_finalize (&data);