PR target/44750
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobab2e67af2aea6af0e9621b06141701b2c09d0cb0
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92 #include "tree-inline.h"
93 #include "tree-ssa-propagate.h"
95 /* FIXME: Expressions are expanded to RTL in this pass to determine the
96 cost of different addressing modes. This should be moved to a TBD
97 interface between the GIMPLE and RTL worlds. */
98 #include "expr.h"
100 /* The infinite cost. */
101 #define INFTY 10000000
103 #define AVG_LOOP_NITER(LOOP) 5
105 /* Returns the expected number of loop iterations for LOOP.
106 The average trip count is computed from profile data if it
107 exists. */
109 static inline HOST_WIDE_INT
110 avg_loop_niter (struct loop *loop)
112 HOST_WIDE_INT niter = estimated_loop_iterations_int (loop, false);
113 if (niter == -1)
114 return AVG_LOOP_NITER (loop);
116 return niter;
119 /* Representation of the induction variable. */
120 struct iv
122 tree base; /* Initial value of the iv. */
123 tree base_object; /* A memory object to that the induction variable points. */
124 tree step; /* Step of the iv (constant only). */
125 tree ssa_name; /* The ssa name with the value. */
126 bool biv_p; /* Is it a biv? */
127 bool have_use_for; /* Do we already have a use for it? */
128 unsigned use_id; /* The identifier in the use if it is the case. */
131 /* Per-ssa version information (induction variable descriptions, etc.). */
132 struct version_info
134 tree name; /* The ssa name. */
135 struct iv *iv; /* Induction variable description. */
136 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
137 an expression that is not an induction variable. */
138 bool preserve_biv; /* For the original biv, whether to preserve it. */
139 unsigned inv_id; /* Id of an invariant. */
142 /* Types of uses. */
143 enum use_type
145 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
146 USE_ADDRESS, /* Use in an address. */
147 USE_COMPARE /* Use is a compare. */
150 /* Cost of a computation. */
151 typedef struct
153 int cost; /* The runtime cost. */
154 unsigned complexity; /* The estimate of the complexity of the code for
155 the computation (in no concrete units --
156 complexity field should be larger for more
157 complex expressions and addressing modes). */
158 } comp_cost;
160 static const comp_cost zero_cost = {0, 0};
161 static const comp_cost infinite_cost = {INFTY, INFTY};
163 /* The candidate - cost pair. */
164 struct cost_pair
166 struct iv_cand *cand; /* The candidate. */
167 comp_cost cost; /* The cost. */
168 bitmap depends_on; /* The list of invariants that have to be
169 preserved. */
170 tree value; /* For final value elimination, the expression for
171 the final value of the iv. For iv elimination,
172 the new bound to compare with. */
173 int inv_expr_id; /* Loop invariant expression id. */
176 /* Use. */
177 struct iv_use
179 unsigned id; /* The id of the use. */
180 enum use_type type; /* Type of the use. */
181 struct iv *iv; /* The induction variable it is based on. */
182 gimple stmt; /* Statement in that it occurs. */
183 tree *op_p; /* The place where it occurs. */
184 bitmap related_cands; /* The set of "related" iv candidates, plus the common
185 important ones. */
187 unsigned n_map_members; /* Number of candidates in the cost_map list. */
188 struct cost_pair *cost_map;
189 /* The costs wrto the iv candidates. */
191 struct iv_cand *selected;
192 /* The selected candidate. */
195 /* The position where the iv is computed. */
196 enum iv_position
198 IP_NORMAL, /* At the end, just before the exit condition. */
199 IP_END, /* At the end of the latch block. */
200 IP_BEFORE_USE, /* Immediately before a specific use. */
201 IP_AFTER_USE, /* Immediately after a specific use. */
202 IP_ORIGINAL /* The original biv. */
205 /* The induction variable candidate. */
206 struct iv_cand
208 unsigned id; /* The number of the candidate. */
209 bool important; /* Whether this is an "important" candidate, i.e. such
210 that it should be considered by all uses. */
211 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
212 gimple incremented_at;/* For original biv, the statement where it is
213 incremented. */
214 tree var_before; /* The variable used for it before increment. */
215 tree var_after; /* The variable used for it after increment. */
216 struct iv *iv; /* The value of the candidate. NULL for
217 "pseudocandidate" used to indicate the possibility
218 to replace the final value of an iv by direct
219 computation of the value. */
220 unsigned cost; /* Cost of the candidate. */
221 unsigned cost_step; /* Cost of the candidate's increment operation. */
222 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
223 where it is incremented. */
224 bitmap depends_on; /* The list of invariants that are used in step of the
225 biv. */
228 /* Loop invariant expression hashtable entry. */
229 struct iv_inv_expr_ent
231 tree expr;
232 int id;
233 hashval_t hash;
236 /* The data used by the induction variable optimizations. */
238 typedef struct iv_use *iv_use_p;
239 DEF_VEC_P(iv_use_p);
240 DEF_VEC_ALLOC_P(iv_use_p,heap);
242 typedef struct iv_cand *iv_cand_p;
243 DEF_VEC_P(iv_cand_p);
244 DEF_VEC_ALLOC_P(iv_cand_p,heap);
246 struct ivopts_data
248 /* The currently optimized loop. */
249 struct loop *current_loop;
251 /* Numbers of iterations for all exits of the current loop. */
252 struct pointer_map_t *niters;
254 /* Number of registers used in it. */
255 unsigned regs_used;
257 /* The size of version_info array allocated. */
258 unsigned version_info_size;
260 /* The array of information for the ssa names. */
261 struct version_info *version_info;
263 /* The hashtable of loop invariant expressions created
264 by ivopt. */
265 htab_t inv_expr_tab;
267 /* Loop invariant expression id. */
268 int inv_expr_id;
270 /* The bitmap of indices in version_info whose value was changed. */
271 bitmap relevant;
273 /* The uses of induction variables. */
274 VEC(iv_use_p,heap) *iv_uses;
276 /* The candidates. */
277 VEC(iv_cand_p,heap) *iv_candidates;
279 /* A bitmap of important candidates. */
280 bitmap important_candidates;
282 /* The maximum invariant id. */
283 unsigned max_inv_id;
285 /* Whether to consider just related and important candidates when replacing a
286 use. */
287 bool consider_all_candidates;
289 /* Are we optimizing for speed? */
290 bool speed;
292 /* Whether the loop body includes any function calls. */
293 bool body_includes_call;
296 /* An assignment of iv candidates to uses. */
298 struct iv_ca
300 /* The number of uses covered by the assignment. */
301 unsigned upto;
303 /* Number of uses that cannot be expressed by the candidates in the set. */
304 unsigned bad_uses;
306 /* Candidate assigned to a use, together with the related costs. */
307 struct cost_pair **cand_for_use;
309 /* Number of times each candidate is used. */
310 unsigned *n_cand_uses;
312 /* The candidates used. */
313 bitmap cands;
315 /* The number of candidates in the set. */
316 unsigned n_cands;
318 /* Total number of registers needed. */
319 unsigned n_regs;
321 /* Total cost of expressing uses. */
322 comp_cost cand_use_cost;
324 /* Total cost of candidates. */
325 unsigned cand_cost;
327 /* Number of times each invariant is used. */
328 unsigned *n_invariant_uses;
330 /* The array holding the number of uses of each loop
331 invariant expressions created by ivopt. */
332 unsigned *used_inv_expr;
334 /* The number of created loop invariants. */
335 unsigned num_used_inv_expr;
337 /* Total cost of the assignment. */
338 comp_cost cost;
341 /* Difference of two iv candidate assignments. */
343 struct iv_ca_delta
345 /* Changed use. */
346 struct iv_use *use;
348 /* An old assignment (for rollback purposes). */
349 struct cost_pair *old_cp;
351 /* A new assignment. */
352 struct cost_pair *new_cp;
354 /* Next change in the list. */
355 struct iv_ca_delta *next_change;
358 /* Bound on number of candidates below that all candidates are considered. */
360 #define CONSIDER_ALL_CANDIDATES_BOUND \
361 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
363 /* If there are more iv occurrences, we just give up (it is quite unlikely that
364 optimizing such a loop would help, and it would take ages). */
366 #define MAX_CONSIDERED_USES \
367 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
369 /* If there are at most this number of ivs in the set, try removing unnecessary
370 ivs from the set always. */
372 #define ALWAYS_PRUNE_CAND_SET_BOUND \
373 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
375 /* The list of trees for that the decl_rtl field must be reset is stored
376 here. */
378 static VEC(tree,heap) *decl_rtl_to_reset;
380 /* Number of uses recorded in DATA. */
382 static inline unsigned
383 n_iv_uses (struct ivopts_data *data)
385 return VEC_length (iv_use_p, data->iv_uses);
388 /* Ith use recorded in DATA. */
390 static inline struct iv_use *
391 iv_use (struct ivopts_data *data, unsigned i)
393 return VEC_index (iv_use_p, data->iv_uses, i);
396 /* Number of candidates recorded in DATA. */
398 static inline unsigned
399 n_iv_cands (struct ivopts_data *data)
401 return VEC_length (iv_cand_p, data->iv_candidates);
404 /* Ith candidate recorded in DATA. */
406 static inline struct iv_cand *
407 iv_cand (struct ivopts_data *data, unsigned i)
409 return VEC_index (iv_cand_p, data->iv_candidates, i);
412 /* The single loop exit if it dominates the latch, NULL otherwise. */
414 edge
415 single_dom_exit (struct loop *loop)
417 edge exit = single_exit (loop);
419 if (!exit)
420 return NULL;
422 if (!just_once_each_iteration_p (loop, exit->src))
423 return NULL;
425 return exit;
428 /* Dumps information about the induction variable IV to FILE. */
430 extern void dump_iv (FILE *, struct iv *);
431 void
432 dump_iv (FILE *file, struct iv *iv)
434 if (iv->ssa_name)
436 fprintf (file, "ssa name ");
437 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
438 fprintf (file, "\n");
441 fprintf (file, " type ");
442 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
443 fprintf (file, "\n");
445 if (iv->step)
447 fprintf (file, " base ");
448 print_generic_expr (file, iv->base, TDF_SLIM);
449 fprintf (file, "\n");
451 fprintf (file, " step ");
452 print_generic_expr (file, iv->step, TDF_SLIM);
453 fprintf (file, "\n");
455 else
457 fprintf (file, " invariant ");
458 print_generic_expr (file, iv->base, TDF_SLIM);
459 fprintf (file, "\n");
462 if (iv->base_object)
464 fprintf (file, " base object ");
465 print_generic_expr (file, iv->base_object, TDF_SLIM);
466 fprintf (file, "\n");
469 if (iv->biv_p)
470 fprintf (file, " is a biv\n");
473 /* Dumps information about the USE to FILE. */
475 extern void dump_use (FILE *, struct iv_use *);
476 void
477 dump_use (FILE *file, struct iv_use *use)
479 fprintf (file, "use %d\n", use->id);
481 switch (use->type)
483 case USE_NONLINEAR_EXPR:
484 fprintf (file, " generic\n");
485 break;
487 case USE_ADDRESS:
488 fprintf (file, " address\n");
489 break;
491 case USE_COMPARE:
492 fprintf (file, " compare\n");
493 break;
495 default:
496 gcc_unreachable ();
499 fprintf (file, " in statement ");
500 print_gimple_stmt (file, use->stmt, 0, 0);
501 fprintf (file, "\n");
503 fprintf (file, " at position ");
504 if (use->op_p)
505 print_generic_expr (file, *use->op_p, TDF_SLIM);
506 fprintf (file, "\n");
508 dump_iv (file, use->iv);
510 if (use->related_cands)
512 fprintf (file, " related candidates ");
513 dump_bitmap (file, use->related_cands);
517 /* Dumps information about the uses to FILE. */
519 extern void dump_uses (FILE *, struct ivopts_data *);
520 void
521 dump_uses (FILE *file, struct ivopts_data *data)
523 unsigned i;
524 struct iv_use *use;
526 for (i = 0; i < n_iv_uses (data); i++)
528 use = iv_use (data, i);
530 dump_use (file, use);
531 fprintf (file, "\n");
535 /* Dumps information about induction variable candidate CAND to FILE. */
537 extern void dump_cand (FILE *, struct iv_cand *);
538 void
539 dump_cand (FILE *file, struct iv_cand *cand)
541 struct iv *iv = cand->iv;
543 fprintf (file, "candidate %d%s\n",
544 cand->id, cand->important ? " (important)" : "");
546 if (cand->depends_on)
548 fprintf (file, " depends on ");
549 dump_bitmap (file, cand->depends_on);
552 if (!iv)
554 fprintf (file, " final value replacement\n");
555 return;
558 if (cand->var_before)
560 fprintf (file, " var_before ");
561 print_generic_expr (file, cand->var_before, TDF_SLIM);
562 fprintf (file, "\n");
564 if (cand->var_after)
566 fprintf (file, " var_after ");
567 print_generic_expr (file, cand->var_after, TDF_SLIM);
568 fprintf (file, "\n");
571 switch (cand->pos)
573 case IP_NORMAL:
574 fprintf (file, " incremented before exit test\n");
575 break;
577 case IP_BEFORE_USE:
578 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
579 break;
581 case IP_AFTER_USE:
582 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
583 break;
585 case IP_END:
586 fprintf (file, " incremented at end\n");
587 break;
589 case IP_ORIGINAL:
590 fprintf (file, " original biv\n");
591 break;
594 dump_iv (file, iv);
597 /* Returns the info for ssa version VER. */
599 static inline struct version_info *
600 ver_info (struct ivopts_data *data, unsigned ver)
602 return data->version_info + ver;
605 /* Returns the info for ssa name NAME. */
607 static inline struct version_info *
608 name_info (struct ivopts_data *data, tree name)
610 return ver_info (data, SSA_NAME_VERSION (name));
613 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
614 emitted in LOOP. */
616 static bool
617 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
619 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
621 gcc_assert (bb);
623 if (sbb == loop->latch)
624 return true;
626 if (sbb != bb)
627 return false;
629 return stmt == last_stmt (bb);
632 /* Returns true if STMT if after the place where the original induction
633 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
634 if the positions are identical. */
636 static bool
637 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
639 basic_block cand_bb = gimple_bb (cand->incremented_at);
640 basic_block stmt_bb = gimple_bb (stmt);
642 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
643 return false;
645 if (stmt_bb != cand_bb)
646 return true;
648 if (true_if_equal
649 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
650 return true;
651 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
654 /* Returns true if STMT if after the place where the induction variable
655 CAND is incremented in LOOP. */
657 static bool
658 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
660 switch (cand->pos)
662 case IP_END:
663 return false;
665 case IP_NORMAL:
666 return stmt_after_ip_normal_pos (loop, stmt);
668 case IP_ORIGINAL:
669 case IP_AFTER_USE:
670 return stmt_after_inc_pos (cand, stmt, false);
672 case IP_BEFORE_USE:
673 return stmt_after_inc_pos (cand, stmt, true);
675 default:
676 gcc_unreachable ();
680 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
682 static bool
683 abnormal_ssa_name_p (tree exp)
685 if (!exp)
686 return false;
688 if (TREE_CODE (exp) != SSA_NAME)
689 return false;
691 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
694 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
695 abnormal phi node. Callback for for_each_index. */
697 static bool
698 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
699 void *data ATTRIBUTE_UNUSED)
701 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
703 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
704 return false;
705 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
706 return false;
709 return !abnormal_ssa_name_p (*index);
712 /* Returns true if EXPR contains a ssa name that occurs in an
713 abnormal phi node. */
715 bool
716 contains_abnormal_ssa_name_p (tree expr)
718 enum tree_code code;
719 enum tree_code_class codeclass;
721 if (!expr)
722 return false;
724 code = TREE_CODE (expr);
725 codeclass = TREE_CODE_CLASS (code);
727 if (code == SSA_NAME)
728 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
730 if (code == INTEGER_CST
731 || is_gimple_min_invariant (expr))
732 return false;
734 if (code == ADDR_EXPR)
735 return !for_each_index (&TREE_OPERAND (expr, 0),
736 idx_contains_abnormal_ssa_name_p,
737 NULL);
739 if (code == COND_EXPR)
740 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
742 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
744 switch (codeclass)
746 case tcc_binary:
747 case tcc_comparison:
748 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
749 return true;
751 /* Fallthru. */
752 case tcc_unary:
753 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
754 return true;
756 break;
758 default:
759 gcc_unreachable ();
762 return false;
765 /* Returns tree describing number of iterations determined from
766 EXIT of DATA->current_loop, or NULL if something goes wrong. */
768 static tree
769 niter_for_exit (struct ivopts_data *data, edge exit,
770 struct tree_niter_desc **desc_p)
772 struct tree_niter_desc* desc = NULL;
773 tree niter;
774 void **slot;
776 if (!data->niters)
778 data->niters = pointer_map_create ();
779 slot = NULL;
781 else
782 slot = pointer_map_contains (data->niters, exit);
784 if (!slot)
786 /* Try to determine number of iterations. We must know it
787 unconditionally (i.e., without possibility of # of iterations
788 being zero). Also, we cannot safely work with ssa names that
789 appear in phi nodes on abnormal edges, so that we do not create
790 overlapping life ranges for them (PR 27283). */
791 desc = XNEW (struct tree_niter_desc);
792 if (number_of_iterations_exit (data->current_loop,
793 exit, desc, true)
794 && integer_zerop (desc->may_be_zero)
795 && !contains_abnormal_ssa_name_p (desc->niter))
796 niter = desc->niter;
797 else
798 niter = NULL_TREE;
800 desc->niter = niter;
801 slot = pointer_map_insert (data->niters, exit);
802 *slot = desc;
804 else
805 niter = ((struct tree_niter_desc *) *slot)->niter;
807 if (desc_p)
808 *desc_p = (struct tree_niter_desc *) *slot;
809 return niter;
812 /* Returns tree describing number of iterations determined from
813 single dominating exit of DATA->current_loop, or NULL if something
814 goes wrong. */
816 static tree
817 niter_for_single_dom_exit (struct ivopts_data *data)
819 edge exit = single_dom_exit (data->current_loop);
821 if (!exit)
822 return NULL;
824 return niter_for_exit (data, exit, NULL);
827 /* Hash table equality function for expressions. */
829 static int
830 htab_inv_expr_eq (const void *ent1, const void *ent2)
832 const struct iv_inv_expr_ent *expr1 =
833 (const struct iv_inv_expr_ent *)ent1;
834 const struct iv_inv_expr_ent *expr2 =
835 (const struct iv_inv_expr_ent *)ent2;
837 return operand_equal_p (expr1->expr, expr2->expr, 0);
840 /* Hash function for loop invariant expressions. */
842 static hashval_t
843 htab_inv_expr_hash (const void *ent)
845 const struct iv_inv_expr_ent *expr =
846 (const struct iv_inv_expr_ent *)ent;
847 return expr->hash;
850 /* Initializes data structures used by the iv optimization pass, stored
851 in DATA. */
853 static void
854 tree_ssa_iv_optimize_init (struct ivopts_data *data)
856 data->version_info_size = 2 * num_ssa_names;
857 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
858 data->relevant = BITMAP_ALLOC (NULL);
859 data->important_candidates = BITMAP_ALLOC (NULL);
860 data->max_inv_id = 0;
861 data->niters = NULL;
862 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
863 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
864 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
865 htab_inv_expr_eq, free);
866 data->inv_expr_id = 0;
867 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
870 /* Returns a memory object to that EXPR points. In case we are able to
871 determine that it does not point to any such object, NULL is returned. */
873 static tree
874 determine_base_object (tree expr)
876 enum tree_code code = TREE_CODE (expr);
877 tree base, obj;
879 /* If this is a pointer casted to any type, we need to determine
880 the base object for the pointer; so handle conversions before
881 throwing away non-pointer expressions. */
882 if (CONVERT_EXPR_P (expr))
883 return determine_base_object (TREE_OPERAND (expr, 0));
885 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
886 return NULL_TREE;
888 switch (code)
890 case INTEGER_CST:
891 return NULL_TREE;
893 case ADDR_EXPR:
894 obj = TREE_OPERAND (expr, 0);
895 base = get_base_address (obj);
897 if (!base)
898 return expr;
900 if (TREE_CODE (base) == MEM_REF)
901 return determine_base_object (TREE_OPERAND (base, 0));
903 return fold_convert (ptr_type_node,
904 build_fold_addr_expr (base));
906 case POINTER_PLUS_EXPR:
907 return determine_base_object (TREE_OPERAND (expr, 0));
909 case PLUS_EXPR:
910 case MINUS_EXPR:
911 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
912 gcc_unreachable ();
914 default:
915 return fold_convert (ptr_type_node, expr);
919 /* Allocates an induction variable with given initial value BASE and step STEP
920 for loop LOOP. */
922 static struct iv *
923 alloc_iv (tree base, tree step)
925 struct iv *iv = XCNEW (struct iv);
926 gcc_assert (step != NULL_TREE);
928 iv->base = base;
929 iv->base_object = determine_base_object (base);
930 iv->step = step;
931 iv->biv_p = false;
932 iv->have_use_for = false;
933 iv->use_id = 0;
934 iv->ssa_name = NULL_TREE;
936 return iv;
939 /* Sets STEP and BASE for induction variable IV. */
941 static void
942 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
944 struct version_info *info = name_info (data, iv);
946 gcc_assert (!info->iv);
948 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
949 info->iv = alloc_iv (base, step);
950 info->iv->ssa_name = iv;
953 /* Finds induction variable declaration for VAR. */
955 static struct iv *
956 get_iv (struct ivopts_data *data, tree var)
958 basic_block bb;
959 tree type = TREE_TYPE (var);
961 if (!POINTER_TYPE_P (type)
962 && !INTEGRAL_TYPE_P (type))
963 return NULL;
965 if (!name_info (data, var)->iv)
967 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
969 if (!bb
970 || !flow_bb_inside_loop_p (data->current_loop, bb))
971 set_iv (data, var, var, build_int_cst (type, 0));
974 return name_info (data, var)->iv;
977 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
978 not define a simple affine biv with nonzero step. */
980 static tree
981 determine_biv_step (gimple phi)
983 struct loop *loop = gimple_bb (phi)->loop_father;
984 tree name = PHI_RESULT (phi);
985 affine_iv iv;
987 if (!is_gimple_reg (name))
988 return NULL_TREE;
990 if (!simple_iv (loop, loop, name, &iv, true))
991 return NULL_TREE;
993 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
996 /* Finds basic ivs. */
998 static bool
999 find_bivs (struct ivopts_data *data)
1001 gimple phi;
1002 tree step, type, base;
1003 bool found = false;
1004 struct loop *loop = data->current_loop;
1005 gimple_stmt_iterator psi;
1007 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1009 phi = gsi_stmt (psi);
1011 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1012 continue;
1014 step = determine_biv_step (phi);
1015 if (!step)
1016 continue;
1018 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1019 base = expand_simple_operations (base);
1020 if (contains_abnormal_ssa_name_p (base)
1021 || contains_abnormal_ssa_name_p (step))
1022 continue;
1024 type = TREE_TYPE (PHI_RESULT (phi));
1025 base = fold_convert (type, base);
1026 if (step)
1028 if (POINTER_TYPE_P (type))
1029 step = fold_convert (sizetype, step);
1030 else
1031 step = fold_convert (type, step);
1034 set_iv (data, PHI_RESULT (phi), base, step);
1035 found = true;
1038 return found;
1041 /* Marks basic ivs. */
1043 static void
1044 mark_bivs (struct ivopts_data *data)
1046 gimple phi;
1047 tree var;
1048 struct iv *iv, *incr_iv;
1049 struct loop *loop = data->current_loop;
1050 basic_block incr_bb;
1051 gimple_stmt_iterator psi;
1053 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1055 phi = gsi_stmt (psi);
1057 iv = get_iv (data, PHI_RESULT (phi));
1058 if (!iv)
1059 continue;
1061 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1062 incr_iv = get_iv (data, var);
1063 if (!incr_iv)
1064 continue;
1066 /* If the increment is in the subloop, ignore it. */
1067 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1068 if (incr_bb->loop_father != data->current_loop
1069 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1070 continue;
1072 iv->biv_p = true;
1073 incr_iv->biv_p = true;
1077 /* Checks whether STMT defines a linear induction variable and stores its
1078 parameters to IV. */
1080 static bool
1081 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1083 tree lhs;
1084 struct loop *loop = data->current_loop;
1086 iv->base = NULL_TREE;
1087 iv->step = NULL_TREE;
1089 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1090 return false;
1092 lhs = gimple_assign_lhs (stmt);
1093 if (TREE_CODE (lhs) != SSA_NAME)
1094 return false;
1096 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1097 return false;
1098 iv->base = expand_simple_operations (iv->base);
1100 if (contains_abnormal_ssa_name_p (iv->base)
1101 || contains_abnormal_ssa_name_p (iv->step))
1102 return false;
1104 return true;
1107 /* Finds general ivs in statement STMT. */
1109 static void
1110 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1112 affine_iv iv;
1114 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1115 return;
1117 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1120 /* Finds general ivs in basic block BB. */
1122 static void
1123 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1125 gimple_stmt_iterator bsi;
1127 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1128 find_givs_in_stmt (data, gsi_stmt (bsi));
1131 /* Finds general ivs. */
1133 static void
1134 find_givs (struct ivopts_data *data)
1136 struct loop *loop = data->current_loop;
1137 basic_block *body = get_loop_body_in_dom_order (loop);
1138 unsigned i;
1140 for (i = 0; i < loop->num_nodes; i++)
1141 find_givs_in_bb (data, body[i]);
1142 free (body);
1145 /* For each ssa name defined in LOOP determines whether it is an induction
1146 variable and if so, its initial value and step. */
1148 static bool
1149 find_induction_variables (struct ivopts_data *data)
1151 unsigned i;
1152 bitmap_iterator bi;
1154 if (!find_bivs (data))
1155 return false;
1157 find_givs (data);
1158 mark_bivs (data);
1160 if (dump_file && (dump_flags & TDF_DETAILS))
1162 tree niter = niter_for_single_dom_exit (data);
1164 if (niter)
1166 fprintf (dump_file, " number of iterations ");
1167 print_generic_expr (dump_file, niter, TDF_SLIM);
1168 fprintf (dump_file, "\n\n");
1171 fprintf (dump_file, "Induction variables:\n\n");
1173 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1175 if (ver_info (data, i)->iv)
1176 dump_iv (dump_file, ver_info (data, i)->iv);
1180 return true;
1183 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1185 static struct iv_use *
1186 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1187 gimple stmt, enum use_type use_type)
1189 struct iv_use *use = XCNEW (struct iv_use);
1191 use->id = n_iv_uses (data);
1192 use->type = use_type;
1193 use->iv = iv;
1194 use->stmt = stmt;
1195 use->op_p = use_p;
1196 use->related_cands = BITMAP_ALLOC (NULL);
1198 /* To avoid showing ssa name in the dumps, if it was not reset by the
1199 caller. */
1200 iv->ssa_name = NULL_TREE;
1202 if (dump_file && (dump_flags & TDF_DETAILS))
1203 dump_use (dump_file, use);
1205 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1207 return use;
1210 /* Checks whether OP is a loop-level invariant and if so, records it.
1211 NONLINEAR_USE is true if the invariant is used in a way we do not
1212 handle specially. */
1214 static void
1215 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1217 basic_block bb;
1218 struct version_info *info;
1220 if (TREE_CODE (op) != SSA_NAME
1221 || !is_gimple_reg (op))
1222 return;
1224 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1225 if (bb
1226 && flow_bb_inside_loop_p (data->current_loop, bb))
1227 return;
1229 info = name_info (data, op);
1230 info->name = op;
1231 info->has_nonlin_use |= nonlinear_use;
1232 if (!info->inv_id)
1233 info->inv_id = ++data->max_inv_id;
1234 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1237 /* Checks whether the use OP is interesting and if so, records it. */
1239 static struct iv_use *
1240 find_interesting_uses_op (struct ivopts_data *data, tree op)
1242 struct iv *iv;
1243 struct iv *civ;
1244 gimple stmt;
1245 struct iv_use *use;
1247 if (TREE_CODE (op) != SSA_NAME)
1248 return NULL;
1250 iv = get_iv (data, op);
1251 if (!iv)
1252 return NULL;
1254 if (iv->have_use_for)
1256 use = iv_use (data, iv->use_id);
1258 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1259 return use;
1262 if (integer_zerop (iv->step))
1264 record_invariant (data, op, true);
1265 return NULL;
1267 iv->have_use_for = true;
1269 civ = XNEW (struct iv);
1270 *civ = *iv;
1272 stmt = SSA_NAME_DEF_STMT (op);
1273 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1274 || is_gimple_assign (stmt));
1276 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1277 iv->use_id = use->id;
1279 return use;
1282 /* Given a condition in statement STMT, checks whether it is a compare
1283 of an induction variable and an invariant. If this is the case,
1284 CONTROL_VAR is set to location of the iv, BOUND to the location of
1285 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1286 induction variable descriptions, and true is returned. If this is not
1287 the case, CONTROL_VAR and BOUND are set to the arguments of the
1288 condition and false is returned. */
1290 static bool
1291 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1292 tree **control_var, tree **bound,
1293 struct iv **iv_var, struct iv **iv_bound)
1295 /* The objects returned when COND has constant operands. */
1296 static struct iv const_iv;
1297 static tree zero;
1298 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1299 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1300 bool ret = false;
1302 if (gimple_code (stmt) == GIMPLE_COND)
1304 op0 = gimple_cond_lhs_ptr (stmt);
1305 op1 = gimple_cond_rhs_ptr (stmt);
1307 else
1309 op0 = gimple_assign_rhs1_ptr (stmt);
1310 op1 = gimple_assign_rhs2_ptr (stmt);
1313 zero = integer_zero_node;
1314 const_iv.step = integer_zero_node;
1316 if (TREE_CODE (*op0) == SSA_NAME)
1317 iv0 = get_iv (data, *op0);
1318 if (TREE_CODE (*op1) == SSA_NAME)
1319 iv1 = get_iv (data, *op1);
1321 /* Exactly one of the compared values must be an iv, and the other one must
1322 be an invariant. */
1323 if (!iv0 || !iv1)
1324 goto end;
1326 if (integer_zerop (iv0->step))
1328 /* Control variable may be on the other side. */
1329 tmp_op = op0; op0 = op1; op1 = tmp_op;
1330 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1332 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1334 end:
1335 if (control_var)
1336 *control_var = op0;;
1337 if (iv_var)
1338 *iv_var = iv0;;
1339 if (bound)
1340 *bound = op1;
1341 if (iv_bound)
1342 *iv_bound = iv1;
1344 return ret;
1347 /* Checks whether the condition in STMT is interesting and if so,
1348 records it. */
1350 static void
1351 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1353 tree *var_p, *bound_p;
1354 struct iv *var_iv, *civ;
1356 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1358 find_interesting_uses_op (data, *var_p);
1359 find_interesting_uses_op (data, *bound_p);
1360 return;
1363 civ = XNEW (struct iv);
1364 *civ = *var_iv;
1365 record_use (data, NULL, civ, stmt, USE_COMPARE);
1368 /* Returns true if expression EXPR is obviously invariant in LOOP,
1369 i.e. if all its operands are defined outside of the LOOP. LOOP
1370 should not be the function body. */
1372 bool
1373 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1375 basic_block def_bb;
1376 unsigned i, len;
1378 gcc_assert (loop_depth (loop) > 0);
1380 if (is_gimple_min_invariant (expr))
1381 return true;
1383 if (TREE_CODE (expr) == SSA_NAME)
1385 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1386 if (def_bb
1387 && flow_bb_inside_loop_p (loop, def_bb))
1388 return false;
1390 return true;
1393 if (!EXPR_P (expr))
1394 return false;
1396 len = TREE_OPERAND_LENGTH (expr);
1397 for (i = 0; i < len; i++)
1398 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1399 return false;
1401 return true;
1404 /* Returns true if statement STMT is obviously invariant in LOOP,
1405 i.e. if all its operands on the RHS are defined outside of the LOOP.
1406 LOOP should not be the function body. */
1408 bool
1409 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1411 unsigned i;
1412 tree lhs;
1414 gcc_assert (loop_depth (loop) > 0);
1416 lhs = gimple_get_lhs (stmt);
1417 for (i = 0; i < gimple_num_ops (stmt); i++)
1419 tree op = gimple_op (stmt, i);
1420 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1421 return false;
1424 return true;
1427 /* Cumulates the steps of indices into DATA and replaces their values with the
1428 initial ones. Returns false when the value of the index cannot be determined.
1429 Callback for for_each_index. */
1431 struct ifs_ivopts_data
1433 struct ivopts_data *ivopts_data;
1434 gimple stmt;
1435 tree step;
1438 static bool
1439 idx_find_step (tree base, tree *idx, void *data)
1441 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1442 struct iv *iv;
1443 tree step, iv_base, iv_step, lbound, off;
1444 struct loop *loop = dta->ivopts_data->current_loop;
1446 /* If base is a component ref, require that the offset of the reference
1447 be invariant. */
1448 if (TREE_CODE (base) == COMPONENT_REF)
1450 off = component_ref_field_offset (base);
1451 return expr_invariant_in_loop_p (loop, off);
1454 /* If base is array, first check whether we will be able to move the
1455 reference out of the loop (in order to take its address in strength
1456 reduction). In order for this to work we need both lower bound
1457 and step to be loop invariants. */
1458 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1460 /* Moreover, for a range, the size needs to be invariant as well. */
1461 if (TREE_CODE (base) == ARRAY_RANGE_REF
1462 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1463 return false;
1465 step = array_ref_element_size (base);
1466 lbound = array_ref_low_bound (base);
1468 if (!expr_invariant_in_loop_p (loop, step)
1469 || !expr_invariant_in_loop_p (loop, lbound))
1470 return false;
1473 if (TREE_CODE (*idx) != SSA_NAME)
1474 return true;
1476 iv = get_iv (dta->ivopts_data, *idx);
1477 if (!iv)
1478 return false;
1480 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1481 *&x[0], which is not folded and does not trigger the
1482 ARRAY_REF path below. */
1483 *idx = iv->base;
1485 if (integer_zerop (iv->step))
1486 return true;
1488 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1490 step = array_ref_element_size (base);
1492 /* We only handle addresses whose step is an integer constant. */
1493 if (TREE_CODE (step) != INTEGER_CST)
1494 return false;
1496 else
1497 /* The step for pointer arithmetics already is 1 byte. */
1498 step = size_one_node;
1500 iv_base = iv->base;
1501 iv_step = iv->step;
1502 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1503 sizetype, &iv_base, &iv_step, dta->stmt,
1504 false))
1506 /* The index might wrap. */
1507 return false;
1510 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1511 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1513 return true;
1516 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1517 object is passed to it in DATA. */
1519 static bool
1520 idx_record_use (tree base, tree *idx,
1521 void *vdata)
1523 struct ivopts_data *data = (struct ivopts_data *) vdata;
1524 find_interesting_uses_op (data, *idx);
1525 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1527 find_interesting_uses_op (data, array_ref_element_size (base));
1528 find_interesting_uses_op (data, array_ref_low_bound (base));
1530 return true;
1533 /* If we can prove that TOP = cst * BOT for some constant cst,
1534 store cst to MUL and return true. Otherwise return false.
1535 The returned value is always sign-extended, regardless of the
1536 signedness of TOP and BOT. */
1538 static bool
1539 constant_multiple_of (tree top, tree bot, double_int *mul)
1541 tree mby;
1542 enum tree_code code;
1543 double_int res, p0, p1;
1544 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1546 STRIP_NOPS (top);
1547 STRIP_NOPS (bot);
1549 if (operand_equal_p (top, bot, 0))
1551 *mul = double_int_one;
1552 return true;
1555 code = TREE_CODE (top);
1556 switch (code)
1558 case MULT_EXPR:
1559 mby = TREE_OPERAND (top, 1);
1560 if (TREE_CODE (mby) != INTEGER_CST)
1561 return false;
1563 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1564 return false;
1566 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1567 precision);
1568 return true;
1570 case PLUS_EXPR:
1571 case MINUS_EXPR:
1572 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1573 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1574 return false;
1576 if (code == MINUS_EXPR)
1577 p1 = double_int_neg (p1);
1578 *mul = double_int_sext (double_int_add (p0, p1), precision);
1579 return true;
1581 case INTEGER_CST:
1582 if (TREE_CODE (bot) != INTEGER_CST)
1583 return false;
1585 p0 = double_int_sext (tree_to_double_int (top), precision);
1586 p1 = double_int_sext (tree_to_double_int (bot), precision);
1587 if (double_int_zero_p (p1))
1588 return false;
1589 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1590 precision);
1591 return double_int_zero_p (res);
1593 default:
1594 return false;
1598 /* Returns true if memory reference REF with step STEP may be unaligned. */
1600 static bool
1601 may_be_unaligned_p (tree ref, tree step)
1603 tree base;
1604 tree base_type;
1605 HOST_WIDE_INT bitsize;
1606 HOST_WIDE_INT bitpos;
1607 tree toffset;
1608 enum machine_mode mode;
1609 int unsignedp, volatilep;
1610 unsigned base_align;
1612 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1613 thus they are not misaligned. */
1614 if (TREE_CODE (ref) == TARGET_MEM_REF)
1615 return false;
1617 /* The test below is basically copy of what expr.c:normal_inner_ref
1618 does to check whether the object must be loaded by parts when
1619 STRICT_ALIGNMENT is true. */
1620 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1621 &unsignedp, &volatilep, true);
1622 base_type = TREE_TYPE (base);
1623 base_align = TYPE_ALIGN (base_type);
1625 if (mode != BLKmode)
1627 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1629 if (base_align < mode_align
1630 || (bitpos % mode_align) != 0
1631 || (bitpos % BITS_PER_UNIT) != 0)
1632 return true;
1634 if (toffset
1635 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1636 return true;
1638 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1639 return true;
1642 return false;
1645 /* Return true if EXPR may be non-addressable. */
1647 bool
1648 may_be_nonaddressable_p (tree expr)
1650 switch (TREE_CODE (expr))
1652 case TARGET_MEM_REF:
1653 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1654 target, thus they are always addressable. */
1655 return false;
1657 case COMPONENT_REF:
1658 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1659 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1661 case VIEW_CONVERT_EXPR:
1662 /* This kind of view-conversions may wrap non-addressable objects
1663 and make them look addressable. After some processing the
1664 non-addressability may be uncovered again, causing ADDR_EXPRs
1665 of inappropriate objects to be built. */
1666 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1667 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1668 return true;
1670 /* ... fall through ... */
1672 case ARRAY_REF:
1673 case ARRAY_RANGE_REF:
1674 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1676 CASE_CONVERT:
1677 return true;
1679 default:
1680 break;
1683 return false;
1686 /* Finds addresses in *OP_P inside STMT. */
1688 static void
1689 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1691 tree base = *op_p, step = size_zero_node;
1692 struct iv *civ;
1693 struct ifs_ivopts_data ifs_ivopts_data;
1695 /* Do not play with volatile memory references. A bit too conservative,
1696 perhaps, but safe. */
1697 if (gimple_has_volatile_ops (stmt))
1698 goto fail;
1700 /* Ignore bitfields for now. Not really something terribly complicated
1701 to handle. TODO. */
1702 if (TREE_CODE (base) == BIT_FIELD_REF)
1703 goto fail;
1705 base = unshare_expr (base);
1707 if (TREE_CODE (base) == TARGET_MEM_REF)
1709 tree type = build_pointer_type (TREE_TYPE (base));
1710 tree astep;
1712 if (TMR_BASE (base)
1713 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1715 civ = get_iv (data, TMR_BASE (base));
1716 if (!civ)
1717 goto fail;
1719 TMR_BASE (base) = civ->base;
1720 step = civ->step;
1722 if (TMR_INDEX2 (base)
1723 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1725 civ = get_iv (data, TMR_INDEX2 (base));
1726 if (!civ)
1727 goto fail;
1729 TMR_INDEX2 (base) = civ->base;
1730 step = civ->step;
1732 if (TMR_INDEX (base)
1733 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1735 civ = get_iv (data, TMR_INDEX (base));
1736 if (!civ)
1737 goto fail;
1739 TMR_INDEX (base) = civ->base;
1740 astep = civ->step;
1742 if (astep)
1744 if (TMR_STEP (base))
1745 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1747 step = fold_build2 (PLUS_EXPR, type, step, astep);
1751 if (integer_zerop (step))
1752 goto fail;
1753 base = tree_mem_ref_addr (type, base);
1755 else
1757 ifs_ivopts_data.ivopts_data = data;
1758 ifs_ivopts_data.stmt = stmt;
1759 ifs_ivopts_data.step = size_zero_node;
1760 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1761 || integer_zerop (ifs_ivopts_data.step))
1762 goto fail;
1763 step = ifs_ivopts_data.step;
1765 /* Check that the base expression is addressable. This needs
1766 to be done after substituting bases of IVs into it. */
1767 if (may_be_nonaddressable_p (base))
1768 goto fail;
1770 /* Moreover, on strict alignment platforms, check that it is
1771 sufficiently aligned. */
1772 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1773 goto fail;
1775 base = build_fold_addr_expr (base);
1777 /* Substituting bases of IVs into the base expression might
1778 have caused folding opportunities. */
1779 if (TREE_CODE (base) == ADDR_EXPR)
1781 tree *ref = &TREE_OPERAND (base, 0);
1782 while (handled_component_p (*ref))
1783 ref = &TREE_OPERAND (*ref, 0);
1784 if (TREE_CODE (*ref) == MEM_REF)
1786 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1787 TREE_OPERAND (*ref, 0),
1788 TREE_OPERAND (*ref, 1));
1789 if (tem)
1790 *ref = tem;
1795 civ = alloc_iv (base, step);
1796 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1797 return;
1799 fail:
1800 for_each_index (op_p, idx_record_use, data);
1803 /* Finds and records invariants used in STMT. */
1805 static void
1806 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1808 ssa_op_iter iter;
1809 use_operand_p use_p;
1810 tree op;
1812 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1814 op = USE_FROM_PTR (use_p);
1815 record_invariant (data, op, false);
1819 /* Finds interesting uses of induction variables in the statement STMT. */
1821 static void
1822 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1824 struct iv *iv;
1825 tree op, *lhs, *rhs;
1826 ssa_op_iter iter;
1827 use_operand_p use_p;
1828 enum tree_code code;
1830 find_invariants_stmt (data, stmt);
1832 if (gimple_code (stmt) == GIMPLE_COND)
1834 find_interesting_uses_cond (data, stmt);
1835 return;
1838 if (is_gimple_assign (stmt))
1840 lhs = gimple_assign_lhs_ptr (stmt);
1841 rhs = gimple_assign_rhs1_ptr (stmt);
1843 if (TREE_CODE (*lhs) == SSA_NAME)
1845 /* If the statement defines an induction variable, the uses are not
1846 interesting by themselves. */
1848 iv = get_iv (data, *lhs);
1850 if (iv && !integer_zerop (iv->step))
1851 return;
1854 code = gimple_assign_rhs_code (stmt);
1855 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1856 && (REFERENCE_CLASS_P (*rhs)
1857 || is_gimple_val (*rhs)))
1859 if (REFERENCE_CLASS_P (*rhs))
1860 find_interesting_uses_address (data, stmt, rhs);
1861 else
1862 find_interesting_uses_op (data, *rhs);
1864 if (REFERENCE_CLASS_P (*lhs))
1865 find_interesting_uses_address (data, stmt, lhs);
1866 return;
1868 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1870 find_interesting_uses_cond (data, stmt);
1871 return;
1874 /* TODO -- we should also handle address uses of type
1876 memory = call (whatever);
1880 call (memory). */
1883 if (gimple_code (stmt) == GIMPLE_PHI
1884 && gimple_bb (stmt) == data->current_loop->header)
1886 iv = get_iv (data, PHI_RESULT (stmt));
1888 if (iv && !integer_zerop (iv->step))
1889 return;
1892 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1894 op = USE_FROM_PTR (use_p);
1896 if (TREE_CODE (op) != SSA_NAME)
1897 continue;
1899 iv = get_iv (data, op);
1900 if (!iv)
1901 continue;
1903 find_interesting_uses_op (data, op);
1907 /* Finds interesting uses of induction variables outside of loops
1908 on loop exit edge EXIT. */
1910 static void
1911 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1913 gimple phi;
1914 gimple_stmt_iterator psi;
1915 tree def;
1917 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1919 phi = gsi_stmt (psi);
1920 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1921 if (is_gimple_reg (def))
1922 find_interesting_uses_op (data, def);
1926 /* Finds uses of the induction variables that are interesting. */
1928 static void
1929 find_interesting_uses (struct ivopts_data *data)
1931 basic_block bb;
1932 gimple_stmt_iterator bsi;
1933 basic_block *body = get_loop_body (data->current_loop);
1934 unsigned i;
1935 struct version_info *info;
1936 edge e;
1938 if (dump_file && (dump_flags & TDF_DETAILS))
1939 fprintf (dump_file, "Uses:\n\n");
1941 for (i = 0; i < data->current_loop->num_nodes; i++)
1943 edge_iterator ei;
1944 bb = body[i];
1946 FOR_EACH_EDGE (e, ei, bb->succs)
1947 if (e->dest != EXIT_BLOCK_PTR
1948 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1949 find_interesting_uses_outside (data, e);
1951 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1952 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1953 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1954 if (!is_gimple_debug (gsi_stmt (bsi)))
1955 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1958 if (dump_file && (dump_flags & TDF_DETAILS))
1960 bitmap_iterator bi;
1962 fprintf (dump_file, "\n");
1964 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1966 info = ver_info (data, i);
1967 if (info->inv_id)
1969 fprintf (dump_file, " ");
1970 print_generic_expr (dump_file, info->name, TDF_SLIM);
1971 fprintf (dump_file, " is invariant (%d)%s\n",
1972 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1976 fprintf (dump_file, "\n");
1979 free (body);
1982 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1983 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1984 we are at the top-level of the processed address. */
1986 static tree
1987 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1988 unsigned HOST_WIDE_INT *offset)
1990 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1991 enum tree_code code;
1992 tree type, orig_type = TREE_TYPE (expr);
1993 unsigned HOST_WIDE_INT off0, off1, st;
1994 tree orig_expr = expr;
1996 STRIP_NOPS (expr);
1998 type = TREE_TYPE (expr);
1999 code = TREE_CODE (expr);
2000 *offset = 0;
2002 switch (code)
2004 case INTEGER_CST:
2005 if (!cst_and_fits_in_hwi (expr)
2006 || integer_zerop (expr))
2007 return orig_expr;
2009 *offset = int_cst_value (expr);
2010 return build_int_cst (orig_type, 0);
2012 case POINTER_PLUS_EXPR:
2013 case PLUS_EXPR:
2014 case MINUS_EXPR:
2015 op0 = TREE_OPERAND (expr, 0);
2016 op1 = TREE_OPERAND (expr, 1);
2018 op0 = strip_offset_1 (op0, false, false, &off0);
2019 op1 = strip_offset_1 (op1, false, false, &off1);
2021 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2022 if (op0 == TREE_OPERAND (expr, 0)
2023 && op1 == TREE_OPERAND (expr, 1))
2024 return orig_expr;
2026 if (integer_zerop (op1))
2027 expr = op0;
2028 else if (integer_zerop (op0))
2030 if (code == MINUS_EXPR)
2031 expr = fold_build1 (NEGATE_EXPR, type, op1);
2032 else
2033 expr = op1;
2035 else
2036 expr = fold_build2 (code, type, op0, op1);
2038 return fold_convert (orig_type, expr);
2040 case MULT_EXPR:
2041 op1 = TREE_OPERAND (expr, 1);
2042 if (!cst_and_fits_in_hwi (op1))
2043 return orig_expr;
2045 op0 = TREE_OPERAND (expr, 0);
2046 op0 = strip_offset_1 (op0, false, false, &off0);
2047 if (op0 == TREE_OPERAND (expr, 0))
2048 return orig_expr;
2050 *offset = off0 * int_cst_value (op1);
2051 if (integer_zerop (op0))
2052 expr = op0;
2053 else
2054 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2056 return fold_convert (orig_type, expr);
2058 case ARRAY_REF:
2059 case ARRAY_RANGE_REF:
2060 if (!inside_addr)
2061 return orig_expr;
2063 step = array_ref_element_size (expr);
2064 if (!cst_and_fits_in_hwi (step))
2065 break;
2067 st = int_cst_value (step);
2068 op1 = TREE_OPERAND (expr, 1);
2069 op1 = strip_offset_1 (op1, false, false, &off1);
2070 *offset = off1 * st;
2072 if (top_compref
2073 && integer_zerop (op1))
2075 /* Strip the component reference completely. */
2076 op0 = TREE_OPERAND (expr, 0);
2077 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2078 *offset += off0;
2079 return op0;
2081 break;
2083 case COMPONENT_REF:
2084 if (!inside_addr)
2085 return orig_expr;
2087 tmp = component_ref_field_offset (expr);
2088 if (top_compref
2089 && cst_and_fits_in_hwi (tmp))
2091 /* Strip the component reference completely. */
2092 op0 = TREE_OPERAND (expr, 0);
2093 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2094 *offset = off0 + int_cst_value (tmp);
2095 return op0;
2097 break;
2099 case ADDR_EXPR:
2100 op0 = TREE_OPERAND (expr, 0);
2101 op0 = strip_offset_1 (op0, true, true, &off0);
2102 *offset += off0;
2104 if (op0 == TREE_OPERAND (expr, 0))
2105 return orig_expr;
2107 expr = build_fold_addr_expr (op0);
2108 return fold_convert (orig_type, expr);
2110 case MEM_REF:
2111 /* ??? Offset operand? */
2112 inside_addr = false;
2113 break;
2115 default:
2116 return orig_expr;
2119 /* Default handling of expressions for that we want to recurse into
2120 the first operand. */
2121 op0 = TREE_OPERAND (expr, 0);
2122 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2123 *offset += off0;
2125 if (op0 == TREE_OPERAND (expr, 0)
2126 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2127 return orig_expr;
2129 expr = copy_node (expr);
2130 TREE_OPERAND (expr, 0) = op0;
2131 if (op1)
2132 TREE_OPERAND (expr, 1) = op1;
2134 /* Inside address, we might strip the top level component references,
2135 thus changing type of the expression. Handling of ADDR_EXPR
2136 will fix that. */
2137 expr = fold_convert (orig_type, expr);
2139 return expr;
2142 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2144 static tree
2145 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2147 return strip_offset_1 (expr, false, false, offset);
2150 /* Returns variant of TYPE that can be used as base for different uses.
2151 We return unsigned type with the same precision, which avoids problems
2152 with overflows. */
2154 static tree
2155 generic_type_for (tree type)
2157 if (POINTER_TYPE_P (type))
2158 return unsigned_type_for (type);
2160 if (TYPE_UNSIGNED (type))
2161 return type;
2163 return unsigned_type_for (type);
2166 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2167 the bitmap to that we should store it. */
2169 static struct ivopts_data *fd_ivopts_data;
2170 static tree
2171 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2173 bitmap *depends_on = (bitmap *) data;
2174 struct version_info *info;
2176 if (TREE_CODE (*expr_p) != SSA_NAME)
2177 return NULL_TREE;
2178 info = name_info (fd_ivopts_data, *expr_p);
2180 if (!info->inv_id || info->has_nonlin_use)
2181 return NULL_TREE;
2183 if (!*depends_on)
2184 *depends_on = BITMAP_ALLOC (NULL);
2185 bitmap_set_bit (*depends_on, info->inv_id);
2187 return NULL_TREE;
2190 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2191 position to POS. If USE is not NULL, the candidate is set as related to
2192 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2193 replacement of the final value of the iv by a direct computation. */
2195 static struct iv_cand *
2196 add_candidate_1 (struct ivopts_data *data,
2197 tree base, tree step, bool important, enum iv_position pos,
2198 struct iv_use *use, gimple incremented_at)
2200 unsigned i;
2201 struct iv_cand *cand = NULL;
2202 tree type, orig_type;
2204 if (base)
2206 orig_type = TREE_TYPE (base);
2207 type = generic_type_for (orig_type);
2208 if (type != orig_type)
2210 base = fold_convert (type, base);
2211 step = fold_convert (type, step);
2215 for (i = 0; i < n_iv_cands (data); i++)
2217 cand = iv_cand (data, i);
2219 if (cand->pos != pos)
2220 continue;
2222 if (cand->incremented_at != incremented_at
2223 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2224 && cand->ainc_use != use))
2225 continue;
2227 if (!cand->iv)
2229 if (!base && !step)
2230 break;
2232 continue;
2235 if (!base && !step)
2236 continue;
2238 if (operand_equal_p (base, cand->iv->base, 0)
2239 && operand_equal_p (step, cand->iv->step, 0)
2240 && (TYPE_PRECISION (TREE_TYPE (base))
2241 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2242 break;
2245 if (i == n_iv_cands (data))
2247 cand = XCNEW (struct iv_cand);
2248 cand->id = i;
2250 if (!base && !step)
2251 cand->iv = NULL;
2252 else
2253 cand->iv = alloc_iv (base, step);
2255 cand->pos = pos;
2256 if (pos != IP_ORIGINAL && cand->iv)
2258 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2259 cand->var_after = cand->var_before;
2261 cand->important = important;
2262 cand->incremented_at = incremented_at;
2263 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2265 if (step
2266 && TREE_CODE (step) != INTEGER_CST)
2268 fd_ivopts_data = data;
2269 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2272 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2273 cand->ainc_use = use;
2274 else
2275 cand->ainc_use = NULL;
2277 if (dump_file && (dump_flags & TDF_DETAILS))
2278 dump_cand (dump_file, cand);
2281 if (important && !cand->important)
2283 cand->important = true;
2284 if (dump_file && (dump_flags & TDF_DETAILS))
2285 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2288 if (use)
2290 bitmap_set_bit (use->related_cands, i);
2291 if (dump_file && (dump_flags & TDF_DETAILS))
2292 fprintf (dump_file, "Candidate %d is related to use %d\n",
2293 cand->id, use->id);
2296 return cand;
2299 /* Returns true if incrementing the induction variable at the end of the LOOP
2300 is allowed.
2302 The purpose is to avoid splitting latch edge with a biv increment, thus
2303 creating a jump, possibly confusing other optimization passes and leaving
2304 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2305 is not available (so we do not have a better alternative), or if the latch
2306 edge is already nonempty. */
2308 static bool
2309 allow_ip_end_pos_p (struct loop *loop)
2311 if (!ip_normal_pos (loop))
2312 return true;
2314 if (!empty_block_p (ip_end_pos (loop)))
2315 return true;
2317 return false;
2320 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2321 Important field is set to IMPORTANT. */
2323 static void
2324 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2325 bool important, struct iv_use *use)
2327 basic_block use_bb = gimple_bb (use->stmt);
2328 enum machine_mode mem_mode;
2329 unsigned HOST_WIDE_INT cstepi;
2331 /* If we insert the increment in any position other than the standard
2332 ones, we must ensure that it is incremented once per iteration.
2333 It must not be in an inner nested loop, or one side of an if
2334 statement. */
2335 if (use_bb->loop_father != data->current_loop
2336 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2337 || stmt_could_throw_p (use->stmt)
2338 || !cst_and_fits_in_hwi (step))
2339 return;
2341 cstepi = int_cst_value (step);
2343 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2344 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2345 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2347 enum tree_code code = MINUS_EXPR;
2348 tree new_base;
2349 tree new_step = step;
2351 if (POINTER_TYPE_P (TREE_TYPE (base)))
2353 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2354 code = POINTER_PLUS_EXPR;
2356 else
2357 new_step = fold_convert (TREE_TYPE (base), new_step);
2358 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2359 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2360 use->stmt);
2362 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2363 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2365 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2366 use->stmt);
2370 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2371 position to POS. If USE is not NULL, the candidate is set as related to
2372 it. The candidate computation is scheduled on all available positions. */
2374 static void
2375 add_candidate (struct ivopts_data *data,
2376 tree base, tree step, bool important, struct iv_use *use)
2378 if (ip_normal_pos (data->current_loop))
2379 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2380 if (ip_end_pos (data->current_loop)
2381 && allow_ip_end_pos_p (data->current_loop))
2382 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2384 if (use != NULL && use->type == USE_ADDRESS)
2385 add_autoinc_candidates (data, base, step, important, use);
2388 /* Add a standard "0 + 1 * iteration" iv candidate for a
2389 type with SIZE bits. */
2391 static void
2392 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2393 unsigned int size)
2395 tree type = lang_hooks.types.type_for_size (size, true);
2396 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2397 true, NULL);
2400 /* Adds standard iv candidates. */
2402 static void
2403 add_standard_iv_candidates (struct ivopts_data *data)
2405 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2407 /* The same for a double-integer type if it is still fast enough. */
2408 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2409 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2413 /* Adds candidates bases on the old induction variable IV. */
2415 static void
2416 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2418 gimple phi;
2419 tree def;
2420 struct iv_cand *cand;
2422 add_candidate (data, iv->base, iv->step, true, NULL);
2424 /* The same, but with initial value zero. */
2425 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2426 add_candidate (data, size_int (0), iv->step, true, NULL);
2427 else
2428 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2429 iv->step, true, NULL);
2431 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2432 if (gimple_code (phi) == GIMPLE_PHI)
2434 /* Additionally record the possibility of leaving the original iv
2435 untouched. */
2436 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2437 cand = add_candidate_1 (data,
2438 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2439 SSA_NAME_DEF_STMT (def));
2440 cand->var_before = iv->ssa_name;
2441 cand->var_after = def;
2445 /* Adds candidates based on the old induction variables. */
2447 static void
2448 add_old_ivs_candidates (struct ivopts_data *data)
2450 unsigned i;
2451 struct iv *iv;
2452 bitmap_iterator bi;
2454 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2456 iv = ver_info (data, i)->iv;
2457 if (iv && iv->biv_p && !integer_zerop (iv->step))
2458 add_old_iv_candidates (data, iv);
2462 /* Adds candidates based on the value of the induction variable IV and USE. */
2464 static void
2465 add_iv_value_candidates (struct ivopts_data *data,
2466 struct iv *iv, struct iv_use *use)
2468 unsigned HOST_WIDE_INT offset;
2469 tree base;
2470 tree basetype;
2472 add_candidate (data, iv->base, iv->step, false, use);
2474 /* The same, but with initial value zero. Make such variable important,
2475 since it is generic enough so that possibly many uses may be based
2476 on it. */
2477 basetype = TREE_TYPE (iv->base);
2478 if (POINTER_TYPE_P (basetype))
2479 basetype = sizetype;
2480 add_candidate (data, build_int_cst (basetype, 0),
2481 iv->step, true, use);
2483 /* Third, try removing the constant offset. Make sure to even
2484 add a candidate for &a[0] vs. (T *)&a. */
2485 base = strip_offset (iv->base, &offset);
2486 if (offset
2487 || base != iv->base)
2488 add_candidate (data, base, iv->step, false, use);
2491 /* Adds candidates based on the uses. */
2493 static void
2494 add_derived_ivs_candidates (struct ivopts_data *data)
2496 unsigned i;
2498 for (i = 0; i < n_iv_uses (data); i++)
2500 struct iv_use *use = iv_use (data, i);
2502 if (!use)
2503 continue;
2505 switch (use->type)
2507 case USE_NONLINEAR_EXPR:
2508 case USE_COMPARE:
2509 case USE_ADDRESS:
2510 /* Just add the ivs based on the value of the iv used here. */
2511 add_iv_value_candidates (data, use->iv, use);
2512 break;
2514 default:
2515 gcc_unreachable ();
2520 /* Record important candidates and add them to related_cands bitmaps
2521 if needed. */
2523 static void
2524 record_important_candidates (struct ivopts_data *data)
2526 unsigned i;
2527 struct iv_use *use;
2529 for (i = 0; i < n_iv_cands (data); i++)
2531 struct iv_cand *cand = iv_cand (data, i);
2533 if (cand->important)
2534 bitmap_set_bit (data->important_candidates, i);
2537 data->consider_all_candidates = (n_iv_cands (data)
2538 <= CONSIDER_ALL_CANDIDATES_BOUND);
2540 if (data->consider_all_candidates)
2542 /* We will not need "related_cands" bitmaps in this case,
2543 so release them to decrease peak memory consumption. */
2544 for (i = 0; i < n_iv_uses (data); i++)
2546 use = iv_use (data, i);
2547 BITMAP_FREE (use->related_cands);
2550 else
2552 /* Add important candidates to the related_cands bitmaps. */
2553 for (i = 0; i < n_iv_uses (data); i++)
2554 bitmap_ior_into (iv_use (data, i)->related_cands,
2555 data->important_candidates);
2559 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2560 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2561 we allocate a simple list to every use. */
2563 static void
2564 alloc_use_cost_map (struct ivopts_data *data)
2566 unsigned i, size, s, j;
2568 for (i = 0; i < n_iv_uses (data); i++)
2570 struct iv_use *use = iv_use (data, i);
2571 bitmap_iterator bi;
2573 if (data->consider_all_candidates)
2574 size = n_iv_cands (data);
2575 else
2577 s = 0;
2578 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2580 s++;
2583 /* Round up to the power of two, so that moduling by it is fast. */
2584 for (size = 1; size < s; size <<= 1)
2585 continue;
2588 use->n_map_members = size;
2589 use->cost_map = XCNEWVEC (struct cost_pair, size);
2593 /* Returns description of computation cost of expression whose runtime
2594 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2596 static comp_cost
2597 new_cost (unsigned runtime, unsigned complexity)
2599 comp_cost cost;
2601 cost.cost = runtime;
2602 cost.complexity = complexity;
2604 return cost;
2607 /* Adds costs COST1 and COST2. */
2609 static comp_cost
2610 add_costs (comp_cost cost1, comp_cost cost2)
2612 cost1.cost += cost2.cost;
2613 cost1.complexity += cost2.complexity;
2615 return cost1;
2617 /* Subtracts costs COST1 and COST2. */
2619 static comp_cost
2620 sub_costs (comp_cost cost1, comp_cost cost2)
2622 cost1.cost -= cost2.cost;
2623 cost1.complexity -= cost2.complexity;
2625 return cost1;
2628 /* Returns a negative number if COST1 < COST2, a positive number if
2629 COST1 > COST2, and 0 if COST1 = COST2. */
2631 static int
2632 compare_costs (comp_cost cost1, comp_cost cost2)
2634 if (cost1.cost == cost2.cost)
2635 return cost1.complexity - cost2.complexity;
2637 return cost1.cost - cost2.cost;
2640 /* Returns true if COST is infinite. */
2642 static bool
2643 infinite_cost_p (comp_cost cost)
2645 return cost.cost == INFTY;
2648 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2649 on invariants DEPENDS_ON and that the value used in expressing it
2650 is VALUE. */
2652 static void
2653 set_use_iv_cost (struct ivopts_data *data,
2654 struct iv_use *use, struct iv_cand *cand,
2655 comp_cost cost, bitmap depends_on, tree value,
2656 int inv_expr_id)
2658 unsigned i, s;
2660 if (infinite_cost_p (cost))
2662 BITMAP_FREE (depends_on);
2663 return;
2666 if (data->consider_all_candidates)
2668 use->cost_map[cand->id].cand = cand;
2669 use->cost_map[cand->id].cost = cost;
2670 use->cost_map[cand->id].depends_on = depends_on;
2671 use->cost_map[cand->id].value = value;
2672 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2673 return;
2676 /* n_map_members is a power of two, so this computes modulo. */
2677 s = cand->id & (use->n_map_members - 1);
2678 for (i = s; i < use->n_map_members; i++)
2679 if (!use->cost_map[i].cand)
2680 goto found;
2681 for (i = 0; i < s; i++)
2682 if (!use->cost_map[i].cand)
2683 goto found;
2685 gcc_unreachable ();
2687 found:
2688 use->cost_map[i].cand = cand;
2689 use->cost_map[i].cost = cost;
2690 use->cost_map[i].depends_on = depends_on;
2691 use->cost_map[i].value = value;
2692 use->cost_map[i].inv_expr_id = inv_expr_id;
2695 /* Gets cost of (USE, CANDIDATE) pair. */
2697 static struct cost_pair *
2698 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2699 struct iv_cand *cand)
2701 unsigned i, s;
2702 struct cost_pair *ret;
2704 if (!cand)
2705 return NULL;
2707 if (data->consider_all_candidates)
2709 ret = use->cost_map + cand->id;
2710 if (!ret->cand)
2711 return NULL;
2713 return ret;
2716 /* n_map_members is a power of two, so this computes modulo. */
2717 s = cand->id & (use->n_map_members - 1);
2718 for (i = s; i < use->n_map_members; i++)
2719 if (use->cost_map[i].cand == cand)
2720 return use->cost_map + i;
2722 for (i = 0; i < s; i++)
2723 if (use->cost_map[i].cand == cand)
2724 return use->cost_map + i;
2726 return NULL;
2729 /* Returns estimate on cost of computing SEQ. */
2731 static unsigned
2732 seq_cost (rtx seq, bool speed)
2734 unsigned cost = 0;
2735 rtx set;
2737 for (; seq; seq = NEXT_INSN (seq))
2739 set = single_set (seq);
2740 if (set)
2741 cost += rtx_cost (set, SET,speed);
2742 else
2743 cost++;
2746 return cost;
2749 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2750 static rtx
2751 produce_memory_decl_rtl (tree obj, int *regno)
2753 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2754 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2755 rtx x;
2757 gcc_assert (obj);
2758 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2760 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2761 x = gen_rtx_SYMBOL_REF (address_mode, name);
2762 SET_SYMBOL_REF_DECL (x, obj);
2763 x = gen_rtx_MEM (DECL_MODE (obj), x);
2764 set_mem_addr_space (x, as);
2765 targetm.encode_section_info (obj, x, true);
2767 else
2769 x = gen_raw_REG (address_mode, (*regno)++);
2770 x = gen_rtx_MEM (DECL_MODE (obj), x);
2771 set_mem_addr_space (x, as);
2774 return x;
2777 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2778 walk_tree. DATA contains the actual fake register number. */
2780 static tree
2781 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2783 tree obj = NULL_TREE;
2784 rtx x = NULL_RTX;
2785 int *regno = (int *) data;
2787 switch (TREE_CODE (*expr_p))
2789 case ADDR_EXPR:
2790 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2791 handled_component_p (*expr_p);
2792 expr_p = &TREE_OPERAND (*expr_p, 0))
2793 continue;
2794 obj = *expr_p;
2795 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2796 x = produce_memory_decl_rtl (obj, regno);
2797 break;
2799 case SSA_NAME:
2800 *ws = 0;
2801 obj = SSA_NAME_VAR (*expr_p);
2802 if (!DECL_RTL_SET_P (obj))
2803 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2804 break;
2806 case VAR_DECL:
2807 case PARM_DECL:
2808 case RESULT_DECL:
2809 *ws = 0;
2810 obj = *expr_p;
2812 if (DECL_RTL_SET_P (obj))
2813 break;
2815 if (DECL_MODE (obj) == BLKmode)
2816 x = produce_memory_decl_rtl (obj, regno);
2817 else
2818 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2820 break;
2822 default:
2823 break;
2826 if (x)
2828 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2829 SET_DECL_RTL (obj, x);
2832 return NULL_TREE;
2835 /* Determines cost of the computation of EXPR. */
2837 static unsigned
2838 computation_cost (tree expr, bool speed)
2840 rtx seq, rslt;
2841 tree type = TREE_TYPE (expr);
2842 unsigned cost;
2843 /* Avoid using hard regs in ways which may be unsupported. */
2844 int regno = LAST_VIRTUAL_REGISTER + 1;
2845 struct cgraph_node *node = cgraph_node (current_function_decl);
2846 enum node_frequency real_frequency = node->frequency;
2848 node->frequency = NODE_FREQUENCY_NORMAL;
2849 crtl->maybe_hot_insn_p = speed;
2850 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2851 start_sequence ();
2852 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2853 seq = get_insns ();
2854 end_sequence ();
2855 default_rtl_profile ();
2856 node->frequency = real_frequency;
2858 cost = seq_cost (seq, speed);
2859 if (MEM_P (rslt))
2860 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2861 TYPE_ADDR_SPACE (type), speed);
2863 return cost;
2866 /* Returns variable containing the value of candidate CAND at statement AT. */
2868 static tree
2869 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2871 if (stmt_after_increment (loop, cand, stmt))
2872 return cand->var_after;
2873 else
2874 return cand->var_before;
2877 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2878 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2881 tree_int_cst_sign_bit (const_tree t)
2883 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2884 unsigned HOST_WIDE_INT w;
2886 if (bitno < HOST_BITS_PER_WIDE_INT)
2887 w = TREE_INT_CST_LOW (t);
2888 else
2890 w = TREE_INT_CST_HIGH (t);
2891 bitno -= HOST_BITS_PER_WIDE_INT;
2894 return (w >> bitno) & 1;
2897 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2898 same precision that is at least as wide as the precision of TYPE, stores
2899 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2900 type of A and B. */
2902 static tree
2903 determine_common_wider_type (tree *a, tree *b)
2905 tree wider_type = NULL;
2906 tree suba, subb;
2907 tree atype = TREE_TYPE (*a);
2909 if (CONVERT_EXPR_P (*a))
2911 suba = TREE_OPERAND (*a, 0);
2912 wider_type = TREE_TYPE (suba);
2913 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2914 return atype;
2916 else
2917 return atype;
2919 if (CONVERT_EXPR_P (*b))
2921 subb = TREE_OPERAND (*b, 0);
2922 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2923 return atype;
2925 else
2926 return atype;
2928 *a = suba;
2929 *b = subb;
2930 return wider_type;
2933 /* Determines the expression by that USE is expressed from induction variable
2934 CAND at statement AT in LOOP. The expression is stored in a decomposed
2935 form into AFF. Returns false if USE cannot be expressed using CAND. */
2937 static bool
2938 get_computation_aff (struct loop *loop,
2939 struct iv_use *use, struct iv_cand *cand, gimple at,
2940 struct affine_tree_combination *aff)
2942 tree ubase = use->iv->base;
2943 tree ustep = use->iv->step;
2944 tree cbase = cand->iv->base;
2945 tree cstep = cand->iv->step, cstep_common;
2946 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2947 tree common_type, var;
2948 tree uutype;
2949 aff_tree cbase_aff, var_aff;
2950 double_int rat;
2952 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2954 /* We do not have a precision to express the values of use. */
2955 return false;
2958 var = var_at_stmt (loop, cand, at);
2959 uutype = unsigned_type_for (utype);
2961 /* If the conversion is not noop, perform it. */
2962 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2964 cstep = fold_convert (uutype, cstep);
2965 cbase = fold_convert (uutype, cbase);
2966 var = fold_convert (uutype, var);
2969 if (!constant_multiple_of (ustep, cstep, &rat))
2970 return false;
2972 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2973 type, we achieve better folding by computing their difference in this
2974 wider type, and cast the result to UUTYPE. We do not need to worry about
2975 overflows, as all the arithmetics will in the end be performed in UUTYPE
2976 anyway. */
2977 common_type = determine_common_wider_type (&ubase, &cbase);
2979 /* use = ubase - ratio * cbase + ratio * var. */
2980 tree_to_aff_combination (ubase, common_type, aff);
2981 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2982 tree_to_aff_combination (var, uutype, &var_aff);
2984 /* We need to shift the value if we are after the increment. */
2985 if (stmt_after_increment (loop, cand, at))
2987 aff_tree cstep_aff;
2989 if (common_type != uutype)
2990 cstep_common = fold_convert (common_type, cstep);
2991 else
2992 cstep_common = cstep;
2994 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2995 aff_combination_add (&cbase_aff, &cstep_aff);
2998 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2999 aff_combination_add (aff, &cbase_aff);
3000 if (common_type != uutype)
3001 aff_combination_convert (aff, uutype);
3003 aff_combination_scale (&var_aff, rat);
3004 aff_combination_add (aff, &var_aff);
3006 return true;
3009 /* Determines the expression by that USE is expressed from induction variable
3010 CAND at statement AT in LOOP. The computation is unshared. */
3012 static tree
3013 get_computation_at (struct loop *loop,
3014 struct iv_use *use, struct iv_cand *cand, gimple at)
3016 aff_tree aff;
3017 tree type = TREE_TYPE (use->iv->base);
3019 if (!get_computation_aff (loop, use, cand, at, &aff))
3020 return NULL_TREE;
3021 unshare_aff_combination (&aff);
3022 return fold_convert (type, aff_combination_to_tree (&aff));
3025 /* Determines the expression by that USE is expressed from induction variable
3026 CAND in LOOP. The computation is unshared. */
3028 static tree
3029 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3031 return get_computation_at (loop, use, cand, use->stmt);
3034 /* Adjust the cost COST for being in loop setup rather than loop body.
3035 If we're optimizing for space, the loop setup overhead is constant;
3036 if we're optimizing for speed, amortize it over the per-iteration cost. */
3037 static unsigned
3038 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3040 if (cost == INFTY)
3041 return cost;
3042 else if (optimize_loop_for_speed_p (data->current_loop))
3043 return cost / avg_loop_niter (data->current_loop);
3044 else
3045 return cost;
3048 /* Returns cost of addition in MODE. */
3050 static unsigned
3051 add_cost (enum machine_mode mode, bool speed)
3053 static unsigned costs[NUM_MACHINE_MODES];
3054 rtx seq;
3055 unsigned cost;
3057 if (costs[mode])
3058 return costs[mode];
3060 start_sequence ();
3061 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3062 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3063 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3064 NULL_RTX);
3065 seq = get_insns ();
3066 end_sequence ();
3068 cost = seq_cost (seq, speed);
3069 if (!cost)
3070 cost = 1;
3072 costs[mode] = cost;
3074 if (dump_file && (dump_flags & TDF_DETAILS))
3075 fprintf (dump_file, "Addition in %s costs %d\n",
3076 GET_MODE_NAME (mode), cost);
3077 return cost;
3080 /* Entry in a hashtable of already known costs for multiplication. */
3081 struct mbc_entry
3083 HOST_WIDE_INT cst; /* The constant to multiply by. */
3084 enum machine_mode mode; /* In mode. */
3085 unsigned cost; /* The cost. */
3088 /* Counts hash value for the ENTRY. */
3090 static hashval_t
3091 mbc_entry_hash (const void *entry)
3093 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3095 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3098 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3100 static int
3101 mbc_entry_eq (const void *entry1, const void *entry2)
3103 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3104 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3106 return (e1->mode == e2->mode
3107 && e1->cst == e2->cst);
3110 /* Returns cost of multiplication by constant CST in MODE. */
3112 unsigned
3113 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3115 static htab_t costs;
3116 struct mbc_entry **cached, act;
3117 rtx seq;
3118 unsigned cost;
3120 if (!costs)
3121 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3123 act.mode = mode;
3124 act.cst = cst;
3125 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3126 if (*cached)
3127 return (*cached)->cost;
3129 *cached = XNEW (struct mbc_entry);
3130 (*cached)->mode = mode;
3131 (*cached)->cst = cst;
3133 start_sequence ();
3134 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3135 gen_int_mode (cst, mode), NULL_RTX, 0);
3136 seq = get_insns ();
3137 end_sequence ();
3139 cost = seq_cost (seq, speed);
3141 if (dump_file && (dump_flags & TDF_DETAILS))
3142 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3143 (int) cst, GET_MODE_NAME (mode), cost);
3145 (*cached)->cost = cost;
3147 return cost;
3150 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3151 validity for a memory reference accessing memory of mode MODE in
3152 address space AS. */
3154 DEF_VEC_P (sbitmap);
3155 DEF_VEC_ALLOC_P (sbitmap, heap);
3157 bool
3158 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3159 addr_space_t as)
3161 #define MAX_RATIO 128
3162 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3163 static VEC (sbitmap, heap) *valid_mult_list;
3164 sbitmap valid_mult;
3166 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3167 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3169 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3170 if (!valid_mult)
3172 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3173 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3174 rtx addr;
3175 HOST_WIDE_INT i;
3177 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3178 sbitmap_zero (valid_mult);
3179 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3180 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3182 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3183 if (memory_address_addr_space_p (mode, addr, as))
3184 SET_BIT (valid_mult, i + MAX_RATIO);
3187 if (dump_file && (dump_flags & TDF_DETAILS))
3189 fprintf (dump_file, " allowed multipliers:");
3190 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3191 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3192 fprintf (dump_file, " %d", (int) i);
3193 fprintf (dump_file, "\n");
3194 fprintf (dump_file, "\n");
3197 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3200 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3201 return false;
3203 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3206 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3207 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3208 variable is omitted. Compute the cost for a memory reference that accesses
3209 a memory location of mode MEM_MODE in address space AS.
3211 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3212 size of MEM_MODE / RATIO) is available. To make this determination, we
3213 look at the size of the increment to be made, which is given in CSTEP.
3214 CSTEP may be zero if the step is unknown.
3215 STMT_AFTER_INC is true iff the statement we're looking at is after the
3216 increment of the original biv.
3218 TODO -- there must be some better way. This all is quite crude. */
3220 typedef struct
3222 HOST_WIDE_INT min_offset, max_offset;
3223 unsigned costs[2][2][2][2];
3224 } *address_cost_data;
3226 DEF_VEC_P (address_cost_data);
3227 DEF_VEC_ALLOC_P (address_cost_data, heap);
3229 static comp_cost
3230 get_address_cost (bool symbol_present, bool var_present,
3231 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3232 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3233 addr_space_t as, bool speed,
3234 bool stmt_after_inc, bool *may_autoinc)
3236 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3237 static VEC(address_cost_data, heap) *address_cost_data_list;
3238 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3239 address_cost_data data;
3240 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3241 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3242 unsigned cost, acost, complexity;
3243 bool offset_p, ratio_p, autoinc;
3244 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3245 unsigned HOST_WIDE_INT mask;
3246 unsigned bits;
3248 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3249 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3250 data_index + 1);
3252 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3253 if (!data)
3255 HOST_WIDE_INT i;
3256 HOST_WIDE_INT rat, off = 0;
3257 int old_cse_not_expected, width;
3258 unsigned sym_p, var_p, off_p, rat_p, add_c;
3259 rtx seq, addr, base;
3260 rtx reg0, reg1;
3262 data = (address_cost_data) xcalloc (1, sizeof (*data));
3264 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3266 width = GET_MODE_BITSIZE (address_mode) - 1;
3267 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3268 width = HOST_BITS_PER_WIDE_INT - 1;
3269 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3271 for (i = width; i >= 0; i--)
3273 off = -((HOST_WIDE_INT) 1 << i);
3274 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3275 if (memory_address_addr_space_p (mem_mode, addr, as))
3276 break;
3278 data->min_offset = (i == -1? 0 : off);
3280 for (i = width; i >= 0; i--)
3282 off = ((HOST_WIDE_INT) 1 << i) - 1;
3283 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3284 if (memory_address_addr_space_p (mem_mode, addr, as))
3285 break;
3287 if (i == -1)
3288 off = 0;
3289 data->max_offset = off;
3291 if (dump_file && (dump_flags & TDF_DETAILS))
3293 fprintf (dump_file, "get_address_cost:\n");
3294 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3295 GET_MODE_NAME (mem_mode),
3296 data->min_offset);
3297 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3298 GET_MODE_NAME (mem_mode),
3299 data->max_offset);
3302 rat = 1;
3303 for (i = 2; i <= MAX_RATIO; i++)
3304 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3306 rat = i;
3307 break;
3310 /* Compute the cost of various addressing modes. */
3311 acost = 0;
3312 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3313 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3315 if (HAVE_PRE_DECREMENT)
3317 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3318 has_predec[mem_mode]
3319 = memory_address_addr_space_p (mem_mode, addr, as);
3321 if (HAVE_POST_DECREMENT)
3323 addr = gen_rtx_POST_DEC (address_mode, reg0);
3324 has_postdec[mem_mode]
3325 = memory_address_addr_space_p (mem_mode, addr, as);
3327 if (HAVE_PRE_INCREMENT)
3329 addr = gen_rtx_PRE_INC (address_mode, reg0);
3330 has_preinc[mem_mode]
3331 = memory_address_addr_space_p (mem_mode, addr, as);
3333 if (HAVE_POST_INCREMENT)
3335 addr = gen_rtx_POST_INC (address_mode, reg0);
3336 has_postinc[mem_mode]
3337 = memory_address_addr_space_p (mem_mode, addr, as);
3339 for (i = 0; i < 16; i++)
3341 sym_p = i & 1;
3342 var_p = (i >> 1) & 1;
3343 off_p = (i >> 2) & 1;
3344 rat_p = (i >> 3) & 1;
3346 addr = reg0;
3347 if (rat_p)
3348 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3349 gen_int_mode (rat, address_mode));
3351 if (var_p)
3352 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3354 if (sym_p)
3356 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3357 /* ??? We can run into trouble with some backends by presenting
3358 it with symbols which haven't been properly passed through
3359 targetm.encode_section_info. By setting the local bit, we
3360 enhance the probability of things working. */
3361 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3363 if (off_p)
3364 base = gen_rtx_fmt_e (CONST, address_mode,
3365 gen_rtx_fmt_ee
3366 (PLUS, address_mode, base,
3367 gen_int_mode (off, address_mode)));
3369 else if (off_p)
3370 base = gen_int_mode (off, address_mode);
3371 else
3372 base = NULL_RTX;
3374 if (base)
3375 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3377 start_sequence ();
3378 /* To avoid splitting addressing modes, pretend that no cse will
3379 follow. */
3380 old_cse_not_expected = cse_not_expected;
3381 cse_not_expected = true;
3382 addr = memory_address_addr_space (mem_mode, addr, as);
3383 cse_not_expected = old_cse_not_expected;
3384 seq = get_insns ();
3385 end_sequence ();
3387 acost = seq_cost (seq, speed);
3388 acost += address_cost (addr, mem_mode, as, speed);
3390 if (!acost)
3391 acost = 1;
3392 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3395 /* On some targets, it is quite expensive to load symbol to a register,
3396 which makes addresses that contain symbols look much more expensive.
3397 However, the symbol will have to be loaded in any case before the
3398 loop (and quite likely we have it in register already), so it does not
3399 make much sense to penalize them too heavily. So make some final
3400 tweaks for the SYMBOL_PRESENT modes:
3402 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3403 var is cheaper, use this mode with small penalty.
3404 If VAR_PRESENT is true, try whether the mode with
3405 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3406 if this is the case, use it. */
3407 add_c = add_cost (address_mode, speed);
3408 for (i = 0; i < 8; i++)
3410 var_p = i & 1;
3411 off_p = (i >> 1) & 1;
3412 rat_p = (i >> 2) & 1;
3414 acost = data->costs[0][1][off_p][rat_p] + 1;
3415 if (var_p)
3416 acost += add_c;
3418 if (acost < data->costs[1][var_p][off_p][rat_p])
3419 data->costs[1][var_p][off_p][rat_p] = acost;
3422 if (dump_file && (dump_flags & TDF_DETAILS))
3424 fprintf (dump_file, "Address costs:\n");
3426 for (i = 0; i < 16; i++)
3428 sym_p = i & 1;
3429 var_p = (i >> 1) & 1;
3430 off_p = (i >> 2) & 1;
3431 rat_p = (i >> 3) & 1;
3433 fprintf (dump_file, " ");
3434 if (sym_p)
3435 fprintf (dump_file, "sym + ");
3436 if (var_p)
3437 fprintf (dump_file, "var + ");
3438 if (off_p)
3439 fprintf (dump_file, "cst + ");
3440 if (rat_p)
3441 fprintf (dump_file, "rat * ");
3443 acost = data->costs[sym_p][var_p][off_p][rat_p];
3444 fprintf (dump_file, "index costs %d\n", acost);
3446 if (has_predec[mem_mode] || has_postdec[mem_mode]
3447 || has_preinc[mem_mode] || has_postinc[mem_mode])
3448 fprintf (dump_file, " May include autoinc/dec\n");
3449 fprintf (dump_file, "\n");
3452 VEC_replace (address_cost_data, address_cost_data_list,
3453 data_index, data);
3456 bits = GET_MODE_BITSIZE (address_mode);
3457 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3458 offset &= mask;
3459 if ((offset >> (bits - 1) & 1))
3460 offset |= ~mask;
3461 s_offset = offset;
3463 autoinc = false;
3464 msize = GET_MODE_SIZE (mem_mode);
3465 autoinc_offset = offset;
3466 if (stmt_after_inc)
3467 autoinc_offset += ratio * cstep;
3468 if (symbol_present || var_present || ratio != 1)
3469 autoinc = false;
3470 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3471 && msize == cstep)
3472 || (has_postdec[mem_mode] && autoinc_offset == 0
3473 && msize == -cstep)
3474 || (has_preinc[mem_mode] && autoinc_offset == msize
3475 && msize == cstep)
3476 || (has_predec[mem_mode] && autoinc_offset == -msize
3477 && msize == -cstep))
3478 autoinc = true;
3480 cost = 0;
3481 offset_p = (s_offset != 0
3482 && data->min_offset <= s_offset
3483 && s_offset <= data->max_offset);
3484 ratio_p = (ratio != 1
3485 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3487 if (ratio != 1 && !ratio_p)
3488 cost += multiply_by_cost (ratio, address_mode, speed);
3490 if (s_offset && !offset_p && !symbol_present)
3491 cost += add_cost (address_mode, speed);
3493 if (may_autoinc)
3494 *may_autoinc = autoinc;
3495 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3496 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3497 return new_cost (cost + acost, complexity);
3500 /* Estimates cost of forcing expression EXPR into a variable. */
3502 static comp_cost
3503 force_expr_to_var_cost (tree expr, bool speed)
3505 static bool costs_initialized = false;
3506 static unsigned integer_cost [2];
3507 static unsigned symbol_cost [2];
3508 static unsigned address_cost [2];
3509 tree op0, op1;
3510 comp_cost cost0, cost1, cost;
3511 enum machine_mode mode;
3513 if (!costs_initialized)
3515 tree type = build_pointer_type (integer_type_node);
3516 tree var, addr;
3517 rtx x;
3518 int i;
3520 var = create_tmp_var_raw (integer_type_node, "test_var");
3521 TREE_STATIC (var) = 1;
3522 x = produce_memory_decl_rtl (var, NULL);
3523 SET_DECL_RTL (var, x);
3525 addr = build1 (ADDR_EXPR, type, var);
3528 for (i = 0; i < 2; i++)
3530 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3531 2000), i);
3533 symbol_cost[i] = computation_cost (addr, i) + 1;
3535 address_cost[i]
3536 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3537 addr,
3538 build_int_cst (sizetype, 2000)), i) + 1;
3539 if (dump_file && (dump_flags & TDF_DETAILS))
3541 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3542 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3543 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3544 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3545 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3546 fprintf (dump_file, "\n");
3550 costs_initialized = true;
3553 STRIP_NOPS (expr);
3555 if (SSA_VAR_P (expr))
3556 return zero_cost;
3558 if (is_gimple_min_invariant (expr))
3560 if (TREE_CODE (expr) == INTEGER_CST)
3561 return new_cost (integer_cost [speed], 0);
3563 if (TREE_CODE (expr) == ADDR_EXPR)
3565 tree obj = TREE_OPERAND (expr, 0);
3567 if (TREE_CODE (obj) == VAR_DECL
3568 || TREE_CODE (obj) == PARM_DECL
3569 || TREE_CODE (obj) == RESULT_DECL)
3570 return new_cost (symbol_cost [speed], 0);
3573 return new_cost (address_cost [speed], 0);
3576 switch (TREE_CODE (expr))
3578 case POINTER_PLUS_EXPR:
3579 case PLUS_EXPR:
3580 case MINUS_EXPR:
3581 case MULT_EXPR:
3582 op0 = TREE_OPERAND (expr, 0);
3583 op1 = TREE_OPERAND (expr, 1);
3584 STRIP_NOPS (op0);
3585 STRIP_NOPS (op1);
3587 if (is_gimple_val (op0))
3588 cost0 = zero_cost;
3589 else
3590 cost0 = force_expr_to_var_cost (op0, speed);
3592 if (is_gimple_val (op1))
3593 cost1 = zero_cost;
3594 else
3595 cost1 = force_expr_to_var_cost (op1, speed);
3597 break;
3599 case NEGATE_EXPR:
3600 op0 = TREE_OPERAND (expr, 0);
3601 STRIP_NOPS (op0);
3602 op1 = NULL_TREE;
3604 if (is_gimple_val (op0))
3605 cost0 = zero_cost;
3606 else
3607 cost0 = force_expr_to_var_cost (op0, speed);
3609 cost1 = zero_cost;
3610 break;
3612 default:
3613 /* Just an arbitrary value, FIXME. */
3614 return new_cost (target_spill_cost[speed], 0);
3617 mode = TYPE_MODE (TREE_TYPE (expr));
3618 switch (TREE_CODE (expr))
3620 case POINTER_PLUS_EXPR:
3621 case PLUS_EXPR:
3622 case MINUS_EXPR:
3623 case NEGATE_EXPR:
3624 cost = new_cost (add_cost (mode, speed), 0);
3625 break;
3627 case MULT_EXPR:
3628 if (cst_and_fits_in_hwi (op0))
3629 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3630 else if (cst_and_fits_in_hwi (op1))
3631 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3632 else
3633 return new_cost (target_spill_cost [speed], 0);
3634 break;
3636 default:
3637 gcc_unreachable ();
3640 cost = add_costs (cost, cost0);
3641 cost = add_costs (cost, cost1);
3643 /* Bound the cost by target_spill_cost. The parts of complicated
3644 computations often are either loop invariant or at least can
3645 be shared between several iv uses, so letting this grow without
3646 limits would not give reasonable results. */
3647 if (cost.cost > (int) target_spill_cost [speed])
3648 cost.cost = target_spill_cost [speed];
3650 return cost;
3653 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3654 invariants the computation depends on. */
3656 static comp_cost
3657 force_var_cost (struct ivopts_data *data,
3658 tree expr, bitmap *depends_on)
3660 if (depends_on)
3662 fd_ivopts_data = data;
3663 walk_tree (&expr, find_depends, depends_on, NULL);
3666 return force_expr_to_var_cost (expr, data->speed);
3669 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3670 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3671 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3672 invariants the computation depends on. */
3674 static comp_cost
3675 split_address_cost (struct ivopts_data *data,
3676 tree addr, bool *symbol_present, bool *var_present,
3677 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3679 tree core;
3680 HOST_WIDE_INT bitsize;
3681 HOST_WIDE_INT bitpos;
3682 tree toffset;
3683 enum machine_mode mode;
3684 int unsignedp, volatilep;
3686 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3687 &unsignedp, &volatilep, false);
3689 if (toffset != 0
3690 || bitpos % BITS_PER_UNIT != 0
3691 || TREE_CODE (core) != VAR_DECL)
3693 *symbol_present = false;
3694 *var_present = true;
3695 fd_ivopts_data = data;
3696 walk_tree (&addr, find_depends, depends_on, NULL);
3697 return new_cost (target_spill_cost[data->speed], 0);
3700 *offset += bitpos / BITS_PER_UNIT;
3701 if (TREE_STATIC (core)
3702 || DECL_EXTERNAL (core))
3704 *symbol_present = true;
3705 *var_present = false;
3706 return zero_cost;
3709 *symbol_present = false;
3710 *var_present = true;
3711 return zero_cost;
3714 /* Estimates cost of expressing difference of addresses E1 - E2 as
3715 var + symbol + offset. The value of offset is added to OFFSET,
3716 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3717 part is missing. DEPENDS_ON is a set of the invariants the computation
3718 depends on. */
3720 static comp_cost
3721 ptr_difference_cost (struct ivopts_data *data,
3722 tree e1, tree e2, bool *symbol_present, bool *var_present,
3723 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3725 HOST_WIDE_INT diff = 0;
3726 aff_tree aff_e1, aff_e2;
3727 tree type;
3729 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3731 if (ptr_difference_const (e1, e2, &diff))
3733 *offset += diff;
3734 *symbol_present = false;
3735 *var_present = false;
3736 return zero_cost;
3739 if (integer_zerop (e2))
3740 return split_address_cost (data, TREE_OPERAND (e1, 0),
3741 symbol_present, var_present, offset, depends_on);
3743 *symbol_present = false;
3744 *var_present = true;
3746 type = signed_type_for (TREE_TYPE (e1));
3747 tree_to_aff_combination (e1, type, &aff_e1);
3748 tree_to_aff_combination (e2, type, &aff_e2);
3749 aff_combination_scale (&aff_e2, double_int_minus_one);
3750 aff_combination_add (&aff_e1, &aff_e2);
3752 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3755 /* Estimates cost of expressing difference E1 - E2 as
3756 var + symbol + offset. The value of offset is added to OFFSET,
3757 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3758 part is missing. DEPENDS_ON is a set of the invariants the computation
3759 depends on. */
3761 static comp_cost
3762 difference_cost (struct ivopts_data *data,
3763 tree e1, tree e2, bool *symbol_present, bool *var_present,
3764 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3766 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3767 unsigned HOST_WIDE_INT off1, off2;
3768 aff_tree aff_e1, aff_e2;
3769 tree type;
3771 e1 = strip_offset (e1, &off1);
3772 e2 = strip_offset (e2, &off2);
3773 *offset += off1 - off2;
3775 STRIP_NOPS (e1);
3776 STRIP_NOPS (e2);
3778 if (TREE_CODE (e1) == ADDR_EXPR)
3779 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3780 offset, depends_on);
3781 *symbol_present = false;
3783 if (operand_equal_p (e1, e2, 0))
3785 *var_present = false;
3786 return zero_cost;
3789 *var_present = true;
3791 if (integer_zerop (e2))
3792 return force_var_cost (data, e1, depends_on);
3794 if (integer_zerop (e1))
3796 comp_cost cost = force_var_cost (data, e2, depends_on);
3797 cost.cost += multiply_by_cost (-1, mode, data->speed);
3798 return cost;
3801 type = signed_type_for (TREE_TYPE (e1));
3802 tree_to_aff_combination (e1, type, &aff_e1);
3803 tree_to_aff_combination (e2, type, &aff_e2);
3804 aff_combination_scale (&aff_e2, double_int_minus_one);
3805 aff_combination_add (&aff_e1, &aff_e2);
3807 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3810 /* Returns true if AFF1 and AFF2 are identical. */
3812 static bool
3813 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3815 unsigned i;
3817 if (aff1->n != aff2->n)
3818 return false;
3820 for (i = 0; i < aff1->n; i++)
3822 if (double_int_cmp (aff1->elts[i].coef, aff2->elts[i].coef, 0) != 0)
3823 return false;
3825 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3826 return false;
3828 return true;
3831 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3832 requires a new compiler generated temporary. Returns -1 otherwise.
3833 ADDRESS_P is a flag indicating if the expression is for address
3834 computation. */
3836 static int
3837 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3838 tree cbase, HOST_WIDE_INT ratio,
3839 bool address_p)
3841 aff_tree ubase_aff, cbase_aff;
3842 tree expr, ub, cb;
3843 struct iv_inv_expr_ent ent;
3844 struct iv_inv_expr_ent **slot;
3846 STRIP_NOPS (ubase);
3847 STRIP_NOPS (cbase);
3848 ub = ubase;
3849 cb = cbase;
3851 if ((TREE_CODE (ubase) == INTEGER_CST)
3852 && (TREE_CODE (cbase) == INTEGER_CST))
3853 return -1;
3855 /* Strips the constant part. */
3856 if (TREE_CODE (ubase) == PLUS_EXPR
3857 || TREE_CODE (ubase) == MINUS_EXPR
3858 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3860 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3861 ubase = TREE_OPERAND (ubase, 0);
3864 /* Strips the constant part. */
3865 if (TREE_CODE (cbase) == PLUS_EXPR
3866 || TREE_CODE (cbase) == MINUS_EXPR
3867 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3869 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3870 cbase = TREE_OPERAND (cbase, 0);
3873 if (address_p)
3875 if (((TREE_CODE (ubase) == SSA_NAME)
3876 || (TREE_CODE (ubase) == ADDR_EXPR
3877 && is_gimple_min_invariant (ubase)))
3878 && (TREE_CODE (cbase) == INTEGER_CST))
3879 return -1;
3881 if (((TREE_CODE (cbase) == SSA_NAME)
3882 || (TREE_CODE (cbase) == ADDR_EXPR
3883 && is_gimple_min_invariant (cbase)))
3884 && (TREE_CODE (ubase) == INTEGER_CST))
3885 return -1;
3888 if (ratio == 1)
3890 if(operand_equal_p (ubase, cbase, 0))
3891 return -1;
3893 if (TREE_CODE (ubase) == ADDR_EXPR
3894 && TREE_CODE (cbase) == ADDR_EXPR)
3896 tree usym, csym;
3898 usym = TREE_OPERAND (ubase, 0);
3899 csym = TREE_OPERAND (cbase, 0);
3900 if (TREE_CODE (usym) == ARRAY_REF)
3902 tree ind = TREE_OPERAND (usym, 1);
3903 if (TREE_CODE (ind) == INTEGER_CST
3904 && host_integerp (ind, 0)
3905 && TREE_INT_CST_LOW (ind) == 0)
3906 usym = TREE_OPERAND (usym, 0);
3908 if (TREE_CODE (csym) == ARRAY_REF)
3910 tree ind = TREE_OPERAND (csym, 1);
3911 if (TREE_CODE (ind) == INTEGER_CST
3912 && host_integerp (ind, 0)
3913 && TREE_INT_CST_LOW (ind) == 0)
3914 csym = TREE_OPERAND (csym, 0);
3916 if (operand_equal_p (usym, csym, 0))
3917 return -1;
3919 /* Now do more complex comparison */
3920 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3921 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3922 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3923 return -1;
3926 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3927 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3929 aff_combination_scale (&cbase_aff, shwi_to_double_int (-1 * ratio));
3930 aff_combination_add (&ubase_aff, &cbase_aff);
3931 expr = aff_combination_to_tree (&ubase_aff);
3932 ent.expr = expr;
3933 ent.hash = iterative_hash_expr (expr, 0);
3934 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3935 &ent, INSERT);
3936 if (*slot)
3937 return (*slot)->id;
3939 *slot = XNEW (struct iv_inv_expr_ent);
3940 (*slot)->expr = expr;
3941 (*slot)->hash = ent.hash;
3942 (*slot)->id = data->inv_expr_id++;
3943 return (*slot)->id;
3948 /* Determines the cost of the computation by that USE is expressed
3949 from induction variable CAND. If ADDRESS_P is true, we just need
3950 to create an address from it, otherwise we want to get it into
3951 register. A set of invariants we depend on is stored in
3952 DEPENDS_ON. AT is the statement at that the value is computed.
3953 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3954 addressing is likely. */
3956 static comp_cost
3957 get_computation_cost_at (struct ivopts_data *data,
3958 struct iv_use *use, struct iv_cand *cand,
3959 bool address_p, bitmap *depends_on, gimple at,
3960 bool *can_autoinc,
3961 int *inv_expr_id)
3963 tree ubase = use->iv->base, ustep = use->iv->step;
3964 tree cbase, cstep;
3965 tree utype = TREE_TYPE (ubase), ctype;
3966 unsigned HOST_WIDE_INT cstepi, offset = 0;
3967 HOST_WIDE_INT ratio, aratio;
3968 bool var_present, symbol_present, stmt_is_after_inc;
3969 comp_cost cost;
3970 double_int rat;
3971 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3973 *depends_on = NULL;
3975 /* Only consider real candidates. */
3976 if (!cand->iv)
3977 return infinite_cost;
3979 cbase = cand->iv->base;
3980 cstep = cand->iv->step;
3981 ctype = TREE_TYPE (cbase);
3983 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3985 /* We do not have a precision to express the values of use. */
3986 return infinite_cost;
3989 if (address_p)
3991 /* Do not try to express address of an object with computation based
3992 on address of a different object. This may cause problems in rtl
3993 level alias analysis (that does not expect this to be happening,
3994 as this is illegal in C), and would be unlikely to be useful
3995 anyway. */
3996 if (use->iv->base_object
3997 && cand->iv->base_object
3998 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3999 return infinite_cost;
4002 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4004 /* TODO -- add direct handling of this case. */
4005 goto fallback;
4008 /* CSTEPI is removed from the offset in case statement is after the
4009 increment. If the step is not constant, we use zero instead.
4010 This is a bit imprecise (there is the extra addition), but
4011 redundancy elimination is likely to transform the code so that
4012 it uses value of the variable before increment anyway,
4013 so it is not that much unrealistic. */
4014 if (cst_and_fits_in_hwi (cstep))
4015 cstepi = int_cst_value (cstep);
4016 else
4017 cstepi = 0;
4019 if (!constant_multiple_of (ustep, cstep, &rat))
4020 return infinite_cost;
4022 if (double_int_fits_in_shwi_p (rat))
4023 ratio = double_int_to_shwi (rat);
4024 else
4025 return infinite_cost;
4027 STRIP_NOPS (cbase);
4028 ctype = TREE_TYPE (cbase);
4030 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4032 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4033 or ratio == 1, it is better to handle this like
4035 ubase - ratio * cbase + ratio * var
4037 (also holds in the case ratio == -1, TODO. */
4039 if (cst_and_fits_in_hwi (cbase))
4041 offset = - ratio * int_cst_value (cbase);
4042 cost = difference_cost (data,
4043 ubase, build_int_cst (utype, 0),
4044 &symbol_present, &var_present, &offset,
4045 depends_on);
4046 cost.cost /= avg_loop_niter (data->current_loop);
4048 else if (ratio == 1)
4050 tree real_cbase = cbase;
4052 /* Check to see if any adjustment is needed. */
4053 if (cstepi == 0 && stmt_is_after_inc)
4055 aff_tree real_cbase_aff;
4056 aff_tree cstep_aff;
4058 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4059 &real_cbase_aff);
4060 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4062 aff_combination_add (&real_cbase_aff, &cstep_aff);
4063 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4066 cost = difference_cost (data,
4067 ubase, real_cbase,
4068 &symbol_present, &var_present, &offset,
4069 depends_on);
4070 cost.cost /= avg_loop_niter (data->current_loop);
4072 else if (address_p
4073 && !POINTER_TYPE_P (ctype)
4074 && multiplier_allowed_in_address_p
4075 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4076 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4078 cbase
4079 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4080 cost = difference_cost (data,
4081 ubase, cbase,
4082 &symbol_present, &var_present, &offset,
4083 depends_on);
4084 cost.cost /= avg_loop_niter (data->current_loop);
4086 else
4088 cost = force_var_cost (data, cbase, depends_on);
4089 cost = add_costs (cost,
4090 difference_cost (data,
4091 ubase, build_int_cst (utype, 0),
4092 &symbol_present, &var_present,
4093 &offset, depends_on));
4094 cost.cost /= avg_loop_niter (data->current_loop);
4095 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
4098 if (inv_expr_id)
4100 *inv_expr_id =
4101 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4102 /* Clear depends on. */
4103 if (*inv_expr_id != -1 && depends_on && *depends_on)
4104 bitmap_clear (*depends_on);
4107 /* If we are after the increment, the value of the candidate is higher by
4108 one iteration. */
4109 if (stmt_is_after_inc)
4110 offset -= ratio * cstepi;
4112 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4113 (symbol/var1/const parts may be omitted). If we are looking for an
4114 address, find the cost of addressing this. */
4115 if (address_p)
4116 return add_costs (cost,
4117 get_address_cost (symbol_present, var_present,
4118 offset, ratio, cstepi,
4119 TYPE_MODE (TREE_TYPE (utype)),
4120 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4121 speed, stmt_is_after_inc,
4122 can_autoinc));
4124 /* Otherwise estimate the costs for computing the expression. */
4125 if (!symbol_present && !var_present && !offset)
4127 if (ratio != 1)
4128 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
4129 return cost;
4132 /* Symbol + offset should be compile-time computable so consider that they
4133 are added once to the variable, if present. */
4134 if (var_present && (symbol_present || offset))
4135 cost.cost += adjust_setup_cost (data,
4136 add_cost (TYPE_MODE (ctype), speed));
4138 /* Having offset does not affect runtime cost in case it is added to
4139 symbol, but it increases complexity. */
4140 if (offset)
4141 cost.complexity++;
4143 cost.cost += add_cost (TYPE_MODE (ctype), speed);
4145 aratio = ratio > 0 ? ratio : -ratio;
4146 if (aratio != 1)
4147 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
4148 return cost;
4150 fallback:
4151 if (can_autoinc)
4152 *can_autoinc = false;
4155 /* Just get the expression, expand it and measure the cost. */
4156 tree comp = get_computation_at (data->current_loop, use, cand, at);
4158 if (!comp)
4159 return infinite_cost;
4161 if (address_p)
4162 comp = build_simple_mem_ref (comp);
4164 return new_cost (computation_cost (comp, speed), 0);
4168 /* Determines the cost of the computation by that USE is expressed
4169 from induction variable CAND. If ADDRESS_P is true, we just need
4170 to create an address from it, otherwise we want to get it into
4171 register. A set of invariants we depend on is stored in
4172 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4173 autoinc addressing is likely. */
4175 static comp_cost
4176 get_computation_cost (struct ivopts_data *data,
4177 struct iv_use *use, struct iv_cand *cand,
4178 bool address_p, bitmap *depends_on,
4179 bool *can_autoinc, int *inv_expr_id)
4181 return get_computation_cost_at (data,
4182 use, cand, address_p, depends_on, use->stmt,
4183 can_autoinc, inv_expr_id);
4186 /* Determines cost of basing replacement of USE on CAND in a generic
4187 expression. */
4189 static bool
4190 determine_use_iv_cost_generic (struct ivopts_data *data,
4191 struct iv_use *use, struct iv_cand *cand)
4193 bitmap depends_on;
4194 comp_cost cost;
4195 int inv_expr_id = -1;
4197 /* The simple case first -- if we need to express value of the preserved
4198 original biv, the cost is 0. This also prevents us from counting the
4199 cost of increment twice -- once at this use and once in the cost of
4200 the candidate. */
4201 if (cand->pos == IP_ORIGINAL
4202 && cand->incremented_at == use->stmt)
4204 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE, -1);
4205 return true;
4208 cost = get_computation_cost (data, use, cand, false, &depends_on,
4209 NULL, &inv_expr_id);
4211 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4212 inv_expr_id);
4214 return !infinite_cost_p (cost);
4217 /* Determines cost of basing replacement of USE on CAND in an address. */
4219 static bool
4220 determine_use_iv_cost_address (struct ivopts_data *data,
4221 struct iv_use *use, struct iv_cand *cand)
4223 bitmap depends_on;
4224 bool can_autoinc;
4225 int inv_expr_id = -1;
4226 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4227 &can_autoinc, &inv_expr_id);
4229 if (cand->ainc_use == use)
4231 if (can_autoinc)
4232 cost.cost -= cand->cost_step;
4233 /* If we generated the candidate solely for exploiting autoincrement
4234 opportunities, and it turns out it can't be used, set the cost to
4235 infinity to make sure we ignore it. */
4236 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4237 cost = infinite_cost;
4239 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE,
4240 inv_expr_id);
4242 return !infinite_cost_p (cost);
4245 /* Computes value of candidate CAND at position AT in iteration NITER, and
4246 stores it to VAL. */
4248 static void
4249 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4250 aff_tree *val)
4252 aff_tree step, delta, nit;
4253 struct iv *iv = cand->iv;
4254 tree type = TREE_TYPE (iv->base);
4255 tree steptype = type;
4256 if (POINTER_TYPE_P (type))
4257 steptype = sizetype;
4259 tree_to_aff_combination (iv->step, steptype, &step);
4260 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4261 aff_combination_convert (&nit, steptype);
4262 aff_combination_mult (&nit, &step, &delta);
4263 if (stmt_after_increment (loop, cand, at))
4264 aff_combination_add (&delta, &step);
4266 tree_to_aff_combination (iv->base, type, val);
4267 aff_combination_add (val, &delta);
4270 /* Returns period of induction variable iv. */
4272 static tree
4273 iv_period (struct iv *iv)
4275 tree step = iv->step, period, type;
4276 tree pow2div;
4278 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4280 type = unsigned_type_for (TREE_TYPE (step));
4281 /* Period of the iv is lcm (step, type_range)/step -1,
4282 i.e., N*type_range/step - 1. Since type range is power
4283 of two, N == (step >> num_of_ending_zeros_binary (step),
4284 so the final result is
4286 (type_range >> num_of_ending_zeros_binary (step)) - 1
4289 pow2div = num_ending_zeros (step);
4291 period = build_low_bits_mask (type,
4292 (TYPE_PRECISION (type)
4293 - tree_low_cst (pow2div, 1)));
4295 return period;
4298 /* Returns the comparison operator used when eliminating the iv USE. */
4300 static enum tree_code
4301 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4303 struct loop *loop = data->current_loop;
4304 basic_block ex_bb;
4305 edge exit;
4307 ex_bb = gimple_bb (use->stmt);
4308 exit = EDGE_SUCC (ex_bb, 0);
4309 if (flow_bb_inside_loop_p (loop, exit->dest))
4310 exit = EDGE_SUCC (ex_bb, 1);
4312 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4315 /* Check whether it is possible to express the condition in USE by comparison
4316 of candidate CAND. If so, store the value compared with to BOUND. */
4318 static bool
4319 may_eliminate_iv (struct ivopts_data *data,
4320 struct iv_use *use, struct iv_cand *cand, tree *bound)
4322 basic_block ex_bb;
4323 edge exit;
4324 tree nit, period;
4325 struct loop *loop = data->current_loop;
4326 aff_tree bnd;
4327 struct tree_niter_desc *desc = NULL;
4329 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4330 return false;
4332 /* For now works only for exits that dominate the loop latch.
4333 TODO: extend to other conditions inside loop body. */
4334 ex_bb = gimple_bb (use->stmt);
4335 if (use->stmt != last_stmt (ex_bb)
4336 || gimple_code (use->stmt) != GIMPLE_COND
4337 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4338 return false;
4340 exit = EDGE_SUCC (ex_bb, 0);
4341 if (flow_bb_inside_loop_p (loop, exit->dest))
4342 exit = EDGE_SUCC (ex_bb, 1);
4343 if (flow_bb_inside_loop_p (loop, exit->dest))
4344 return false;
4346 nit = niter_for_exit (data, exit, &desc);
4347 if (!nit)
4348 return false;
4350 /* Determine whether we can use the variable to test the exit condition.
4351 This is the case iff the period of the induction variable is greater
4352 than the number of iterations for which the exit condition is true. */
4353 period = iv_period (cand->iv);
4355 /* If the number of iterations is constant, compare against it directly. */
4356 if (TREE_CODE (nit) == INTEGER_CST)
4358 /* See cand_value_at. */
4359 if (stmt_after_increment (loop, cand, use->stmt))
4361 if (!tree_int_cst_lt (nit, period))
4362 return false;
4364 else
4366 if (tree_int_cst_lt (period, nit))
4367 return false;
4371 /* If not, and if this is the only possible exit of the loop, see whether
4372 we can get a conservative estimate on the number of iterations of the
4373 entire loop and compare against that instead. */
4374 else
4376 double_int period_value, max_niter;
4378 max_niter = desc->max;
4379 if (stmt_after_increment (loop, cand, use->stmt))
4380 max_niter = double_int_add (max_niter, double_int_one);
4381 period_value = tree_to_double_int (period);
4382 if (double_int_ucmp (max_niter, period_value) > 0)
4384 /* See if we can take advantage of infered loop bound information. */
4385 if (loop_only_exit_p (loop, exit))
4387 if (!estimated_loop_iterations (loop, true, &max_niter))
4388 return false;
4389 /* The loop bound is already adjusted by adding 1. */
4390 if (double_int_ucmp (max_niter, period_value) > 0)
4391 return false;
4393 else
4394 return false;
4398 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4400 *bound = aff_combination_to_tree (&bnd);
4401 /* It is unlikely that computing the number of iterations using division
4402 would be more profitable than keeping the original induction variable. */
4403 if (expression_expensive_p (*bound))
4404 return false;
4405 return true;
4409 /* Determines cost of basing replacement of USE on CAND in a condition. */
4411 static bool
4412 determine_use_iv_cost_condition (struct ivopts_data *data,
4413 struct iv_use *use, struct iv_cand *cand)
4415 tree bound = NULL_TREE;
4416 struct iv *cmp_iv;
4417 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4418 comp_cost elim_cost, express_cost, cost;
4419 bool ok;
4420 int inv_expr_id = -1;
4421 tree *control_var, *bound_cst;
4423 /* Only consider real candidates. */
4424 if (!cand->iv)
4426 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE, -1);
4427 return false;
4430 /* Try iv elimination. */
4431 if (may_eliminate_iv (data, use, cand, &bound))
4433 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4434 /* The bound is a loop invariant, so it will be only computed
4435 once. */
4436 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4438 else
4439 elim_cost = infinite_cost;
4441 /* Try expressing the original giv. If it is compared with an invariant,
4442 note that we cannot get rid of it. */
4443 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4444 NULL, &cmp_iv);
4445 gcc_assert (ok);
4447 /* When the condition is a comparison of the candidate IV against
4448 zero, prefer this IV.
4450 TODO: The constant that we're substracting from the cost should
4451 be target-dependent. This information should be added to the
4452 target costs for each backend. */
4453 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4454 && integer_zerop (*bound_cst)
4455 && (operand_equal_p (*control_var, cand->var_after, 0)
4456 || operand_equal_p (*control_var, cand->var_before, 0)))
4457 elim_cost.cost -= 1;
4459 express_cost = get_computation_cost (data, use, cand, false,
4460 &depends_on_express, NULL,
4461 &inv_expr_id);
4462 fd_ivopts_data = data;
4463 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4465 /* Choose the better approach, preferring the eliminated IV. */
4466 if (compare_costs (elim_cost, express_cost) <= 0)
4468 cost = elim_cost;
4469 depends_on = depends_on_elim;
4470 depends_on_elim = NULL;
4472 else
4474 cost = express_cost;
4475 depends_on = depends_on_express;
4476 depends_on_express = NULL;
4477 bound = NULL_TREE;
4480 set_use_iv_cost (data, use, cand, cost, depends_on, bound, inv_expr_id);
4482 if (depends_on_elim)
4483 BITMAP_FREE (depends_on_elim);
4484 if (depends_on_express)
4485 BITMAP_FREE (depends_on_express);
4487 return !infinite_cost_p (cost);
4490 /* Determines cost of basing replacement of USE on CAND. Returns false
4491 if USE cannot be based on CAND. */
4493 static bool
4494 determine_use_iv_cost (struct ivopts_data *data,
4495 struct iv_use *use, struct iv_cand *cand)
4497 switch (use->type)
4499 case USE_NONLINEAR_EXPR:
4500 return determine_use_iv_cost_generic (data, use, cand);
4502 case USE_ADDRESS:
4503 return determine_use_iv_cost_address (data, use, cand);
4505 case USE_COMPARE:
4506 return determine_use_iv_cost_condition (data, use, cand);
4508 default:
4509 gcc_unreachable ();
4513 /* Return true if get_computation_cost indicates that autoincrement is
4514 a possibility for the pair of USE and CAND, false otherwise. */
4516 static bool
4517 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4518 struct iv_cand *cand)
4520 bitmap depends_on;
4521 bool can_autoinc;
4522 comp_cost cost;
4524 if (use->type != USE_ADDRESS)
4525 return false;
4527 cost = get_computation_cost (data, use, cand, true, &depends_on,
4528 &can_autoinc, NULL);
4530 BITMAP_FREE (depends_on);
4532 return !infinite_cost_p (cost) && can_autoinc;
4535 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4536 use that allows autoincrement, and set their AINC_USE if possible. */
4538 static void
4539 set_autoinc_for_original_candidates (struct ivopts_data *data)
4541 unsigned i, j;
4543 for (i = 0; i < n_iv_cands (data); i++)
4545 struct iv_cand *cand = iv_cand (data, i);
4546 struct iv_use *closest = NULL;
4547 if (cand->pos != IP_ORIGINAL)
4548 continue;
4549 for (j = 0; j < n_iv_uses (data); j++)
4551 struct iv_use *use = iv_use (data, j);
4552 unsigned uid = gimple_uid (use->stmt);
4553 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4554 || uid > gimple_uid (cand->incremented_at))
4555 continue;
4556 if (closest == NULL || uid > gimple_uid (closest->stmt))
4557 closest = use;
4559 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4560 continue;
4561 cand->ainc_use = closest;
4565 /* Finds the candidates for the induction variables. */
4567 static void
4568 find_iv_candidates (struct ivopts_data *data)
4570 /* Add commonly used ivs. */
4571 add_standard_iv_candidates (data);
4573 /* Add old induction variables. */
4574 add_old_ivs_candidates (data);
4576 /* Add induction variables derived from uses. */
4577 add_derived_ivs_candidates (data);
4579 set_autoinc_for_original_candidates (data);
4581 /* Record the important candidates. */
4582 record_important_candidates (data);
4585 /* Determines costs of basing the use of the iv on an iv candidate. */
4587 static void
4588 determine_use_iv_costs (struct ivopts_data *data)
4590 unsigned i, j;
4591 struct iv_use *use;
4592 struct iv_cand *cand;
4593 bitmap to_clear = BITMAP_ALLOC (NULL);
4595 alloc_use_cost_map (data);
4597 for (i = 0; i < n_iv_uses (data); i++)
4599 use = iv_use (data, i);
4601 if (data->consider_all_candidates)
4603 for (j = 0; j < n_iv_cands (data); j++)
4605 cand = iv_cand (data, j);
4606 determine_use_iv_cost (data, use, cand);
4609 else
4611 bitmap_iterator bi;
4613 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4615 cand = iv_cand (data, j);
4616 if (!determine_use_iv_cost (data, use, cand))
4617 bitmap_set_bit (to_clear, j);
4620 /* Remove the candidates for that the cost is infinite from
4621 the list of related candidates. */
4622 bitmap_and_compl_into (use->related_cands, to_clear);
4623 bitmap_clear (to_clear);
4627 BITMAP_FREE (to_clear);
4629 if (dump_file && (dump_flags & TDF_DETAILS))
4631 fprintf (dump_file, "Use-candidate costs:\n");
4633 for (i = 0; i < n_iv_uses (data); i++)
4635 use = iv_use (data, i);
4637 fprintf (dump_file, "Use %d:\n", i);
4638 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4639 for (j = 0; j < use->n_map_members; j++)
4641 if (!use->cost_map[j].cand
4642 || infinite_cost_p (use->cost_map[j].cost))
4643 continue;
4645 fprintf (dump_file, " %d\t%d\t%d\t",
4646 use->cost_map[j].cand->id,
4647 use->cost_map[j].cost.cost,
4648 use->cost_map[j].cost.complexity);
4649 if (use->cost_map[j].depends_on)
4650 bitmap_print (dump_file,
4651 use->cost_map[j].depends_on, "","");
4652 if (use->cost_map[j].inv_expr_id != -1)
4653 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4654 fprintf (dump_file, "\n");
4657 fprintf (dump_file, "\n");
4659 fprintf (dump_file, "\n");
4663 /* Determines cost of the candidate CAND. */
4665 static void
4666 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4668 comp_cost cost_base;
4669 unsigned cost, cost_step;
4670 tree base;
4672 if (!cand->iv)
4674 cand->cost = 0;
4675 return;
4678 /* There are two costs associated with the candidate -- its increment
4679 and its initialization. The second is almost negligible for any loop
4680 that rolls enough, so we take it just very little into account. */
4682 base = cand->iv->base;
4683 cost_base = force_var_cost (data, base, NULL);
4684 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4686 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4688 /* Prefer the original ivs unless we may gain something by replacing it.
4689 The reason is to make debugging simpler; so this is not relevant for
4690 artificial ivs created by other optimization passes. */
4691 if (cand->pos != IP_ORIGINAL
4692 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4693 cost++;
4695 /* Prefer not to insert statements into latch unless there are some
4696 already (so that we do not create unnecessary jumps). */
4697 if (cand->pos == IP_END
4698 && empty_block_p (ip_end_pos (data->current_loop)))
4699 cost++;
4701 cand->cost = cost;
4702 cand->cost_step = cost_step;
4705 /* Determines costs of computation of the candidates. */
4707 static void
4708 determine_iv_costs (struct ivopts_data *data)
4710 unsigned i;
4712 if (dump_file && (dump_flags & TDF_DETAILS))
4714 fprintf (dump_file, "Candidate costs:\n");
4715 fprintf (dump_file, " cand\tcost\n");
4718 for (i = 0; i < n_iv_cands (data); i++)
4720 struct iv_cand *cand = iv_cand (data, i);
4722 determine_iv_cost (data, cand);
4724 if (dump_file && (dump_flags & TDF_DETAILS))
4725 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4728 if (dump_file && (dump_flags & TDF_DETAILS))
4729 fprintf (dump_file, "\n");
4732 /* Calculates cost for having SIZE induction variables. */
4734 static unsigned
4735 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4737 /* We add size to the cost, so that we prefer eliminating ivs
4738 if possible. */
4739 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
4740 data->body_includes_call);
4743 /* For each size of the induction variable set determine the penalty. */
4745 static void
4746 determine_set_costs (struct ivopts_data *data)
4748 unsigned j, n;
4749 gimple phi;
4750 gimple_stmt_iterator psi;
4751 tree op;
4752 struct loop *loop = data->current_loop;
4753 bitmap_iterator bi;
4755 if (dump_file && (dump_flags & TDF_DETAILS))
4757 fprintf (dump_file, "Global costs:\n");
4758 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4759 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
4760 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4761 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4764 n = 0;
4765 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4767 phi = gsi_stmt (psi);
4768 op = PHI_RESULT (phi);
4770 if (!is_gimple_reg (op))
4771 continue;
4773 if (get_iv (data, op))
4774 continue;
4776 n++;
4779 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4781 struct version_info *info = ver_info (data, j);
4783 if (info->inv_id && info->has_nonlin_use)
4784 n++;
4787 data->regs_used = n;
4788 if (dump_file && (dump_flags & TDF_DETAILS))
4789 fprintf (dump_file, " regs_used %d\n", n);
4791 if (dump_file && (dump_flags & TDF_DETAILS))
4793 fprintf (dump_file, " cost for size:\n");
4794 fprintf (dump_file, " ivs\tcost\n");
4795 for (j = 0; j <= 2 * target_avail_regs; j++)
4796 fprintf (dump_file, " %d\t%d\n", j,
4797 ivopts_global_cost_for_size (data, j));
4798 fprintf (dump_file, "\n");
4802 /* Returns true if A is a cheaper cost pair than B. */
4804 static bool
4805 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4807 int cmp;
4809 if (!a)
4810 return false;
4812 if (!b)
4813 return true;
4815 cmp = compare_costs (a->cost, b->cost);
4816 if (cmp < 0)
4817 return true;
4819 if (cmp > 0)
4820 return false;
4822 /* In case the costs are the same, prefer the cheaper candidate. */
4823 if (a->cand->cost < b->cand->cost)
4824 return true;
4826 return false;
4830 /* Returns candidate by that USE is expressed in IVS. */
4832 static struct cost_pair *
4833 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4835 return ivs->cand_for_use[use->id];
4838 /* Computes the cost field of IVS structure. */
4840 static void
4841 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4843 comp_cost cost = ivs->cand_use_cost;
4845 cost.cost += ivs->cand_cost;
4847 cost.cost += ivopts_global_cost_for_size (data,
4848 ivs->n_regs + ivs->num_used_inv_expr);
4850 ivs->cost = cost;
4853 /* Remove invariants in set INVS to set IVS. */
4855 static void
4856 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4858 bitmap_iterator bi;
4859 unsigned iid;
4861 if (!invs)
4862 return;
4864 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4866 ivs->n_invariant_uses[iid]--;
4867 if (ivs->n_invariant_uses[iid] == 0)
4868 ivs->n_regs--;
4872 /* Set USE not to be expressed by any candidate in IVS. */
4874 static void
4875 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4876 struct iv_use *use)
4878 unsigned uid = use->id, cid;
4879 struct cost_pair *cp;
4881 cp = ivs->cand_for_use[uid];
4882 if (!cp)
4883 return;
4884 cid = cp->cand->id;
4886 ivs->bad_uses++;
4887 ivs->cand_for_use[uid] = NULL;
4888 ivs->n_cand_uses[cid]--;
4890 if (ivs->n_cand_uses[cid] == 0)
4892 bitmap_clear_bit (ivs->cands, cid);
4893 /* Do not count the pseudocandidates. */
4894 if (cp->cand->iv)
4895 ivs->n_regs--;
4896 ivs->n_cands--;
4897 ivs->cand_cost -= cp->cand->cost;
4899 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4902 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4904 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4906 if (cp->inv_expr_id != -1)
4908 ivs->used_inv_expr[cp->inv_expr_id]--;
4909 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
4910 ivs->num_used_inv_expr--;
4912 iv_ca_recount_cost (data, ivs);
4915 /* Add invariants in set INVS to set IVS. */
4917 static void
4918 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4920 bitmap_iterator bi;
4921 unsigned iid;
4923 if (!invs)
4924 return;
4926 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4928 ivs->n_invariant_uses[iid]++;
4929 if (ivs->n_invariant_uses[iid] == 1)
4930 ivs->n_regs++;
4934 /* Set cost pair for USE in set IVS to CP. */
4936 static void
4937 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4938 struct iv_use *use, struct cost_pair *cp)
4940 unsigned uid = use->id, cid;
4942 if (ivs->cand_for_use[uid] == cp)
4943 return;
4945 if (ivs->cand_for_use[uid])
4946 iv_ca_set_no_cp (data, ivs, use);
4948 if (cp)
4950 cid = cp->cand->id;
4952 ivs->bad_uses--;
4953 ivs->cand_for_use[uid] = cp;
4954 ivs->n_cand_uses[cid]++;
4955 if (ivs->n_cand_uses[cid] == 1)
4957 bitmap_set_bit (ivs->cands, cid);
4958 /* Do not count the pseudocandidates. */
4959 if (cp->cand->iv)
4960 ivs->n_regs++;
4961 ivs->n_cands++;
4962 ivs->cand_cost += cp->cand->cost;
4964 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4967 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4968 iv_ca_set_add_invariants (ivs, cp->depends_on);
4970 if (cp->inv_expr_id != -1)
4972 ivs->used_inv_expr[cp->inv_expr_id]++;
4973 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
4974 ivs->num_used_inv_expr++;
4976 iv_ca_recount_cost (data, ivs);
4980 /* Extend set IVS by expressing USE by some of the candidates in it
4981 if possible. All important candidates will be considered
4982 if IMPORTANT_CANDIDATES is true. */
4984 static void
4985 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4986 struct iv_use *use, bool important_candidates)
4988 struct cost_pair *best_cp = NULL, *cp;
4989 bitmap_iterator bi;
4990 bitmap cands;
4991 unsigned i;
4993 gcc_assert (ivs->upto >= use->id);
4995 if (ivs->upto == use->id)
4997 ivs->upto++;
4998 ivs->bad_uses++;
5001 cands = (important_candidates ? data->important_candidates : ivs->cands);
5002 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5004 struct iv_cand *cand = iv_cand (data, i);
5006 cp = get_use_iv_cost (data, use, cand);
5008 if (cheaper_cost_pair (cp, best_cp))
5009 best_cp = cp;
5012 iv_ca_set_cp (data, ivs, use, best_cp);
5015 /* Get cost for assignment IVS. */
5017 static comp_cost
5018 iv_ca_cost (struct iv_ca *ivs)
5020 /* This was a conditional expression but it triggered a bug in
5021 Sun C 5.5. */
5022 if (ivs->bad_uses)
5023 return infinite_cost;
5024 else
5025 return ivs->cost;
5028 /* Returns true if all dependences of CP are among invariants in IVS. */
5030 static bool
5031 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5033 unsigned i;
5034 bitmap_iterator bi;
5036 if (!cp->depends_on)
5037 return true;
5039 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5041 if (ivs->n_invariant_uses[i] == 0)
5042 return false;
5045 return true;
5048 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5049 it before NEXT_CHANGE. */
5051 static struct iv_ca_delta *
5052 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5053 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5055 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5057 change->use = use;
5058 change->old_cp = old_cp;
5059 change->new_cp = new_cp;
5060 change->next_change = next_change;
5062 return change;
5065 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5066 are rewritten. */
5068 static struct iv_ca_delta *
5069 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5071 struct iv_ca_delta *last;
5073 if (!l2)
5074 return l1;
5076 if (!l1)
5077 return l2;
5079 for (last = l1; last->next_change; last = last->next_change)
5080 continue;
5081 last->next_change = l2;
5083 return l1;
5086 /* Reverse the list of changes DELTA, forming the inverse to it. */
5088 static struct iv_ca_delta *
5089 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5091 struct iv_ca_delta *act, *next, *prev = NULL;
5092 struct cost_pair *tmp;
5094 for (act = delta; act; act = next)
5096 next = act->next_change;
5097 act->next_change = prev;
5098 prev = act;
5100 tmp = act->old_cp;
5101 act->old_cp = act->new_cp;
5102 act->new_cp = tmp;
5105 return prev;
5108 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5109 reverted instead. */
5111 static void
5112 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5113 struct iv_ca_delta *delta, bool forward)
5115 struct cost_pair *from, *to;
5116 struct iv_ca_delta *act;
5118 if (!forward)
5119 delta = iv_ca_delta_reverse (delta);
5121 for (act = delta; act; act = act->next_change)
5123 from = act->old_cp;
5124 to = act->new_cp;
5125 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5126 iv_ca_set_cp (data, ivs, act->use, to);
5129 if (!forward)
5130 iv_ca_delta_reverse (delta);
5133 /* Returns true if CAND is used in IVS. */
5135 static bool
5136 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5138 return ivs->n_cand_uses[cand->id] > 0;
5141 /* Returns number of induction variable candidates in the set IVS. */
5143 static unsigned
5144 iv_ca_n_cands (struct iv_ca *ivs)
5146 return ivs->n_cands;
5149 /* Free the list of changes DELTA. */
5151 static void
5152 iv_ca_delta_free (struct iv_ca_delta **delta)
5154 struct iv_ca_delta *act, *next;
5156 for (act = *delta; act; act = next)
5158 next = act->next_change;
5159 free (act);
5162 *delta = NULL;
5165 /* Allocates new iv candidates assignment. */
5167 static struct iv_ca *
5168 iv_ca_new (struct ivopts_data *data)
5170 struct iv_ca *nw = XNEW (struct iv_ca);
5172 nw->upto = 0;
5173 nw->bad_uses = 0;
5174 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5175 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5176 nw->cands = BITMAP_ALLOC (NULL);
5177 nw->n_cands = 0;
5178 nw->n_regs = 0;
5179 nw->cand_use_cost = zero_cost;
5180 nw->cand_cost = 0;
5181 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5182 nw->cost = zero_cost;
5183 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5184 nw->num_used_inv_expr = 0;
5186 return nw;
5189 /* Free memory occupied by the set IVS. */
5191 static void
5192 iv_ca_free (struct iv_ca **ivs)
5194 free ((*ivs)->cand_for_use);
5195 free ((*ivs)->n_cand_uses);
5196 BITMAP_FREE ((*ivs)->cands);
5197 free ((*ivs)->n_invariant_uses);
5198 free ((*ivs)->used_inv_expr);
5199 free (*ivs);
5200 *ivs = NULL;
5203 /* Dumps IVS to FILE. */
5205 static void
5206 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5208 const char *pref = " invariants ";
5209 unsigned i;
5210 comp_cost cost = iv_ca_cost (ivs);
5212 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5213 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5214 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5215 bitmap_print (file, ivs->cands, " candidates: ","\n");
5217 for (i = 0; i < ivs->upto; i++)
5219 struct iv_use *use = iv_use (data, i);
5220 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5221 if (cp)
5222 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5223 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5224 else
5225 fprintf (file, " use:%d --> ??\n", use->id);
5228 for (i = 1; i <= data->max_inv_id; i++)
5229 if (ivs->n_invariant_uses[i])
5231 fprintf (file, "%s%d", pref, i);
5232 pref = ", ";
5234 fprintf (file, "\n\n");
5237 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5238 new set, and store differences in DELTA. Number of induction variables
5239 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5240 the function will try to find a solution with mimimal iv candidates. */
5242 static comp_cost
5243 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5244 struct iv_cand *cand, struct iv_ca_delta **delta,
5245 unsigned *n_ivs, bool min_ncand)
5247 unsigned i;
5248 comp_cost cost;
5249 struct iv_use *use;
5250 struct cost_pair *old_cp, *new_cp;
5252 *delta = NULL;
5253 for (i = 0; i < ivs->upto; i++)
5255 use = iv_use (data, i);
5256 old_cp = iv_ca_cand_for_use (ivs, use);
5258 if (old_cp
5259 && old_cp->cand == cand)
5260 continue;
5262 new_cp = get_use_iv_cost (data, use, cand);
5263 if (!new_cp)
5264 continue;
5266 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5267 continue;
5269 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5270 continue;
5272 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5275 iv_ca_delta_commit (data, ivs, *delta, true);
5276 cost = iv_ca_cost (ivs);
5277 if (n_ivs)
5278 *n_ivs = iv_ca_n_cands (ivs);
5279 iv_ca_delta_commit (data, ivs, *delta, false);
5281 return cost;
5284 /* Try narrowing set IVS by removing CAND. Return the cost of
5285 the new set and store the differences in DELTA. */
5287 static comp_cost
5288 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5289 struct iv_cand *cand, struct iv_ca_delta **delta)
5291 unsigned i, ci;
5292 struct iv_use *use;
5293 struct cost_pair *old_cp, *new_cp, *cp;
5294 bitmap_iterator bi;
5295 struct iv_cand *cnd;
5296 comp_cost cost;
5298 *delta = NULL;
5299 for (i = 0; i < n_iv_uses (data); i++)
5301 use = iv_use (data, i);
5303 old_cp = iv_ca_cand_for_use (ivs, use);
5304 if (old_cp->cand != cand)
5305 continue;
5307 new_cp = NULL;
5309 if (data->consider_all_candidates)
5311 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5313 if (ci == cand->id)
5314 continue;
5316 cnd = iv_cand (data, ci);
5318 cp = get_use_iv_cost (data, use, cnd);
5319 if (!cp)
5320 continue;
5322 if (!iv_ca_has_deps (ivs, cp))
5323 continue;
5325 if (!cheaper_cost_pair (cp, new_cp))
5326 continue;
5328 new_cp = cp;
5331 else
5333 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5335 if (ci == cand->id)
5336 continue;
5338 cnd = iv_cand (data, ci);
5340 cp = get_use_iv_cost (data, use, cnd);
5341 if (!cp)
5342 continue;
5343 if (!iv_ca_has_deps (ivs, cp))
5344 continue;
5346 if (!cheaper_cost_pair (cp, new_cp))
5347 continue;
5349 new_cp = cp;
5353 if (!new_cp)
5355 iv_ca_delta_free (delta);
5356 return infinite_cost;
5359 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5362 iv_ca_delta_commit (data, ivs, *delta, true);
5363 cost = iv_ca_cost (ivs);
5364 iv_ca_delta_commit (data, ivs, *delta, false);
5366 return cost;
5369 /* Try optimizing the set of candidates IVS by removing candidates different
5370 from to EXCEPT_CAND from it. Return cost of the new set, and store
5371 differences in DELTA. */
5373 static comp_cost
5374 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5375 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5377 bitmap_iterator bi;
5378 struct iv_ca_delta *act_delta, *best_delta;
5379 unsigned i;
5380 comp_cost best_cost, acost;
5381 struct iv_cand *cand;
5383 best_delta = NULL;
5384 best_cost = iv_ca_cost (ivs);
5386 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5388 cand = iv_cand (data, i);
5390 if (cand == except_cand)
5391 continue;
5393 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5395 if (compare_costs (acost, best_cost) < 0)
5397 best_cost = acost;
5398 iv_ca_delta_free (&best_delta);
5399 best_delta = act_delta;
5401 else
5402 iv_ca_delta_free (&act_delta);
5405 if (!best_delta)
5407 *delta = NULL;
5408 return best_cost;
5411 /* Recurse to possibly remove other unnecessary ivs. */
5412 iv_ca_delta_commit (data, ivs, best_delta, true);
5413 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5414 iv_ca_delta_commit (data, ivs, best_delta, false);
5415 *delta = iv_ca_delta_join (best_delta, *delta);
5416 return best_cost;
5419 /* Tries to extend the sets IVS in the best possible way in order
5420 to express the USE. If ORIGINALP is true, prefer candidates from
5421 the original set of IVs, otherwise favor important candidates not
5422 based on any memory object. */
5424 static bool
5425 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5426 struct iv_use *use, bool originalp)
5428 comp_cost best_cost, act_cost;
5429 unsigned i;
5430 bitmap_iterator bi;
5431 struct iv_cand *cand;
5432 struct iv_ca_delta *best_delta = NULL, *act_delta;
5433 struct cost_pair *cp;
5435 iv_ca_add_use (data, ivs, use, false);
5436 best_cost = iv_ca_cost (ivs);
5438 cp = iv_ca_cand_for_use (ivs, use);
5439 if (!cp)
5441 ivs->upto--;
5442 ivs->bad_uses--;
5443 iv_ca_add_use (data, ivs, use, true);
5444 best_cost = iv_ca_cost (ivs);
5445 cp = iv_ca_cand_for_use (ivs, use);
5447 if (cp)
5449 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5450 iv_ca_set_no_cp (data, ivs, use);
5453 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5454 first try important candidates not based on any memory object. Only if
5455 this fails, try the specific ones. Rationale -- in loops with many
5456 variables the best choice often is to use just one generic biv. If we
5457 added here many ivs specific to the uses, the optimization algorithm later
5458 would be likely to get stuck in a local minimum, thus causing us to create
5459 too many ivs. The approach from few ivs to more seems more likely to be
5460 successful -- starting from few ivs, replacing an expensive use by a
5461 specific iv should always be a win. */
5462 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5464 cand = iv_cand (data, i);
5466 if (originalp && cand->pos !=IP_ORIGINAL)
5467 continue;
5469 if (!originalp && cand->iv->base_object != NULL_TREE)
5470 continue;
5472 if (iv_ca_cand_used_p (ivs, cand))
5473 continue;
5475 cp = get_use_iv_cost (data, use, cand);
5476 if (!cp)
5477 continue;
5479 iv_ca_set_cp (data, ivs, use, cp);
5480 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5481 true);
5482 iv_ca_set_no_cp (data, ivs, use);
5483 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5485 if (compare_costs (act_cost, best_cost) < 0)
5487 best_cost = act_cost;
5489 iv_ca_delta_free (&best_delta);
5490 best_delta = act_delta;
5492 else
5493 iv_ca_delta_free (&act_delta);
5496 if (infinite_cost_p (best_cost))
5498 for (i = 0; i < use->n_map_members; i++)
5500 cp = use->cost_map + i;
5501 cand = cp->cand;
5502 if (!cand)
5503 continue;
5505 /* Already tried this. */
5506 if (cand->important)
5508 if (originalp && cand->pos == IP_ORIGINAL)
5509 continue;
5510 if (!originalp && cand->iv->base_object == NULL_TREE)
5511 continue;
5514 if (iv_ca_cand_used_p (ivs, cand))
5515 continue;
5517 act_delta = NULL;
5518 iv_ca_set_cp (data, ivs, use, cp);
5519 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5520 iv_ca_set_no_cp (data, ivs, use);
5521 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5522 cp, act_delta);
5524 if (compare_costs (act_cost, best_cost) < 0)
5526 best_cost = act_cost;
5528 if (best_delta)
5529 iv_ca_delta_free (&best_delta);
5530 best_delta = act_delta;
5532 else
5533 iv_ca_delta_free (&act_delta);
5537 iv_ca_delta_commit (data, ivs, best_delta, true);
5538 iv_ca_delta_free (&best_delta);
5540 return !infinite_cost_p (best_cost);
5543 /* Finds an initial assignment of candidates to uses. */
5545 static struct iv_ca *
5546 get_initial_solution (struct ivopts_data *data, bool originalp)
5548 struct iv_ca *ivs = iv_ca_new (data);
5549 unsigned i;
5551 for (i = 0; i < n_iv_uses (data); i++)
5552 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5554 iv_ca_free (&ivs);
5555 return NULL;
5558 return ivs;
5561 /* Tries to improve set of induction variables IVS. */
5563 static bool
5564 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5566 unsigned i, n_ivs;
5567 comp_cost acost, best_cost = iv_ca_cost (ivs);
5568 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5569 struct iv_cand *cand;
5571 /* Try extending the set of induction variables by one. */
5572 for (i = 0; i < n_iv_cands (data); i++)
5574 cand = iv_cand (data, i);
5576 if (iv_ca_cand_used_p (ivs, cand))
5577 continue;
5579 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5580 if (!act_delta)
5581 continue;
5583 /* If we successfully added the candidate and the set is small enough,
5584 try optimizing it by removing other candidates. */
5585 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5587 iv_ca_delta_commit (data, ivs, act_delta, true);
5588 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5589 iv_ca_delta_commit (data, ivs, act_delta, false);
5590 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5593 if (compare_costs (acost, best_cost) < 0)
5595 best_cost = acost;
5596 iv_ca_delta_free (&best_delta);
5597 best_delta = act_delta;
5599 else
5600 iv_ca_delta_free (&act_delta);
5603 if (!best_delta)
5605 /* Try removing the candidates from the set instead. */
5606 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5608 /* Nothing more we can do. */
5609 if (!best_delta)
5610 return false;
5613 iv_ca_delta_commit (data, ivs, best_delta, true);
5614 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5615 iv_ca_delta_free (&best_delta);
5616 return true;
5619 /* Attempts to find the optimal set of induction variables. We do simple
5620 greedy heuristic -- we try to replace at most one candidate in the selected
5621 solution and remove the unused ivs while this improves the cost. */
5623 static struct iv_ca *
5624 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5626 struct iv_ca *set;
5628 /* Get the initial solution. */
5629 set = get_initial_solution (data, originalp);
5630 if (!set)
5632 if (dump_file && (dump_flags & TDF_DETAILS))
5633 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5634 return NULL;
5637 if (dump_file && (dump_flags & TDF_DETAILS))
5639 fprintf (dump_file, "Initial set of candidates:\n");
5640 iv_ca_dump (data, dump_file, set);
5643 while (try_improve_iv_set (data, set))
5645 if (dump_file && (dump_flags & TDF_DETAILS))
5647 fprintf (dump_file, "Improved to:\n");
5648 iv_ca_dump (data, dump_file, set);
5652 return set;
5655 static struct iv_ca *
5656 find_optimal_iv_set (struct ivopts_data *data)
5658 unsigned i;
5659 struct iv_ca *set, *origset;
5660 struct iv_use *use;
5661 comp_cost cost, origcost;
5663 /* Determine the cost based on a strategy that starts with original IVs,
5664 and try again using a strategy that prefers candidates not based
5665 on any IVs. */
5666 origset = find_optimal_iv_set_1 (data, true);
5667 set = find_optimal_iv_set_1 (data, false);
5669 if (!origset && !set)
5670 return NULL;
5672 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5673 cost = set ? iv_ca_cost (set) : infinite_cost;
5675 if (dump_file && (dump_flags & TDF_DETAILS))
5677 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5678 origcost.cost, origcost.complexity);
5679 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5680 cost.cost, cost.complexity);
5683 /* Choose the one with the best cost. */
5684 if (compare_costs (origcost, cost) <= 0)
5686 if (set)
5687 iv_ca_free (&set);
5688 set = origset;
5690 else if (origset)
5691 iv_ca_free (&origset);
5693 for (i = 0; i < n_iv_uses (data); i++)
5695 use = iv_use (data, i);
5696 use->selected = iv_ca_cand_for_use (set, use)->cand;
5699 return set;
5702 /* Creates a new induction variable corresponding to CAND. */
5704 static void
5705 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5707 gimple_stmt_iterator incr_pos;
5708 tree base;
5709 bool after = false;
5711 if (!cand->iv)
5712 return;
5714 switch (cand->pos)
5716 case IP_NORMAL:
5717 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5718 break;
5720 case IP_END:
5721 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5722 after = true;
5723 break;
5725 case IP_AFTER_USE:
5726 after = true;
5727 /* fall through */
5728 case IP_BEFORE_USE:
5729 incr_pos = gsi_for_stmt (cand->incremented_at);
5730 break;
5732 case IP_ORIGINAL:
5733 /* Mark that the iv is preserved. */
5734 name_info (data, cand->var_before)->preserve_biv = true;
5735 name_info (data, cand->var_after)->preserve_biv = true;
5737 /* Rewrite the increment so that it uses var_before directly. */
5738 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5739 return;
5742 gimple_add_tmp_var (cand->var_before);
5743 add_referenced_var (cand->var_before);
5745 base = unshare_expr (cand->iv->base);
5747 create_iv (base, unshare_expr (cand->iv->step),
5748 cand->var_before, data->current_loop,
5749 &incr_pos, after, &cand->var_before, &cand->var_after);
5752 /* Creates new induction variables described in SET. */
5754 static void
5755 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5757 unsigned i;
5758 struct iv_cand *cand;
5759 bitmap_iterator bi;
5761 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5763 cand = iv_cand (data, i);
5764 create_new_iv (data, cand);
5767 if (dump_file && (dump_flags & TDF_DETAILS))
5769 fprintf (dump_file, "\nSelected IV set: \n");
5770 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5772 cand = iv_cand (data, i);
5773 dump_cand (dump_file, cand);
5775 fprintf (dump_file, "\n");
5779 /* Rewrites USE (definition of iv used in a nonlinear expression)
5780 using candidate CAND. */
5782 static void
5783 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5784 struct iv_use *use, struct iv_cand *cand)
5786 tree comp;
5787 tree op, tgt;
5788 gimple ass;
5789 gimple_stmt_iterator bsi;
5791 /* An important special case -- if we are asked to express value of
5792 the original iv by itself, just exit; there is no need to
5793 introduce a new computation (that might also need casting the
5794 variable to unsigned and back). */
5795 if (cand->pos == IP_ORIGINAL
5796 && cand->incremented_at == use->stmt)
5798 tree step, ctype, utype;
5799 enum tree_code incr_code = PLUS_EXPR, old_code;
5801 gcc_assert (is_gimple_assign (use->stmt));
5802 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5804 step = cand->iv->step;
5805 ctype = TREE_TYPE (step);
5806 utype = TREE_TYPE (cand->var_after);
5807 if (TREE_CODE (step) == NEGATE_EXPR)
5809 incr_code = MINUS_EXPR;
5810 step = TREE_OPERAND (step, 0);
5813 /* Check whether we may leave the computation unchanged.
5814 This is the case only if it does not rely on other
5815 computations in the loop -- otherwise, the computation
5816 we rely upon may be removed in remove_unused_ivs,
5817 thus leading to ICE. */
5818 old_code = gimple_assign_rhs_code (use->stmt);
5819 if (old_code == PLUS_EXPR
5820 || old_code == MINUS_EXPR
5821 || old_code == POINTER_PLUS_EXPR)
5823 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5824 op = gimple_assign_rhs2 (use->stmt);
5825 else if (old_code != MINUS_EXPR
5826 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5827 op = gimple_assign_rhs1 (use->stmt);
5828 else
5829 op = NULL_TREE;
5831 else
5832 op = NULL_TREE;
5834 if (op
5835 && (TREE_CODE (op) == INTEGER_CST
5836 || operand_equal_p (op, step, 0)))
5837 return;
5839 /* Otherwise, add the necessary computations to express
5840 the iv. */
5841 op = fold_convert (ctype, cand->var_before);
5842 comp = fold_convert (utype,
5843 build2 (incr_code, ctype, op,
5844 unshare_expr (step)));
5846 else
5848 comp = get_computation (data->current_loop, use, cand);
5849 gcc_assert (comp != NULL_TREE);
5852 switch (gimple_code (use->stmt))
5854 case GIMPLE_PHI:
5855 tgt = PHI_RESULT (use->stmt);
5857 /* If we should keep the biv, do not replace it. */
5858 if (name_info (data, tgt)->preserve_biv)
5859 return;
5861 bsi = gsi_after_labels (gimple_bb (use->stmt));
5862 break;
5864 case GIMPLE_ASSIGN:
5865 tgt = gimple_assign_lhs (use->stmt);
5866 bsi = gsi_for_stmt (use->stmt);
5867 break;
5869 default:
5870 gcc_unreachable ();
5873 if (!valid_gimple_rhs_p (comp)
5874 || (gimple_code (use->stmt) != GIMPLE_PHI
5875 /* We can't allow re-allocating the stmt as it might be pointed
5876 to still. */
5877 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5878 >= gimple_num_ops (gsi_stmt (bsi)))))
5880 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
5881 true, GSI_SAME_STMT);
5882 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
5884 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
5885 /* As this isn't a plain copy we have to reset alignment
5886 information. */
5887 if (SSA_NAME_PTR_INFO (comp))
5889 SSA_NAME_PTR_INFO (comp)->align = 1;
5890 SSA_NAME_PTR_INFO (comp)->misalign = 0;
5895 if (gimple_code (use->stmt) == GIMPLE_PHI)
5897 ass = gimple_build_assign (tgt, comp);
5898 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5900 bsi = gsi_for_stmt (use->stmt);
5901 remove_phi_node (&bsi, false);
5903 else
5905 gimple_assign_set_rhs_from_tree (&bsi, comp);
5906 use->stmt = gsi_stmt (bsi);
5910 /* Copies the reference information from OLD_REF to NEW_REF. */
5912 static void
5913 copy_ref_info (tree new_ref, tree old_ref)
5915 tree new_ptr_base = NULL_TREE;
5917 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5918 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5920 new_ptr_base = TREE_OPERAND (new_ref, 0);
5922 /* We can transfer points-to information from an old pointer
5923 or decl base to the new one. */
5924 if (new_ptr_base
5925 && TREE_CODE (new_ptr_base) == SSA_NAME
5926 && !SSA_NAME_PTR_INFO (new_ptr_base))
5928 tree base = get_base_address (old_ref);
5929 if (!base)
5931 else if ((TREE_CODE (base) == MEM_REF
5932 || TREE_CODE (base) == TARGET_MEM_REF)
5933 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
5934 && SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)))
5936 struct ptr_info_def *new_pi;
5937 duplicate_ssa_name_ptr_info
5938 (new_ptr_base, SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5939 new_pi = SSA_NAME_PTR_INFO (new_ptr_base);
5940 /* We have to be careful about transfering alignment information. */
5941 if (TREE_CODE (old_ref) == MEM_REF
5942 && !(TREE_CODE (new_ref) == TARGET_MEM_REF
5943 && (TMR_INDEX2 (new_ref)
5944 || (TMR_STEP (new_ref)
5945 && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
5946 < new_pi->align)))))
5948 new_pi->misalign += double_int_sub (mem_ref_offset (old_ref),
5949 mem_ref_offset (new_ref)).low;
5950 new_pi->misalign &= (new_pi->align - 1);
5952 else
5954 new_pi->align = 1;
5955 new_pi->misalign = 0;
5958 else if (TREE_CODE (base) == VAR_DECL
5959 || TREE_CODE (base) == PARM_DECL
5960 || TREE_CODE (base) == RESULT_DECL)
5962 struct ptr_info_def *pi = get_ptr_info (new_ptr_base);
5963 pt_solution_set_var (&pi->pt, base);
5968 /* Performs a peephole optimization to reorder the iv update statement with
5969 a mem ref to enable instruction combining in later phases. The mem ref uses
5970 the iv value before the update, so the reordering transformation requires
5971 adjustment of the offset. CAND is the selected IV_CAND.
5973 Example:
5975 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
5976 iv2 = iv1 + 1;
5978 if (t < val) (1)
5979 goto L;
5980 goto Head;
5983 directly propagating t over to (1) will introduce overlapping live range
5984 thus increase register pressure. This peephole transform it into:
5987 iv2 = iv1 + 1;
5988 t = MEM_REF (base, iv2, 8, 8);
5989 if (t < val)
5990 goto L;
5991 goto Head;
5994 static void
5995 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
5997 tree var_after;
5998 gimple iv_update, stmt;
5999 basic_block bb;
6000 gimple_stmt_iterator gsi, gsi_iv;
6002 if (cand->pos != IP_NORMAL)
6003 return;
6005 var_after = cand->var_after;
6006 iv_update = SSA_NAME_DEF_STMT (var_after);
6008 bb = gimple_bb (iv_update);
6009 gsi = gsi_last_nondebug_bb (bb);
6010 stmt = gsi_stmt (gsi);
6012 /* Only handle conditional statement for now. */
6013 if (gimple_code (stmt) != GIMPLE_COND)
6014 return;
6016 gsi_prev_nondebug (&gsi);
6017 stmt = gsi_stmt (gsi);
6018 if (stmt != iv_update)
6019 return;
6021 gsi_prev_nondebug (&gsi);
6022 if (gsi_end_p (gsi))
6023 return;
6025 stmt = gsi_stmt (gsi);
6026 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6027 return;
6029 if (stmt != use->stmt)
6030 return;
6032 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6033 return;
6035 if (dump_file && (dump_flags & TDF_DETAILS))
6037 fprintf (dump_file, "Reordering \n");
6038 print_gimple_stmt (dump_file, iv_update, 0, 0);
6039 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6040 fprintf (dump_file, "\n");
6043 gsi = gsi_for_stmt (use->stmt);
6044 gsi_iv = gsi_for_stmt (iv_update);
6045 gsi_move_before (&gsi_iv, &gsi);
6047 cand->pos = IP_BEFORE_USE;
6048 cand->incremented_at = use->stmt;
6051 /* Rewrites USE (address that is an iv) using candidate CAND. */
6053 static void
6054 rewrite_use_address (struct ivopts_data *data,
6055 struct iv_use *use, struct iv_cand *cand)
6057 aff_tree aff;
6058 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6059 tree base_hint = NULL_TREE;
6060 tree ref, iv;
6061 bool ok;
6063 adjust_iv_update_pos (cand, use);
6064 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6065 gcc_assert (ok);
6066 unshare_aff_combination (&aff);
6068 /* To avoid undefined overflow problems, all IV candidates use unsigned
6069 integer types. The drawback is that this makes it impossible for
6070 create_mem_ref to distinguish an IV that is based on a memory object
6071 from one that represents simply an offset.
6073 To work around this problem, we pass a hint to create_mem_ref that
6074 indicates which variable (if any) in aff is an IV based on a memory
6075 object. Note that we only consider the candidate. If this is not
6076 based on an object, the base of the reference is in some subexpression
6077 of the use -- but these will use pointer types, so they are recognized
6078 by the create_mem_ref heuristics anyway. */
6079 if (cand->iv->base_object)
6080 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6082 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6083 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6084 reference_alias_ptr_type (*use->op_p),
6085 iv, base_hint, data->speed);
6086 copy_ref_info (ref, *use->op_p);
6087 *use->op_p = ref;
6090 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6091 candidate CAND. */
6093 static void
6094 rewrite_use_compare (struct ivopts_data *data,
6095 struct iv_use *use, struct iv_cand *cand)
6097 tree comp, *var_p, op, bound;
6098 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6099 enum tree_code compare;
6100 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6101 bool ok;
6103 bound = cp->value;
6104 if (bound)
6106 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6107 tree var_type = TREE_TYPE (var);
6108 gimple_seq stmts;
6110 if (dump_file && (dump_flags & TDF_DETAILS))
6112 fprintf (dump_file, "Replacing exit test: ");
6113 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6115 compare = iv_elimination_compare (data, use);
6116 bound = unshare_expr (fold_convert (var_type, bound));
6117 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6118 if (stmts)
6119 gsi_insert_seq_on_edge_immediate (
6120 loop_preheader_edge (data->current_loop),
6121 stmts);
6123 gimple_cond_set_lhs (use->stmt, var);
6124 gimple_cond_set_code (use->stmt, compare);
6125 gimple_cond_set_rhs (use->stmt, op);
6126 return;
6129 /* The induction variable elimination failed; just express the original
6130 giv. */
6131 comp = get_computation (data->current_loop, use, cand);
6132 gcc_assert (comp != NULL_TREE);
6134 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6135 gcc_assert (ok);
6137 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6138 true, GSI_SAME_STMT);
6141 /* Rewrites USE using candidate CAND. */
6143 static void
6144 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6146 switch (use->type)
6148 case USE_NONLINEAR_EXPR:
6149 rewrite_use_nonlinear_expr (data, use, cand);
6150 break;
6152 case USE_ADDRESS:
6153 rewrite_use_address (data, use, cand);
6154 break;
6156 case USE_COMPARE:
6157 rewrite_use_compare (data, use, cand);
6158 break;
6160 default:
6161 gcc_unreachable ();
6164 update_stmt (use->stmt);
6167 /* Rewrite the uses using the selected induction variables. */
6169 static void
6170 rewrite_uses (struct ivopts_data *data)
6172 unsigned i;
6173 struct iv_cand *cand;
6174 struct iv_use *use;
6176 for (i = 0; i < n_iv_uses (data); i++)
6178 use = iv_use (data, i);
6179 cand = use->selected;
6180 gcc_assert (cand);
6182 rewrite_use (data, use, cand);
6186 /* Removes the ivs that are not used after rewriting. */
6188 static void
6189 remove_unused_ivs (struct ivopts_data *data)
6191 unsigned j;
6192 bitmap_iterator bi;
6193 bitmap toremove = BITMAP_ALLOC (NULL);
6195 /* Figure out an order in which to release SSA DEFs so that we don't
6196 release something that we'd have to propagate into a debug stmt
6197 afterwards. */
6198 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6200 struct version_info *info;
6202 info = ver_info (data, j);
6203 if (info->iv
6204 && !integer_zerop (info->iv->step)
6205 && !info->inv_id
6206 && !info->iv->have_use_for
6207 && !info->preserve_biv)
6208 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6211 release_defs_bitset (toremove);
6213 BITMAP_FREE (toremove);
6216 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6217 for pointer_map_traverse. */
6219 static bool
6220 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6221 void *data ATTRIBUTE_UNUSED)
6223 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6225 free (niter);
6226 return true;
6229 /* Frees data allocated by the optimization of a single loop. */
6231 static void
6232 free_loop_data (struct ivopts_data *data)
6234 unsigned i, j;
6235 bitmap_iterator bi;
6236 tree obj;
6238 if (data->niters)
6240 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6241 pointer_map_destroy (data->niters);
6242 data->niters = NULL;
6245 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6247 struct version_info *info;
6249 info = ver_info (data, i);
6250 if (info->iv)
6251 free (info->iv);
6252 info->iv = NULL;
6253 info->has_nonlin_use = false;
6254 info->preserve_biv = false;
6255 info->inv_id = 0;
6257 bitmap_clear (data->relevant);
6258 bitmap_clear (data->important_candidates);
6260 for (i = 0; i < n_iv_uses (data); i++)
6262 struct iv_use *use = iv_use (data, i);
6264 free (use->iv);
6265 BITMAP_FREE (use->related_cands);
6266 for (j = 0; j < use->n_map_members; j++)
6267 if (use->cost_map[j].depends_on)
6268 BITMAP_FREE (use->cost_map[j].depends_on);
6269 free (use->cost_map);
6270 free (use);
6272 VEC_truncate (iv_use_p, data->iv_uses, 0);
6274 for (i = 0; i < n_iv_cands (data); i++)
6276 struct iv_cand *cand = iv_cand (data, i);
6278 if (cand->iv)
6279 free (cand->iv);
6280 if (cand->depends_on)
6281 BITMAP_FREE (cand->depends_on);
6282 free (cand);
6284 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
6286 if (data->version_info_size < num_ssa_names)
6288 data->version_info_size = 2 * num_ssa_names;
6289 free (data->version_info);
6290 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6293 data->max_inv_id = 0;
6295 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
6296 SET_DECL_RTL (obj, NULL_RTX);
6298 VEC_truncate (tree, decl_rtl_to_reset, 0);
6300 htab_empty (data->inv_expr_tab);
6301 data->inv_expr_id = 0;
6304 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6305 loop tree. */
6307 static void
6308 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6310 free_loop_data (data);
6311 free (data->version_info);
6312 BITMAP_FREE (data->relevant);
6313 BITMAP_FREE (data->important_candidates);
6315 VEC_free (tree, heap, decl_rtl_to_reset);
6316 VEC_free (iv_use_p, heap, data->iv_uses);
6317 VEC_free (iv_cand_p, heap, data->iv_candidates);
6318 htab_delete (data->inv_expr_tab);
6321 /* Returns true if the loop body BODY includes any function calls. */
6323 static bool
6324 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6326 gimple_stmt_iterator gsi;
6327 unsigned i;
6329 for (i = 0; i < num_nodes; i++)
6330 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6332 gimple stmt = gsi_stmt (gsi);
6333 if (is_gimple_call (stmt)
6334 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6335 return true;
6337 return false;
6340 /* Optimizes the LOOP. Returns true if anything changed. */
6342 static bool
6343 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6345 bool changed = false;
6346 struct iv_ca *iv_ca;
6347 edge exit;
6348 basic_block *body;
6350 gcc_assert (!data->niters);
6351 data->current_loop = loop;
6352 data->speed = optimize_loop_for_speed_p (loop);
6354 if (dump_file && (dump_flags & TDF_DETAILS))
6356 fprintf (dump_file, "Processing loop %d\n", loop->num);
6358 exit = single_dom_exit (loop);
6359 if (exit)
6361 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6362 exit->src->index, exit->dest->index);
6363 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6364 fprintf (dump_file, "\n");
6367 fprintf (dump_file, "\n");
6370 body = get_loop_body (loop);
6371 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6372 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6373 free (body);
6375 /* For each ssa name determines whether it behaves as an induction variable
6376 in some loop. */
6377 if (!find_induction_variables (data))
6378 goto finish;
6380 /* Finds interesting uses (item 1). */
6381 find_interesting_uses (data);
6382 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6383 goto finish;
6385 /* Finds candidates for the induction variables (item 2). */
6386 find_iv_candidates (data);
6388 /* Calculates the costs (item 3, part 1). */
6389 determine_iv_costs (data);
6390 determine_use_iv_costs (data);
6391 determine_set_costs (data);
6393 /* Find the optimal set of induction variables (item 3, part 2). */
6394 iv_ca = find_optimal_iv_set (data);
6395 if (!iv_ca)
6396 goto finish;
6397 changed = true;
6399 /* Create the new induction variables (item 4, part 1). */
6400 create_new_ivs (data, iv_ca);
6401 iv_ca_free (&iv_ca);
6403 /* Rewrite the uses (item 4, part 2). */
6404 rewrite_uses (data);
6406 /* Remove the ivs that are unused after rewriting. */
6407 remove_unused_ivs (data);
6409 /* We have changed the structure of induction variables; it might happen
6410 that definitions in the scev database refer to some of them that were
6411 eliminated. */
6412 scev_reset ();
6414 finish:
6415 free_loop_data (data);
6417 return changed;
6420 /* Main entry point. Optimizes induction variables in loops. */
6422 void
6423 tree_ssa_iv_optimize (void)
6425 struct loop *loop;
6426 struct ivopts_data data;
6427 loop_iterator li;
6429 tree_ssa_iv_optimize_init (&data);
6431 /* Optimize the loops starting with the innermost ones. */
6432 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6434 if (dump_file && (dump_flags & TDF_DETAILS))
6435 flow_loop_dump (loop, dump_file, NULL, 1);
6437 tree_ssa_iv_optimize_loop (&data, loop);
6440 tree_ssa_iv_optimize_finalize (&data);