PR tree-optimization/28364
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob0e8fa94b9d62340cf470505f5d49393fb7c356a0
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Types of uses. */
125 enum use_type
127 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
128 USE_ADDRESS, /* Use in an address. */
129 USE_COMPARE /* Use is a compare. */
132 /* The candidate - cost pair. */
133 struct cost_pair
135 struct iv_cand *cand; /* The candidate. */
136 unsigned cost; /* The cost. */
137 bitmap depends_on; /* The list of invariants that have to be
138 preserved. */
139 tree value; /* For final value elimination, the expression for
140 the final value of the iv. For iv elimination,
141 the new bound to compare with. */
144 /* Use. */
145 struct iv_use
147 unsigned id; /* The id of the use. */
148 enum use_type type; /* Type of the use. */
149 struct iv *iv; /* The induction variable it is based on. */
150 tree stmt; /* Statement in that it occurs. */
151 tree *op_p; /* The place where it occurs. */
152 bitmap related_cands; /* The set of "related" iv candidates, plus the common
153 important ones. */
155 unsigned n_map_members; /* Number of candidates in the cost_map list. */
156 struct cost_pair *cost_map;
157 /* The costs wrto the iv candidates. */
159 struct iv_cand *selected;
160 /* The selected candidate. */
163 /* The position where the iv is computed. */
164 enum iv_position
166 IP_NORMAL, /* At the end, just before the exit condition. */
167 IP_END, /* At the end of the latch block. */
168 IP_ORIGINAL /* The original biv. */
171 /* The induction variable candidate. */
172 struct iv_cand
174 unsigned id; /* The number of the candidate. */
175 bool important; /* Whether this is an "important" candidate, i.e. such
176 that it should be considered by all uses. */
177 enum iv_position pos; /* Where it is computed. */
178 tree incremented_at; /* For original biv, the statement where it is
179 incremented. */
180 tree var_before; /* The variable used for it before increment. */
181 tree var_after; /* The variable used for it after increment. */
182 struct iv *iv; /* The value of the candidate. NULL for
183 "pseudocandidate" used to indicate the possibility
184 to replace the final value of an iv by direct
185 computation of the value. */
186 unsigned cost; /* Cost of the candidate. */
187 bitmap depends_on; /* The list of invariants that are used in step of the
188 biv. */
191 /* The data used by the induction variable optimizations. */
193 typedef struct iv_use *iv_use_p;
194 DEF_VEC_P(iv_use_p);
195 DEF_VEC_ALLOC_P(iv_use_p,heap);
197 typedef struct iv_cand *iv_cand_p;
198 DEF_VEC_P(iv_cand_p);
199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
201 struct ivopts_data
203 /* The currently optimized loop. */
204 struct loop *current_loop;
206 /* Number of registers used in it. */
207 unsigned regs_used;
209 /* Numbers of iterations for all exits of the current loop. */
210 htab_t niters;
212 /* The size of version_info array allocated. */
213 unsigned version_info_size;
215 /* The array of information for the ssa names. */
216 struct version_info *version_info;
218 /* The bitmap of indices in version_info whose value was changed. */
219 bitmap relevant;
221 /* The maximum invariant id. */
222 unsigned max_inv_id;
224 /* The uses of induction variables. */
225 VEC(iv_use_p,heap) *iv_uses;
227 /* The candidates. */
228 VEC(iv_cand_p,heap) *iv_candidates;
230 /* A bitmap of important candidates. */
231 bitmap important_candidates;
233 /* Whether to consider just related and important candidates when replacing a
234 use. */
235 bool consider_all_candidates;
238 /* An assignment of iv candidates to uses. */
240 struct iv_ca
242 /* The number of uses covered by the assignment. */
243 unsigned upto;
245 /* Number of uses that cannot be expressed by the candidates in the set. */
246 unsigned bad_uses;
248 /* Candidate assigned to a use, together with the related costs. */
249 struct cost_pair **cand_for_use;
251 /* Number of times each candidate is used. */
252 unsigned *n_cand_uses;
254 /* The candidates used. */
255 bitmap cands;
257 /* The number of candidates in the set. */
258 unsigned n_cands;
260 /* Total number of registers needed. */
261 unsigned n_regs;
263 /* Total cost of expressing uses. */
264 unsigned cand_use_cost;
266 /* Total cost of candidates. */
267 unsigned cand_cost;
269 /* Number of times each invariant is used. */
270 unsigned *n_invariant_uses;
272 /* Total cost of the assignment. */
273 unsigned cost;
276 /* Difference of two iv candidate assignments. */
278 struct iv_ca_delta
280 /* Changed use. */
281 struct iv_use *use;
283 /* An old assignment (for rollback purposes). */
284 struct cost_pair *old_cp;
286 /* A new assignment. */
287 struct cost_pair *new_cp;
289 /* Next change in the list. */
290 struct iv_ca_delta *next_change;
293 /* Bound on number of candidates below that all candidates are considered. */
295 #define CONSIDER_ALL_CANDIDATES_BOUND \
296 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
299 optimizing such a loop would help, and it would take ages). */
301 #define MAX_CONSIDERED_USES \
302 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
304 /* If there are at most this number of ivs in the set, try removing unnecessary
305 ivs from the set always. */
307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
308 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
310 /* The list of trees for that the decl_rtl field must be reset is stored
311 here. */
313 static VEC(tree,heap) *decl_rtl_to_reset;
315 /* Number of uses recorded in DATA. */
317 static inline unsigned
318 n_iv_uses (struct ivopts_data *data)
320 return VEC_length (iv_use_p, data->iv_uses);
323 /* Ith use recorded in DATA. */
325 static inline struct iv_use *
326 iv_use (struct ivopts_data *data, unsigned i)
328 return VEC_index (iv_use_p, data->iv_uses, i);
331 /* Number of candidates recorded in DATA. */
333 static inline unsigned
334 n_iv_cands (struct ivopts_data *data)
336 return VEC_length (iv_cand_p, data->iv_candidates);
339 /* Ith candidate recorded in DATA. */
341 static inline struct iv_cand *
342 iv_cand (struct ivopts_data *data, unsigned i)
344 return VEC_index (iv_cand_p, data->iv_candidates, i);
347 /* The single loop exit if it dominates the latch, NULL otherwise. */
349 edge
350 single_dom_exit (struct loop *loop)
352 edge exit = loop->single_exit;
354 if (!exit)
355 return NULL;
357 if (!just_once_each_iteration_p (loop, exit->src))
358 return NULL;
360 return exit;
363 /* Dumps information about the induction variable IV to FILE. */
365 extern void dump_iv (FILE *, struct iv *);
366 void
367 dump_iv (FILE *file, struct iv *iv)
369 if (iv->ssa_name)
371 fprintf (file, "ssa name ");
372 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
373 fprintf (file, "\n");
376 fprintf (file, " type ");
377 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
378 fprintf (file, "\n");
380 if (iv->step)
382 fprintf (file, " base ");
383 print_generic_expr (file, iv->base, TDF_SLIM);
384 fprintf (file, "\n");
386 fprintf (file, " step ");
387 print_generic_expr (file, iv->step, TDF_SLIM);
388 fprintf (file, "\n");
390 else
392 fprintf (file, " invariant ");
393 print_generic_expr (file, iv->base, TDF_SLIM);
394 fprintf (file, "\n");
397 if (iv->base_object)
399 fprintf (file, " base object ");
400 print_generic_expr (file, iv->base_object, TDF_SLIM);
401 fprintf (file, "\n");
404 if (iv->biv_p)
405 fprintf (file, " is a biv\n");
408 /* Dumps information about the USE to FILE. */
410 extern void dump_use (FILE *, struct iv_use *);
411 void
412 dump_use (FILE *file, struct iv_use *use)
414 fprintf (file, "use %d\n", use->id);
416 switch (use->type)
418 case USE_NONLINEAR_EXPR:
419 fprintf (file, " generic\n");
420 break;
422 case USE_ADDRESS:
423 fprintf (file, " address\n");
424 break;
426 case USE_COMPARE:
427 fprintf (file, " compare\n");
428 break;
430 default:
431 gcc_unreachable ();
434 fprintf (file, " in statement ");
435 print_generic_expr (file, use->stmt, TDF_SLIM);
436 fprintf (file, "\n");
438 fprintf (file, " at position ");
439 if (use->op_p)
440 print_generic_expr (file, *use->op_p, TDF_SLIM);
441 fprintf (file, "\n");
443 dump_iv (file, use->iv);
445 if (use->related_cands)
447 fprintf (file, " related candidates ");
448 dump_bitmap (file, use->related_cands);
452 /* Dumps information about the uses to FILE. */
454 extern void dump_uses (FILE *, struct ivopts_data *);
455 void
456 dump_uses (FILE *file, struct ivopts_data *data)
458 unsigned i;
459 struct iv_use *use;
461 for (i = 0; i < n_iv_uses (data); i++)
463 use = iv_use (data, i);
465 dump_use (file, use);
466 fprintf (file, "\n");
470 /* Dumps information about induction variable candidate CAND to FILE. */
472 extern void dump_cand (FILE *, struct iv_cand *);
473 void
474 dump_cand (FILE *file, struct iv_cand *cand)
476 struct iv *iv = cand->iv;
478 fprintf (file, "candidate %d%s\n",
479 cand->id, cand->important ? " (important)" : "");
481 if (cand->depends_on)
483 fprintf (file, " depends on ");
484 dump_bitmap (file, cand->depends_on);
487 if (!iv)
489 fprintf (file, " final value replacement\n");
490 return;
493 switch (cand->pos)
495 case IP_NORMAL:
496 fprintf (file, " incremented before exit test\n");
497 break;
499 case IP_END:
500 fprintf (file, " incremented at end\n");
501 break;
503 case IP_ORIGINAL:
504 fprintf (file, " original biv\n");
505 break;
508 dump_iv (file, iv);
511 /* Returns the info for ssa version VER. */
513 static inline struct version_info *
514 ver_info (struct ivopts_data *data, unsigned ver)
516 return data->version_info + ver;
519 /* Returns the info for ssa name NAME. */
521 static inline struct version_info *
522 name_info (struct ivopts_data *data, tree name)
524 return ver_info (data, SSA_NAME_VERSION (name));
527 /* Checks whether there exists number X such that X * B = A, counting modulo
528 2^BITS. */
530 static bool
531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
532 HOST_WIDE_INT *x)
534 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
535 unsigned HOST_WIDE_INT inv, ex, val;
536 unsigned i;
538 a &= mask;
539 b &= mask;
541 /* First divide the whole equation by 2 as long as possible. */
542 while (!(a & 1) && !(b & 1))
544 a >>= 1;
545 b >>= 1;
546 bits--;
547 mask >>= 1;
550 if (!(b & 1))
552 /* If b is still even, a is odd and there is no such x. */
553 return false;
556 /* Find the inverse of b. We compute it as
557 b^(2^(bits - 1) - 1) (mod 2^bits). */
558 inv = 1;
559 ex = b;
560 for (i = 0; i < bits - 1; i++)
562 inv = (inv * ex) & mask;
563 ex = (ex * ex) & mask;
566 val = (a * inv) & mask;
568 gcc_assert (((val * b) & mask) == a);
570 if ((val >> (bits - 1)) & 1)
571 val |= ~mask;
573 *x = val;
575 return true;
578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
579 emitted in LOOP. */
581 static bool
582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
584 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
586 gcc_assert (bb);
588 if (sbb == loop->latch)
589 return true;
591 if (sbb != bb)
592 return false;
594 return stmt == last_stmt (bb);
597 /* Returns true if STMT if after the place where the original induction
598 variable CAND is incremented. */
600 static bool
601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
603 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
604 basic_block stmt_bb = bb_for_stmt (stmt);
605 block_stmt_iterator bsi;
607 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
608 return false;
610 if (stmt_bb != cand_bb)
611 return true;
613 /* Scan the block from the end, since the original ivs are usually
614 incremented at the end of the loop body. */
615 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
617 if (bsi_stmt (bsi) == cand->incremented_at)
618 return false;
619 if (bsi_stmt (bsi) == stmt)
620 return true;
624 /* Returns true if STMT if after the place where the induction variable
625 CAND is incremented in LOOP. */
627 static bool
628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
630 switch (cand->pos)
632 case IP_END:
633 return false;
635 case IP_NORMAL:
636 return stmt_after_ip_normal_pos (loop, stmt);
638 case IP_ORIGINAL:
639 return stmt_after_ip_original_pos (cand, stmt);
641 default:
642 gcc_unreachable ();
646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
648 static bool
649 abnormal_ssa_name_p (tree exp)
651 if (!exp)
652 return false;
654 if (TREE_CODE (exp) != SSA_NAME)
655 return false;
657 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
661 abnormal phi node. Callback for for_each_index. */
663 static bool
664 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
665 void *data ATTRIBUTE_UNUSED)
667 if (TREE_CODE (base) == ARRAY_REF)
669 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
670 return false;
671 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
672 return false;
675 return !abnormal_ssa_name_p (*index);
678 /* Returns true if EXPR contains a ssa name that occurs in an
679 abnormal phi node. */
681 bool
682 contains_abnormal_ssa_name_p (tree expr)
684 enum tree_code code;
685 enum tree_code_class class;
687 if (!expr)
688 return false;
690 code = TREE_CODE (expr);
691 class = TREE_CODE_CLASS (code);
693 if (code == SSA_NAME)
694 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
696 if (code == INTEGER_CST
697 || is_gimple_min_invariant (expr))
698 return false;
700 if (code == ADDR_EXPR)
701 return !for_each_index (&TREE_OPERAND (expr, 0),
702 idx_contains_abnormal_ssa_name_p,
703 NULL);
705 switch (class)
707 case tcc_binary:
708 case tcc_comparison:
709 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
710 return true;
712 /* Fallthru. */
713 case tcc_unary:
714 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
715 return true;
717 break;
719 default:
720 gcc_unreachable ();
723 return false;
726 /* Element of the table in that we cache the numbers of iterations obtained
727 from exits of the loop. */
729 struct nfe_cache_elt
731 /* The edge for that the number of iterations is cached. */
732 edge exit;
734 /* Number of iterations corresponding to this exit, or NULL if it cannot be
735 determined. */
736 tree niter;
739 /* Hash function for nfe_cache_elt E. */
741 static hashval_t
742 nfe_hash (const void *e)
744 const struct nfe_cache_elt *elt = e;
746 return htab_hash_pointer (elt->exit);
749 /* Equality function for nfe_cache_elt E1 and edge E2. */
751 static int
752 nfe_eq (const void *e1, const void *e2)
754 const struct nfe_cache_elt *elt1 = e1;
756 return elt1->exit == e2;
759 /* Returns tree describing number of iterations determined from
760 EXIT of DATA->current_loop, or NULL if something goes wrong. */
762 static tree
763 niter_for_exit (struct ivopts_data *data, edge exit)
765 struct nfe_cache_elt *nfe_desc;
766 struct tree_niter_desc desc;
767 PTR *slot;
769 slot = htab_find_slot_with_hash (data->niters, exit,
770 htab_hash_pointer (exit),
771 INSERT);
773 if (!*slot)
775 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
776 nfe_desc->exit = exit;
778 /* Try to determine number of iterations. We must know it
779 unconditionally (i.e., without possibility of # of iterations
780 being zero). Also, we cannot safely work with ssa names that
781 appear in phi nodes on abnormal edges, so that we do not create
782 overlapping life ranges for them (PR 27283). */
783 if (number_of_iterations_exit (data->current_loop,
784 exit, &desc, true)
785 && zero_p (desc.may_be_zero)
786 && !contains_abnormal_ssa_name_p (desc.niter))
787 nfe_desc->niter = desc.niter;
788 else
789 nfe_desc->niter = NULL_TREE;
791 else
792 nfe_desc = *slot;
794 return nfe_desc->niter;
797 /* Returns tree describing number of iterations determined from
798 single dominating exit of DATA->current_loop, or NULL if something
799 goes wrong. */
801 static tree
802 niter_for_single_dom_exit (struct ivopts_data *data)
804 edge exit = single_dom_exit (data->current_loop);
806 if (!exit)
807 return NULL;
809 return niter_for_exit (data, exit);
812 /* Initializes data structures used by the iv optimization pass, stored
813 in DATA. */
815 static void
816 tree_ssa_iv_optimize_init (struct ivopts_data *data)
818 data->version_info_size = 2 * num_ssa_names;
819 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
820 data->relevant = BITMAP_ALLOC (NULL);
821 data->important_candidates = BITMAP_ALLOC (NULL);
822 data->max_inv_id = 0;
823 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
824 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
825 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
826 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
829 /* Returns a memory object to that EXPR points. In case we are able to
830 determine that it does not point to any such object, NULL is returned. */
832 static tree
833 determine_base_object (tree expr)
835 enum tree_code code = TREE_CODE (expr);
836 tree base, obj, op0, op1;
838 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
839 return NULL_TREE;
841 switch (code)
843 case INTEGER_CST:
844 return NULL_TREE;
846 case ADDR_EXPR:
847 obj = TREE_OPERAND (expr, 0);
848 base = get_base_address (obj);
850 if (!base)
851 return expr;
853 if (TREE_CODE (base) == INDIRECT_REF)
854 return determine_base_object (TREE_OPERAND (base, 0));
856 return fold_convert (ptr_type_node,
857 build_fold_addr_expr (base));
859 case PLUS_EXPR:
860 case MINUS_EXPR:
861 op0 = determine_base_object (TREE_OPERAND (expr, 0));
862 op1 = determine_base_object (TREE_OPERAND (expr, 1));
864 if (!op1)
865 return op0;
867 if (!op0)
868 return (code == PLUS_EXPR
869 ? op1
870 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
872 return fold_build2 (code, ptr_type_node, op0, op1);
874 case NOP_EXPR:
875 case CONVERT_EXPR:
876 return determine_base_object (TREE_OPERAND (expr, 0));
878 default:
879 return fold_convert (ptr_type_node, expr);
883 /* Allocates an induction variable with given initial value BASE and step STEP
884 for loop LOOP. */
886 static struct iv *
887 alloc_iv (tree base, tree step)
889 struct iv *iv = XCNEW (struct iv);
891 if (step && integer_zerop (step))
892 step = NULL_TREE;
894 iv->base = base;
895 iv->base_object = determine_base_object (base);
896 iv->step = step;
897 iv->biv_p = false;
898 iv->have_use_for = false;
899 iv->use_id = 0;
900 iv->ssa_name = NULL_TREE;
902 return iv;
905 /* Sets STEP and BASE for induction variable IV. */
907 static void
908 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
910 struct version_info *info = name_info (data, iv);
912 gcc_assert (!info->iv);
914 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
915 info->iv = alloc_iv (base, step);
916 info->iv->ssa_name = iv;
919 /* Finds induction variable declaration for VAR. */
921 static struct iv *
922 get_iv (struct ivopts_data *data, tree var)
924 basic_block bb;
926 if (!name_info (data, var)->iv)
928 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
930 if (!bb
931 || !flow_bb_inside_loop_p (data->current_loop, bb))
932 set_iv (data, var, var, NULL_TREE);
935 return name_info (data, var)->iv;
938 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
939 not define a simple affine biv with nonzero step. */
941 static tree
942 determine_biv_step (tree phi)
944 struct loop *loop = bb_for_stmt (phi)->loop_father;
945 tree name = PHI_RESULT (phi);
946 affine_iv iv;
948 if (!is_gimple_reg (name))
949 return NULL_TREE;
951 if (!simple_iv (loop, phi, name, &iv, true))
952 return NULL_TREE;
954 return (zero_p (iv.step) ? NULL_TREE : iv.step);
957 /* Finds basic ivs. */
959 static bool
960 find_bivs (struct ivopts_data *data)
962 tree phi, step, type, base;
963 bool found = false;
964 struct loop *loop = data->current_loop;
966 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
968 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
969 continue;
971 step = determine_biv_step (phi);
972 if (!step)
973 continue;
975 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
976 base = expand_simple_operations (base);
977 if (contains_abnormal_ssa_name_p (base)
978 || contains_abnormal_ssa_name_p (step))
979 continue;
981 type = TREE_TYPE (PHI_RESULT (phi));
982 base = fold_convert (type, base);
983 if (step)
984 step = fold_convert (type, step);
986 set_iv (data, PHI_RESULT (phi), base, step);
987 found = true;
990 return found;
993 /* Marks basic ivs. */
995 static void
996 mark_bivs (struct ivopts_data *data)
998 tree phi, var;
999 struct iv *iv, *incr_iv;
1000 struct loop *loop = data->current_loop;
1001 basic_block incr_bb;
1003 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1005 iv = get_iv (data, PHI_RESULT (phi));
1006 if (!iv)
1007 continue;
1009 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010 incr_iv = get_iv (data, var);
1011 if (!incr_iv)
1012 continue;
1014 /* If the increment is in the subloop, ignore it. */
1015 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016 if (incr_bb->loop_father != data->current_loop
1017 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1018 continue;
1020 iv->biv_p = true;
1021 incr_iv->biv_p = true;
1025 /* Checks whether STMT defines a linear induction variable and stores its
1026 parameters to IV. */
1028 static bool
1029 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1031 tree lhs;
1032 struct loop *loop = data->current_loop;
1034 iv->base = NULL_TREE;
1035 iv->step = NULL_TREE;
1037 if (TREE_CODE (stmt) != MODIFY_EXPR)
1038 return false;
1040 lhs = TREE_OPERAND (stmt, 0);
1041 if (TREE_CODE (lhs) != SSA_NAME)
1042 return false;
1044 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1045 return false;
1046 iv->base = expand_simple_operations (iv->base);
1048 if (contains_abnormal_ssa_name_p (iv->base)
1049 || contains_abnormal_ssa_name_p (iv->step))
1050 return false;
1052 return true;
1055 /* Finds general ivs in statement STMT. */
1057 static void
1058 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1060 affine_iv iv;
1062 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1063 return;
1065 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1068 /* Finds general ivs in basic block BB. */
1070 static void
1071 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1073 block_stmt_iterator bsi;
1075 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1076 find_givs_in_stmt (data, bsi_stmt (bsi));
1079 /* Finds general ivs. */
1081 static void
1082 find_givs (struct ivopts_data *data)
1084 struct loop *loop = data->current_loop;
1085 basic_block *body = get_loop_body_in_dom_order (loop);
1086 unsigned i;
1088 for (i = 0; i < loop->num_nodes; i++)
1089 find_givs_in_bb (data, body[i]);
1090 free (body);
1093 /* For each ssa name defined in LOOP determines whether it is an induction
1094 variable and if so, its initial value and step. */
1096 static bool
1097 find_induction_variables (struct ivopts_data *data)
1099 unsigned i;
1100 bitmap_iterator bi;
1102 if (!find_bivs (data))
1103 return false;
1105 find_givs (data);
1106 mark_bivs (data);
1108 if (dump_file && (dump_flags & TDF_DETAILS))
1110 tree niter = niter_for_single_dom_exit (data);
1112 if (niter)
1114 fprintf (dump_file, " number of iterations ");
1115 print_generic_expr (dump_file, niter, TDF_SLIM);
1116 fprintf (dump_file, "\n\n");
1119 fprintf (dump_file, "Induction variables:\n\n");
1121 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1123 if (ver_info (data, i)->iv)
1124 dump_iv (dump_file, ver_info (data, i)->iv);
1128 return true;
1131 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1133 static struct iv_use *
1134 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1135 tree stmt, enum use_type use_type)
1137 struct iv_use *use = XCNEW (struct iv_use);
1139 use->id = n_iv_uses (data);
1140 use->type = use_type;
1141 use->iv = iv;
1142 use->stmt = stmt;
1143 use->op_p = use_p;
1144 use->related_cands = BITMAP_ALLOC (NULL);
1146 /* To avoid showing ssa name in the dumps, if it was not reset by the
1147 caller. */
1148 iv->ssa_name = NULL_TREE;
1150 if (dump_file && (dump_flags & TDF_DETAILS))
1151 dump_use (dump_file, use);
1153 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1155 return use;
1158 /* Checks whether OP is a loop-level invariant and if so, records it.
1159 NONLINEAR_USE is true if the invariant is used in a way we do not
1160 handle specially. */
1162 static void
1163 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1165 basic_block bb;
1166 struct version_info *info;
1168 if (TREE_CODE (op) != SSA_NAME
1169 || !is_gimple_reg (op))
1170 return;
1172 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1173 if (bb
1174 && flow_bb_inside_loop_p (data->current_loop, bb))
1175 return;
1177 info = name_info (data, op);
1178 info->name = op;
1179 info->has_nonlin_use |= nonlinear_use;
1180 if (!info->inv_id)
1181 info->inv_id = ++data->max_inv_id;
1182 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1185 /* Checks whether the use OP is interesting and if so, records it. */
1187 static struct iv_use *
1188 find_interesting_uses_op (struct ivopts_data *data, tree op)
1190 struct iv *iv;
1191 struct iv *civ;
1192 tree stmt;
1193 struct iv_use *use;
1195 if (TREE_CODE (op) != SSA_NAME)
1196 return NULL;
1198 iv = get_iv (data, op);
1199 if (!iv)
1200 return NULL;
1202 if (iv->have_use_for)
1204 use = iv_use (data, iv->use_id);
1206 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1207 return use;
1210 if (zero_p (iv->step))
1212 record_invariant (data, op, true);
1213 return NULL;
1215 iv->have_use_for = true;
1217 civ = XNEW (struct iv);
1218 *civ = *iv;
1220 stmt = SSA_NAME_DEF_STMT (op);
1221 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1222 || TREE_CODE (stmt) == MODIFY_EXPR);
1224 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1225 iv->use_id = use->id;
1227 return use;
1230 /* Checks whether the condition *COND_P in STMT is interesting
1231 and if so, records it. */
1233 static void
1234 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1236 tree *op0_p;
1237 tree *op1_p;
1238 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1239 struct iv const_iv;
1240 tree zero = integer_zero_node;
1242 const_iv.step = NULL_TREE;
1244 if (TREE_CODE (*cond_p) != SSA_NAME
1245 && !COMPARISON_CLASS_P (*cond_p))
1246 return;
1248 if (TREE_CODE (*cond_p) == SSA_NAME)
1250 op0_p = cond_p;
1251 op1_p = &zero;
1253 else
1255 op0_p = &TREE_OPERAND (*cond_p, 0);
1256 op1_p = &TREE_OPERAND (*cond_p, 1);
1259 if (TREE_CODE (*op0_p) == SSA_NAME)
1260 iv0 = get_iv (data, *op0_p);
1261 else
1262 iv0 = &const_iv;
1264 if (TREE_CODE (*op1_p) == SSA_NAME)
1265 iv1 = get_iv (data, *op1_p);
1266 else
1267 iv1 = &const_iv;
1269 if (/* When comparing with non-invariant value, we may not do any senseful
1270 induction variable elimination. */
1271 (!iv0 || !iv1)
1272 /* Eliminating condition based on two ivs would be nontrivial.
1273 ??? TODO -- it is not really important to handle this case. */
1274 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1276 find_interesting_uses_op (data, *op0_p);
1277 find_interesting_uses_op (data, *op1_p);
1278 return;
1281 if (zero_p (iv0->step) && zero_p (iv1->step))
1283 /* If both are invariants, this is a work for unswitching. */
1284 return;
1287 civ = XNEW (struct iv);
1288 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1289 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1292 /* Returns true if expression EXPR is obviously invariant in LOOP,
1293 i.e. if all its operands are defined outside of the LOOP. */
1295 bool
1296 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1298 basic_block def_bb;
1299 unsigned i, len;
1301 if (is_gimple_min_invariant (expr))
1302 return true;
1304 if (TREE_CODE (expr) == SSA_NAME)
1306 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1307 if (def_bb
1308 && flow_bb_inside_loop_p (loop, def_bb))
1309 return false;
1311 return true;
1314 if (!EXPR_P (expr))
1315 return false;
1317 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1318 for (i = 0; i < len; i++)
1319 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1320 return false;
1322 return true;
1325 /* Cumulates the steps of indices into DATA and replaces their values with the
1326 initial ones. Returns false when the value of the index cannot be determined.
1327 Callback for for_each_index. */
1329 struct ifs_ivopts_data
1331 struct ivopts_data *ivopts_data;
1332 tree stmt;
1333 tree *step_p;
1336 static bool
1337 idx_find_step (tree base, tree *idx, void *data)
1339 struct ifs_ivopts_data *dta = data;
1340 struct iv *iv;
1341 tree step, iv_base, iv_step, lbound, off;
1342 struct loop *loop = dta->ivopts_data->current_loop;
1344 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1345 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1346 return false;
1348 /* If base is a component ref, require that the offset of the reference
1349 be invariant. */
1350 if (TREE_CODE (base) == COMPONENT_REF)
1352 off = component_ref_field_offset (base);
1353 return expr_invariant_in_loop_p (loop, off);
1356 /* If base is array, first check whether we will be able to move the
1357 reference out of the loop (in order to take its address in strength
1358 reduction). In order for this to work we need both lower bound
1359 and step to be loop invariants. */
1360 if (TREE_CODE (base) == ARRAY_REF)
1362 step = array_ref_element_size (base);
1363 lbound = array_ref_low_bound (base);
1365 if (!expr_invariant_in_loop_p (loop, step)
1366 || !expr_invariant_in_loop_p (loop, lbound))
1367 return false;
1370 if (TREE_CODE (*idx) != SSA_NAME)
1371 return true;
1373 iv = get_iv (dta->ivopts_data, *idx);
1374 if (!iv)
1375 return false;
1377 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1378 *&x[0], which is not folded and does not trigger the
1379 ARRAY_REF path below. */
1380 *idx = iv->base;
1382 if (!iv->step)
1383 return true;
1385 if (TREE_CODE (base) == ARRAY_REF)
1387 step = array_ref_element_size (base);
1389 /* We only handle addresses whose step is an integer constant. */
1390 if (TREE_CODE (step) != INTEGER_CST)
1391 return false;
1393 else
1394 /* The step for pointer arithmetics already is 1 byte. */
1395 step = build_int_cst (sizetype, 1);
1397 iv_base = iv->base;
1398 iv_step = iv->step;
1399 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1400 sizetype, &iv_base, &iv_step, dta->stmt,
1401 false))
1403 /* The index might wrap. */
1404 return false;
1407 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1409 if (!*dta->step_p)
1410 *dta->step_p = step;
1411 else
1412 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1414 return true;
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1420 static bool
1421 idx_record_use (tree base, tree *idx,
1422 void *data)
1424 find_interesting_uses_op (data, *idx);
1425 if (TREE_CODE (base) == ARRAY_REF)
1427 find_interesting_uses_op (data, array_ref_element_size (base));
1428 find_interesting_uses_op (data, array_ref_low_bound (base));
1430 return true;
1433 /* Returns true if memory reference REF may be unaligned. */
1435 static bool
1436 may_be_unaligned_p (tree ref)
1438 tree base;
1439 tree base_type;
1440 HOST_WIDE_INT bitsize;
1441 HOST_WIDE_INT bitpos;
1442 tree toffset;
1443 enum machine_mode mode;
1444 int unsignedp, volatilep;
1445 unsigned base_align;
1447 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1448 thus they are not misaligned. */
1449 if (TREE_CODE (ref) == TARGET_MEM_REF)
1450 return false;
1452 /* The test below is basically copy of what expr.c:normal_inner_ref
1453 does to check whether the object must be loaded by parts when
1454 STRICT_ALIGNMENT is true. */
1455 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1456 &unsignedp, &volatilep, true);
1457 base_type = TREE_TYPE (base);
1458 base_align = TYPE_ALIGN (base_type);
1460 if (mode != BLKmode
1461 && (base_align < GET_MODE_ALIGNMENT (mode)
1462 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1463 || bitpos % BITS_PER_UNIT != 0))
1464 return true;
1466 return false;
1469 /* Return true if EXPR may be non-addressable. */
1471 static bool
1472 may_be_nonaddressable_p (tree expr)
1474 switch (TREE_CODE (expr))
1476 case COMPONENT_REF:
1477 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1478 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1480 case ARRAY_REF:
1481 case ARRAY_RANGE_REF:
1482 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1484 case VIEW_CONVERT_EXPR:
1485 /* This kind of view-conversions may wrap non-addressable objects
1486 and make them look addressable. After some processing the
1487 non-addressability may be uncovered again, causing ADDR_EXPRs
1488 of inappropriate objects to be built. */
1489 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1490 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1492 default:
1493 break;
1496 return false;
1499 /* Finds addresses in *OP_P inside STMT. */
1501 static void
1502 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1504 tree base = *op_p, step = NULL;
1505 struct iv *civ;
1506 struct ifs_ivopts_data ifs_ivopts_data;
1508 /* Do not play with volatile memory references. A bit too conservative,
1509 perhaps, but safe. */
1510 if (stmt_ann (stmt)->has_volatile_ops)
1511 goto fail;
1513 /* Ignore bitfields for now. Not really something terribly complicated
1514 to handle. TODO. */
1515 if (TREE_CODE (base) == BIT_FIELD_REF)
1516 goto fail;
1518 if (may_be_nonaddressable_p (base))
1519 goto fail;
1521 if (STRICT_ALIGNMENT
1522 && may_be_unaligned_p (base))
1523 goto fail;
1525 base = unshare_expr (base);
1527 if (TREE_CODE (base) == TARGET_MEM_REF)
1529 tree type = build_pointer_type (TREE_TYPE (base));
1530 tree astep;
1532 if (TMR_BASE (base)
1533 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1535 civ = get_iv (data, TMR_BASE (base));
1536 if (!civ)
1537 goto fail;
1539 TMR_BASE (base) = civ->base;
1540 step = civ->step;
1542 if (TMR_INDEX (base)
1543 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1545 civ = get_iv (data, TMR_INDEX (base));
1546 if (!civ)
1547 goto fail;
1549 TMR_INDEX (base) = civ->base;
1550 astep = civ->step;
1552 if (astep)
1554 if (TMR_STEP (base))
1555 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1557 if (step)
1558 step = fold_build2 (PLUS_EXPR, type, step, astep);
1559 else
1560 step = astep;
1564 if (zero_p (step))
1565 goto fail;
1566 base = tree_mem_ref_addr (type, base);
1568 else
1570 ifs_ivopts_data.ivopts_data = data;
1571 ifs_ivopts_data.stmt = stmt;
1572 ifs_ivopts_data.step_p = &step;
1573 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1574 || zero_p (step))
1575 goto fail;
1577 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1578 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1580 base = build_fold_addr_expr (base);
1582 /* Substituting bases of IVs into the base expression might
1583 have caused folding opportunities. */
1584 if (TREE_CODE (base) == ADDR_EXPR)
1586 tree *ref = &TREE_OPERAND (base, 0);
1587 while (handled_component_p (*ref))
1588 ref = &TREE_OPERAND (*ref, 0);
1589 if (TREE_CODE (*ref) == INDIRECT_REF)
1590 *ref = fold_indirect_ref (*ref);
1594 civ = alloc_iv (base, step);
1595 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1596 return;
1598 fail:
1599 for_each_index (op_p, idx_record_use, data);
1602 /* Finds and records invariants used in STMT. */
1604 static void
1605 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1607 ssa_op_iter iter;
1608 use_operand_p use_p;
1609 tree op;
1611 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1613 op = USE_FROM_PTR (use_p);
1614 record_invariant (data, op, false);
1618 /* Finds interesting uses of induction variables in the statement STMT. */
1620 static void
1621 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1623 struct iv *iv;
1624 tree op, lhs, rhs;
1625 ssa_op_iter iter;
1626 use_operand_p use_p;
1628 find_invariants_stmt (data, stmt);
1630 if (TREE_CODE (stmt) == COND_EXPR)
1632 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1633 return;
1636 if (TREE_CODE (stmt) == MODIFY_EXPR)
1638 lhs = TREE_OPERAND (stmt, 0);
1639 rhs = TREE_OPERAND (stmt, 1);
1641 if (TREE_CODE (lhs) == SSA_NAME)
1643 /* If the statement defines an induction variable, the uses are not
1644 interesting by themselves. */
1646 iv = get_iv (data, lhs);
1648 if (iv && !zero_p (iv->step))
1649 return;
1652 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1654 case tcc_comparison:
1655 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1656 return;
1658 case tcc_reference:
1659 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1660 if (REFERENCE_CLASS_P (lhs))
1661 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1662 return;
1664 default: ;
1667 if (REFERENCE_CLASS_P (lhs)
1668 && is_gimple_val (rhs))
1670 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1671 find_interesting_uses_op (data, rhs);
1672 return;
1675 /* TODO -- we should also handle address uses of type
1677 memory = call (whatever);
1681 call (memory). */
1684 if (TREE_CODE (stmt) == PHI_NODE
1685 && bb_for_stmt (stmt) == data->current_loop->header)
1687 lhs = PHI_RESULT (stmt);
1688 iv = get_iv (data, lhs);
1690 if (iv && !zero_p (iv->step))
1691 return;
1694 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1696 op = USE_FROM_PTR (use_p);
1698 if (TREE_CODE (op) != SSA_NAME)
1699 continue;
1701 iv = get_iv (data, op);
1702 if (!iv)
1703 continue;
1705 find_interesting_uses_op (data, op);
1709 /* Finds interesting uses of induction variables outside of loops
1710 on loop exit edge EXIT. */
1712 static void
1713 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1715 tree phi, def;
1717 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1719 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1720 find_interesting_uses_op (data, def);
1724 /* Finds uses of the induction variables that are interesting. */
1726 static void
1727 find_interesting_uses (struct ivopts_data *data)
1729 basic_block bb;
1730 block_stmt_iterator bsi;
1731 tree phi;
1732 basic_block *body = get_loop_body (data->current_loop);
1733 unsigned i;
1734 struct version_info *info;
1735 edge e;
1737 if (dump_file && (dump_flags & TDF_DETAILS))
1738 fprintf (dump_file, "Uses:\n\n");
1740 for (i = 0; i < data->current_loop->num_nodes; i++)
1742 edge_iterator ei;
1743 bb = body[i];
1745 FOR_EACH_EDGE (e, ei, bb->succs)
1746 if (e->dest != EXIT_BLOCK_PTR
1747 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1748 find_interesting_uses_outside (data, e);
1750 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1751 find_interesting_uses_stmt (data, phi);
1752 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1753 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1756 if (dump_file && (dump_flags & TDF_DETAILS))
1758 bitmap_iterator bi;
1760 fprintf (dump_file, "\n");
1762 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1764 info = ver_info (data, i);
1765 if (info->inv_id)
1767 fprintf (dump_file, " ");
1768 print_generic_expr (dump_file, info->name, TDF_SLIM);
1769 fprintf (dump_file, " is invariant (%d)%s\n",
1770 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1774 fprintf (dump_file, "\n");
1777 free (body);
1780 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1781 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1782 we are at the top-level of the processed address. */
1784 static tree
1785 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1786 unsigned HOST_WIDE_INT *offset)
1788 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1789 enum tree_code code;
1790 tree type, orig_type = TREE_TYPE (expr);
1791 unsigned HOST_WIDE_INT off0, off1, st;
1792 tree orig_expr = expr;
1794 STRIP_NOPS (expr);
1796 type = TREE_TYPE (expr);
1797 code = TREE_CODE (expr);
1798 *offset = 0;
1800 switch (code)
1802 case INTEGER_CST:
1803 if (!cst_and_fits_in_hwi (expr)
1804 || zero_p (expr))
1805 return orig_expr;
1807 *offset = int_cst_value (expr);
1808 return build_int_cst (orig_type, 0);
1810 case PLUS_EXPR:
1811 case MINUS_EXPR:
1812 op0 = TREE_OPERAND (expr, 0);
1813 op1 = TREE_OPERAND (expr, 1);
1815 op0 = strip_offset_1 (op0, false, false, &off0);
1816 op1 = strip_offset_1 (op1, false, false, &off1);
1818 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1819 if (op0 == TREE_OPERAND (expr, 0)
1820 && op1 == TREE_OPERAND (expr, 1))
1821 return orig_expr;
1823 if (zero_p (op1))
1824 expr = op0;
1825 else if (zero_p (op0))
1827 if (code == PLUS_EXPR)
1828 expr = op1;
1829 else
1830 expr = fold_build1 (NEGATE_EXPR, type, op1);
1832 else
1833 expr = fold_build2 (code, type, op0, op1);
1835 return fold_convert (orig_type, expr);
1837 case ARRAY_REF:
1838 if (!inside_addr)
1839 return orig_expr;
1841 step = array_ref_element_size (expr);
1842 if (!cst_and_fits_in_hwi (step))
1843 break;
1845 st = int_cst_value (step);
1846 op1 = TREE_OPERAND (expr, 1);
1847 op1 = strip_offset_1 (op1, false, false, &off1);
1848 *offset = off1 * st;
1850 if (top_compref
1851 && zero_p (op1))
1853 /* Strip the component reference completely. */
1854 op0 = TREE_OPERAND (expr, 0);
1855 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1856 *offset += off0;
1857 return op0;
1859 break;
1861 case COMPONENT_REF:
1862 if (!inside_addr)
1863 return orig_expr;
1865 tmp = component_ref_field_offset (expr);
1866 if (top_compref
1867 && cst_and_fits_in_hwi (tmp))
1869 /* Strip the component reference completely. */
1870 op0 = TREE_OPERAND (expr, 0);
1871 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1872 *offset = off0 + int_cst_value (tmp);
1873 return op0;
1875 break;
1877 case ADDR_EXPR:
1878 op0 = TREE_OPERAND (expr, 0);
1879 op0 = strip_offset_1 (op0, true, true, &off0);
1880 *offset += off0;
1882 if (op0 == TREE_OPERAND (expr, 0))
1883 return orig_expr;
1885 expr = build_fold_addr_expr (op0);
1886 return fold_convert (orig_type, expr);
1888 case INDIRECT_REF:
1889 inside_addr = false;
1890 break;
1892 default:
1893 return orig_expr;
1896 /* Default handling of expressions for that we want to recurse into
1897 the first operand. */
1898 op0 = TREE_OPERAND (expr, 0);
1899 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1900 *offset += off0;
1902 if (op0 == TREE_OPERAND (expr, 0)
1903 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1904 return orig_expr;
1906 expr = copy_node (expr);
1907 TREE_OPERAND (expr, 0) = op0;
1908 if (op1)
1909 TREE_OPERAND (expr, 1) = op1;
1911 /* Inside address, we might strip the top level component references,
1912 thus changing type of the expression. Handling of ADDR_EXPR
1913 will fix that. */
1914 expr = fold_convert (orig_type, expr);
1916 return expr;
1919 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1921 static tree
1922 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1924 return strip_offset_1 (expr, false, false, offset);
1927 /* Returns variant of TYPE that can be used as base for different uses.
1928 For integer types, we return unsigned variant of the type, which
1929 avoids problems with overflows. For pointer types, we return void *. */
1931 static tree
1932 generic_type_for (tree type)
1934 if (POINTER_TYPE_P (type))
1935 return ptr_type_node;
1937 if (TYPE_UNSIGNED (type))
1938 return type;
1940 return unsigned_type_for (type);
1943 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1944 the bitmap to that we should store it. */
1946 static struct ivopts_data *fd_ivopts_data;
1947 static tree
1948 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1950 bitmap *depends_on = data;
1951 struct version_info *info;
1953 if (TREE_CODE (*expr_p) != SSA_NAME)
1954 return NULL_TREE;
1955 info = name_info (fd_ivopts_data, *expr_p);
1957 if (!info->inv_id || info->has_nonlin_use)
1958 return NULL_TREE;
1960 if (!*depends_on)
1961 *depends_on = BITMAP_ALLOC (NULL);
1962 bitmap_set_bit (*depends_on, info->inv_id);
1964 return NULL_TREE;
1967 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1968 position to POS. If USE is not NULL, the candidate is set as related to
1969 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1970 replacement of the final value of the iv by a direct computation. */
1972 static struct iv_cand *
1973 add_candidate_1 (struct ivopts_data *data,
1974 tree base, tree step, bool important, enum iv_position pos,
1975 struct iv_use *use, tree incremented_at)
1977 unsigned i;
1978 struct iv_cand *cand = NULL;
1979 tree type, orig_type;
1981 if (base)
1983 orig_type = TREE_TYPE (base);
1984 type = generic_type_for (orig_type);
1985 if (type != orig_type)
1987 base = fold_convert (type, base);
1988 if (step)
1989 step = fold_convert (type, step);
1993 for (i = 0; i < n_iv_cands (data); i++)
1995 cand = iv_cand (data, i);
1997 if (cand->pos != pos)
1998 continue;
2000 if (cand->incremented_at != incremented_at)
2001 continue;
2003 if (!cand->iv)
2005 if (!base && !step)
2006 break;
2008 continue;
2011 if (!base && !step)
2012 continue;
2014 if (!operand_equal_p (base, cand->iv->base, 0))
2015 continue;
2017 if (zero_p (cand->iv->step))
2019 if (zero_p (step))
2020 break;
2022 else
2024 if (step && operand_equal_p (step, cand->iv->step, 0))
2025 break;
2029 if (i == n_iv_cands (data))
2031 cand = XCNEW (struct iv_cand);
2032 cand->id = i;
2034 if (!base && !step)
2035 cand->iv = NULL;
2036 else
2037 cand->iv = alloc_iv (base, step);
2039 cand->pos = pos;
2040 if (pos != IP_ORIGINAL && cand->iv)
2042 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2043 cand->var_after = cand->var_before;
2045 cand->important = important;
2046 cand->incremented_at = incremented_at;
2047 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2049 if (step
2050 && TREE_CODE (step) != INTEGER_CST)
2052 fd_ivopts_data = data;
2053 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2056 if (dump_file && (dump_flags & TDF_DETAILS))
2057 dump_cand (dump_file, cand);
2060 if (important && !cand->important)
2062 cand->important = true;
2063 if (dump_file && (dump_flags & TDF_DETAILS))
2064 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2067 if (use)
2069 bitmap_set_bit (use->related_cands, i);
2070 if (dump_file && (dump_flags & TDF_DETAILS))
2071 fprintf (dump_file, "Candidate %d is related to use %d\n",
2072 cand->id, use->id);
2075 return cand;
2078 /* Returns true if incrementing the induction variable at the end of the LOOP
2079 is allowed.
2081 The purpose is to avoid splitting latch edge with a biv increment, thus
2082 creating a jump, possibly confusing other optimization passes and leaving
2083 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2084 is not available (so we do not have a better alternative), or if the latch
2085 edge is already nonempty. */
2087 static bool
2088 allow_ip_end_pos_p (struct loop *loop)
2090 if (!ip_normal_pos (loop))
2091 return true;
2093 if (!empty_block_p (ip_end_pos (loop)))
2094 return true;
2096 return false;
2099 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2100 position to POS. If USE is not NULL, the candidate is set as related to
2101 it. The candidate computation is scheduled on all available positions. */
2103 static void
2104 add_candidate (struct ivopts_data *data,
2105 tree base, tree step, bool important, struct iv_use *use)
2107 if (ip_normal_pos (data->current_loop))
2108 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2109 if (ip_end_pos (data->current_loop)
2110 && allow_ip_end_pos_p (data->current_loop))
2111 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2114 /* Add a standard "0 + 1 * iteration" iv candidate for a
2115 type with SIZE bits. */
2117 static void
2118 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2119 unsigned int size)
2121 tree type = lang_hooks.types.type_for_size (size, true);
2122 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2123 true, NULL);
2126 /* Adds standard iv candidates. */
2128 static void
2129 add_standard_iv_candidates (struct ivopts_data *data)
2131 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2133 /* The same for a double-integer type if it is still fast enough. */
2134 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2135 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2139 /* Adds candidates bases on the old induction variable IV. */
2141 static void
2142 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2144 tree phi, def;
2145 struct iv_cand *cand;
2147 add_candidate (data, iv->base, iv->step, true, NULL);
2149 /* The same, but with initial value zero. */
2150 add_candidate (data,
2151 build_int_cst (TREE_TYPE (iv->base), 0),
2152 iv->step, true, NULL);
2154 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2155 if (TREE_CODE (phi) == PHI_NODE)
2157 /* Additionally record the possibility of leaving the original iv
2158 untouched. */
2159 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2160 cand = add_candidate_1 (data,
2161 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2162 SSA_NAME_DEF_STMT (def));
2163 cand->var_before = iv->ssa_name;
2164 cand->var_after = def;
2168 /* Adds candidates based on the old induction variables. */
2170 static void
2171 add_old_ivs_candidates (struct ivopts_data *data)
2173 unsigned i;
2174 struct iv *iv;
2175 bitmap_iterator bi;
2177 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2179 iv = ver_info (data, i)->iv;
2180 if (iv && iv->biv_p && !zero_p (iv->step))
2181 add_old_iv_candidates (data, iv);
2185 /* Adds candidates based on the value of the induction variable IV and USE. */
2187 static void
2188 add_iv_value_candidates (struct ivopts_data *data,
2189 struct iv *iv, struct iv_use *use)
2191 unsigned HOST_WIDE_INT offset;
2192 tree base;
2194 add_candidate (data, iv->base, iv->step, false, use);
2196 /* The same, but with initial value zero. Make such variable important,
2197 since it is generic enough so that possibly many uses may be based
2198 on it. */
2199 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2200 iv->step, true, use);
2202 /* Third, try removing the constant offset. */
2203 base = strip_offset (iv->base, &offset);
2204 if (offset)
2205 add_candidate (data, base, iv->step, false, use);
2208 /* Adds candidates based on the uses. */
2210 static void
2211 add_derived_ivs_candidates (struct ivopts_data *data)
2213 unsigned i;
2215 for (i = 0; i < n_iv_uses (data); i++)
2217 struct iv_use *use = iv_use (data, i);
2219 if (!use)
2220 continue;
2222 switch (use->type)
2224 case USE_NONLINEAR_EXPR:
2225 case USE_COMPARE:
2226 case USE_ADDRESS:
2227 /* Just add the ivs based on the value of the iv used here. */
2228 add_iv_value_candidates (data, use->iv, use);
2229 break;
2231 default:
2232 gcc_unreachable ();
2237 /* Record important candidates and add them to related_cands bitmaps
2238 if needed. */
2240 static void
2241 record_important_candidates (struct ivopts_data *data)
2243 unsigned i;
2244 struct iv_use *use;
2246 for (i = 0; i < n_iv_cands (data); i++)
2248 struct iv_cand *cand = iv_cand (data, i);
2250 if (cand->important)
2251 bitmap_set_bit (data->important_candidates, i);
2254 data->consider_all_candidates = (n_iv_cands (data)
2255 <= CONSIDER_ALL_CANDIDATES_BOUND);
2257 if (data->consider_all_candidates)
2259 /* We will not need "related_cands" bitmaps in this case,
2260 so release them to decrease peak memory consumption. */
2261 for (i = 0; i < n_iv_uses (data); i++)
2263 use = iv_use (data, i);
2264 BITMAP_FREE (use->related_cands);
2267 else
2269 /* Add important candidates to the related_cands bitmaps. */
2270 for (i = 0; i < n_iv_uses (data); i++)
2271 bitmap_ior_into (iv_use (data, i)->related_cands,
2272 data->important_candidates);
2276 /* Finds the candidates for the induction variables. */
2278 static void
2279 find_iv_candidates (struct ivopts_data *data)
2281 /* Add commonly used ivs. */
2282 add_standard_iv_candidates (data);
2284 /* Add old induction variables. */
2285 add_old_ivs_candidates (data);
2287 /* Add induction variables derived from uses. */
2288 add_derived_ivs_candidates (data);
2290 /* Record the important candidates. */
2291 record_important_candidates (data);
2294 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2295 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2296 we allocate a simple list to every use. */
2298 static void
2299 alloc_use_cost_map (struct ivopts_data *data)
2301 unsigned i, size, s, j;
2303 for (i = 0; i < n_iv_uses (data); i++)
2305 struct iv_use *use = iv_use (data, i);
2306 bitmap_iterator bi;
2308 if (data->consider_all_candidates)
2309 size = n_iv_cands (data);
2310 else
2312 s = 0;
2313 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2315 s++;
2318 /* Round up to the power of two, so that moduling by it is fast. */
2319 for (size = 1; size < s; size <<= 1)
2320 continue;
2323 use->n_map_members = size;
2324 use->cost_map = XCNEWVEC (struct cost_pair, size);
2328 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2329 on invariants DEPENDS_ON and that the value used in expressing it
2330 is VALUE.*/
2332 static void
2333 set_use_iv_cost (struct ivopts_data *data,
2334 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2335 bitmap depends_on, tree value)
2337 unsigned i, s;
2339 if (cost == INFTY)
2341 BITMAP_FREE (depends_on);
2342 return;
2345 if (data->consider_all_candidates)
2347 use->cost_map[cand->id].cand = cand;
2348 use->cost_map[cand->id].cost = cost;
2349 use->cost_map[cand->id].depends_on = depends_on;
2350 use->cost_map[cand->id].value = value;
2351 return;
2354 /* n_map_members is a power of two, so this computes modulo. */
2355 s = cand->id & (use->n_map_members - 1);
2356 for (i = s; i < use->n_map_members; i++)
2357 if (!use->cost_map[i].cand)
2358 goto found;
2359 for (i = 0; i < s; i++)
2360 if (!use->cost_map[i].cand)
2361 goto found;
2363 gcc_unreachable ();
2365 found:
2366 use->cost_map[i].cand = cand;
2367 use->cost_map[i].cost = cost;
2368 use->cost_map[i].depends_on = depends_on;
2369 use->cost_map[i].value = value;
2372 /* Gets cost of (USE, CANDIDATE) pair. */
2374 static struct cost_pair *
2375 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2376 struct iv_cand *cand)
2378 unsigned i, s;
2379 struct cost_pair *ret;
2381 if (!cand)
2382 return NULL;
2384 if (data->consider_all_candidates)
2386 ret = use->cost_map + cand->id;
2387 if (!ret->cand)
2388 return NULL;
2390 return ret;
2393 /* n_map_members is a power of two, so this computes modulo. */
2394 s = cand->id & (use->n_map_members - 1);
2395 for (i = s; i < use->n_map_members; i++)
2396 if (use->cost_map[i].cand == cand)
2397 return use->cost_map + i;
2399 for (i = 0; i < s; i++)
2400 if (use->cost_map[i].cand == cand)
2401 return use->cost_map + i;
2403 return NULL;
2406 /* Returns estimate on cost of computing SEQ. */
2408 static unsigned
2409 seq_cost (rtx seq)
2411 unsigned cost = 0;
2412 rtx set;
2414 for (; seq; seq = NEXT_INSN (seq))
2416 set = single_set (seq);
2417 if (set)
2418 cost += rtx_cost (set, SET);
2419 else
2420 cost++;
2423 return cost;
2426 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2427 static rtx
2428 produce_memory_decl_rtl (tree obj, int *regno)
2430 rtx x;
2432 gcc_assert (obj);
2433 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2435 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2436 x = gen_rtx_SYMBOL_REF (Pmode, name);
2438 else
2439 x = gen_raw_REG (Pmode, (*regno)++);
2441 return gen_rtx_MEM (DECL_MODE (obj), x);
2444 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2445 walk_tree. DATA contains the actual fake register number. */
2447 static tree
2448 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2450 tree obj = NULL_TREE;
2451 rtx x = NULL_RTX;
2452 int *regno = data;
2454 switch (TREE_CODE (*expr_p))
2456 case ADDR_EXPR:
2457 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2458 handled_component_p (*expr_p);
2459 expr_p = &TREE_OPERAND (*expr_p, 0))
2460 continue;
2461 obj = *expr_p;
2462 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2463 x = produce_memory_decl_rtl (obj, regno);
2464 break;
2466 case SSA_NAME:
2467 *ws = 0;
2468 obj = SSA_NAME_VAR (*expr_p);
2469 if (!DECL_RTL_SET_P (obj))
2470 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2471 break;
2473 case VAR_DECL:
2474 case PARM_DECL:
2475 case RESULT_DECL:
2476 *ws = 0;
2477 obj = *expr_p;
2479 if (DECL_RTL_SET_P (obj))
2480 break;
2482 if (DECL_MODE (obj) == BLKmode)
2483 x = produce_memory_decl_rtl (obj, regno);
2484 else
2485 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2487 break;
2489 default:
2490 break;
2493 if (x)
2495 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2496 SET_DECL_RTL (obj, x);
2499 return NULL_TREE;
2502 /* Determines cost of the computation of EXPR. */
2504 static unsigned
2505 computation_cost (tree expr)
2507 rtx seq, rslt;
2508 tree type = TREE_TYPE (expr);
2509 unsigned cost;
2510 /* Avoid using hard regs in ways which may be unsupported. */
2511 int regno = LAST_VIRTUAL_REGISTER + 1;
2513 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2514 start_sequence ();
2515 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2516 seq = get_insns ();
2517 end_sequence ();
2519 cost = seq_cost (seq);
2520 if (MEM_P (rslt))
2521 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2523 return cost;
2526 /* Returns variable containing the value of candidate CAND at statement AT. */
2528 static tree
2529 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2531 if (stmt_after_increment (loop, cand, stmt))
2532 return cand->var_after;
2533 else
2534 return cand->var_before;
2537 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2538 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2541 tree_int_cst_sign_bit (tree t)
2543 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2544 unsigned HOST_WIDE_INT w;
2546 if (bitno < HOST_BITS_PER_WIDE_INT)
2547 w = TREE_INT_CST_LOW (t);
2548 else
2550 w = TREE_INT_CST_HIGH (t);
2551 bitno -= HOST_BITS_PER_WIDE_INT;
2554 return (w >> bitno) & 1;
2557 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2558 return cst. Otherwise return NULL_TREE. */
2560 static tree
2561 constant_multiple_of (tree type, tree top, tree bot)
2563 tree res, mby, p0, p1;
2564 enum tree_code code;
2565 bool negate;
2567 STRIP_NOPS (top);
2568 STRIP_NOPS (bot);
2570 if (operand_equal_p (top, bot, 0))
2571 return build_int_cst (type, 1);
2573 code = TREE_CODE (top);
2574 switch (code)
2576 case MULT_EXPR:
2577 mby = TREE_OPERAND (top, 1);
2578 if (TREE_CODE (mby) != INTEGER_CST)
2579 return NULL_TREE;
2581 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2582 if (!res)
2583 return NULL_TREE;
2585 return fold_binary_to_constant (MULT_EXPR, type, res,
2586 fold_convert (type, mby));
2588 case PLUS_EXPR:
2589 case MINUS_EXPR:
2590 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2591 if (!p0)
2592 return NULL_TREE;
2593 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2594 if (!p1)
2595 return NULL_TREE;
2597 return fold_binary_to_constant (code, type, p0, p1);
2599 case INTEGER_CST:
2600 if (TREE_CODE (bot) != INTEGER_CST)
2601 return NULL_TREE;
2603 bot = fold_convert (type, bot);
2604 top = fold_convert (type, top);
2606 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2607 the result afterwards. */
2608 if (tree_int_cst_sign_bit (bot))
2610 negate = true;
2611 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2613 else
2614 negate = false;
2616 /* Ditto for TOP. */
2617 if (tree_int_cst_sign_bit (top))
2619 negate = !negate;
2620 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2623 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2624 return NULL_TREE;
2626 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2627 if (negate)
2628 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2629 return res;
2631 default:
2632 return NULL_TREE;
2636 /* Sets COMB to CST. */
2638 static void
2639 aff_combination_const (struct affine_tree_combination *comb, tree type,
2640 unsigned HOST_WIDE_INT cst)
2642 unsigned prec = TYPE_PRECISION (type);
2644 comb->type = type;
2645 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2647 comb->n = 0;
2648 comb->rest = NULL_TREE;
2649 comb->offset = cst & comb->mask;
2652 /* Sets COMB to single element ELT. */
2654 static void
2655 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2657 unsigned prec = TYPE_PRECISION (type);
2659 comb->type = type;
2660 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2662 comb->n = 1;
2663 comb->elts[0] = elt;
2664 comb->coefs[0] = 1;
2665 comb->rest = NULL_TREE;
2666 comb->offset = 0;
2669 /* Scales COMB by SCALE. */
2671 static void
2672 aff_combination_scale (struct affine_tree_combination *comb,
2673 unsigned HOST_WIDE_INT scale)
2675 unsigned i, j;
2677 if (scale == 1)
2678 return;
2680 if (scale == 0)
2682 aff_combination_const (comb, comb->type, 0);
2683 return;
2686 comb->offset = (scale * comb->offset) & comb->mask;
2687 for (i = 0, j = 0; i < comb->n; i++)
2689 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2690 comb->elts[j] = comb->elts[i];
2691 if (comb->coefs[j] != 0)
2692 j++;
2694 comb->n = j;
2696 if (comb->rest)
2698 if (comb->n < MAX_AFF_ELTS)
2700 comb->coefs[comb->n] = scale;
2701 comb->elts[comb->n] = comb->rest;
2702 comb->rest = NULL_TREE;
2703 comb->n++;
2705 else
2706 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2707 build_int_cst_type (comb->type, scale));
2711 /* Adds ELT * SCALE to COMB. */
2713 static void
2714 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2715 unsigned HOST_WIDE_INT scale)
2717 unsigned i;
2719 if (scale == 0)
2720 return;
2722 for (i = 0; i < comb->n; i++)
2723 if (operand_equal_p (comb->elts[i], elt, 0))
2725 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2726 if (comb->coefs[i])
2727 return;
2729 comb->n--;
2730 comb->coefs[i] = comb->coefs[comb->n];
2731 comb->elts[i] = comb->elts[comb->n];
2733 if (comb->rest)
2735 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2736 comb->coefs[comb->n] = 1;
2737 comb->elts[comb->n] = comb->rest;
2738 comb->rest = NULL_TREE;
2739 comb->n++;
2741 return;
2743 if (comb->n < MAX_AFF_ELTS)
2745 comb->coefs[comb->n] = scale;
2746 comb->elts[comb->n] = elt;
2747 comb->n++;
2748 return;
2751 if (scale == 1)
2752 elt = fold_convert (comb->type, elt);
2753 else
2754 elt = fold_build2 (MULT_EXPR, comb->type,
2755 fold_convert (comb->type, elt),
2756 build_int_cst_type (comb->type, scale));
2758 if (comb->rest)
2759 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2760 else
2761 comb->rest = elt;
2764 /* Adds COMB2 to COMB1. */
2766 static void
2767 aff_combination_add (struct affine_tree_combination *comb1,
2768 struct affine_tree_combination *comb2)
2770 unsigned i;
2772 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2773 for (i = 0; i < comb2->n; i++)
2774 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2775 if (comb2->rest)
2776 aff_combination_add_elt (comb1, comb2->rest, 1);
2779 /* Splits EXPR into an affine combination of parts. */
2781 static void
2782 tree_to_aff_combination (tree expr, tree type,
2783 struct affine_tree_combination *comb)
2785 struct affine_tree_combination tmp;
2786 enum tree_code code;
2787 tree cst, core, toffset;
2788 HOST_WIDE_INT bitpos, bitsize;
2789 enum machine_mode mode;
2790 int unsignedp, volatilep;
2792 STRIP_NOPS (expr);
2794 code = TREE_CODE (expr);
2795 switch (code)
2797 case INTEGER_CST:
2798 aff_combination_const (comb, type, int_cst_value (expr));
2799 return;
2801 case PLUS_EXPR:
2802 case MINUS_EXPR:
2803 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2804 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2805 if (code == MINUS_EXPR)
2806 aff_combination_scale (&tmp, -1);
2807 aff_combination_add (comb, &tmp);
2808 return;
2810 case MULT_EXPR:
2811 cst = TREE_OPERAND (expr, 1);
2812 if (TREE_CODE (cst) != INTEGER_CST)
2813 break;
2814 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2815 aff_combination_scale (comb, int_cst_value (cst));
2816 return;
2818 case NEGATE_EXPR:
2819 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2820 aff_combination_scale (comb, -1);
2821 return;
2823 case ADDR_EXPR:
2824 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2825 &toffset, &mode, &unsignedp, &volatilep,
2826 false);
2827 if (bitpos % BITS_PER_UNIT != 0)
2828 break;
2829 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2830 core = build_fold_addr_expr (core);
2831 if (TREE_CODE (core) == ADDR_EXPR)
2832 aff_combination_add_elt (comb, core, 1);
2833 else
2835 tree_to_aff_combination (core, type, &tmp);
2836 aff_combination_add (comb, &tmp);
2838 if (toffset)
2840 tree_to_aff_combination (toffset, type, &tmp);
2841 aff_combination_add (comb, &tmp);
2843 return;
2845 default:
2846 break;
2849 aff_combination_elt (comb, type, expr);
2852 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2854 static tree
2855 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2856 unsigned HOST_WIDE_INT mask)
2858 enum tree_code code;
2860 scale &= mask;
2861 elt = fold_convert (type, elt);
2863 if (scale == 1)
2865 if (!expr)
2866 return elt;
2868 return fold_build2 (PLUS_EXPR, type, expr, elt);
2871 if (scale == mask)
2873 if (!expr)
2874 return fold_build1 (NEGATE_EXPR, type, elt);
2876 return fold_build2 (MINUS_EXPR, type, expr, elt);
2879 if (!expr)
2880 return fold_build2 (MULT_EXPR, type, elt,
2881 build_int_cst_type (type, scale));
2883 if ((scale | (mask >> 1)) == mask)
2885 /* Scale is negative. */
2886 code = MINUS_EXPR;
2887 scale = (-scale) & mask;
2889 else
2890 code = PLUS_EXPR;
2892 elt = fold_build2 (MULT_EXPR, type, elt,
2893 build_int_cst_type (type, scale));
2894 return fold_build2 (code, type, expr, elt);
2897 /* Copies the tree elements of COMB to ensure that they are not shared. */
2899 static void
2900 unshare_aff_combination (struct affine_tree_combination *comb)
2902 unsigned i;
2904 for (i = 0; i < comb->n; i++)
2905 comb->elts[i] = unshare_expr (comb->elts[i]);
2906 if (comb->rest)
2907 comb->rest = unshare_expr (comb->rest);
2910 /* Makes tree from the affine combination COMB. */
2912 static tree
2913 aff_combination_to_tree (struct affine_tree_combination *comb)
2915 tree type = comb->type;
2916 tree expr = comb->rest;
2917 unsigned i;
2918 unsigned HOST_WIDE_INT off, sgn;
2920 if (comb->n == 0 && comb->offset == 0)
2922 if (expr)
2924 /* Handle the special case produced by get_computation_aff when
2925 the type does not fit in HOST_WIDE_INT. */
2926 return fold_convert (type, expr);
2928 else
2929 return build_int_cst (type, 0);
2932 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2934 for (i = 0; i < comb->n; i++)
2935 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2936 comb->mask);
2938 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2940 /* Offset is negative. */
2941 off = (-comb->offset) & comb->mask;
2942 sgn = comb->mask;
2944 else
2946 off = comb->offset;
2947 sgn = 1;
2949 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2950 comb->mask);
2953 /* Folds EXPR using the affine expressions framework. */
2955 static tree
2956 fold_affine_expr (tree expr)
2958 tree type = TREE_TYPE (expr);
2959 struct affine_tree_combination comb;
2961 if (TYPE_PRECISION (type) > HOST_BITS_PER_WIDE_INT)
2962 return expr;
2964 tree_to_aff_combination (expr, type, &comb);
2965 return aff_combination_to_tree (&comb);
2968 /* Determines the expression by that USE is expressed from induction variable
2969 CAND at statement AT in LOOP. The expression is stored in a decomposed
2970 form into AFF. Returns false if USE cannot be expressed using CAND. */
2972 static bool
2973 get_computation_aff (struct loop *loop,
2974 struct iv_use *use, struct iv_cand *cand, tree at,
2975 struct affine_tree_combination *aff)
2977 tree ubase = use->iv->base;
2978 tree ustep = use->iv->step;
2979 tree cbase = cand->iv->base;
2980 tree cstep = cand->iv->step;
2981 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2982 tree uutype;
2983 tree expr, delta;
2984 tree ratio;
2985 unsigned HOST_WIDE_INT ustepi, cstepi;
2986 HOST_WIDE_INT ratioi;
2987 struct affine_tree_combination cbase_aff, expr_aff;
2988 tree cstep_orig = cstep, ustep_orig = ustep;
2990 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2992 /* We do not have a precision to express the values of use. */
2993 return false;
2996 expr = var_at_stmt (loop, cand, at);
2998 if (TREE_TYPE (expr) != ctype)
3000 /* This may happen with the original ivs. */
3001 expr = fold_convert (ctype, expr);
3004 if (TYPE_UNSIGNED (utype))
3005 uutype = utype;
3006 else
3008 uutype = unsigned_type_for (utype);
3009 ubase = fold_convert (uutype, ubase);
3010 ustep = fold_convert (uutype, ustep);
3013 if (uutype != ctype)
3015 expr = fold_convert (uutype, expr);
3016 cbase = fold_convert (uutype, cbase);
3017 cstep = fold_convert (uutype, cstep);
3019 /* If the conversion is not noop, we must take it into account when
3020 considering the value of the step. */
3021 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3022 cstep_orig = cstep;
3025 if (cst_and_fits_in_hwi (cstep_orig)
3026 && cst_and_fits_in_hwi (ustep_orig))
3028 ustepi = int_cst_value (ustep_orig);
3029 cstepi = int_cst_value (cstep_orig);
3031 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3033 /* TODO maybe consider case when ustep divides cstep and the ratio is
3034 a power of 2 (so that the division is fast to execute)? We would
3035 need to be much more careful with overflows etc. then. */
3036 return false;
3039 ratio = build_int_cst_type (uutype, ratioi);
3041 else
3043 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3044 if (!ratio)
3045 return false;
3047 /* Ratioi is only used to detect special cases when the multiplicative
3048 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3049 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3050 to integer_onep/integer_all_onesp, since the former ignores
3051 TREE_OVERFLOW. */
3052 if (cst_and_fits_in_hwi (ratio))
3053 ratioi = int_cst_value (ratio);
3054 else if (integer_onep (ratio))
3055 ratioi = 1;
3056 else if (integer_all_onesp (ratio))
3057 ratioi = -1;
3058 else
3059 ratioi = 0;
3062 /* We may need to shift the value if we are after the increment. */
3063 if (stmt_after_increment (loop, cand, at))
3064 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3066 /* use = ubase - ratio * cbase + ratio * var.
3068 In general case ubase + ratio * (var - cbase) could be better (one less
3069 multiplication), but often it is possible to eliminate redundant parts
3070 of computations from (ubase - ratio * cbase) term, and if it does not
3071 happen, fold is able to apply the distributive law to obtain this form
3072 anyway. */
3074 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3076 /* Let's compute in trees and just return the result in AFF. This case
3077 should not be very common, and fold itself is not that bad either,
3078 so making the aff. functions more complicated to handle this case
3079 is not that urgent. */
3080 if (ratioi == 1)
3082 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3083 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3085 else if (ratioi == -1)
3087 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3088 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3090 else
3092 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3093 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3094 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3095 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3098 aff->type = uutype;
3099 aff->n = 0;
3100 aff->offset = 0;
3101 aff->mask = 0;
3102 aff->rest = expr;
3103 return true;
3106 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3107 possible to compute ratioi. */
3108 gcc_assert (ratioi);
3110 tree_to_aff_combination (ubase, uutype, aff);
3111 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3112 tree_to_aff_combination (expr, uutype, &expr_aff);
3113 aff_combination_scale (&cbase_aff, -ratioi);
3114 aff_combination_scale (&expr_aff, ratioi);
3115 aff_combination_add (aff, &cbase_aff);
3116 aff_combination_add (aff, &expr_aff);
3118 return true;
3121 /* Determines the expression by that USE is expressed from induction variable
3122 CAND at statement AT in LOOP. The computation is unshared. */
3124 static tree
3125 get_computation_at (struct loop *loop,
3126 struct iv_use *use, struct iv_cand *cand, tree at)
3128 struct affine_tree_combination aff;
3129 tree type = TREE_TYPE (use->iv->base);
3131 if (!get_computation_aff (loop, use, cand, at, &aff))
3132 return NULL_TREE;
3133 unshare_aff_combination (&aff);
3134 return fold_convert (type, aff_combination_to_tree (&aff));
3137 /* Determines the expression by that USE is expressed from induction variable
3138 CAND in LOOP. The computation is unshared. */
3140 static tree
3141 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3143 return get_computation_at (loop, use, cand, use->stmt);
3146 /* Returns cost of addition in MODE. */
3148 static unsigned
3149 add_cost (enum machine_mode mode)
3151 static unsigned costs[NUM_MACHINE_MODES];
3152 rtx seq;
3153 unsigned cost;
3155 if (costs[mode])
3156 return costs[mode];
3158 start_sequence ();
3159 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3160 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3161 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3162 NULL_RTX);
3163 seq = get_insns ();
3164 end_sequence ();
3166 cost = seq_cost (seq);
3167 if (!cost)
3168 cost = 1;
3170 costs[mode] = cost;
3172 if (dump_file && (dump_flags & TDF_DETAILS))
3173 fprintf (dump_file, "Addition in %s costs %d\n",
3174 GET_MODE_NAME (mode), cost);
3175 return cost;
3178 /* Entry in a hashtable of already known costs for multiplication. */
3179 struct mbc_entry
3181 HOST_WIDE_INT cst; /* The constant to multiply by. */
3182 enum machine_mode mode; /* In mode. */
3183 unsigned cost; /* The cost. */
3186 /* Counts hash value for the ENTRY. */
3188 static hashval_t
3189 mbc_entry_hash (const void *entry)
3191 const struct mbc_entry *e = entry;
3193 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3196 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3198 static int
3199 mbc_entry_eq (const void *entry1, const void *entry2)
3201 const struct mbc_entry *e1 = entry1;
3202 const struct mbc_entry *e2 = entry2;
3204 return (e1->mode == e2->mode
3205 && e1->cst == e2->cst);
3208 /* Returns cost of multiplication by constant CST in MODE. */
3210 unsigned
3211 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3213 static htab_t costs;
3214 struct mbc_entry **cached, act;
3215 rtx seq;
3216 unsigned cost;
3218 if (!costs)
3219 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3221 act.mode = mode;
3222 act.cst = cst;
3223 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3224 if (*cached)
3225 return (*cached)->cost;
3227 *cached = XNEW (struct mbc_entry);
3228 (*cached)->mode = mode;
3229 (*cached)->cst = cst;
3231 start_sequence ();
3232 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3233 gen_int_mode (cst, mode), NULL_RTX, 0);
3234 seq = get_insns ();
3235 end_sequence ();
3237 cost = seq_cost (seq);
3239 if (dump_file && (dump_flags & TDF_DETAILS))
3240 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3241 (int) cst, GET_MODE_NAME (mode), cost);
3243 (*cached)->cost = cost;
3245 return cost;
3248 /* Returns true if multiplying by RATIO is allowed in address. */
3250 bool
3251 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3253 #define MAX_RATIO 128
3254 static sbitmap valid_mult;
3256 if (!valid_mult)
3258 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3259 rtx addr;
3260 HOST_WIDE_INT i;
3262 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3263 sbitmap_zero (valid_mult);
3264 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3265 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3267 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3268 if (memory_address_p (Pmode, addr))
3269 SET_BIT (valid_mult, i + MAX_RATIO);
3272 if (dump_file && (dump_flags & TDF_DETAILS))
3274 fprintf (dump_file, " allowed multipliers:");
3275 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3276 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3277 fprintf (dump_file, " %d", (int) i);
3278 fprintf (dump_file, "\n");
3279 fprintf (dump_file, "\n");
3283 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3284 return false;
3286 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3289 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3290 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3291 variable is omitted. The created memory accesses MODE.
3293 TODO -- there must be some better way. This all is quite crude. */
3295 static unsigned
3296 get_address_cost (bool symbol_present, bool var_present,
3297 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3299 static bool initialized = false;
3300 static HOST_WIDE_INT rat, off;
3301 static HOST_WIDE_INT min_offset, max_offset;
3302 static unsigned costs[2][2][2][2];
3303 unsigned cost, acost;
3304 rtx seq, addr, base;
3305 bool offset_p, ratio_p;
3306 rtx reg1;
3307 HOST_WIDE_INT s_offset;
3308 unsigned HOST_WIDE_INT mask;
3309 unsigned bits;
3311 if (!initialized)
3313 HOST_WIDE_INT i;
3314 initialized = true;
3316 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3318 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3319 for (i = 1; i <= 1 << 20; i <<= 1)
3321 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3322 if (!memory_address_p (Pmode, addr))
3323 break;
3325 max_offset = i >> 1;
3326 off = max_offset;
3328 for (i = 1; i <= 1 << 20; i <<= 1)
3330 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3331 if (!memory_address_p (Pmode, addr))
3332 break;
3334 min_offset = -(i >> 1);
3336 if (dump_file && (dump_flags & TDF_DETAILS))
3338 fprintf (dump_file, "get_address_cost:\n");
3339 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3340 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3343 rat = 1;
3344 for (i = 2; i <= MAX_RATIO; i++)
3345 if (multiplier_allowed_in_address_p (i))
3347 rat = i;
3348 break;
3352 bits = GET_MODE_BITSIZE (Pmode);
3353 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3354 offset &= mask;
3355 if ((offset >> (bits - 1) & 1))
3356 offset |= ~mask;
3357 s_offset = offset;
3359 cost = 0;
3360 offset_p = (s_offset != 0
3361 && min_offset <= s_offset && s_offset <= max_offset);
3362 ratio_p = (ratio != 1
3363 && multiplier_allowed_in_address_p (ratio));
3365 if (ratio != 1 && !ratio_p)
3366 cost += multiply_by_cost (ratio, Pmode);
3368 if (s_offset && !offset_p && !symbol_present)
3370 cost += add_cost (Pmode);
3371 var_present = true;
3374 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3375 if (!acost)
3377 int old_cse_not_expected;
3378 acost = 0;
3380 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3381 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3382 if (ratio_p)
3383 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3385 if (var_present)
3386 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3388 if (symbol_present)
3390 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3391 if (offset_p)
3392 base = gen_rtx_fmt_e (CONST, Pmode,
3393 gen_rtx_fmt_ee (PLUS, Pmode,
3394 base,
3395 gen_int_mode (off, Pmode)));
3397 else if (offset_p)
3398 base = gen_int_mode (off, Pmode);
3399 else
3400 base = NULL_RTX;
3402 if (base)
3403 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3405 start_sequence ();
3406 /* To avoid splitting addressing modes, pretend that no cse will
3407 follow. */
3408 old_cse_not_expected = cse_not_expected;
3409 cse_not_expected = true;
3410 addr = memory_address (Pmode, addr);
3411 cse_not_expected = old_cse_not_expected;
3412 seq = get_insns ();
3413 end_sequence ();
3415 acost = seq_cost (seq);
3416 acost += address_cost (addr, Pmode);
3418 if (!acost)
3419 acost = 1;
3420 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3423 return cost + acost;
3426 /* Estimates cost of forcing expression EXPR into a variable. */
3428 unsigned
3429 force_expr_to_var_cost (tree expr)
3431 static bool costs_initialized = false;
3432 static unsigned integer_cost;
3433 static unsigned symbol_cost;
3434 static unsigned address_cost;
3435 tree op0, op1;
3436 unsigned cost0, cost1, cost;
3437 enum machine_mode mode;
3439 if (!costs_initialized)
3441 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3442 rtx x = gen_rtx_MEM (DECL_MODE (var),
3443 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3444 tree addr;
3445 tree type = build_pointer_type (integer_type_node);
3447 integer_cost = computation_cost (build_int_cst (integer_type_node,
3448 2000));
3450 SET_DECL_RTL (var, x);
3451 TREE_STATIC (var) = 1;
3452 addr = build1 (ADDR_EXPR, type, var);
3453 symbol_cost = computation_cost (addr) + 1;
3455 address_cost
3456 = computation_cost (build2 (PLUS_EXPR, type,
3457 addr,
3458 build_int_cst (type, 2000))) + 1;
3459 if (dump_file && (dump_flags & TDF_DETAILS))
3461 fprintf (dump_file, "force_expr_to_var_cost:\n");
3462 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3463 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3464 fprintf (dump_file, " address %d\n", (int) address_cost);
3465 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3466 fprintf (dump_file, "\n");
3469 costs_initialized = true;
3472 STRIP_NOPS (expr);
3474 if (SSA_VAR_P (expr))
3475 return 0;
3477 if (TREE_INVARIANT (expr))
3479 if (TREE_CODE (expr) == INTEGER_CST)
3480 return integer_cost;
3482 if (TREE_CODE (expr) == ADDR_EXPR)
3484 tree obj = TREE_OPERAND (expr, 0);
3486 if (TREE_CODE (obj) == VAR_DECL
3487 || TREE_CODE (obj) == PARM_DECL
3488 || TREE_CODE (obj) == RESULT_DECL)
3489 return symbol_cost;
3492 return address_cost;
3495 switch (TREE_CODE (expr))
3497 case PLUS_EXPR:
3498 case MINUS_EXPR:
3499 case MULT_EXPR:
3500 op0 = TREE_OPERAND (expr, 0);
3501 op1 = TREE_OPERAND (expr, 1);
3502 STRIP_NOPS (op0);
3503 STRIP_NOPS (op1);
3505 if (is_gimple_val (op0))
3506 cost0 = 0;
3507 else
3508 cost0 = force_expr_to_var_cost (op0);
3510 if (is_gimple_val (op1))
3511 cost1 = 0;
3512 else
3513 cost1 = force_expr_to_var_cost (op1);
3515 break;
3517 default:
3518 /* Just an arbitrary value, FIXME. */
3519 return target_spill_cost;
3522 mode = TYPE_MODE (TREE_TYPE (expr));
3523 switch (TREE_CODE (expr))
3525 case PLUS_EXPR:
3526 case MINUS_EXPR:
3527 cost = add_cost (mode);
3528 break;
3530 case MULT_EXPR:
3531 if (cst_and_fits_in_hwi (op0))
3532 cost = multiply_by_cost (int_cst_value (op0), mode);
3533 else if (cst_and_fits_in_hwi (op1))
3534 cost = multiply_by_cost (int_cst_value (op1), mode);
3535 else
3536 return target_spill_cost;
3537 break;
3539 default:
3540 gcc_unreachable ();
3543 cost += cost0;
3544 cost += cost1;
3546 /* Bound the cost by target_spill_cost. The parts of complicated
3547 computations often are either loop invariant or at least can
3548 be shared between several iv uses, so letting this grow without
3549 limits would not give reasonable results. */
3550 return cost < target_spill_cost ? cost : target_spill_cost;
3553 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3554 invariants the computation depends on. */
3556 static unsigned
3557 force_var_cost (struct ivopts_data *data,
3558 tree expr, bitmap *depends_on)
3560 if (depends_on)
3562 fd_ivopts_data = data;
3563 walk_tree (&expr, find_depends, depends_on, NULL);
3566 return force_expr_to_var_cost (expr);
3569 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3570 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3571 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3572 invariants the computation depends on. */
3574 static unsigned
3575 split_address_cost (struct ivopts_data *data,
3576 tree addr, bool *symbol_present, bool *var_present,
3577 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3579 tree core;
3580 HOST_WIDE_INT bitsize;
3581 HOST_WIDE_INT bitpos;
3582 tree toffset;
3583 enum machine_mode mode;
3584 int unsignedp, volatilep;
3586 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3587 &unsignedp, &volatilep, false);
3589 if (toffset != 0
3590 || bitpos % BITS_PER_UNIT != 0
3591 || TREE_CODE (core) != VAR_DECL)
3593 *symbol_present = false;
3594 *var_present = true;
3595 fd_ivopts_data = data;
3596 walk_tree (&addr, find_depends, depends_on, NULL);
3597 return target_spill_cost;
3600 *offset += bitpos / BITS_PER_UNIT;
3601 if (TREE_STATIC (core)
3602 || DECL_EXTERNAL (core))
3604 *symbol_present = true;
3605 *var_present = false;
3606 return 0;
3609 *symbol_present = false;
3610 *var_present = true;
3611 return 0;
3614 /* Estimates cost of expressing difference of addresses E1 - E2 as
3615 var + symbol + offset. The value of offset is added to OFFSET,
3616 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3617 part is missing. DEPENDS_ON is a set of the invariants the computation
3618 depends on. */
3620 static unsigned
3621 ptr_difference_cost (struct ivopts_data *data,
3622 tree e1, tree e2, bool *symbol_present, bool *var_present,
3623 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3625 HOST_WIDE_INT diff = 0;
3626 unsigned cost;
3628 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3630 if (ptr_difference_const (e1, e2, &diff))
3632 *offset += diff;
3633 *symbol_present = false;
3634 *var_present = false;
3635 return 0;
3638 if (e2 == integer_zero_node)
3639 return split_address_cost (data, TREE_OPERAND (e1, 0),
3640 symbol_present, var_present, offset, depends_on);
3642 *symbol_present = false;
3643 *var_present = true;
3645 cost = force_var_cost (data, e1, depends_on);
3646 cost += force_var_cost (data, e2, depends_on);
3647 cost += add_cost (Pmode);
3649 return cost;
3652 /* Estimates cost of expressing difference E1 - E2 as
3653 var + symbol + offset. The value of offset is added to OFFSET,
3654 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3655 part is missing. DEPENDS_ON is a set of the invariants the computation
3656 depends on. */
3658 static unsigned
3659 difference_cost (struct ivopts_data *data,
3660 tree e1, tree e2, bool *symbol_present, bool *var_present,
3661 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3663 unsigned cost;
3664 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3665 unsigned HOST_WIDE_INT off1, off2;
3667 e1 = strip_offset (e1, &off1);
3668 e2 = strip_offset (e2, &off2);
3669 *offset += off1 - off2;
3671 STRIP_NOPS (e1);
3672 STRIP_NOPS (e2);
3674 if (TREE_CODE (e1) == ADDR_EXPR)
3675 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3676 depends_on);
3677 *symbol_present = false;
3679 if (operand_equal_p (e1, e2, 0))
3681 *var_present = false;
3682 return 0;
3684 *var_present = true;
3685 if (zero_p (e2))
3686 return force_var_cost (data, e1, depends_on);
3688 if (zero_p (e1))
3690 cost = force_var_cost (data, e2, depends_on);
3691 cost += multiply_by_cost (-1, mode);
3693 return cost;
3696 cost = force_var_cost (data, e1, depends_on);
3697 cost += force_var_cost (data, e2, depends_on);
3698 cost += add_cost (mode);
3700 return cost;
3703 /* Determines the cost of the computation by that USE is expressed
3704 from induction variable CAND. If ADDRESS_P is true, we just need
3705 to create an address from it, otherwise we want to get it into
3706 register. A set of invariants we depend on is stored in
3707 DEPENDS_ON. AT is the statement at that the value is computed. */
3709 static unsigned
3710 get_computation_cost_at (struct ivopts_data *data,
3711 struct iv_use *use, struct iv_cand *cand,
3712 bool address_p, bitmap *depends_on, tree at)
3714 tree ubase = use->iv->base, ustep = use->iv->step;
3715 tree cbase, cstep;
3716 tree utype = TREE_TYPE (ubase), ctype;
3717 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3718 HOST_WIDE_INT ratio, aratio;
3719 bool var_present, symbol_present;
3720 unsigned cost = 0, n_sums;
3722 *depends_on = NULL;
3724 /* Only consider real candidates. */
3725 if (!cand->iv)
3726 return INFTY;
3728 cbase = cand->iv->base;
3729 cstep = cand->iv->step;
3730 ctype = TREE_TYPE (cbase);
3732 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3734 /* We do not have a precision to express the values of use. */
3735 return INFTY;
3738 if (address_p)
3740 /* Do not try to express address of an object with computation based
3741 on address of a different object. This may cause problems in rtl
3742 level alias analysis (that does not expect this to be happening,
3743 as this is illegal in C), and would be unlikely to be useful
3744 anyway. */
3745 if (use->iv->base_object
3746 && cand->iv->base_object
3747 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3748 return INFTY;
3751 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3753 /* TODO -- add direct handling of this case. */
3754 goto fallback;
3757 /* CSTEPI is removed from the offset in case statement is after the
3758 increment. If the step is not constant, we use zero instead.
3759 This is a bit imprecise (there is the extra addition), but
3760 redundancy elimination is likely to transform the code so that
3761 it uses value of the variable before increment anyway,
3762 so it is not that much unrealistic. */
3763 if (cst_and_fits_in_hwi (cstep))
3764 cstepi = int_cst_value (cstep);
3765 else
3766 cstepi = 0;
3768 if (cst_and_fits_in_hwi (ustep)
3769 && cst_and_fits_in_hwi (cstep))
3771 ustepi = int_cst_value (ustep);
3773 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3774 return INFTY;
3776 else
3778 tree rat;
3780 rat = constant_multiple_of (utype, ustep, cstep);
3782 if (!rat)
3783 return INFTY;
3785 if (cst_and_fits_in_hwi (rat))
3786 ratio = int_cst_value (rat);
3787 else if (integer_onep (rat))
3788 ratio = 1;
3789 else if (integer_all_onesp (rat))
3790 ratio = -1;
3791 else
3792 return INFTY;
3795 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3796 or ratio == 1, it is better to handle this like
3798 ubase - ratio * cbase + ratio * var
3800 (also holds in the case ratio == -1, TODO. */
3802 if (cst_and_fits_in_hwi (cbase))
3804 offset = - ratio * int_cst_value (cbase);
3805 cost += difference_cost (data,
3806 ubase, integer_zero_node,
3807 &symbol_present, &var_present, &offset,
3808 depends_on);
3810 else if (ratio == 1)
3812 cost += difference_cost (data,
3813 ubase, cbase,
3814 &symbol_present, &var_present, &offset,
3815 depends_on);
3817 else
3819 cost += force_var_cost (data, cbase, depends_on);
3820 cost += add_cost (TYPE_MODE (ctype));
3821 cost += difference_cost (data,
3822 ubase, integer_zero_node,
3823 &symbol_present, &var_present, &offset,
3824 depends_on);
3827 /* If we are after the increment, the value of the candidate is higher by
3828 one iteration. */
3829 if (stmt_after_increment (data->current_loop, cand, at))
3830 offset -= ratio * cstepi;
3832 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3833 (symbol/var/const parts may be omitted). If we are looking for an address,
3834 find the cost of addressing this. */
3835 if (address_p)
3836 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3838 /* Otherwise estimate the costs for computing the expression. */
3839 aratio = ratio > 0 ? ratio : -ratio;
3840 if (!symbol_present && !var_present && !offset)
3842 if (ratio != 1)
3843 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3845 return cost;
3848 if (aratio != 1)
3849 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3851 n_sums = 1;
3852 if (var_present
3853 /* Symbol + offset should be compile-time computable. */
3854 && (symbol_present || offset))
3855 n_sums++;
3857 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3859 fallback:
3861 /* Just get the expression, expand it and measure the cost. */
3862 tree comp = get_computation_at (data->current_loop, use, cand, at);
3864 if (!comp)
3865 return INFTY;
3867 if (address_p)
3868 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3870 return computation_cost (comp);
3874 /* Determines the cost of the computation by that USE is expressed
3875 from induction variable CAND. If ADDRESS_P is true, we just need
3876 to create an address from it, otherwise we want to get it into
3877 register. A set of invariants we depend on is stored in
3878 DEPENDS_ON. */
3880 static unsigned
3881 get_computation_cost (struct ivopts_data *data,
3882 struct iv_use *use, struct iv_cand *cand,
3883 bool address_p, bitmap *depends_on)
3885 return get_computation_cost_at (data,
3886 use, cand, address_p, depends_on, use->stmt);
3889 /* Determines cost of basing replacement of USE on CAND in a generic
3890 expression. */
3892 static bool
3893 determine_use_iv_cost_generic (struct ivopts_data *data,
3894 struct iv_use *use, struct iv_cand *cand)
3896 bitmap depends_on;
3897 unsigned cost;
3899 /* The simple case first -- if we need to express value of the preserved
3900 original biv, the cost is 0. This also prevents us from counting the
3901 cost of increment twice -- once at this use and once in the cost of
3902 the candidate. */
3903 if (cand->pos == IP_ORIGINAL
3904 && cand->incremented_at == use->stmt)
3906 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3907 return true;
3910 cost = get_computation_cost (data, use, cand, false, &depends_on);
3911 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3913 return cost != INFTY;
3916 /* Determines cost of basing replacement of USE on CAND in an address. */
3918 static bool
3919 determine_use_iv_cost_address (struct ivopts_data *data,
3920 struct iv_use *use, struct iv_cand *cand)
3922 bitmap depends_on;
3923 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3925 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3927 return cost != INFTY;
3930 /* Computes value of induction variable IV in iteration NITER. */
3932 static tree
3933 iv_value (struct iv *iv, tree niter)
3935 tree val;
3936 tree type = TREE_TYPE (iv->base);
3938 niter = fold_convert (type, niter);
3939 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3941 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3944 /* Computes value of candidate CAND at position AT in iteration NITER. */
3946 static tree
3947 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3949 tree val = iv_value (cand->iv, niter);
3950 tree type = TREE_TYPE (cand->iv->base);
3952 if (stmt_after_increment (loop, cand, at))
3953 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3955 return val;
3958 /* Returns period of induction variable iv. */
3960 static tree
3961 iv_period (struct iv *iv)
3963 tree step = iv->step, period, type;
3964 tree pow2div;
3966 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3968 /* Period of the iv is gcd (step, type range). Since type range is power
3969 of two, it suffices to determine the maximum power of two that divides
3970 step. */
3971 pow2div = num_ending_zeros (step);
3972 type = unsigned_type_for (TREE_TYPE (step));
3974 period = build_low_bits_mask (type,
3975 (TYPE_PRECISION (type)
3976 - tree_low_cst (pow2div, 1)));
3978 return period;
3981 /* Returns the comparison operator used when eliminating the iv USE. */
3983 static enum tree_code
3984 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3986 struct loop *loop = data->current_loop;
3987 basic_block ex_bb;
3988 edge exit;
3990 ex_bb = bb_for_stmt (use->stmt);
3991 exit = EDGE_SUCC (ex_bb, 0);
3992 if (flow_bb_inside_loop_p (loop, exit->dest))
3993 exit = EDGE_SUCC (ex_bb, 1);
3995 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3998 /* Check whether it is possible to express the condition in USE by comparison
3999 of candidate CAND. If so, store the value compared with to BOUND. */
4001 static bool
4002 may_eliminate_iv (struct ivopts_data *data,
4003 struct iv_use *use, struct iv_cand *cand, tree *bound)
4005 basic_block ex_bb;
4006 edge exit;
4007 tree nit, nit_type;
4008 tree wider_type, period, per_type;
4009 struct loop *loop = data->current_loop;
4011 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4012 return false;
4014 /* For now works only for exits that dominate the loop latch. TODO -- extend
4015 for other conditions inside loop body. */
4016 ex_bb = bb_for_stmt (use->stmt);
4017 if (use->stmt != last_stmt (ex_bb)
4018 || TREE_CODE (use->stmt) != COND_EXPR)
4019 return false;
4020 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4021 return false;
4023 exit = EDGE_SUCC (ex_bb, 0);
4024 if (flow_bb_inside_loop_p (loop, exit->dest))
4025 exit = EDGE_SUCC (ex_bb, 1);
4026 if (flow_bb_inside_loop_p (loop, exit->dest))
4027 return false;
4029 nit = niter_for_exit (data, exit);
4030 if (!nit)
4031 return false;
4033 nit_type = TREE_TYPE (nit);
4035 /* Determine whether we may use the variable to test whether niter iterations
4036 elapsed. This is the case iff the period of the induction variable is
4037 greater than the number of iterations. */
4038 period = iv_period (cand->iv);
4039 if (!period)
4040 return false;
4041 per_type = TREE_TYPE (period);
4043 wider_type = TREE_TYPE (period);
4044 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4045 wider_type = per_type;
4046 else
4047 wider_type = nit_type;
4049 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4050 fold_convert (wider_type, period),
4051 fold_convert (wider_type, nit))))
4052 return false;
4054 *bound = fold_affine_expr (cand_value_at (loop, cand, use->stmt, nit));
4055 return true;
4058 /* Determines cost of basing replacement of USE on CAND in a condition. */
4060 static bool
4061 determine_use_iv_cost_condition (struct ivopts_data *data,
4062 struct iv_use *use, struct iv_cand *cand)
4064 tree bound = NULL_TREE, op, cond;
4065 bitmap depends_on = NULL;
4066 unsigned cost;
4068 /* Only consider real candidates. */
4069 if (!cand->iv)
4071 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4072 return false;
4075 if (may_eliminate_iv (data, use, cand, &bound))
4077 cost = force_var_cost (data, bound, &depends_on);
4079 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4080 return cost != INFTY;
4083 /* The induction variable elimination failed; just express the original
4084 giv. If it is compared with an invariant, note that we cannot get
4085 rid of it. */
4086 cost = get_computation_cost (data, use, cand, false, &depends_on);
4088 cond = *use->op_p;
4089 if (TREE_CODE (cond) != SSA_NAME)
4091 op = TREE_OPERAND (cond, 0);
4092 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4093 op = TREE_OPERAND (cond, 1);
4094 if (TREE_CODE (op) == SSA_NAME)
4096 op = get_iv (data, op)->base;
4097 fd_ivopts_data = data;
4098 walk_tree (&op, find_depends, &depends_on, NULL);
4102 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4103 return cost != INFTY;
4106 /* Determines cost of basing replacement of USE on CAND. Returns false
4107 if USE cannot be based on CAND. */
4109 static bool
4110 determine_use_iv_cost (struct ivopts_data *data,
4111 struct iv_use *use, struct iv_cand *cand)
4113 switch (use->type)
4115 case USE_NONLINEAR_EXPR:
4116 return determine_use_iv_cost_generic (data, use, cand);
4118 case USE_ADDRESS:
4119 return determine_use_iv_cost_address (data, use, cand);
4121 case USE_COMPARE:
4122 return determine_use_iv_cost_condition (data, use, cand);
4124 default:
4125 gcc_unreachable ();
4129 /* Determines costs of basing the use of the iv on an iv candidate. */
4131 static void
4132 determine_use_iv_costs (struct ivopts_data *data)
4134 unsigned i, j;
4135 struct iv_use *use;
4136 struct iv_cand *cand;
4137 bitmap to_clear = BITMAP_ALLOC (NULL);
4139 alloc_use_cost_map (data);
4141 for (i = 0; i < n_iv_uses (data); i++)
4143 use = iv_use (data, i);
4145 if (data->consider_all_candidates)
4147 for (j = 0; j < n_iv_cands (data); j++)
4149 cand = iv_cand (data, j);
4150 determine_use_iv_cost (data, use, cand);
4153 else
4155 bitmap_iterator bi;
4157 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4159 cand = iv_cand (data, j);
4160 if (!determine_use_iv_cost (data, use, cand))
4161 bitmap_set_bit (to_clear, j);
4164 /* Remove the candidates for that the cost is infinite from
4165 the list of related candidates. */
4166 bitmap_and_compl_into (use->related_cands, to_clear);
4167 bitmap_clear (to_clear);
4171 BITMAP_FREE (to_clear);
4173 if (dump_file && (dump_flags & TDF_DETAILS))
4175 fprintf (dump_file, "Use-candidate costs:\n");
4177 for (i = 0; i < n_iv_uses (data); i++)
4179 use = iv_use (data, i);
4181 fprintf (dump_file, "Use %d:\n", i);
4182 fprintf (dump_file, " cand\tcost\tdepends on\n");
4183 for (j = 0; j < use->n_map_members; j++)
4185 if (!use->cost_map[j].cand
4186 || use->cost_map[j].cost == INFTY)
4187 continue;
4189 fprintf (dump_file, " %d\t%d\t",
4190 use->cost_map[j].cand->id,
4191 use->cost_map[j].cost);
4192 if (use->cost_map[j].depends_on)
4193 bitmap_print (dump_file,
4194 use->cost_map[j].depends_on, "","");
4195 fprintf (dump_file, "\n");
4198 fprintf (dump_file, "\n");
4200 fprintf (dump_file, "\n");
4204 /* Determines cost of the candidate CAND. */
4206 static void
4207 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4209 unsigned cost_base, cost_step;
4210 tree base;
4212 if (!cand->iv)
4214 cand->cost = 0;
4215 return;
4218 /* There are two costs associated with the candidate -- its increment
4219 and its initialization. The second is almost negligible for any loop
4220 that rolls enough, so we take it just very little into account. */
4222 base = cand->iv->base;
4223 cost_base = force_var_cost (data, base, NULL);
4224 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4226 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4228 /* Prefer the original iv unless we may gain something by replacing it;
4229 this is not really relevant for artificial ivs created by other
4230 passes. */
4231 if (cand->pos == IP_ORIGINAL
4232 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4233 cand->cost--;
4235 /* Prefer not to insert statements into latch unless there are some
4236 already (so that we do not create unnecessary jumps). */
4237 if (cand->pos == IP_END
4238 && empty_block_p (ip_end_pos (data->current_loop)))
4239 cand->cost++;
4242 /* Determines costs of computation of the candidates. */
4244 static void
4245 determine_iv_costs (struct ivopts_data *data)
4247 unsigned i;
4249 if (dump_file && (dump_flags & TDF_DETAILS))
4251 fprintf (dump_file, "Candidate costs:\n");
4252 fprintf (dump_file, " cand\tcost\n");
4255 for (i = 0; i < n_iv_cands (data); i++)
4257 struct iv_cand *cand = iv_cand (data, i);
4259 determine_iv_cost (data, cand);
4261 if (dump_file && (dump_flags & TDF_DETAILS))
4262 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4265 if (dump_file && (dump_flags & TDF_DETAILS))
4266 fprintf (dump_file, "\n");
4269 /* Calculates cost for having SIZE induction variables. */
4271 static unsigned
4272 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4274 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4277 /* For each size of the induction variable set determine the penalty. */
4279 static void
4280 determine_set_costs (struct ivopts_data *data)
4282 unsigned j, n;
4283 tree phi, op;
4284 struct loop *loop = data->current_loop;
4285 bitmap_iterator bi;
4287 /* We use the following model (definitely improvable, especially the
4288 cost function -- TODO):
4290 We estimate the number of registers available (using MD data), name it A.
4292 We estimate the number of registers used by the loop, name it U. This
4293 number is obtained as the number of loop phi nodes (not counting virtual
4294 registers and bivs) + the number of variables from outside of the loop.
4296 We set a reserve R (free regs that are used for temporary computations,
4297 etc.). For now the reserve is a constant 3.
4299 Let I be the number of induction variables.
4301 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4302 make a lot of ivs without a reason).
4303 -- if A - R < U + I <= A, the cost is I * PRES_COST
4304 -- if U + I > A, the cost is I * PRES_COST and
4305 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4307 if (dump_file && (dump_flags & TDF_DETAILS))
4309 fprintf (dump_file, "Global costs:\n");
4310 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4311 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4312 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4313 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4316 n = 0;
4317 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4319 op = PHI_RESULT (phi);
4321 if (!is_gimple_reg (op))
4322 continue;
4324 if (get_iv (data, op))
4325 continue;
4327 n++;
4330 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4332 struct version_info *info = ver_info (data, j);
4334 if (info->inv_id && info->has_nonlin_use)
4335 n++;
4338 data->regs_used = n;
4339 if (dump_file && (dump_flags & TDF_DETAILS))
4340 fprintf (dump_file, " regs_used %d\n", n);
4342 if (dump_file && (dump_flags & TDF_DETAILS))
4344 fprintf (dump_file, " cost for size:\n");
4345 fprintf (dump_file, " ivs\tcost\n");
4346 for (j = 0; j <= 2 * target_avail_regs; j++)
4347 fprintf (dump_file, " %d\t%d\n", j,
4348 ivopts_global_cost_for_size (data, j));
4349 fprintf (dump_file, "\n");
4353 /* Returns true if A is a cheaper cost pair than B. */
4355 static bool
4356 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4358 if (!a)
4359 return false;
4361 if (!b)
4362 return true;
4364 if (a->cost < b->cost)
4365 return true;
4367 if (a->cost > b->cost)
4368 return false;
4370 /* In case the costs are the same, prefer the cheaper candidate. */
4371 if (a->cand->cost < b->cand->cost)
4372 return true;
4374 return false;
4377 /* Computes the cost field of IVS structure. */
4379 static void
4380 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4382 unsigned cost = 0;
4384 cost += ivs->cand_use_cost;
4385 cost += ivs->cand_cost;
4386 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4388 ivs->cost = cost;
4391 /* Remove invariants in set INVS to set IVS. */
4393 static void
4394 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4396 bitmap_iterator bi;
4397 unsigned iid;
4399 if (!invs)
4400 return;
4402 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4404 ivs->n_invariant_uses[iid]--;
4405 if (ivs->n_invariant_uses[iid] == 0)
4406 ivs->n_regs--;
4410 /* Set USE not to be expressed by any candidate in IVS. */
4412 static void
4413 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4414 struct iv_use *use)
4416 unsigned uid = use->id, cid;
4417 struct cost_pair *cp;
4419 cp = ivs->cand_for_use[uid];
4420 if (!cp)
4421 return;
4422 cid = cp->cand->id;
4424 ivs->bad_uses++;
4425 ivs->cand_for_use[uid] = NULL;
4426 ivs->n_cand_uses[cid]--;
4428 if (ivs->n_cand_uses[cid] == 0)
4430 bitmap_clear_bit (ivs->cands, cid);
4431 /* Do not count the pseudocandidates. */
4432 if (cp->cand->iv)
4433 ivs->n_regs--;
4434 ivs->n_cands--;
4435 ivs->cand_cost -= cp->cand->cost;
4437 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4440 ivs->cand_use_cost -= cp->cost;
4442 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4443 iv_ca_recount_cost (data, ivs);
4446 /* Add invariants in set INVS to set IVS. */
4448 static void
4449 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4451 bitmap_iterator bi;
4452 unsigned iid;
4454 if (!invs)
4455 return;
4457 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4459 ivs->n_invariant_uses[iid]++;
4460 if (ivs->n_invariant_uses[iid] == 1)
4461 ivs->n_regs++;
4465 /* Set cost pair for USE in set IVS to CP. */
4467 static void
4468 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4469 struct iv_use *use, struct cost_pair *cp)
4471 unsigned uid = use->id, cid;
4473 if (ivs->cand_for_use[uid] == cp)
4474 return;
4476 if (ivs->cand_for_use[uid])
4477 iv_ca_set_no_cp (data, ivs, use);
4479 if (cp)
4481 cid = cp->cand->id;
4483 ivs->bad_uses--;
4484 ivs->cand_for_use[uid] = cp;
4485 ivs->n_cand_uses[cid]++;
4486 if (ivs->n_cand_uses[cid] == 1)
4488 bitmap_set_bit (ivs->cands, cid);
4489 /* Do not count the pseudocandidates. */
4490 if (cp->cand->iv)
4491 ivs->n_regs++;
4492 ivs->n_cands++;
4493 ivs->cand_cost += cp->cand->cost;
4495 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4498 ivs->cand_use_cost += cp->cost;
4499 iv_ca_set_add_invariants (ivs, cp->depends_on);
4500 iv_ca_recount_cost (data, ivs);
4504 /* Extend set IVS by expressing USE by some of the candidates in it
4505 if possible. */
4507 static void
4508 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4509 struct iv_use *use)
4511 struct cost_pair *best_cp = NULL, *cp;
4512 bitmap_iterator bi;
4513 unsigned i;
4515 gcc_assert (ivs->upto >= use->id);
4517 if (ivs->upto == use->id)
4519 ivs->upto++;
4520 ivs->bad_uses++;
4523 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4525 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4527 if (cheaper_cost_pair (cp, best_cp))
4528 best_cp = cp;
4531 iv_ca_set_cp (data, ivs, use, best_cp);
4534 /* Get cost for assignment IVS. */
4536 static unsigned
4537 iv_ca_cost (struct iv_ca *ivs)
4539 return (ivs->bad_uses ? INFTY : ivs->cost);
4542 /* Returns true if all dependences of CP are among invariants in IVS. */
4544 static bool
4545 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4547 unsigned i;
4548 bitmap_iterator bi;
4550 if (!cp->depends_on)
4551 return true;
4553 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4555 if (ivs->n_invariant_uses[i] == 0)
4556 return false;
4559 return true;
4562 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4563 it before NEXT_CHANGE. */
4565 static struct iv_ca_delta *
4566 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4567 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4569 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4571 change->use = use;
4572 change->old_cp = old_cp;
4573 change->new_cp = new_cp;
4574 change->next_change = next_change;
4576 return change;
4579 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4580 are rewritten. */
4582 static struct iv_ca_delta *
4583 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4585 struct iv_ca_delta *last;
4587 if (!l2)
4588 return l1;
4590 if (!l1)
4591 return l2;
4593 for (last = l1; last->next_change; last = last->next_change)
4594 continue;
4595 last->next_change = l2;
4597 return l1;
4600 /* Returns candidate by that USE is expressed in IVS. */
4602 static struct cost_pair *
4603 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4605 return ivs->cand_for_use[use->id];
4608 /* Reverse the list of changes DELTA, forming the inverse to it. */
4610 static struct iv_ca_delta *
4611 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4613 struct iv_ca_delta *act, *next, *prev = NULL;
4614 struct cost_pair *tmp;
4616 for (act = delta; act; act = next)
4618 next = act->next_change;
4619 act->next_change = prev;
4620 prev = act;
4622 tmp = act->old_cp;
4623 act->old_cp = act->new_cp;
4624 act->new_cp = tmp;
4627 return prev;
4630 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4631 reverted instead. */
4633 static void
4634 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4635 struct iv_ca_delta *delta, bool forward)
4637 struct cost_pair *from, *to;
4638 struct iv_ca_delta *act;
4640 if (!forward)
4641 delta = iv_ca_delta_reverse (delta);
4643 for (act = delta; act; act = act->next_change)
4645 from = act->old_cp;
4646 to = act->new_cp;
4647 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4648 iv_ca_set_cp (data, ivs, act->use, to);
4651 if (!forward)
4652 iv_ca_delta_reverse (delta);
4655 /* Returns true if CAND is used in IVS. */
4657 static bool
4658 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4660 return ivs->n_cand_uses[cand->id] > 0;
4663 /* Returns number of induction variable candidates in the set IVS. */
4665 static unsigned
4666 iv_ca_n_cands (struct iv_ca *ivs)
4668 return ivs->n_cands;
4671 /* Free the list of changes DELTA. */
4673 static void
4674 iv_ca_delta_free (struct iv_ca_delta **delta)
4676 struct iv_ca_delta *act, *next;
4678 for (act = *delta; act; act = next)
4680 next = act->next_change;
4681 free (act);
4684 *delta = NULL;
4687 /* Allocates new iv candidates assignment. */
4689 static struct iv_ca *
4690 iv_ca_new (struct ivopts_data *data)
4692 struct iv_ca *nw = XNEW (struct iv_ca);
4694 nw->upto = 0;
4695 nw->bad_uses = 0;
4696 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4697 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4698 nw->cands = BITMAP_ALLOC (NULL);
4699 nw->n_cands = 0;
4700 nw->n_regs = 0;
4701 nw->cand_use_cost = 0;
4702 nw->cand_cost = 0;
4703 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4704 nw->cost = 0;
4706 return nw;
4709 /* Free memory occupied by the set IVS. */
4711 static void
4712 iv_ca_free (struct iv_ca **ivs)
4714 free ((*ivs)->cand_for_use);
4715 free ((*ivs)->n_cand_uses);
4716 BITMAP_FREE ((*ivs)->cands);
4717 free ((*ivs)->n_invariant_uses);
4718 free (*ivs);
4719 *ivs = NULL;
4722 /* Dumps IVS to FILE. */
4724 static void
4725 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4727 const char *pref = " invariants ";
4728 unsigned i;
4730 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4731 bitmap_print (file, ivs->cands, " candidates ","\n");
4733 for (i = 1; i <= data->max_inv_id; i++)
4734 if (ivs->n_invariant_uses[i])
4736 fprintf (file, "%s%d", pref, i);
4737 pref = ", ";
4739 fprintf (file, "\n");
4742 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4743 new set, and store differences in DELTA. Number of induction variables
4744 in the new set is stored to N_IVS. */
4746 static unsigned
4747 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4748 struct iv_cand *cand, struct iv_ca_delta **delta,
4749 unsigned *n_ivs)
4751 unsigned i, cost;
4752 struct iv_use *use;
4753 struct cost_pair *old_cp, *new_cp;
4755 *delta = NULL;
4756 for (i = 0; i < ivs->upto; i++)
4758 use = iv_use (data, i);
4759 old_cp = iv_ca_cand_for_use (ivs, use);
4761 if (old_cp
4762 && old_cp->cand == cand)
4763 continue;
4765 new_cp = get_use_iv_cost (data, use, cand);
4766 if (!new_cp)
4767 continue;
4769 if (!iv_ca_has_deps (ivs, new_cp))
4770 continue;
4772 if (!cheaper_cost_pair (new_cp, old_cp))
4773 continue;
4775 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4778 iv_ca_delta_commit (data, ivs, *delta, true);
4779 cost = iv_ca_cost (ivs);
4780 if (n_ivs)
4781 *n_ivs = iv_ca_n_cands (ivs);
4782 iv_ca_delta_commit (data, ivs, *delta, false);
4784 return cost;
4787 /* Try narrowing set IVS by removing CAND. Return the cost of
4788 the new set and store the differences in DELTA. */
4790 static unsigned
4791 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4792 struct iv_cand *cand, struct iv_ca_delta **delta)
4794 unsigned i, ci;
4795 struct iv_use *use;
4796 struct cost_pair *old_cp, *new_cp, *cp;
4797 bitmap_iterator bi;
4798 struct iv_cand *cnd;
4799 unsigned cost;
4801 *delta = NULL;
4802 for (i = 0; i < n_iv_uses (data); i++)
4804 use = iv_use (data, i);
4806 old_cp = iv_ca_cand_for_use (ivs, use);
4807 if (old_cp->cand != cand)
4808 continue;
4810 new_cp = NULL;
4812 if (data->consider_all_candidates)
4814 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4816 if (ci == cand->id)
4817 continue;
4819 cnd = iv_cand (data, ci);
4821 cp = get_use_iv_cost (data, use, cnd);
4822 if (!cp)
4823 continue;
4824 if (!iv_ca_has_deps (ivs, cp))
4825 continue;
4827 if (!cheaper_cost_pair (cp, new_cp))
4828 continue;
4830 new_cp = cp;
4833 else
4835 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4837 if (ci == cand->id)
4838 continue;
4840 cnd = iv_cand (data, ci);
4842 cp = get_use_iv_cost (data, use, cnd);
4843 if (!cp)
4844 continue;
4845 if (!iv_ca_has_deps (ivs, cp))
4846 continue;
4848 if (!cheaper_cost_pair (cp, new_cp))
4849 continue;
4851 new_cp = cp;
4855 if (!new_cp)
4857 iv_ca_delta_free (delta);
4858 return INFTY;
4861 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4864 iv_ca_delta_commit (data, ivs, *delta, true);
4865 cost = iv_ca_cost (ivs);
4866 iv_ca_delta_commit (data, ivs, *delta, false);
4868 return cost;
4871 /* Try optimizing the set of candidates IVS by removing candidates different
4872 from to EXCEPT_CAND from it. Return cost of the new set, and store
4873 differences in DELTA. */
4875 static unsigned
4876 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4877 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4879 bitmap_iterator bi;
4880 struct iv_ca_delta *act_delta, *best_delta;
4881 unsigned i, best_cost, acost;
4882 struct iv_cand *cand;
4884 best_delta = NULL;
4885 best_cost = iv_ca_cost (ivs);
4887 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4889 cand = iv_cand (data, i);
4891 if (cand == except_cand)
4892 continue;
4894 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4896 if (acost < best_cost)
4898 best_cost = acost;
4899 iv_ca_delta_free (&best_delta);
4900 best_delta = act_delta;
4902 else
4903 iv_ca_delta_free (&act_delta);
4906 if (!best_delta)
4908 *delta = NULL;
4909 return best_cost;
4912 /* Recurse to possibly remove other unnecessary ivs. */
4913 iv_ca_delta_commit (data, ivs, best_delta, true);
4914 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4915 iv_ca_delta_commit (data, ivs, best_delta, false);
4916 *delta = iv_ca_delta_join (best_delta, *delta);
4917 return best_cost;
4920 /* Tries to extend the sets IVS in the best possible way in order
4921 to express the USE. */
4923 static bool
4924 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4925 struct iv_use *use)
4927 unsigned best_cost, act_cost;
4928 unsigned i;
4929 bitmap_iterator bi;
4930 struct iv_cand *cand;
4931 struct iv_ca_delta *best_delta = NULL, *act_delta;
4932 struct cost_pair *cp;
4934 iv_ca_add_use (data, ivs, use);
4935 best_cost = iv_ca_cost (ivs);
4937 cp = iv_ca_cand_for_use (ivs, use);
4938 if (cp)
4940 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4941 iv_ca_set_no_cp (data, ivs, use);
4944 /* First try important candidates. Only if it fails, try the specific ones.
4945 Rationale -- in loops with many variables the best choice often is to use
4946 just one generic biv. If we added here many ivs specific to the uses,
4947 the optimization algorithm later would be likely to get stuck in a local
4948 minimum, thus causing us to create too many ivs. The approach from
4949 few ivs to more seems more likely to be successful -- starting from few
4950 ivs, replacing an expensive use by a specific iv should always be a
4951 win. */
4952 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4954 cand = iv_cand (data, i);
4956 if (iv_ca_cand_used_p (ivs, cand))
4957 continue;
4959 cp = get_use_iv_cost (data, use, cand);
4960 if (!cp)
4961 continue;
4963 iv_ca_set_cp (data, ivs, use, cp);
4964 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4965 iv_ca_set_no_cp (data, ivs, use);
4966 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4968 if (act_cost < best_cost)
4970 best_cost = act_cost;
4972 iv_ca_delta_free (&best_delta);
4973 best_delta = act_delta;
4975 else
4976 iv_ca_delta_free (&act_delta);
4979 if (best_cost == INFTY)
4981 for (i = 0; i < use->n_map_members; i++)
4983 cp = use->cost_map + i;
4984 cand = cp->cand;
4985 if (!cand)
4986 continue;
4988 /* Already tried this. */
4989 if (cand->important)
4990 continue;
4992 if (iv_ca_cand_used_p (ivs, cand))
4993 continue;
4995 act_delta = NULL;
4996 iv_ca_set_cp (data, ivs, use, cp);
4997 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4998 iv_ca_set_no_cp (data, ivs, use);
4999 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5000 cp, act_delta);
5002 if (act_cost < best_cost)
5004 best_cost = act_cost;
5006 if (best_delta)
5007 iv_ca_delta_free (&best_delta);
5008 best_delta = act_delta;
5010 else
5011 iv_ca_delta_free (&act_delta);
5015 iv_ca_delta_commit (data, ivs, best_delta, true);
5016 iv_ca_delta_free (&best_delta);
5018 return (best_cost != INFTY);
5021 /* Finds an initial assignment of candidates to uses. */
5023 static struct iv_ca *
5024 get_initial_solution (struct ivopts_data *data)
5026 struct iv_ca *ivs = iv_ca_new (data);
5027 unsigned i;
5029 for (i = 0; i < n_iv_uses (data); i++)
5030 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5032 iv_ca_free (&ivs);
5033 return NULL;
5036 return ivs;
5039 /* Tries to improve set of induction variables IVS. */
5041 static bool
5042 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5044 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5045 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5046 struct iv_cand *cand;
5048 /* Try extending the set of induction variables by one. */
5049 for (i = 0; i < n_iv_cands (data); i++)
5051 cand = iv_cand (data, i);
5053 if (iv_ca_cand_used_p (ivs, cand))
5054 continue;
5056 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5057 if (!act_delta)
5058 continue;
5060 /* If we successfully added the candidate and the set is small enough,
5061 try optimizing it by removing other candidates. */
5062 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5064 iv_ca_delta_commit (data, ivs, act_delta, true);
5065 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5066 iv_ca_delta_commit (data, ivs, act_delta, false);
5067 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5070 if (acost < best_cost)
5072 best_cost = acost;
5073 iv_ca_delta_free (&best_delta);
5074 best_delta = act_delta;
5076 else
5077 iv_ca_delta_free (&act_delta);
5080 if (!best_delta)
5082 /* Try removing the candidates from the set instead. */
5083 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5085 /* Nothing more we can do. */
5086 if (!best_delta)
5087 return false;
5090 iv_ca_delta_commit (data, ivs, best_delta, true);
5091 gcc_assert (best_cost == iv_ca_cost (ivs));
5092 iv_ca_delta_free (&best_delta);
5093 return true;
5096 /* Attempts to find the optimal set of induction variables. We do simple
5097 greedy heuristic -- we try to replace at most one candidate in the selected
5098 solution and remove the unused ivs while this improves the cost. */
5100 static struct iv_ca *
5101 find_optimal_iv_set (struct ivopts_data *data)
5103 unsigned i;
5104 struct iv_ca *set;
5105 struct iv_use *use;
5107 /* Get the initial solution. */
5108 set = get_initial_solution (data);
5109 if (!set)
5111 if (dump_file && (dump_flags & TDF_DETAILS))
5112 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5113 return NULL;
5116 if (dump_file && (dump_flags & TDF_DETAILS))
5118 fprintf (dump_file, "Initial set of candidates:\n");
5119 iv_ca_dump (data, dump_file, set);
5122 while (try_improve_iv_set (data, set))
5124 if (dump_file && (dump_flags & TDF_DETAILS))
5126 fprintf (dump_file, "Improved to:\n");
5127 iv_ca_dump (data, dump_file, set);
5131 if (dump_file && (dump_flags & TDF_DETAILS))
5132 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5134 for (i = 0; i < n_iv_uses (data); i++)
5136 use = iv_use (data, i);
5137 use->selected = iv_ca_cand_for_use (set, use)->cand;
5140 return set;
5143 /* Creates a new induction variable corresponding to CAND. */
5145 static void
5146 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5148 block_stmt_iterator incr_pos;
5149 tree base;
5150 bool after = false;
5152 if (!cand->iv)
5153 return;
5155 switch (cand->pos)
5157 case IP_NORMAL:
5158 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5159 break;
5161 case IP_END:
5162 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5163 after = true;
5164 break;
5166 case IP_ORIGINAL:
5167 /* Mark that the iv is preserved. */
5168 name_info (data, cand->var_before)->preserve_biv = true;
5169 name_info (data, cand->var_after)->preserve_biv = true;
5171 /* Rewrite the increment so that it uses var_before directly. */
5172 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5174 return;
5177 gimple_add_tmp_var (cand->var_before);
5178 add_referenced_var (cand->var_before);
5180 base = unshare_expr (cand->iv->base);
5182 create_iv (base, unshare_expr (cand->iv->step),
5183 cand->var_before, data->current_loop,
5184 &incr_pos, after, &cand->var_before, &cand->var_after);
5187 /* Creates new induction variables described in SET. */
5189 static void
5190 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5192 unsigned i;
5193 struct iv_cand *cand;
5194 bitmap_iterator bi;
5196 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5198 cand = iv_cand (data, i);
5199 create_new_iv (data, cand);
5203 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5204 is true, remove also the ssa name defined by the statement. */
5206 static void
5207 remove_statement (tree stmt, bool including_defined_name)
5209 if (TREE_CODE (stmt) == PHI_NODE)
5211 if (!including_defined_name)
5213 /* Prevent the ssa name defined by the statement from being removed. */
5214 SET_PHI_RESULT (stmt, NULL);
5216 remove_phi_node (stmt, NULL_TREE);
5218 else
5220 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5222 bsi_remove (&bsi, true);
5226 /* Rewrites USE (definition of iv used in a nonlinear expression)
5227 using candidate CAND. */
5229 static void
5230 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5231 struct iv_use *use, struct iv_cand *cand)
5233 tree comp;
5234 tree op, stmts, tgt, ass;
5235 block_stmt_iterator bsi, pbsi;
5237 /* An important special case -- if we are asked to express value of
5238 the original iv by itself, just exit; there is no need to
5239 introduce a new computation (that might also need casting the
5240 variable to unsigned and back). */
5241 if (cand->pos == IP_ORIGINAL
5242 && cand->incremented_at == use->stmt)
5244 tree step, ctype, utype;
5245 enum tree_code incr_code = PLUS_EXPR;
5247 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5248 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5250 step = cand->iv->step;
5251 ctype = TREE_TYPE (step);
5252 utype = TREE_TYPE (cand->var_after);
5253 if (TREE_CODE (step) == NEGATE_EXPR)
5255 incr_code = MINUS_EXPR;
5256 step = TREE_OPERAND (step, 0);
5259 /* Check whether we may leave the computation unchanged.
5260 This is the case only if it does not rely on other
5261 computations in the loop -- otherwise, the computation
5262 we rely upon may be removed in remove_unused_ivs,
5263 thus leading to ICE. */
5264 op = TREE_OPERAND (use->stmt, 1);
5265 if (TREE_CODE (op) == PLUS_EXPR
5266 || TREE_CODE (op) == MINUS_EXPR)
5268 if (TREE_OPERAND (op, 0) == cand->var_before)
5269 op = TREE_OPERAND (op, 1);
5270 else if (TREE_CODE (op) == PLUS_EXPR
5271 && TREE_OPERAND (op, 1) == cand->var_before)
5272 op = TREE_OPERAND (op, 0);
5273 else
5274 op = NULL_TREE;
5276 else
5277 op = NULL_TREE;
5279 if (op
5280 && (TREE_CODE (op) == INTEGER_CST
5281 || operand_equal_p (op, step, 0)))
5282 return;
5284 /* Otherwise, add the necessary computations to express
5285 the iv. */
5286 op = fold_convert (ctype, cand->var_before);
5287 comp = fold_convert (utype,
5288 build2 (incr_code, ctype, op,
5289 unshare_expr (step)));
5291 else
5292 comp = get_computation (data->current_loop, use, cand);
5294 switch (TREE_CODE (use->stmt))
5296 case PHI_NODE:
5297 tgt = PHI_RESULT (use->stmt);
5299 /* If we should keep the biv, do not replace it. */
5300 if (name_info (data, tgt)->preserve_biv)
5301 return;
5303 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5304 while (!bsi_end_p (pbsi)
5305 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5307 bsi = pbsi;
5308 bsi_next (&pbsi);
5310 break;
5312 case MODIFY_EXPR:
5313 tgt = TREE_OPERAND (use->stmt, 0);
5314 bsi = bsi_for_stmt (use->stmt);
5315 break;
5317 default:
5318 gcc_unreachable ();
5321 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5323 if (TREE_CODE (use->stmt) == PHI_NODE)
5325 if (stmts)
5326 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5327 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5328 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5329 remove_statement (use->stmt, false);
5330 SSA_NAME_DEF_STMT (tgt) = ass;
5332 else
5334 if (stmts)
5335 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5336 TREE_OPERAND (use->stmt, 1) = op;
5340 /* Replaces ssa name in index IDX by its basic variable. Callback for
5341 for_each_index. */
5343 static bool
5344 idx_remove_ssa_names (tree base, tree *idx,
5345 void *data ATTRIBUTE_UNUSED)
5347 tree *op;
5349 if (TREE_CODE (*idx) == SSA_NAME)
5350 *idx = SSA_NAME_VAR (*idx);
5352 if (TREE_CODE (base) == ARRAY_REF)
5354 op = &TREE_OPERAND (base, 2);
5355 if (*op
5356 && TREE_CODE (*op) == SSA_NAME)
5357 *op = SSA_NAME_VAR (*op);
5358 op = &TREE_OPERAND (base, 3);
5359 if (*op
5360 && TREE_CODE (*op) == SSA_NAME)
5361 *op = SSA_NAME_VAR (*op);
5364 return true;
5367 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5369 static tree
5370 unshare_and_remove_ssa_names (tree ref)
5372 ref = unshare_expr (ref);
5373 for_each_index (&ref, idx_remove_ssa_names, NULL);
5375 return ref;
5378 /* Extract the alias analysis info for the memory reference REF. There are
5379 several ways how this information may be stored and what precisely is
5380 its semantics depending on the type of the reference, but there always is
5381 somewhere hidden one _DECL node that is used to determine the set of
5382 virtual operands for the reference. The code below deciphers this jungle
5383 and extracts this single useful piece of information. */
5385 static tree
5386 get_ref_tag (tree ref, tree orig)
5388 tree var = get_base_address (ref);
5389 tree aref = NULL_TREE, tag, sv;
5390 HOST_WIDE_INT offset, size, maxsize;
5392 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5394 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5395 if (ref)
5396 break;
5399 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5400 return unshare_expr (sv);
5402 if (!var)
5403 return NULL_TREE;
5405 if (TREE_CODE (var) == INDIRECT_REF)
5407 /* If the base is a dereference of a pointer, first check its name memory
5408 tag. If it does not have one, use its symbol memory tag. */
5409 var = TREE_OPERAND (var, 0);
5410 if (TREE_CODE (var) != SSA_NAME)
5411 return NULL_TREE;
5413 if (SSA_NAME_PTR_INFO (var))
5415 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5416 if (tag)
5417 return tag;
5420 var = SSA_NAME_VAR (var);
5421 tag = var_ann (var)->symbol_mem_tag;
5422 gcc_assert (tag != NULL_TREE);
5423 return tag;
5425 else
5427 if (!DECL_P (var))
5428 return NULL_TREE;
5430 tag = var_ann (var)->symbol_mem_tag;
5431 if (tag)
5432 return tag;
5434 return var;
5438 /* Copies the reference information from OLD_REF to NEW_REF. */
5440 static void
5441 copy_ref_info (tree new_ref, tree old_ref)
5443 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5444 copy_mem_ref_info (new_ref, old_ref);
5445 else
5447 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5448 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5452 /* Rewrites USE (address that is an iv) using candidate CAND. */
5454 static void
5455 rewrite_use_address (struct ivopts_data *data,
5456 struct iv_use *use, struct iv_cand *cand)
5458 struct affine_tree_combination aff;
5459 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5460 tree ref;
5462 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5463 unshare_aff_combination (&aff);
5465 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5466 copy_ref_info (ref, *use->op_p);
5467 *use->op_p = ref;
5470 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5471 candidate CAND. */
5473 static void
5474 rewrite_use_compare (struct ivopts_data *data,
5475 struct iv_use *use, struct iv_cand *cand)
5477 tree comp;
5478 tree *op_p, cond, op, stmts, bound;
5479 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5480 enum tree_code compare;
5481 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5483 bound = cp->value;
5484 if (bound)
5486 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5487 tree var_type = TREE_TYPE (var);
5489 compare = iv_elimination_compare (data, use);
5490 bound = fold_convert (var_type, bound);
5491 op = force_gimple_operand (unshare_expr (bound), &stmts,
5492 true, NULL_TREE);
5494 if (stmts)
5495 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5497 *use->op_p = build2 (compare, boolean_type_node, var, op);
5498 update_stmt (use->stmt);
5499 return;
5502 /* The induction variable elimination failed; just express the original
5503 giv. */
5504 comp = get_computation (data->current_loop, use, cand);
5506 cond = *use->op_p;
5507 op_p = &TREE_OPERAND (cond, 0);
5508 if (TREE_CODE (*op_p) != SSA_NAME
5509 || zero_p (get_iv (data, *op_p)->step))
5510 op_p = &TREE_OPERAND (cond, 1);
5512 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5513 if (stmts)
5514 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5516 *op_p = op;
5519 /* Rewrites USE using candidate CAND. */
5521 static void
5522 rewrite_use (struct ivopts_data *data,
5523 struct iv_use *use, struct iv_cand *cand)
5525 switch (use->type)
5527 case USE_NONLINEAR_EXPR:
5528 rewrite_use_nonlinear_expr (data, use, cand);
5529 break;
5531 case USE_ADDRESS:
5532 rewrite_use_address (data, use, cand);
5533 break;
5535 case USE_COMPARE:
5536 rewrite_use_compare (data, use, cand);
5537 break;
5539 default:
5540 gcc_unreachable ();
5542 mark_new_vars_to_rename (use->stmt);
5545 /* Rewrite the uses using the selected induction variables. */
5547 static void
5548 rewrite_uses (struct ivopts_data *data)
5550 unsigned i;
5551 struct iv_cand *cand;
5552 struct iv_use *use;
5554 for (i = 0; i < n_iv_uses (data); i++)
5556 use = iv_use (data, i);
5557 cand = use->selected;
5558 gcc_assert (cand);
5560 rewrite_use (data, use, cand);
5564 /* Removes the ivs that are not used after rewriting. */
5566 static void
5567 remove_unused_ivs (struct ivopts_data *data)
5569 unsigned j;
5570 bitmap_iterator bi;
5572 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5574 struct version_info *info;
5576 info = ver_info (data, j);
5577 if (info->iv
5578 && !zero_p (info->iv->step)
5579 && !info->inv_id
5580 && !info->iv->have_use_for
5581 && !info->preserve_biv)
5582 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5586 /* Frees data allocated by the optimization of a single loop. */
5588 static void
5589 free_loop_data (struct ivopts_data *data)
5591 unsigned i, j;
5592 bitmap_iterator bi;
5593 tree obj;
5595 htab_empty (data->niters);
5597 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5599 struct version_info *info;
5601 info = ver_info (data, i);
5602 if (info->iv)
5603 free (info->iv);
5604 info->iv = NULL;
5605 info->has_nonlin_use = false;
5606 info->preserve_biv = false;
5607 info->inv_id = 0;
5609 bitmap_clear (data->relevant);
5610 bitmap_clear (data->important_candidates);
5612 for (i = 0; i < n_iv_uses (data); i++)
5614 struct iv_use *use = iv_use (data, i);
5616 free (use->iv);
5617 BITMAP_FREE (use->related_cands);
5618 for (j = 0; j < use->n_map_members; j++)
5619 if (use->cost_map[j].depends_on)
5620 BITMAP_FREE (use->cost_map[j].depends_on);
5621 free (use->cost_map);
5622 free (use);
5624 VEC_truncate (iv_use_p, data->iv_uses, 0);
5626 for (i = 0; i < n_iv_cands (data); i++)
5628 struct iv_cand *cand = iv_cand (data, i);
5630 if (cand->iv)
5631 free (cand->iv);
5632 if (cand->depends_on)
5633 BITMAP_FREE (cand->depends_on);
5634 free (cand);
5636 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5638 if (data->version_info_size < num_ssa_names)
5640 data->version_info_size = 2 * num_ssa_names;
5641 free (data->version_info);
5642 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5645 data->max_inv_id = 0;
5647 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5648 SET_DECL_RTL (obj, NULL_RTX);
5650 VEC_truncate (tree, decl_rtl_to_reset, 0);
5653 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5654 loop tree. */
5656 static void
5657 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5659 free_loop_data (data);
5660 free (data->version_info);
5661 BITMAP_FREE (data->relevant);
5662 BITMAP_FREE (data->important_candidates);
5663 htab_delete (data->niters);
5665 VEC_free (tree, heap, decl_rtl_to_reset);
5666 VEC_free (iv_use_p, heap, data->iv_uses);
5667 VEC_free (iv_cand_p, heap, data->iv_candidates);
5670 /* Optimizes the LOOP. Returns true if anything changed. */
5672 static bool
5673 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5675 bool changed = false;
5676 struct iv_ca *iv_ca;
5677 edge exit;
5679 data->current_loop = loop;
5681 if (dump_file && (dump_flags & TDF_DETAILS))
5683 fprintf (dump_file, "Processing loop %d\n", loop->num);
5685 exit = single_dom_exit (loop);
5686 if (exit)
5688 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5689 exit->src->index, exit->dest->index);
5690 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5691 fprintf (dump_file, "\n");
5694 fprintf (dump_file, "\n");
5697 /* For each ssa name determines whether it behaves as an induction variable
5698 in some loop. */
5699 if (!find_induction_variables (data))
5700 goto finish;
5702 /* Finds interesting uses (item 1). */
5703 find_interesting_uses (data);
5704 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5705 goto finish;
5707 /* Finds candidates for the induction variables (item 2). */
5708 find_iv_candidates (data);
5710 /* Calculates the costs (item 3, part 1). */
5711 determine_use_iv_costs (data);
5712 determine_iv_costs (data);
5713 determine_set_costs (data);
5715 /* Find the optimal set of induction variables (item 3, part 2). */
5716 iv_ca = find_optimal_iv_set (data);
5717 if (!iv_ca)
5718 goto finish;
5719 changed = true;
5721 /* Create the new induction variables (item 4, part 1). */
5722 create_new_ivs (data, iv_ca);
5723 iv_ca_free (&iv_ca);
5725 /* Rewrite the uses (item 4, part 2). */
5726 rewrite_uses (data);
5728 /* Remove the ivs that are unused after rewriting. */
5729 remove_unused_ivs (data);
5731 /* We have changed the structure of induction variables; it might happen
5732 that definitions in the scev database refer to some of them that were
5733 eliminated. */
5734 scev_reset ();
5736 finish:
5737 free_loop_data (data);
5739 return changed;
5742 /* Main entry point. Optimizes induction variables in LOOPS. */
5744 void
5745 tree_ssa_iv_optimize (struct loops *loops)
5747 struct loop *loop;
5748 struct ivopts_data data;
5750 tree_ssa_iv_optimize_init (&data);
5752 /* Optimize the loops starting with the innermost ones. */
5753 loop = loops->tree_root;
5754 while (loop->inner)
5755 loop = loop->inner;
5757 /* Scan the loops, inner ones first. */
5758 while (loop != loops->tree_root)
5760 if (dump_file && (dump_flags & TDF_DETAILS))
5761 flow_loop_dump (loop, dump_file, NULL, 1);
5763 tree_ssa_iv_optimize_loop (&data, loop);
5765 if (loop->next)
5767 loop = loop->next;
5768 while (loop->inner)
5769 loop = loop->inner;
5771 else
5772 loop = loop->outer;
5775 tree_ssa_iv_optimize_finalize (&data);