* doc/invoke.texi: Add cpu_type power6.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob2bb2f0621b7718b119bab5519f1a9477fccb2e88
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Types of uses. */
125 enum use_type
127 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
128 USE_ADDRESS, /* Use in an address. */
129 USE_COMPARE /* Use is a compare. */
132 /* The candidate - cost pair. */
133 struct cost_pair
135 struct iv_cand *cand; /* The candidate. */
136 unsigned cost; /* The cost. */
137 bitmap depends_on; /* The list of invariants that have to be
138 preserved. */
139 tree value; /* For final value elimination, the expression for
140 the final value of the iv. For iv elimination,
141 the new bound to compare with. */
144 /* Use. */
145 struct iv_use
147 unsigned id; /* The id of the use. */
148 enum use_type type; /* Type of the use. */
149 struct iv *iv; /* The induction variable it is based on. */
150 tree stmt; /* Statement in that it occurs. */
151 tree *op_p; /* The place where it occurs. */
152 bitmap related_cands; /* The set of "related" iv candidates, plus the common
153 important ones. */
155 unsigned n_map_members; /* Number of candidates in the cost_map list. */
156 struct cost_pair *cost_map;
157 /* The costs wrto the iv candidates. */
159 struct iv_cand *selected;
160 /* The selected candidate. */
163 /* The position where the iv is computed. */
164 enum iv_position
166 IP_NORMAL, /* At the end, just before the exit condition. */
167 IP_END, /* At the end of the latch block. */
168 IP_ORIGINAL /* The original biv. */
171 /* The induction variable candidate. */
172 struct iv_cand
174 unsigned id; /* The number of the candidate. */
175 bool important; /* Whether this is an "important" candidate, i.e. such
176 that it should be considered by all uses. */
177 enum iv_position pos; /* Where it is computed. */
178 tree incremented_at; /* For original biv, the statement where it is
179 incremented. */
180 tree var_before; /* The variable used for it before increment. */
181 tree var_after; /* The variable used for it after increment. */
182 struct iv *iv; /* The value of the candidate. NULL for
183 "pseudocandidate" used to indicate the possibility
184 to replace the final value of an iv by direct
185 computation of the value. */
186 unsigned cost; /* Cost of the candidate. */
187 bitmap depends_on; /* The list of invariants that are used in step of the
188 biv. */
191 /* The data used by the induction variable optimizations. */
193 typedef struct iv_use *iv_use_p;
194 DEF_VEC_P(iv_use_p);
195 DEF_VEC_ALLOC_P(iv_use_p,heap);
197 typedef struct iv_cand *iv_cand_p;
198 DEF_VEC_P(iv_cand_p);
199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
201 struct ivopts_data
203 /* The currently optimized loop. */
204 struct loop *current_loop;
206 /* Number of registers used in it. */
207 unsigned regs_used;
209 /* Numbers of iterations for all exits of the current loop. */
210 htab_t niters;
212 /* The size of version_info array allocated. */
213 unsigned version_info_size;
215 /* The array of information for the ssa names. */
216 struct version_info *version_info;
218 /* The bitmap of indices in version_info whose value was changed. */
219 bitmap relevant;
221 /* The maximum invariant id. */
222 unsigned max_inv_id;
224 /* The uses of induction variables. */
225 VEC(iv_use_p,heap) *iv_uses;
227 /* The candidates. */
228 VEC(iv_cand_p,heap) *iv_candidates;
230 /* A bitmap of important candidates. */
231 bitmap important_candidates;
233 /* Whether to consider just related and important candidates when replacing a
234 use. */
235 bool consider_all_candidates;
238 /* An assignment of iv candidates to uses. */
240 struct iv_ca
242 /* The number of uses covered by the assignment. */
243 unsigned upto;
245 /* Number of uses that cannot be expressed by the candidates in the set. */
246 unsigned bad_uses;
248 /* Candidate assigned to a use, together with the related costs. */
249 struct cost_pair **cand_for_use;
251 /* Number of times each candidate is used. */
252 unsigned *n_cand_uses;
254 /* The candidates used. */
255 bitmap cands;
257 /* The number of candidates in the set. */
258 unsigned n_cands;
260 /* Total number of registers needed. */
261 unsigned n_regs;
263 /* Total cost of expressing uses. */
264 unsigned cand_use_cost;
266 /* Total cost of candidates. */
267 unsigned cand_cost;
269 /* Number of times each invariant is used. */
270 unsigned *n_invariant_uses;
272 /* Total cost of the assignment. */
273 unsigned cost;
276 /* Difference of two iv candidate assignments. */
278 struct iv_ca_delta
280 /* Changed use. */
281 struct iv_use *use;
283 /* An old assignment (for rollback purposes). */
284 struct cost_pair *old_cp;
286 /* A new assignment. */
287 struct cost_pair *new_cp;
289 /* Next change in the list. */
290 struct iv_ca_delta *next_change;
293 /* Bound on number of candidates below that all candidates are considered. */
295 #define CONSIDER_ALL_CANDIDATES_BOUND \
296 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
299 optimizing such a loop would help, and it would take ages). */
301 #define MAX_CONSIDERED_USES \
302 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
304 /* If there are at most this number of ivs in the set, try removing unnecessary
305 ivs from the set always. */
307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
308 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
310 /* The list of trees for that the decl_rtl field must be reset is stored
311 here. */
313 static VEC(tree,heap) *decl_rtl_to_reset;
315 /* Number of uses recorded in DATA. */
317 static inline unsigned
318 n_iv_uses (struct ivopts_data *data)
320 return VEC_length (iv_use_p, data->iv_uses);
323 /* Ith use recorded in DATA. */
325 static inline struct iv_use *
326 iv_use (struct ivopts_data *data, unsigned i)
328 return VEC_index (iv_use_p, data->iv_uses, i);
331 /* Number of candidates recorded in DATA. */
333 static inline unsigned
334 n_iv_cands (struct ivopts_data *data)
336 return VEC_length (iv_cand_p, data->iv_candidates);
339 /* Ith candidate recorded in DATA. */
341 static inline struct iv_cand *
342 iv_cand (struct ivopts_data *data, unsigned i)
344 return VEC_index (iv_cand_p, data->iv_candidates, i);
347 /* The single loop exit if it dominates the latch, NULL otherwise. */
349 edge
350 single_dom_exit (struct loop *loop)
352 edge exit = loop->single_exit;
354 if (!exit)
355 return NULL;
357 if (!just_once_each_iteration_p (loop, exit->src))
358 return NULL;
360 return exit;
363 /* Dumps information about the induction variable IV to FILE. */
365 extern void dump_iv (FILE *, struct iv *);
366 void
367 dump_iv (FILE *file, struct iv *iv)
369 if (iv->ssa_name)
371 fprintf (file, "ssa name ");
372 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
373 fprintf (file, "\n");
376 fprintf (file, " type ");
377 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
378 fprintf (file, "\n");
380 if (iv->step)
382 fprintf (file, " base ");
383 print_generic_expr (file, iv->base, TDF_SLIM);
384 fprintf (file, "\n");
386 fprintf (file, " step ");
387 print_generic_expr (file, iv->step, TDF_SLIM);
388 fprintf (file, "\n");
390 else
392 fprintf (file, " invariant ");
393 print_generic_expr (file, iv->base, TDF_SLIM);
394 fprintf (file, "\n");
397 if (iv->base_object)
399 fprintf (file, " base object ");
400 print_generic_expr (file, iv->base_object, TDF_SLIM);
401 fprintf (file, "\n");
404 if (iv->biv_p)
405 fprintf (file, " is a biv\n");
408 /* Dumps information about the USE to FILE. */
410 extern void dump_use (FILE *, struct iv_use *);
411 void
412 dump_use (FILE *file, struct iv_use *use)
414 fprintf (file, "use %d\n", use->id);
416 switch (use->type)
418 case USE_NONLINEAR_EXPR:
419 fprintf (file, " generic\n");
420 break;
422 case USE_ADDRESS:
423 fprintf (file, " address\n");
424 break;
426 case USE_COMPARE:
427 fprintf (file, " compare\n");
428 break;
430 default:
431 gcc_unreachable ();
434 fprintf (file, " in statement ");
435 print_generic_expr (file, use->stmt, TDF_SLIM);
436 fprintf (file, "\n");
438 fprintf (file, " at position ");
439 if (use->op_p)
440 print_generic_expr (file, *use->op_p, TDF_SLIM);
441 fprintf (file, "\n");
443 dump_iv (file, use->iv);
445 if (use->related_cands)
447 fprintf (file, " related candidates ");
448 dump_bitmap (file, use->related_cands);
452 /* Dumps information about the uses to FILE. */
454 extern void dump_uses (FILE *, struct ivopts_data *);
455 void
456 dump_uses (FILE *file, struct ivopts_data *data)
458 unsigned i;
459 struct iv_use *use;
461 for (i = 0; i < n_iv_uses (data); i++)
463 use = iv_use (data, i);
465 dump_use (file, use);
466 fprintf (file, "\n");
470 /* Dumps information about induction variable candidate CAND to FILE. */
472 extern void dump_cand (FILE *, struct iv_cand *);
473 void
474 dump_cand (FILE *file, struct iv_cand *cand)
476 struct iv *iv = cand->iv;
478 fprintf (file, "candidate %d%s\n",
479 cand->id, cand->important ? " (important)" : "");
481 if (cand->depends_on)
483 fprintf (file, " depends on ");
484 dump_bitmap (file, cand->depends_on);
487 if (!iv)
489 fprintf (file, " final value replacement\n");
490 return;
493 switch (cand->pos)
495 case IP_NORMAL:
496 fprintf (file, " incremented before exit test\n");
497 break;
499 case IP_END:
500 fprintf (file, " incremented at end\n");
501 break;
503 case IP_ORIGINAL:
504 fprintf (file, " original biv\n");
505 break;
508 dump_iv (file, iv);
511 /* Returns the info for ssa version VER. */
513 static inline struct version_info *
514 ver_info (struct ivopts_data *data, unsigned ver)
516 return data->version_info + ver;
519 /* Returns the info for ssa name NAME. */
521 static inline struct version_info *
522 name_info (struct ivopts_data *data, tree name)
524 return ver_info (data, SSA_NAME_VERSION (name));
527 /* Checks whether there exists number X such that X * B = A, counting modulo
528 2^BITS. */
530 static bool
531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
532 HOST_WIDE_INT *x)
534 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
535 unsigned HOST_WIDE_INT inv, ex, val;
536 unsigned i;
538 a &= mask;
539 b &= mask;
541 /* First divide the whole equation by 2 as long as possible. */
542 while (!(a & 1) && !(b & 1))
544 a >>= 1;
545 b >>= 1;
546 bits--;
547 mask >>= 1;
550 if (!(b & 1))
552 /* If b is still even, a is odd and there is no such x. */
553 return false;
556 /* Find the inverse of b. We compute it as
557 b^(2^(bits - 1) - 1) (mod 2^bits). */
558 inv = 1;
559 ex = b;
560 for (i = 0; i < bits - 1; i++)
562 inv = (inv * ex) & mask;
563 ex = (ex * ex) & mask;
566 val = (a * inv) & mask;
568 gcc_assert (((val * b) & mask) == a);
570 if ((val >> (bits - 1)) & 1)
571 val |= ~mask;
573 *x = val;
575 return true;
578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
579 emitted in LOOP. */
581 static bool
582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
584 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
586 gcc_assert (bb);
588 if (sbb == loop->latch)
589 return true;
591 if (sbb != bb)
592 return false;
594 return stmt == last_stmt (bb);
597 /* Returns true if STMT if after the place where the original induction
598 variable CAND is incremented. */
600 static bool
601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
603 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
604 basic_block stmt_bb = bb_for_stmt (stmt);
605 block_stmt_iterator bsi;
607 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
608 return false;
610 if (stmt_bb != cand_bb)
611 return true;
613 /* Scan the block from the end, since the original ivs are usually
614 incremented at the end of the loop body. */
615 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
617 if (bsi_stmt (bsi) == cand->incremented_at)
618 return false;
619 if (bsi_stmt (bsi) == stmt)
620 return true;
624 /* Returns true if STMT if after the place where the induction variable
625 CAND is incremented in LOOP. */
627 static bool
628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
630 switch (cand->pos)
632 case IP_END:
633 return false;
635 case IP_NORMAL:
636 return stmt_after_ip_normal_pos (loop, stmt);
638 case IP_ORIGINAL:
639 return stmt_after_ip_original_pos (cand, stmt);
641 default:
642 gcc_unreachable ();
646 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
648 static bool
649 abnormal_ssa_name_p (tree exp)
651 if (!exp)
652 return false;
654 if (TREE_CODE (exp) != SSA_NAME)
655 return false;
657 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
660 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
661 abnormal phi node. Callback for for_each_index. */
663 static bool
664 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
665 void *data ATTRIBUTE_UNUSED)
667 if (TREE_CODE (base) == ARRAY_REF)
669 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
670 return false;
671 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
672 return false;
675 return !abnormal_ssa_name_p (*index);
678 /* Returns true if EXPR contains a ssa name that occurs in an
679 abnormal phi node. */
681 bool
682 contains_abnormal_ssa_name_p (tree expr)
684 enum tree_code code;
685 enum tree_code_class class;
687 if (!expr)
688 return false;
690 code = TREE_CODE (expr);
691 class = TREE_CODE_CLASS (code);
693 if (code == SSA_NAME)
694 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
696 if (code == INTEGER_CST
697 || is_gimple_min_invariant (expr))
698 return false;
700 if (code == ADDR_EXPR)
701 return !for_each_index (&TREE_OPERAND (expr, 0),
702 idx_contains_abnormal_ssa_name_p,
703 NULL);
705 switch (class)
707 case tcc_binary:
708 case tcc_comparison:
709 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
710 return true;
712 /* Fallthru. */
713 case tcc_unary:
714 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
715 return true;
717 break;
719 default:
720 gcc_unreachable ();
723 return false;
726 /* Element of the table in that we cache the numbers of iterations obtained
727 from exits of the loop. */
729 struct nfe_cache_elt
731 /* The edge for that the number of iterations is cached. */
732 edge exit;
734 /* Number of iterations corresponding to this exit, or NULL if it cannot be
735 determined. */
736 tree niter;
739 /* Hash function for nfe_cache_elt E. */
741 static hashval_t
742 nfe_hash (const void *e)
744 const struct nfe_cache_elt *elt = e;
746 return htab_hash_pointer (elt->exit);
749 /* Equality function for nfe_cache_elt E1 and edge E2. */
751 static int
752 nfe_eq (const void *e1, const void *e2)
754 const struct nfe_cache_elt *elt1 = e1;
756 return elt1->exit == e2;
759 /* Returns tree describing number of iterations determined from
760 EXIT of DATA->current_loop, or NULL if something goes wrong. */
762 static tree
763 niter_for_exit (struct ivopts_data *data, edge exit)
765 struct nfe_cache_elt *nfe_desc;
766 struct tree_niter_desc desc;
767 PTR *slot;
769 slot = htab_find_slot_with_hash (data->niters, exit,
770 htab_hash_pointer (exit),
771 INSERT);
773 if (!*slot)
775 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
776 nfe_desc->exit = exit;
778 /* Try to determine number of iterations. We must know it
779 unconditionally (i.e., without possibility of # of iterations
780 being zero). Also, we cannot safely work with ssa names that
781 appear in phi nodes on abnormal edges, so that we do not create
782 overlapping life ranges for them (PR 27283). */
783 if (number_of_iterations_exit (data->current_loop,
784 exit, &desc, true)
785 && zero_p (desc.may_be_zero)
786 && !contains_abnormal_ssa_name_p (desc.niter))
787 nfe_desc->niter = desc.niter;
788 else
789 nfe_desc->niter = NULL_TREE;
791 else
792 nfe_desc = *slot;
794 return nfe_desc->niter;
797 /* Returns tree describing number of iterations determined from
798 single dominating exit of DATA->current_loop, or NULL if something
799 goes wrong. */
801 static tree
802 niter_for_single_dom_exit (struct ivopts_data *data)
804 edge exit = single_dom_exit (data->current_loop);
806 if (!exit)
807 return NULL;
809 return niter_for_exit (data, exit);
812 /* Initializes data structures used by the iv optimization pass, stored
813 in DATA. */
815 static void
816 tree_ssa_iv_optimize_init (struct ivopts_data *data)
818 data->version_info_size = 2 * num_ssa_names;
819 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
820 data->relevant = BITMAP_ALLOC (NULL);
821 data->important_candidates = BITMAP_ALLOC (NULL);
822 data->max_inv_id = 0;
823 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
824 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
825 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
826 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
829 /* Returns a memory object to that EXPR points. In case we are able to
830 determine that it does not point to any such object, NULL is returned. */
832 static tree
833 determine_base_object (tree expr)
835 enum tree_code code = TREE_CODE (expr);
836 tree base, obj, op0, op1;
838 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
839 return NULL_TREE;
841 switch (code)
843 case INTEGER_CST:
844 return NULL_TREE;
846 case ADDR_EXPR:
847 obj = TREE_OPERAND (expr, 0);
848 base = get_base_address (obj);
850 if (!base)
851 return expr;
853 if (TREE_CODE (base) == INDIRECT_REF)
854 return determine_base_object (TREE_OPERAND (base, 0));
856 return fold_convert (ptr_type_node,
857 build_fold_addr_expr (base));
859 case PLUS_EXPR:
860 case MINUS_EXPR:
861 op0 = determine_base_object (TREE_OPERAND (expr, 0));
862 op1 = determine_base_object (TREE_OPERAND (expr, 1));
864 if (!op1)
865 return op0;
867 if (!op0)
868 return (code == PLUS_EXPR
869 ? op1
870 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
872 return fold_build2 (code, ptr_type_node, op0, op1);
874 case NOP_EXPR:
875 case CONVERT_EXPR:
876 return determine_base_object (TREE_OPERAND (expr, 0));
878 default:
879 return fold_convert (ptr_type_node, expr);
883 /* Allocates an induction variable with given initial value BASE and step STEP
884 for loop LOOP. */
886 static struct iv *
887 alloc_iv (tree base, tree step)
889 struct iv *iv = XCNEW (struct iv);
891 if (step && integer_zerop (step))
892 step = NULL_TREE;
894 iv->base = base;
895 iv->base_object = determine_base_object (base);
896 iv->step = step;
897 iv->biv_p = false;
898 iv->have_use_for = false;
899 iv->use_id = 0;
900 iv->ssa_name = NULL_TREE;
902 return iv;
905 /* Sets STEP and BASE for induction variable IV. */
907 static void
908 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
910 struct version_info *info = name_info (data, iv);
912 gcc_assert (!info->iv);
914 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
915 info->iv = alloc_iv (base, step);
916 info->iv->ssa_name = iv;
919 /* Finds induction variable declaration for VAR. */
921 static struct iv *
922 get_iv (struct ivopts_data *data, tree var)
924 basic_block bb;
926 if (!name_info (data, var)->iv)
928 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
930 if (!bb
931 || !flow_bb_inside_loop_p (data->current_loop, bb))
932 set_iv (data, var, var, NULL_TREE);
935 return name_info (data, var)->iv;
938 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
939 not define a simple affine biv with nonzero step. */
941 static tree
942 determine_biv_step (tree phi)
944 struct loop *loop = bb_for_stmt (phi)->loop_father;
945 tree name = PHI_RESULT (phi);
946 affine_iv iv;
948 if (!is_gimple_reg (name))
949 return NULL_TREE;
951 if (!simple_iv (loop, phi, name, &iv, true))
952 return NULL_TREE;
954 return (zero_p (iv.step) ? NULL_TREE : iv.step);
957 /* Finds basic ivs. */
959 static bool
960 find_bivs (struct ivopts_data *data)
962 tree phi, step, type, base;
963 bool found = false;
964 struct loop *loop = data->current_loop;
966 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
968 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
969 continue;
971 step = determine_biv_step (phi);
972 if (!step)
973 continue;
975 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
976 base = expand_simple_operations (base);
977 if (contains_abnormal_ssa_name_p (base)
978 || contains_abnormal_ssa_name_p (step))
979 continue;
981 type = TREE_TYPE (PHI_RESULT (phi));
982 base = fold_convert (type, base);
983 if (step)
984 step = fold_convert (type, step);
986 set_iv (data, PHI_RESULT (phi), base, step);
987 found = true;
990 return found;
993 /* Marks basic ivs. */
995 static void
996 mark_bivs (struct ivopts_data *data)
998 tree phi, var;
999 struct iv *iv, *incr_iv;
1000 struct loop *loop = data->current_loop;
1001 basic_block incr_bb;
1003 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1005 iv = get_iv (data, PHI_RESULT (phi));
1006 if (!iv)
1007 continue;
1009 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010 incr_iv = get_iv (data, var);
1011 if (!incr_iv)
1012 continue;
1014 /* If the increment is in the subloop, ignore it. */
1015 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016 if (incr_bb->loop_father != data->current_loop
1017 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1018 continue;
1020 iv->biv_p = true;
1021 incr_iv->biv_p = true;
1025 /* Checks whether STMT defines a linear induction variable and stores its
1026 parameters to IV. */
1028 static bool
1029 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1031 tree lhs;
1032 struct loop *loop = data->current_loop;
1034 iv->base = NULL_TREE;
1035 iv->step = NULL_TREE;
1037 if (TREE_CODE (stmt) != MODIFY_EXPR)
1038 return false;
1040 lhs = TREE_OPERAND (stmt, 0);
1041 if (TREE_CODE (lhs) != SSA_NAME)
1042 return false;
1044 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1045 return false;
1046 iv->base = expand_simple_operations (iv->base);
1048 if (contains_abnormal_ssa_name_p (iv->base)
1049 || contains_abnormal_ssa_name_p (iv->step))
1050 return false;
1052 return true;
1055 /* Finds general ivs in statement STMT. */
1057 static void
1058 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1060 affine_iv iv;
1062 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1063 return;
1065 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1068 /* Finds general ivs in basic block BB. */
1070 static void
1071 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1073 block_stmt_iterator bsi;
1075 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1076 find_givs_in_stmt (data, bsi_stmt (bsi));
1079 /* Finds general ivs. */
1081 static void
1082 find_givs (struct ivopts_data *data)
1084 struct loop *loop = data->current_loop;
1085 basic_block *body = get_loop_body_in_dom_order (loop);
1086 unsigned i;
1088 for (i = 0; i < loop->num_nodes; i++)
1089 find_givs_in_bb (data, body[i]);
1090 free (body);
1093 /* For each ssa name defined in LOOP determines whether it is an induction
1094 variable and if so, its initial value and step. */
1096 static bool
1097 find_induction_variables (struct ivopts_data *data)
1099 unsigned i;
1100 bitmap_iterator bi;
1102 if (!find_bivs (data))
1103 return false;
1105 find_givs (data);
1106 mark_bivs (data);
1108 if (dump_file && (dump_flags & TDF_DETAILS))
1110 tree niter = niter_for_single_dom_exit (data);
1112 if (niter)
1114 fprintf (dump_file, " number of iterations ");
1115 print_generic_expr (dump_file, niter, TDF_SLIM);
1116 fprintf (dump_file, "\n\n");
1119 fprintf (dump_file, "Induction variables:\n\n");
1121 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1123 if (ver_info (data, i)->iv)
1124 dump_iv (dump_file, ver_info (data, i)->iv);
1128 return true;
1131 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1133 static struct iv_use *
1134 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1135 tree stmt, enum use_type use_type)
1137 struct iv_use *use = XCNEW (struct iv_use);
1139 use->id = n_iv_uses (data);
1140 use->type = use_type;
1141 use->iv = iv;
1142 use->stmt = stmt;
1143 use->op_p = use_p;
1144 use->related_cands = BITMAP_ALLOC (NULL);
1146 /* To avoid showing ssa name in the dumps, if it was not reset by the
1147 caller. */
1148 iv->ssa_name = NULL_TREE;
1150 if (dump_file && (dump_flags & TDF_DETAILS))
1151 dump_use (dump_file, use);
1153 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1155 return use;
1158 /* Checks whether OP is a loop-level invariant and if so, records it.
1159 NONLINEAR_USE is true if the invariant is used in a way we do not
1160 handle specially. */
1162 static void
1163 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1165 basic_block bb;
1166 struct version_info *info;
1168 if (TREE_CODE (op) != SSA_NAME
1169 || !is_gimple_reg (op))
1170 return;
1172 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1173 if (bb
1174 && flow_bb_inside_loop_p (data->current_loop, bb))
1175 return;
1177 info = name_info (data, op);
1178 info->name = op;
1179 info->has_nonlin_use |= nonlinear_use;
1180 if (!info->inv_id)
1181 info->inv_id = ++data->max_inv_id;
1182 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1185 /* Checks whether the use OP is interesting and if so, records it. */
1187 static struct iv_use *
1188 find_interesting_uses_op (struct ivopts_data *data, tree op)
1190 struct iv *iv;
1191 struct iv *civ;
1192 tree stmt;
1193 struct iv_use *use;
1195 if (TREE_CODE (op) != SSA_NAME)
1196 return NULL;
1198 iv = get_iv (data, op);
1199 if (!iv)
1200 return NULL;
1202 if (iv->have_use_for)
1204 use = iv_use (data, iv->use_id);
1206 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1207 return use;
1210 if (zero_p (iv->step))
1212 record_invariant (data, op, true);
1213 return NULL;
1215 iv->have_use_for = true;
1217 civ = XNEW (struct iv);
1218 *civ = *iv;
1220 stmt = SSA_NAME_DEF_STMT (op);
1221 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1222 || TREE_CODE (stmt) == MODIFY_EXPR);
1224 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1225 iv->use_id = use->id;
1227 return use;
1230 /* Checks whether the condition *COND_P in STMT is interesting
1231 and if so, records it. */
1233 static void
1234 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1236 tree *op0_p;
1237 tree *op1_p;
1238 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1239 struct iv const_iv;
1240 tree zero = integer_zero_node;
1242 const_iv.step = NULL_TREE;
1244 if (TREE_CODE (*cond_p) != SSA_NAME
1245 && !COMPARISON_CLASS_P (*cond_p))
1246 return;
1248 if (TREE_CODE (*cond_p) == SSA_NAME)
1250 op0_p = cond_p;
1251 op1_p = &zero;
1253 else
1255 op0_p = &TREE_OPERAND (*cond_p, 0);
1256 op1_p = &TREE_OPERAND (*cond_p, 1);
1259 if (TREE_CODE (*op0_p) == SSA_NAME)
1260 iv0 = get_iv (data, *op0_p);
1261 else
1262 iv0 = &const_iv;
1264 if (TREE_CODE (*op1_p) == SSA_NAME)
1265 iv1 = get_iv (data, *op1_p);
1266 else
1267 iv1 = &const_iv;
1269 if (/* When comparing with non-invariant value, we may not do any senseful
1270 induction variable elimination. */
1271 (!iv0 || !iv1)
1272 /* Eliminating condition based on two ivs would be nontrivial.
1273 ??? TODO -- it is not really important to handle this case. */
1274 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1276 find_interesting_uses_op (data, *op0_p);
1277 find_interesting_uses_op (data, *op1_p);
1278 return;
1281 if (zero_p (iv0->step) && zero_p (iv1->step))
1283 /* If both are invariants, this is a work for unswitching. */
1284 return;
1287 civ = XNEW (struct iv);
1288 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1289 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1292 /* Returns true if expression EXPR is obviously invariant in LOOP,
1293 i.e. if all its operands are defined outside of the LOOP. */
1295 bool
1296 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1298 basic_block def_bb;
1299 unsigned i, len;
1301 if (is_gimple_min_invariant (expr))
1302 return true;
1304 if (TREE_CODE (expr) == SSA_NAME)
1306 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1307 if (def_bb
1308 && flow_bb_inside_loop_p (loop, def_bb))
1309 return false;
1311 return true;
1314 if (!EXPR_P (expr))
1315 return false;
1317 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1318 for (i = 0; i < len; i++)
1319 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1320 return false;
1322 return true;
1325 /* Cumulates the steps of indices into DATA and replaces their values with the
1326 initial ones. Returns false when the value of the index cannot be determined.
1327 Callback for for_each_index. */
1329 struct ifs_ivopts_data
1331 struct ivopts_data *ivopts_data;
1332 tree stmt;
1333 tree *step_p;
1336 static bool
1337 idx_find_step (tree base, tree *idx, void *data)
1339 struct ifs_ivopts_data *dta = data;
1340 struct iv *iv;
1341 tree step, iv_base, iv_step, lbound, off;
1342 struct loop *loop = dta->ivopts_data->current_loop;
1344 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1345 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1346 return false;
1348 /* If base is a component ref, require that the offset of the reference
1349 be invariant. */
1350 if (TREE_CODE (base) == COMPONENT_REF)
1352 off = component_ref_field_offset (base);
1353 return expr_invariant_in_loop_p (loop, off);
1356 /* If base is array, first check whether we will be able to move the
1357 reference out of the loop (in order to take its address in strength
1358 reduction). In order for this to work we need both lower bound
1359 and step to be loop invariants. */
1360 if (TREE_CODE (base) == ARRAY_REF)
1362 step = array_ref_element_size (base);
1363 lbound = array_ref_low_bound (base);
1365 if (!expr_invariant_in_loop_p (loop, step)
1366 || !expr_invariant_in_loop_p (loop, lbound))
1367 return false;
1370 if (TREE_CODE (*idx) != SSA_NAME)
1371 return true;
1373 iv = get_iv (dta->ivopts_data, *idx);
1374 if (!iv)
1375 return false;
1377 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1378 *&x[0], which is not folded and does not trigger the
1379 ARRAY_REF path below. */
1380 *idx = iv->base;
1382 if (!iv->step)
1383 return true;
1385 if (TREE_CODE (base) == ARRAY_REF)
1387 step = array_ref_element_size (base);
1389 /* We only handle addresses whose step is an integer constant. */
1390 if (TREE_CODE (step) != INTEGER_CST)
1391 return false;
1393 else
1394 /* The step for pointer arithmetics already is 1 byte. */
1395 step = build_int_cst (sizetype, 1);
1397 iv_base = iv->base;
1398 iv_step = iv->step;
1399 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1400 sizetype, &iv_base, &iv_step, dta->stmt,
1401 false))
1403 /* The index might wrap. */
1404 return false;
1407 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1409 if (!*dta->step_p)
1410 *dta->step_p = step;
1411 else
1412 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1414 return true;
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1420 static bool
1421 idx_record_use (tree base, tree *idx,
1422 void *data)
1424 find_interesting_uses_op (data, *idx);
1425 if (TREE_CODE (base) == ARRAY_REF)
1427 find_interesting_uses_op (data, array_ref_element_size (base));
1428 find_interesting_uses_op (data, array_ref_low_bound (base));
1430 return true;
1433 /* Returns true if memory reference REF may be unaligned. */
1435 static bool
1436 may_be_unaligned_p (tree ref)
1438 tree base;
1439 tree base_type;
1440 HOST_WIDE_INT bitsize;
1441 HOST_WIDE_INT bitpos;
1442 tree toffset;
1443 enum machine_mode mode;
1444 int unsignedp, volatilep;
1445 unsigned base_align;
1447 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1448 thus they are not misaligned. */
1449 if (TREE_CODE (ref) == TARGET_MEM_REF)
1450 return false;
1452 /* The test below is basically copy of what expr.c:normal_inner_ref
1453 does to check whether the object must be loaded by parts when
1454 STRICT_ALIGNMENT is true. */
1455 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1456 &unsignedp, &volatilep, true);
1457 base_type = TREE_TYPE (base);
1458 base_align = TYPE_ALIGN (base_type);
1460 if (mode != BLKmode
1461 && (base_align < GET_MODE_ALIGNMENT (mode)
1462 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1463 || bitpos % BITS_PER_UNIT != 0))
1464 return true;
1466 return false;
1469 /* Finds addresses in *OP_P inside STMT. */
1471 static void
1472 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1474 tree base = *op_p, step = NULL;
1475 struct iv *civ;
1476 struct ifs_ivopts_data ifs_ivopts_data;
1478 /* Do not play with volatile memory references. A bit too conservative,
1479 perhaps, but safe. */
1480 if (stmt_ann (stmt)->has_volatile_ops)
1481 goto fail;
1483 /* Ignore bitfields for now. Not really something terribly complicated
1484 to handle. TODO. */
1485 if (TREE_CODE (base) == BIT_FIELD_REF
1486 || (TREE_CODE (base) == COMPONENT_REF
1487 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1))))
1488 goto fail;
1490 if (STRICT_ALIGNMENT
1491 && may_be_unaligned_p (base))
1492 goto fail;
1494 base = unshare_expr (base);
1496 if (TREE_CODE (base) == TARGET_MEM_REF)
1498 tree type = build_pointer_type (TREE_TYPE (base));
1499 tree astep;
1501 if (TMR_BASE (base)
1502 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1504 civ = get_iv (data, TMR_BASE (base));
1505 if (!civ)
1506 goto fail;
1508 TMR_BASE (base) = civ->base;
1509 step = civ->step;
1511 if (TMR_INDEX (base)
1512 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1514 civ = get_iv (data, TMR_INDEX (base));
1515 if (!civ)
1516 goto fail;
1518 TMR_INDEX (base) = civ->base;
1519 astep = civ->step;
1521 if (astep)
1523 if (TMR_STEP (base))
1524 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1526 if (step)
1527 step = fold_build2 (PLUS_EXPR, type, step, astep);
1528 else
1529 step = astep;
1533 if (zero_p (step))
1534 goto fail;
1535 base = tree_mem_ref_addr (type, base);
1537 else
1539 ifs_ivopts_data.ivopts_data = data;
1540 ifs_ivopts_data.stmt = stmt;
1541 ifs_ivopts_data.step_p = &step;
1542 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1543 || zero_p (step))
1544 goto fail;
1546 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1547 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1549 base = build_fold_addr_expr (base);
1551 /* Substituting bases of IVs into the base expression might
1552 have caused folding opportunities. */
1553 if (TREE_CODE (base) == ADDR_EXPR)
1555 tree *ref = &TREE_OPERAND (base, 0);
1556 while (handled_component_p (*ref))
1557 ref = &TREE_OPERAND (*ref, 0);
1558 if (TREE_CODE (*ref) == INDIRECT_REF)
1559 *ref = fold_indirect_ref (*ref);
1563 civ = alloc_iv (base, step);
1564 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1565 return;
1567 fail:
1568 for_each_index (op_p, idx_record_use, data);
1571 /* Finds and records invariants used in STMT. */
1573 static void
1574 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1576 ssa_op_iter iter;
1577 use_operand_p use_p;
1578 tree op;
1580 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1582 op = USE_FROM_PTR (use_p);
1583 record_invariant (data, op, false);
1587 /* Finds interesting uses of induction variables in the statement STMT. */
1589 static void
1590 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1592 struct iv *iv;
1593 tree op, lhs, rhs;
1594 ssa_op_iter iter;
1595 use_operand_p use_p;
1597 find_invariants_stmt (data, stmt);
1599 if (TREE_CODE (stmt) == COND_EXPR)
1601 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1602 return;
1605 if (TREE_CODE (stmt) == MODIFY_EXPR)
1607 lhs = TREE_OPERAND (stmt, 0);
1608 rhs = TREE_OPERAND (stmt, 1);
1610 if (TREE_CODE (lhs) == SSA_NAME)
1612 /* If the statement defines an induction variable, the uses are not
1613 interesting by themselves. */
1615 iv = get_iv (data, lhs);
1617 if (iv && !zero_p (iv->step))
1618 return;
1621 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1623 case tcc_comparison:
1624 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1625 return;
1627 case tcc_reference:
1628 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1629 if (REFERENCE_CLASS_P (lhs))
1630 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1631 return;
1633 default: ;
1636 if (REFERENCE_CLASS_P (lhs)
1637 && is_gimple_val (rhs))
1639 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1640 find_interesting_uses_op (data, rhs);
1641 return;
1644 /* TODO -- we should also handle address uses of type
1646 memory = call (whatever);
1650 call (memory). */
1653 if (TREE_CODE (stmt) == PHI_NODE
1654 && bb_for_stmt (stmt) == data->current_loop->header)
1656 lhs = PHI_RESULT (stmt);
1657 iv = get_iv (data, lhs);
1659 if (iv && !zero_p (iv->step))
1660 return;
1663 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1665 op = USE_FROM_PTR (use_p);
1667 if (TREE_CODE (op) != SSA_NAME)
1668 continue;
1670 iv = get_iv (data, op);
1671 if (!iv)
1672 continue;
1674 find_interesting_uses_op (data, op);
1678 /* Finds interesting uses of induction variables outside of loops
1679 on loop exit edge EXIT. */
1681 static void
1682 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1684 tree phi, def;
1686 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1688 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1689 find_interesting_uses_op (data, def);
1693 /* Finds uses of the induction variables that are interesting. */
1695 static void
1696 find_interesting_uses (struct ivopts_data *data)
1698 basic_block bb;
1699 block_stmt_iterator bsi;
1700 tree phi;
1701 basic_block *body = get_loop_body (data->current_loop);
1702 unsigned i;
1703 struct version_info *info;
1704 edge e;
1706 if (dump_file && (dump_flags & TDF_DETAILS))
1707 fprintf (dump_file, "Uses:\n\n");
1709 for (i = 0; i < data->current_loop->num_nodes; i++)
1711 edge_iterator ei;
1712 bb = body[i];
1714 FOR_EACH_EDGE (e, ei, bb->succs)
1715 if (e->dest != EXIT_BLOCK_PTR
1716 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1717 find_interesting_uses_outside (data, e);
1719 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1720 find_interesting_uses_stmt (data, phi);
1721 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1722 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1725 if (dump_file && (dump_flags & TDF_DETAILS))
1727 bitmap_iterator bi;
1729 fprintf (dump_file, "\n");
1731 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1733 info = ver_info (data, i);
1734 if (info->inv_id)
1736 fprintf (dump_file, " ");
1737 print_generic_expr (dump_file, info->name, TDF_SLIM);
1738 fprintf (dump_file, " is invariant (%d)%s\n",
1739 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1743 fprintf (dump_file, "\n");
1746 free (body);
1749 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1750 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1751 we are at the top-level of the processed address. */
1753 static tree
1754 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1755 unsigned HOST_WIDE_INT *offset)
1757 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1758 enum tree_code code;
1759 tree type, orig_type = TREE_TYPE (expr);
1760 unsigned HOST_WIDE_INT off0, off1, st;
1761 tree orig_expr = expr;
1763 STRIP_NOPS (expr);
1765 type = TREE_TYPE (expr);
1766 code = TREE_CODE (expr);
1767 *offset = 0;
1769 switch (code)
1771 case INTEGER_CST:
1772 if (!cst_and_fits_in_hwi (expr)
1773 || zero_p (expr))
1774 return orig_expr;
1776 *offset = int_cst_value (expr);
1777 return build_int_cst (orig_type, 0);
1779 case PLUS_EXPR:
1780 case MINUS_EXPR:
1781 op0 = TREE_OPERAND (expr, 0);
1782 op1 = TREE_OPERAND (expr, 1);
1784 op0 = strip_offset_1 (op0, false, false, &off0);
1785 op1 = strip_offset_1 (op1, false, false, &off1);
1787 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1788 if (op0 == TREE_OPERAND (expr, 0)
1789 && op1 == TREE_OPERAND (expr, 1))
1790 return orig_expr;
1792 if (zero_p (op1))
1793 expr = op0;
1794 else if (zero_p (op0))
1796 if (code == PLUS_EXPR)
1797 expr = op1;
1798 else
1799 expr = fold_build1 (NEGATE_EXPR, type, op1);
1801 else
1802 expr = fold_build2 (code, type, op0, op1);
1804 return fold_convert (orig_type, expr);
1806 case ARRAY_REF:
1807 if (!inside_addr)
1808 return orig_expr;
1810 step = array_ref_element_size (expr);
1811 if (!cst_and_fits_in_hwi (step))
1812 break;
1814 st = int_cst_value (step);
1815 op1 = TREE_OPERAND (expr, 1);
1816 op1 = strip_offset_1 (op1, false, false, &off1);
1817 *offset = off1 * st;
1819 if (top_compref
1820 && zero_p (op1))
1822 /* Strip the component reference completely. */
1823 op0 = TREE_OPERAND (expr, 0);
1824 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1825 *offset += off0;
1826 return op0;
1828 break;
1830 case COMPONENT_REF:
1831 if (!inside_addr)
1832 return orig_expr;
1834 tmp = component_ref_field_offset (expr);
1835 if (top_compref
1836 && cst_and_fits_in_hwi (tmp))
1838 /* Strip the component reference completely. */
1839 op0 = TREE_OPERAND (expr, 0);
1840 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1841 *offset = off0 + int_cst_value (tmp);
1842 return op0;
1844 break;
1846 case ADDR_EXPR:
1847 op0 = TREE_OPERAND (expr, 0);
1848 op0 = strip_offset_1 (op0, true, true, &off0);
1849 *offset += off0;
1851 if (op0 == TREE_OPERAND (expr, 0))
1852 return orig_expr;
1854 expr = build_fold_addr_expr (op0);
1855 return fold_convert (orig_type, expr);
1857 case INDIRECT_REF:
1858 inside_addr = false;
1859 break;
1861 default:
1862 return orig_expr;
1865 /* Default handling of expressions for that we want to recurse into
1866 the first operand. */
1867 op0 = TREE_OPERAND (expr, 0);
1868 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1869 *offset += off0;
1871 if (op0 == TREE_OPERAND (expr, 0)
1872 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1873 return orig_expr;
1875 expr = copy_node (expr);
1876 TREE_OPERAND (expr, 0) = op0;
1877 if (op1)
1878 TREE_OPERAND (expr, 1) = op1;
1880 /* Inside address, we might strip the top level component references,
1881 thus changing type of the expression. Handling of ADDR_EXPR
1882 will fix that. */
1883 expr = fold_convert (orig_type, expr);
1885 return expr;
1888 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1890 static tree
1891 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1893 return strip_offset_1 (expr, false, false, offset);
1896 /* Returns variant of TYPE that can be used as base for different uses.
1897 For integer types, we return unsigned variant of the type, which
1898 avoids problems with overflows. For pointer types, we return void *. */
1900 static tree
1901 generic_type_for (tree type)
1903 if (POINTER_TYPE_P (type))
1904 return ptr_type_node;
1906 if (TYPE_UNSIGNED (type))
1907 return type;
1909 return unsigned_type_for (type);
1912 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1913 the bitmap to that we should store it. */
1915 static struct ivopts_data *fd_ivopts_data;
1916 static tree
1917 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1919 bitmap *depends_on = data;
1920 struct version_info *info;
1922 if (TREE_CODE (*expr_p) != SSA_NAME)
1923 return NULL_TREE;
1924 info = name_info (fd_ivopts_data, *expr_p);
1926 if (!info->inv_id || info->has_nonlin_use)
1927 return NULL_TREE;
1929 if (!*depends_on)
1930 *depends_on = BITMAP_ALLOC (NULL);
1931 bitmap_set_bit (*depends_on, info->inv_id);
1933 return NULL_TREE;
1936 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1937 position to POS. If USE is not NULL, the candidate is set as related to
1938 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1939 replacement of the final value of the iv by a direct computation. */
1941 static struct iv_cand *
1942 add_candidate_1 (struct ivopts_data *data,
1943 tree base, tree step, bool important, enum iv_position pos,
1944 struct iv_use *use, tree incremented_at)
1946 unsigned i;
1947 struct iv_cand *cand = NULL;
1948 tree type, orig_type;
1950 if (base)
1952 orig_type = TREE_TYPE (base);
1953 type = generic_type_for (orig_type);
1954 if (type != orig_type)
1956 base = fold_convert (type, base);
1957 if (step)
1958 step = fold_convert (type, step);
1962 for (i = 0; i < n_iv_cands (data); i++)
1964 cand = iv_cand (data, i);
1966 if (cand->pos != pos)
1967 continue;
1969 if (cand->incremented_at != incremented_at)
1970 continue;
1972 if (!cand->iv)
1974 if (!base && !step)
1975 break;
1977 continue;
1980 if (!base && !step)
1981 continue;
1983 if (!operand_equal_p (base, cand->iv->base, 0))
1984 continue;
1986 if (zero_p (cand->iv->step))
1988 if (zero_p (step))
1989 break;
1991 else
1993 if (step && operand_equal_p (step, cand->iv->step, 0))
1994 break;
1998 if (i == n_iv_cands (data))
2000 cand = XCNEW (struct iv_cand);
2001 cand->id = i;
2003 if (!base && !step)
2004 cand->iv = NULL;
2005 else
2006 cand->iv = alloc_iv (base, step);
2008 cand->pos = pos;
2009 if (pos != IP_ORIGINAL && cand->iv)
2011 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2012 cand->var_after = cand->var_before;
2014 cand->important = important;
2015 cand->incremented_at = incremented_at;
2016 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2018 if (step
2019 && TREE_CODE (step) != INTEGER_CST)
2021 fd_ivopts_data = data;
2022 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2025 if (dump_file && (dump_flags & TDF_DETAILS))
2026 dump_cand (dump_file, cand);
2029 if (important && !cand->important)
2031 cand->important = true;
2032 if (dump_file && (dump_flags & TDF_DETAILS))
2033 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2036 if (use)
2038 bitmap_set_bit (use->related_cands, i);
2039 if (dump_file && (dump_flags & TDF_DETAILS))
2040 fprintf (dump_file, "Candidate %d is related to use %d\n",
2041 cand->id, use->id);
2044 return cand;
2047 /* Returns true if incrementing the induction variable at the end of the LOOP
2048 is allowed.
2050 The purpose is to avoid splitting latch edge with a biv increment, thus
2051 creating a jump, possibly confusing other optimization passes and leaving
2052 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2053 is not available (so we do not have a better alternative), or if the latch
2054 edge is already nonempty. */
2056 static bool
2057 allow_ip_end_pos_p (struct loop *loop)
2059 if (!ip_normal_pos (loop))
2060 return true;
2062 if (!empty_block_p (ip_end_pos (loop)))
2063 return true;
2065 return false;
2068 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2069 position to POS. If USE is not NULL, the candidate is set as related to
2070 it. The candidate computation is scheduled on all available positions. */
2072 static void
2073 add_candidate (struct ivopts_data *data,
2074 tree base, tree step, bool important, struct iv_use *use)
2076 if (ip_normal_pos (data->current_loop))
2077 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2078 if (ip_end_pos (data->current_loop)
2079 && allow_ip_end_pos_p (data->current_loop))
2080 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2083 /* Add a standard "0 + 1 * iteration" iv candidate for a
2084 type with SIZE bits. */
2086 static void
2087 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2088 unsigned int size)
2090 tree type = lang_hooks.types.type_for_size (size, true);
2091 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2092 true, NULL);
2095 /* Adds standard iv candidates. */
2097 static void
2098 add_standard_iv_candidates (struct ivopts_data *data)
2100 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2102 /* The same for a double-integer type if it is still fast enough. */
2103 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2104 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2108 /* Adds candidates bases on the old induction variable IV. */
2110 static void
2111 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2113 tree phi, def;
2114 struct iv_cand *cand;
2116 add_candidate (data, iv->base, iv->step, true, NULL);
2118 /* The same, but with initial value zero. */
2119 add_candidate (data,
2120 build_int_cst (TREE_TYPE (iv->base), 0),
2121 iv->step, true, NULL);
2123 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2124 if (TREE_CODE (phi) == PHI_NODE)
2126 /* Additionally record the possibility of leaving the original iv
2127 untouched. */
2128 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2129 cand = add_candidate_1 (data,
2130 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2131 SSA_NAME_DEF_STMT (def));
2132 cand->var_before = iv->ssa_name;
2133 cand->var_after = def;
2137 /* Adds candidates based on the old induction variables. */
2139 static void
2140 add_old_ivs_candidates (struct ivopts_data *data)
2142 unsigned i;
2143 struct iv *iv;
2144 bitmap_iterator bi;
2146 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2148 iv = ver_info (data, i)->iv;
2149 if (iv && iv->biv_p && !zero_p (iv->step))
2150 add_old_iv_candidates (data, iv);
2154 /* Adds candidates based on the value of the induction variable IV and USE. */
2156 static void
2157 add_iv_value_candidates (struct ivopts_data *data,
2158 struct iv *iv, struct iv_use *use)
2160 unsigned HOST_WIDE_INT offset;
2161 tree base;
2163 add_candidate (data, iv->base, iv->step, false, use);
2165 /* The same, but with initial value zero. Make such variable important,
2166 since it is generic enough so that possibly many uses may be based
2167 on it. */
2168 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2169 iv->step, true, use);
2171 /* Third, try removing the constant offset. */
2172 base = strip_offset (iv->base, &offset);
2173 if (offset)
2174 add_candidate (data, base, iv->step, false, use);
2177 /* Adds candidates based on the uses. */
2179 static void
2180 add_derived_ivs_candidates (struct ivopts_data *data)
2182 unsigned i;
2184 for (i = 0; i < n_iv_uses (data); i++)
2186 struct iv_use *use = iv_use (data, i);
2188 if (!use)
2189 continue;
2191 switch (use->type)
2193 case USE_NONLINEAR_EXPR:
2194 case USE_COMPARE:
2195 case USE_ADDRESS:
2196 /* Just add the ivs based on the value of the iv used here. */
2197 add_iv_value_candidates (data, use->iv, use);
2198 break;
2200 default:
2201 gcc_unreachable ();
2206 /* Record important candidates and add them to related_cands bitmaps
2207 if needed. */
2209 static void
2210 record_important_candidates (struct ivopts_data *data)
2212 unsigned i;
2213 struct iv_use *use;
2215 for (i = 0; i < n_iv_cands (data); i++)
2217 struct iv_cand *cand = iv_cand (data, i);
2219 if (cand->important)
2220 bitmap_set_bit (data->important_candidates, i);
2223 data->consider_all_candidates = (n_iv_cands (data)
2224 <= CONSIDER_ALL_CANDIDATES_BOUND);
2226 if (data->consider_all_candidates)
2228 /* We will not need "related_cands" bitmaps in this case,
2229 so release them to decrease peak memory consumption. */
2230 for (i = 0; i < n_iv_uses (data); i++)
2232 use = iv_use (data, i);
2233 BITMAP_FREE (use->related_cands);
2236 else
2238 /* Add important candidates to the related_cands bitmaps. */
2239 for (i = 0; i < n_iv_uses (data); i++)
2240 bitmap_ior_into (iv_use (data, i)->related_cands,
2241 data->important_candidates);
2245 /* Finds the candidates for the induction variables. */
2247 static void
2248 find_iv_candidates (struct ivopts_data *data)
2250 /* Add commonly used ivs. */
2251 add_standard_iv_candidates (data);
2253 /* Add old induction variables. */
2254 add_old_ivs_candidates (data);
2256 /* Add induction variables derived from uses. */
2257 add_derived_ivs_candidates (data);
2259 /* Record the important candidates. */
2260 record_important_candidates (data);
2263 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2264 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2265 we allocate a simple list to every use. */
2267 static void
2268 alloc_use_cost_map (struct ivopts_data *data)
2270 unsigned i, size, s, j;
2272 for (i = 0; i < n_iv_uses (data); i++)
2274 struct iv_use *use = iv_use (data, i);
2275 bitmap_iterator bi;
2277 if (data->consider_all_candidates)
2278 size = n_iv_cands (data);
2279 else
2281 s = 0;
2282 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2284 s++;
2287 /* Round up to the power of two, so that moduling by it is fast. */
2288 for (size = 1; size < s; size <<= 1)
2289 continue;
2292 use->n_map_members = size;
2293 use->cost_map = XCNEWVEC (struct cost_pair, size);
2297 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2298 on invariants DEPENDS_ON and that the value used in expressing it
2299 is VALUE.*/
2301 static void
2302 set_use_iv_cost (struct ivopts_data *data,
2303 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2304 bitmap depends_on, tree value)
2306 unsigned i, s;
2308 if (cost == INFTY)
2310 BITMAP_FREE (depends_on);
2311 return;
2314 if (data->consider_all_candidates)
2316 use->cost_map[cand->id].cand = cand;
2317 use->cost_map[cand->id].cost = cost;
2318 use->cost_map[cand->id].depends_on = depends_on;
2319 use->cost_map[cand->id].value = value;
2320 return;
2323 /* n_map_members is a power of two, so this computes modulo. */
2324 s = cand->id & (use->n_map_members - 1);
2325 for (i = s; i < use->n_map_members; i++)
2326 if (!use->cost_map[i].cand)
2327 goto found;
2328 for (i = 0; i < s; i++)
2329 if (!use->cost_map[i].cand)
2330 goto found;
2332 gcc_unreachable ();
2334 found:
2335 use->cost_map[i].cand = cand;
2336 use->cost_map[i].cost = cost;
2337 use->cost_map[i].depends_on = depends_on;
2338 use->cost_map[i].value = value;
2341 /* Gets cost of (USE, CANDIDATE) pair. */
2343 static struct cost_pair *
2344 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2345 struct iv_cand *cand)
2347 unsigned i, s;
2348 struct cost_pair *ret;
2350 if (!cand)
2351 return NULL;
2353 if (data->consider_all_candidates)
2355 ret = use->cost_map + cand->id;
2356 if (!ret->cand)
2357 return NULL;
2359 return ret;
2362 /* n_map_members is a power of two, so this computes modulo. */
2363 s = cand->id & (use->n_map_members - 1);
2364 for (i = s; i < use->n_map_members; i++)
2365 if (use->cost_map[i].cand == cand)
2366 return use->cost_map + i;
2368 for (i = 0; i < s; i++)
2369 if (use->cost_map[i].cand == cand)
2370 return use->cost_map + i;
2372 return NULL;
2375 /* Returns estimate on cost of computing SEQ. */
2377 static unsigned
2378 seq_cost (rtx seq)
2380 unsigned cost = 0;
2381 rtx set;
2383 for (; seq; seq = NEXT_INSN (seq))
2385 set = single_set (seq);
2386 if (set)
2387 cost += rtx_cost (set, SET);
2388 else
2389 cost++;
2392 return cost;
2395 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2396 static rtx
2397 produce_memory_decl_rtl (tree obj, int *regno)
2399 rtx x;
2401 gcc_assert (obj);
2402 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2404 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2405 x = gen_rtx_SYMBOL_REF (Pmode, name);
2407 else
2408 x = gen_raw_REG (Pmode, (*regno)++);
2410 return gen_rtx_MEM (DECL_MODE (obj), x);
2413 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2414 walk_tree. DATA contains the actual fake register number. */
2416 static tree
2417 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2419 tree obj = NULL_TREE;
2420 rtx x = NULL_RTX;
2421 int *regno = data;
2423 switch (TREE_CODE (*expr_p))
2425 case ADDR_EXPR:
2426 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2427 handled_component_p (*expr_p);
2428 expr_p = &TREE_OPERAND (*expr_p, 0))
2429 continue;
2430 obj = *expr_p;
2431 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2432 x = produce_memory_decl_rtl (obj, regno);
2433 break;
2435 case SSA_NAME:
2436 *ws = 0;
2437 obj = SSA_NAME_VAR (*expr_p);
2438 if (!DECL_RTL_SET_P (obj))
2439 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2440 break;
2442 case VAR_DECL:
2443 case PARM_DECL:
2444 case RESULT_DECL:
2445 *ws = 0;
2446 obj = *expr_p;
2448 if (DECL_RTL_SET_P (obj))
2449 break;
2451 if (DECL_MODE (obj) == BLKmode)
2452 x = produce_memory_decl_rtl (obj, regno);
2453 else
2454 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2456 break;
2458 default:
2459 break;
2462 if (x)
2464 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2465 SET_DECL_RTL (obj, x);
2468 return NULL_TREE;
2471 /* Determines cost of the computation of EXPR. */
2473 static unsigned
2474 computation_cost (tree expr)
2476 rtx seq, rslt;
2477 tree type = TREE_TYPE (expr);
2478 unsigned cost;
2479 /* Avoid using hard regs in ways which may be unsupported. */
2480 int regno = LAST_VIRTUAL_REGISTER + 1;
2482 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2483 start_sequence ();
2484 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2485 seq = get_insns ();
2486 end_sequence ();
2488 cost = seq_cost (seq);
2489 if (MEM_P (rslt))
2490 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2492 return cost;
2495 /* Returns variable containing the value of candidate CAND at statement AT. */
2497 static tree
2498 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2500 if (stmt_after_increment (loop, cand, stmt))
2501 return cand->var_after;
2502 else
2503 return cand->var_before;
2506 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2507 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2510 tree_int_cst_sign_bit (tree t)
2512 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2513 unsigned HOST_WIDE_INT w;
2515 if (bitno < HOST_BITS_PER_WIDE_INT)
2516 w = TREE_INT_CST_LOW (t);
2517 else
2519 w = TREE_INT_CST_HIGH (t);
2520 bitno -= HOST_BITS_PER_WIDE_INT;
2523 return (w >> bitno) & 1;
2526 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2527 return cst. Otherwise return NULL_TREE. */
2529 static tree
2530 constant_multiple_of (tree type, tree top, tree bot)
2532 tree res, mby, p0, p1;
2533 enum tree_code code;
2534 bool negate;
2536 STRIP_NOPS (top);
2537 STRIP_NOPS (bot);
2539 if (operand_equal_p (top, bot, 0))
2540 return build_int_cst (type, 1);
2542 code = TREE_CODE (top);
2543 switch (code)
2545 case MULT_EXPR:
2546 mby = TREE_OPERAND (top, 1);
2547 if (TREE_CODE (mby) != INTEGER_CST)
2548 return NULL_TREE;
2550 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2551 if (!res)
2552 return NULL_TREE;
2554 return fold_binary_to_constant (MULT_EXPR, type, res,
2555 fold_convert (type, mby));
2557 case PLUS_EXPR:
2558 case MINUS_EXPR:
2559 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2560 if (!p0)
2561 return NULL_TREE;
2562 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2563 if (!p1)
2564 return NULL_TREE;
2566 return fold_binary_to_constant (code, type, p0, p1);
2568 case INTEGER_CST:
2569 if (TREE_CODE (bot) != INTEGER_CST)
2570 return NULL_TREE;
2572 bot = fold_convert (type, bot);
2573 top = fold_convert (type, top);
2575 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2576 the result afterwards. */
2577 if (tree_int_cst_sign_bit (bot))
2579 negate = true;
2580 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2582 else
2583 negate = false;
2585 /* Ditto for TOP. */
2586 if (tree_int_cst_sign_bit (top))
2588 negate = !negate;
2589 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2592 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2593 return NULL_TREE;
2595 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2596 if (negate)
2597 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2598 return res;
2600 default:
2601 return NULL_TREE;
2605 /* Sets COMB to CST. */
2607 static void
2608 aff_combination_const (struct affine_tree_combination *comb, tree type,
2609 unsigned HOST_WIDE_INT cst)
2611 unsigned prec = TYPE_PRECISION (type);
2613 comb->type = type;
2614 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2616 comb->n = 0;
2617 comb->rest = NULL_TREE;
2618 comb->offset = cst & comb->mask;
2621 /* Sets COMB to single element ELT. */
2623 static void
2624 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2626 unsigned prec = TYPE_PRECISION (type);
2628 comb->type = type;
2629 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2631 comb->n = 1;
2632 comb->elts[0] = elt;
2633 comb->coefs[0] = 1;
2634 comb->rest = NULL_TREE;
2635 comb->offset = 0;
2638 /* Scales COMB by SCALE. */
2640 static void
2641 aff_combination_scale (struct affine_tree_combination *comb,
2642 unsigned HOST_WIDE_INT scale)
2644 unsigned i, j;
2646 if (scale == 1)
2647 return;
2649 if (scale == 0)
2651 aff_combination_const (comb, comb->type, 0);
2652 return;
2655 comb->offset = (scale * comb->offset) & comb->mask;
2656 for (i = 0, j = 0; i < comb->n; i++)
2658 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2659 comb->elts[j] = comb->elts[i];
2660 if (comb->coefs[j] != 0)
2661 j++;
2663 comb->n = j;
2665 if (comb->rest)
2667 if (comb->n < MAX_AFF_ELTS)
2669 comb->coefs[comb->n] = scale;
2670 comb->elts[comb->n] = comb->rest;
2671 comb->rest = NULL_TREE;
2672 comb->n++;
2674 else
2675 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2676 build_int_cst_type (comb->type, scale));
2680 /* Adds ELT * SCALE to COMB. */
2682 static void
2683 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2684 unsigned HOST_WIDE_INT scale)
2686 unsigned i;
2688 if (scale == 0)
2689 return;
2691 for (i = 0; i < comb->n; i++)
2692 if (operand_equal_p (comb->elts[i], elt, 0))
2694 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2695 if (comb->coefs[i])
2696 return;
2698 comb->n--;
2699 comb->coefs[i] = comb->coefs[comb->n];
2700 comb->elts[i] = comb->elts[comb->n];
2702 if (comb->rest)
2704 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2705 comb->coefs[comb->n] = 1;
2706 comb->elts[comb->n] = comb->rest;
2707 comb->rest = NULL_TREE;
2708 comb->n++;
2710 return;
2712 if (comb->n < MAX_AFF_ELTS)
2714 comb->coefs[comb->n] = scale;
2715 comb->elts[comb->n] = elt;
2716 comb->n++;
2717 return;
2720 if (scale == 1)
2721 elt = fold_convert (comb->type, elt);
2722 else
2723 elt = fold_build2 (MULT_EXPR, comb->type,
2724 fold_convert (comb->type, elt),
2725 build_int_cst_type (comb->type, scale));
2727 if (comb->rest)
2728 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2729 else
2730 comb->rest = elt;
2733 /* Adds COMB2 to COMB1. */
2735 static void
2736 aff_combination_add (struct affine_tree_combination *comb1,
2737 struct affine_tree_combination *comb2)
2739 unsigned i;
2741 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2742 for (i = 0; i < comb2->n; i++)
2743 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2744 if (comb2->rest)
2745 aff_combination_add_elt (comb1, comb2->rest, 1);
2748 /* Splits EXPR into an affine combination of parts. */
2750 static void
2751 tree_to_aff_combination (tree expr, tree type,
2752 struct affine_tree_combination *comb)
2754 struct affine_tree_combination tmp;
2755 enum tree_code code;
2756 tree cst, core, toffset;
2757 HOST_WIDE_INT bitpos, bitsize;
2758 enum machine_mode mode;
2759 int unsignedp, volatilep;
2761 STRIP_NOPS (expr);
2763 code = TREE_CODE (expr);
2764 switch (code)
2766 case INTEGER_CST:
2767 aff_combination_const (comb, type, int_cst_value (expr));
2768 return;
2770 case PLUS_EXPR:
2771 case MINUS_EXPR:
2772 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2773 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2774 if (code == MINUS_EXPR)
2775 aff_combination_scale (&tmp, -1);
2776 aff_combination_add (comb, &tmp);
2777 return;
2779 case MULT_EXPR:
2780 cst = TREE_OPERAND (expr, 1);
2781 if (TREE_CODE (cst) != INTEGER_CST)
2782 break;
2783 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2784 aff_combination_scale (comb, int_cst_value (cst));
2785 return;
2787 case NEGATE_EXPR:
2788 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2789 aff_combination_scale (comb, -1);
2790 return;
2792 case ADDR_EXPR:
2793 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2794 &toffset, &mode, &unsignedp, &volatilep,
2795 false);
2796 if (bitpos % BITS_PER_UNIT != 0)
2797 break;
2798 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2799 core = build_fold_addr_expr (core);
2800 if (TREE_CODE (core) == ADDR_EXPR)
2801 aff_combination_add_elt (comb, core, 1);
2802 else
2804 tree_to_aff_combination (core, type, &tmp);
2805 aff_combination_add (comb, &tmp);
2807 if (toffset)
2809 tree_to_aff_combination (toffset, type, &tmp);
2810 aff_combination_add (comb, &tmp);
2812 return;
2814 default:
2815 break;
2818 aff_combination_elt (comb, type, expr);
2821 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2823 static tree
2824 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2825 unsigned HOST_WIDE_INT mask)
2827 enum tree_code code;
2829 scale &= mask;
2830 elt = fold_convert (type, elt);
2832 if (scale == 1)
2834 if (!expr)
2835 return elt;
2837 return fold_build2 (PLUS_EXPR, type, expr, elt);
2840 if (scale == mask)
2842 if (!expr)
2843 return fold_build1 (NEGATE_EXPR, type, elt);
2845 return fold_build2 (MINUS_EXPR, type, expr, elt);
2848 if (!expr)
2849 return fold_build2 (MULT_EXPR, type, elt,
2850 build_int_cst_type (type, scale));
2852 if ((scale | (mask >> 1)) == mask)
2854 /* Scale is negative. */
2855 code = MINUS_EXPR;
2856 scale = (-scale) & mask;
2858 else
2859 code = PLUS_EXPR;
2861 elt = fold_build2 (MULT_EXPR, type, elt,
2862 build_int_cst_type (type, scale));
2863 return fold_build2 (code, type, expr, elt);
2866 /* Copies the tree elements of COMB to ensure that they are not shared. */
2868 static void
2869 unshare_aff_combination (struct affine_tree_combination *comb)
2871 unsigned i;
2873 for (i = 0; i < comb->n; i++)
2874 comb->elts[i] = unshare_expr (comb->elts[i]);
2875 if (comb->rest)
2876 comb->rest = unshare_expr (comb->rest);
2879 /* Makes tree from the affine combination COMB. */
2881 static tree
2882 aff_combination_to_tree (struct affine_tree_combination *comb)
2884 tree type = comb->type;
2885 tree expr = comb->rest;
2886 unsigned i;
2887 unsigned HOST_WIDE_INT off, sgn;
2889 /* Handle the special case produced by get_computation_aff when
2890 the type does not fit in HOST_WIDE_INT. */
2891 if (comb->n == 0 && comb->offset == 0)
2892 return fold_convert (type, expr);
2894 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2896 for (i = 0; i < comb->n; i++)
2897 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2898 comb->mask);
2900 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2902 /* Offset is negative. */
2903 off = (-comb->offset) & comb->mask;
2904 sgn = comb->mask;
2906 else
2908 off = comb->offset;
2909 sgn = 1;
2911 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2912 comb->mask);
2915 /* Determines the expression by that USE is expressed from induction variable
2916 CAND at statement AT in LOOP. The expression is stored in a decomposed
2917 form into AFF. Returns false if USE cannot be expressed using CAND. */
2919 static bool
2920 get_computation_aff (struct loop *loop,
2921 struct iv_use *use, struct iv_cand *cand, tree at,
2922 struct affine_tree_combination *aff)
2924 tree ubase = use->iv->base;
2925 tree ustep = use->iv->step;
2926 tree cbase = cand->iv->base;
2927 tree cstep = cand->iv->step;
2928 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2929 tree uutype;
2930 tree expr, delta;
2931 tree ratio;
2932 unsigned HOST_WIDE_INT ustepi, cstepi;
2933 HOST_WIDE_INT ratioi;
2934 struct affine_tree_combination cbase_aff, expr_aff;
2935 tree cstep_orig = cstep, ustep_orig = ustep;
2937 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2939 /* We do not have a precision to express the values of use. */
2940 return false;
2943 expr = var_at_stmt (loop, cand, at);
2945 if (TREE_TYPE (expr) != ctype)
2947 /* This may happen with the original ivs. */
2948 expr = fold_convert (ctype, expr);
2951 if (TYPE_UNSIGNED (utype))
2952 uutype = utype;
2953 else
2955 uutype = unsigned_type_for (utype);
2956 ubase = fold_convert (uutype, ubase);
2957 ustep = fold_convert (uutype, ustep);
2960 if (uutype != ctype)
2962 expr = fold_convert (uutype, expr);
2963 cbase = fold_convert (uutype, cbase);
2964 cstep = fold_convert (uutype, cstep);
2966 /* If the conversion is not noop, we must take it into account when
2967 considering the value of the step. */
2968 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2969 cstep_orig = cstep;
2972 if (cst_and_fits_in_hwi (cstep_orig)
2973 && cst_and_fits_in_hwi (ustep_orig))
2975 ustepi = int_cst_value (ustep_orig);
2976 cstepi = int_cst_value (cstep_orig);
2978 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2980 /* TODO maybe consider case when ustep divides cstep and the ratio is
2981 a power of 2 (so that the division is fast to execute)? We would
2982 need to be much more careful with overflows etc. then. */
2983 return false;
2986 ratio = build_int_cst_type (uutype, ratioi);
2988 else
2990 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
2991 if (!ratio)
2992 return false;
2994 /* Ratioi is only used to detect special cases when the multiplicative
2995 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
2996 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
2997 to integer_onep/integer_all_onesp, since the former ignores
2998 TREE_OVERFLOW. */
2999 if (cst_and_fits_in_hwi (ratio))
3000 ratioi = int_cst_value (ratio);
3001 else if (integer_onep (ratio))
3002 ratioi = 1;
3003 else if (integer_all_onesp (ratio))
3004 ratioi = -1;
3005 else
3006 ratioi = 0;
3009 /* We may need to shift the value if we are after the increment. */
3010 if (stmt_after_increment (loop, cand, at))
3011 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3013 /* use = ubase - ratio * cbase + ratio * var.
3015 In general case ubase + ratio * (var - cbase) could be better (one less
3016 multiplication), but often it is possible to eliminate redundant parts
3017 of computations from (ubase - ratio * cbase) term, and if it does not
3018 happen, fold is able to apply the distributive law to obtain this form
3019 anyway. */
3021 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3023 /* Let's compute in trees and just return the result in AFF. This case
3024 should not be very common, and fold itself is not that bad either,
3025 so making the aff. functions more complicated to handle this case
3026 is not that urgent. */
3027 if (ratioi == 1)
3029 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3030 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3032 else if (ratioi == -1)
3034 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3035 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3037 else
3039 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3040 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3041 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3042 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3045 aff->type = uutype;
3046 aff->n = 0;
3047 aff->offset = 0;
3048 aff->mask = 0;
3049 aff->rest = expr;
3050 return true;
3053 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3054 possible to compute ratioi. */
3055 gcc_assert (ratioi);
3057 tree_to_aff_combination (ubase, uutype, aff);
3058 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3059 tree_to_aff_combination (expr, uutype, &expr_aff);
3060 aff_combination_scale (&cbase_aff, -ratioi);
3061 aff_combination_scale (&expr_aff, ratioi);
3062 aff_combination_add (aff, &cbase_aff);
3063 aff_combination_add (aff, &expr_aff);
3065 return true;
3068 /* Determines the expression by that USE is expressed from induction variable
3069 CAND at statement AT in LOOP. The computation is unshared. */
3071 static tree
3072 get_computation_at (struct loop *loop,
3073 struct iv_use *use, struct iv_cand *cand, tree at)
3075 struct affine_tree_combination aff;
3076 tree type = TREE_TYPE (use->iv->base);
3078 if (!get_computation_aff (loop, use, cand, at, &aff))
3079 return NULL_TREE;
3080 unshare_aff_combination (&aff);
3081 return fold_convert (type, aff_combination_to_tree (&aff));
3084 /* Determines the expression by that USE is expressed from induction variable
3085 CAND in LOOP. The computation is unshared. */
3087 static tree
3088 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3090 return get_computation_at (loop, use, cand, use->stmt);
3093 /* Returns cost of addition in MODE. */
3095 static unsigned
3096 add_cost (enum machine_mode mode)
3098 static unsigned costs[NUM_MACHINE_MODES];
3099 rtx seq;
3100 unsigned cost;
3102 if (costs[mode])
3103 return costs[mode];
3105 start_sequence ();
3106 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3107 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3108 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3109 NULL_RTX);
3110 seq = get_insns ();
3111 end_sequence ();
3113 cost = seq_cost (seq);
3114 if (!cost)
3115 cost = 1;
3117 costs[mode] = cost;
3119 if (dump_file && (dump_flags & TDF_DETAILS))
3120 fprintf (dump_file, "Addition in %s costs %d\n",
3121 GET_MODE_NAME (mode), cost);
3122 return cost;
3125 /* Entry in a hashtable of already known costs for multiplication. */
3126 struct mbc_entry
3128 HOST_WIDE_INT cst; /* The constant to multiply by. */
3129 enum machine_mode mode; /* In mode. */
3130 unsigned cost; /* The cost. */
3133 /* Counts hash value for the ENTRY. */
3135 static hashval_t
3136 mbc_entry_hash (const void *entry)
3138 const struct mbc_entry *e = entry;
3140 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3143 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3145 static int
3146 mbc_entry_eq (const void *entry1, const void *entry2)
3148 const struct mbc_entry *e1 = entry1;
3149 const struct mbc_entry *e2 = entry2;
3151 return (e1->mode == e2->mode
3152 && e1->cst == e2->cst);
3155 /* Returns cost of multiplication by constant CST in MODE. */
3157 unsigned
3158 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3160 static htab_t costs;
3161 struct mbc_entry **cached, act;
3162 rtx seq;
3163 unsigned cost;
3165 if (!costs)
3166 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3168 act.mode = mode;
3169 act.cst = cst;
3170 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3171 if (*cached)
3172 return (*cached)->cost;
3174 *cached = XNEW (struct mbc_entry);
3175 (*cached)->mode = mode;
3176 (*cached)->cst = cst;
3178 start_sequence ();
3179 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3180 gen_int_mode (cst, mode), NULL_RTX, 0);
3181 seq = get_insns ();
3182 end_sequence ();
3184 cost = seq_cost (seq);
3186 if (dump_file && (dump_flags & TDF_DETAILS))
3187 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3188 (int) cst, GET_MODE_NAME (mode), cost);
3190 (*cached)->cost = cost;
3192 return cost;
3195 /* Returns true if multiplying by RATIO is allowed in address. */
3197 bool
3198 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3200 #define MAX_RATIO 128
3201 static sbitmap valid_mult;
3203 if (!valid_mult)
3205 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3206 rtx addr;
3207 HOST_WIDE_INT i;
3209 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3210 sbitmap_zero (valid_mult);
3211 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3212 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3214 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3215 if (memory_address_p (Pmode, addr))
3216 SET_BIT (valid_mult, i + MAX_RATIO);
3219 if (dump_file && (dump_flags & TDF_DETAILS))
3221 fprintf (dump_file, " allowed multipliers:");
3222 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3223 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3224 fprintf (dump_file, " %d", (int) i);
3225 fprintf (dump_file, "\n");
3226 fprintf (dump_file, "\n");
3230 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3231 return false;
3233 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3236 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3237 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3238 variable is omitted. The created memory accesses MODE.
3240 TODO -- there must be some better way. This all is quite crude. */
3242 static unsigned
3243 get_address_cost (bool symbol_present, bool var_present,
3244 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3246 static bool initialized = false;
3247 static HOST_WIDE_INT rat, off;
3248 static HOST_WIDE_INT min_offset, max_offset;
3249 static unsigned costs[2][2][2][2];
3250 unsigned cost, acost;
3251 rtx seq, addr, base;
3252 bool offset_p, ratio_p;
3253 rtx reg1;
3254 HOST_WIDE_INT s_offset;
3255 unsigned HOST_WIDE_INT mask;
3256 unsigned bits;
3258 if (!initialized)
3260 HOST_WIDE_INT i;
3261 initialized = true;
3263 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3265 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3266 for (i = 1; i <= 1 << 20; i <<= 1)
3268 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3269 if (!memory_address_p (Pmode, addr))
3270 break;
3272 max_offset = i >> 1;
3273 off = max_offset;
3275 for (i = 1; i <= 1 << 20; i <<= 1)
3277 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3278 if (!memory_address_p (Pmode, addr))
3279 break;
3281 min_offset = -(i >> 1);
3283 if (dump_file && (dump_flags & TDF_DETAILS))
3285 fprintf (dump_file, "get_address_cost:\n");
3286 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3287 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3290 rat = 1;
3291 for (i = 2; i <= MAX_RATIO; i++)
3292 if (multiplier_allowed_in_address_p (i))
3294 rat = i;
3295 break;
3299 bits = GET_MODE_BITSIZE (Pmode);
3300 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3301 offset &= mask;
3302 if ((offset >> (bits - 1) & 1))
3303 offset |= ~mask;
3304 s_offset = offset;
3306 cost = 0;
3307 offset_p = (s_offset != 0
3308 && min_offset <= s_offset && s_offset <= max_offset);
3309 ratio_p = (ratio != 1
3310 && multiplier_allowed_in_address_p (ratio));
3312 if (ratio != 1 && !ratio_p)
3313 cost += multiply_by_cost (ratio, Pmode);
3315 if (s_offset && !offset_p && !symbol_present)
3317 cost += add_cost (Pmode);
3318 var_present = true;
3321 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3322 if (!acost)
3324 int old_cse_not_expected;
3325 acost = 0;
3327 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3328 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3329 if (ratio_p)
3330 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3332 if (var_present)
3333 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3335 if (symbol_present)
3337 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3338 if (offset_p)
3339 base = gen_rtx_fmt_e (CONST, Pmode,
3340 gen_rtx_fmt_ee (PLUS, Pmode,
3341 base,
3342 gen_int_mode (off, Pmode)));
3344 else if (offset_p)
3345 base = gen_int_mode (off, Pmode);
3346 else
3347 base = NULL_RTX;
3349 if (base)
3350 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3352 start_sequence ();
3353 /* To avoid splitting addressing modes, pretend that no cse will
3354 follow. */
3355 old_cse_not_expected = cse_not_expected;
3356 cse_not_expected = true;
3357 addr = memory_address (Pmode, addr);
3358 cse_not_expected = old_cse_not_expected;
3359 seq = get_insns ();
3360 end_sequence ();
3362 acost = seq_cost (seq);
3363 acost += address_cost (addr, Pmode);
3365 if (!acost)
3366 acost = 1;
3367 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3370 return cost + acost;
3373 /* Estimates cost of forcing expression EXPR into a variable. */
3375 unsigned
3376 force_expr_to_var_cost (tree expr)
3378 static bool costs_initialized = false;
3379 static unsigned integer_cost;
3380 static unsigned symbol_cost;
3381 static unsigned address_cost;
3382 tree op0, op1;
3383 unsigned cost0, cost1, cost;
3384 enum machine_mode mode;
3386 if (!costs_initialized)
3388 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3389 rtx x = gen_rtx_MEM (DECL_MODE (var),
3390 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3391 tree addr;
3392 tree type = build_pointer_type (integer_type_node);
3394 integer_cost = computation_cost (build_int_cst (integer_type_node,
3395 2000));
3397 SET_DECL_RTL (var, x);
3398 TREE_STATIC (var) = 1;
3399 addr = build1 (ADDR_EXPR, type, var);
3400 symbol_cost = computation_cost (addr) + 1;
3402 address_cost
3403 = computation_cost (build2 (PLUS_EXPR, type,
3404 addr,
3405 build_int_cst (type, 2000))) + 1;
3406 if (dump_file && (dump_flags & TDF_DETAILS))
3408 fprintf (dump_file, "force_expr_to_var_cost:\n");
3409 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3410 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3411 fprintf (dump_file, " address %d\n", (int) address_cost);
3412 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3413 fprintf (dump_file, "\n");
3416 costs_initialized = true;
3419 STRIP_NOPS (expr);
3421 if (SSA_VAR_P (expr))
3422 return 0;
3424 if (TREE_INVARIANT (expr))
3426 if (TREE_CODE (expr) == INTEGER_CST)
3427 return integer_cost;
3429 if (TREE_CODE (expr) == ADDR_EXPR)
3431 tree obj = TREE_OPERAND (expr, 0);
3433 if (TREE_CODE (obj) == VAR_DECL
3434 || TREE_CODE (obj) == PARM_DECL
3435 || TREE_CODE (obj) == RESULT_DECL)
3436 return symbol_cost;
3439 return address_cost;
3442 switch (TREE_CODE (expr))
3444 case PLUS_EXPR:
3445 case MINUS_EXPR:
3446 case MULT_EXPR:
3447 op0 = TREE_OPERAND (expr, 0);
3448 op1 = TREE_OPERAND (expr, 1);
3449 STRIP_NOPS (op0);
3450 STRIP_NOPS (op1);
3452 if (is_gimple_val (op0))
3453 cost0 = 0;
3454 else
3455 cost0 = force_expr_to_var_cost (op0);
3457 if (is_gimple_val (op1))
3458 cost1 = 0;
3459 else
3460 cost1 = force_expr_to_var_cost (op1);
3462 break;
3464 default:
3465 /* Just an arbitrary value, FIXME. */
3466 return target_spill_cost;
3469 mode = TYPE_MODE (TREE_TYPE (expr));
3470 switch (TREE_CODE (expr))
3472 case PLUS_EXPR:
3473 case MINUS_EXPR:
3474 cost = add_cost (mode);
3475 break;
3477 case MULT_EXPR:
3478 if (cst_and_fits_in_hwi (op0))
3479 cost = multiply_by_cost (int_cst_value (op0), mode);
3480 else if (cst_and_fits_in_hwi (op1))
3481 cost = multiply_by_cost (int_cst_value (op1), mode);
3482 else
3483 return target_spill_cost;
3484 break;
3486 default:
3487 gcc_unreachable ();
3490 cost += cost0;
3491 cost += cost1;
3493 /* Bound the cost by target_spill_cost. The parts of complicated
3494 computations often are either loop invariant or at least can
3495 be shared between several iv uses, so letting this grow without
3496 limits would not give reasonable results. */
3497 return cost < target_spill_cost ? cost : target_spill_cost;
3500 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3501 invariants the computation depends on. */
3503 static unsigned
3504 force_var_cost (struct ivopts_data *data,
3505 tree expr, bitmap *depends_on)
3507 if (depends_on)
3509 fd_ivopts_data = data;
3510 walk_tree (&expr, find_depends, depends_on, NULL);
3513 return force_expr_to_var_cost (expr);
3516 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3517 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3518 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3519 invariants the computation depends on. */
3521 static unsigned
3522 split_address_cost (struct ivopts_data *data,
3523 tree addr, bool *symbol_present, bool *var_present,
3524 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3526 tree core;
3527 HOST_WIDE_INT bitsize;
3528 HOST_WIDE_INT bitpos;
3529 tree toffset;
3530 enum machine_mode mode;
3531 int unsignedp, volatilep;
3533 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3534 &unsignedp, &volatilep, false);
3536 if (toffset != 0
3537 || bitpos % BITS_PER_UNIT != 0
3538 || TREE_CODE (core) != VAR_DECL)
3540 *symbol_present = false;
3541 *var_present = true;
3542 fd_ivopts_data = data;
3543 walk_tree (&addr, find_depends, depends_on, NULL);
3544 return target_spill_cost;
3547 *offset += bitpos / BITS_PER_UNIT;
3548 if (TREE_STATIC (core)
3549 || DECL_EXTERNAL (core))
3551 *symbol_present = true;
3552 *var_present = false;
3553 return 0;
3556 *symbol_present = false;
3557 *var_present = true;
3558 return 0;
3561 /* Estimates cost of expressing difference of addresses E1 - E2 as
3562 var + symbol + offset. The value of offset is added to OFFSET,
3563 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3564 part is missing. DEPENDS_ON is a set of the invariants the computation
3565 depends on. */
3567 static unsigned
3568 ptr_difference_cost (struct ivopts_data *data,
3569 tree e1, tree e2, bool *symbol_present, bool *var_present,
3570 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3572 HOST_WIDE_INT diff = 0;
3573 unsigned cost;
3575 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3577 if (ptr_difference_const (e1, e2, &diff))
3579 *offset += diff;
3580 *symbol_present = false;
3581 *var_present = false;
3582 return 0;
3585 if (e2 == integer_zero_node)
3586 return split_address_cost (data, TREE_OPERAND (e1, 0),
3587 symbol_present, var_present, offset, depends_on);
3589 *symbol_present = false;
3590 *var_present = true;
3592 cost = force_var_cost (data, e1, depends_on);
3593 cost += force_var_cost (data, e2, depends_on);
3594 cost += add_cost (Pmode);
3596 return cost;
3599 /* Estimates cost of expressing difference E1 - E2 as
3600 var + symbol + offset. The value of offset is added to OFFSET,
3601 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3602 part is missing. DEPENDS_ON is a set of the invariants the computation
3603 depends on. */
3605 static unsigned
3606 difference_cost (struct ivopts_data *data,
3607 tree e1, tree e2, bool *symbol_present, bool *var_present,
3608 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3610 unsigned cost;
3611 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3612 unsigned HOST_WIDE_INT off1, off2;
3614 e1 = strip_offset (e1, &off1);
3615 e2 = strip_offset (e2, &off2);
3616 *offset += off1 - off2;
3618 STRIP_NOPS (e1);
3619 STRIP_NOPS (e2);
3621 if (TREE_CODE (e1) == ADDR_EXPR)
3622 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3623 depends_on);
3624 *symbol_present = false;
3626 if (operand_equal_p (e1, e2, 0))
3628 *var_present = false;
3629 return 0;
3631 *var_present = true;
3632 if (zero_p (e2))
3633 return force_var_cost (data, e1, depends_on);
3635 if (zero_p (e1))
3637 cost = force_var_cost (data, e2, depends_on);
3638 cost += multiply_by_cost (-1, mode);
3640 return cost;
3643 cost = force_var_cost (data, e1, depends_on);
3644 cost += force_var_cost (data, e2, depends_on);
3645 cost += add_cost (mode);
3647 return cost;
3650 /* Determines the cost of the computation by that USE is expressed
3651 from induction variable CAND. If ADDRESS_P is true, we just need
3652 to create an address from it, otherwise we want to get it into
3653 register. A set of invariants we depend on is stored in
3654 DEPENDS_ON. AT is the statement at that the value is computed. */
3656 static unsigned
3657 get_computation_cost_at (struct ivopts_data *data,
3658 struct iv_use *use, struct iv_cand *cand,
3659 bool address_p, bitmap *depends_on, tree at)
3661 tree ubase = use->iv->base, ustep = use->iv->step;
3662 tree cbase, cstep;
3663 tree utype = TREE_TYPE (ubase), ctype;
3664 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3665 HOST_WIDE_INT ratio, aratio;
3666 bool var_present, symbol_present;
3667 unsigned cost = 0, n_sums;
3669 *depends_on = NULL;
3671 /* Only consider real candidates. */
3672 if (!cand->iv)
3673 return INFTY;
3675 cbase = cand->iv->base;
3676 cstep = cand->iv->step;
3677 ctype = TREE_TYPE (cbase);
3679 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3681 /* We do not have a precision to express the values of use. */
3682 return INFTY;
3685 if (address_p)
3687 /* Do not try to express address of an object with computation based
3688 on address of a different object. This may cause problems in rtl
3689 level alias analysis (that does not expect this to be happening,
3690 as this is illegal in C), and would be unlikely to be useful
3691 anyway. */
3692 if (use->iv->base_object
3693 && cand->iv->base_object
3694 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3695 return INFTY;
3698 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3700 /* TODO -- add direct handling of this case. */
3701 goto fallback;
3704 /* CSTEPI is removed from the offset in case statement is after the
3705 increment. If the step is not constant, we use zero instead.
3706 This is a bit imprecise (there is the extra addition), but
3707 redundancy elimination is likely to transform the code so that
3708 it uses value of the variable before increment anyway,
3709 so it is not that much unrealistic. */
3710 if (cst_and_fits_in_hwi (cstep))
3711 cstepi = int_cst_value (cstep);
3712 else
3713 cstepi = 0;
3715 if (cst_and_fits_in_hwi (ustep)
3716 && cst_and_fits_in_hwi (cstep))
3718 ustepi = int_cst_value (ustep);
3720 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3721 return INFTY;
3723 else
3725 tree rat;
3727 rat = constant_multiple_of (utype, ustep, cstep);
3729 if (!rat)
3730 return INFTY;
3732 if (cst_and_fits_in_hwi (rat))
3733 ratio = int_cst_value (rat);
3734 else if (integer_onep (rat))
3735 ratio = 1;
3736 else if (integer_all_onesp (rat))
3737 ratio = -1;
3738 else
3739 return INFTY;
3742 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3743 or ratio == 1, it is better to handle this like
3745 ubase - ratio * cbase + ratio * var
3747 (also holds in the case ratio == -1, TODO. */
3749 if (cst_and_fits_in_hwi (cbase))
3751 offset = - ratio * int_cst_value (cbase);
3752 cost += difference_cost (data,
3753 ubase, integer_zero_node,
3754 &symbol_present, &var_present, &offset,
3755 depends_on);
3757 else if (ratio == 1)
3759 cost += difference_cost (data,
3760 ubase, cbase,
3761 &symbol_present, &var_present, &offset,
3762 depends_on);
3764 else
3766 cost += force_var_cost (data, cbase, depends_on);
3767 cost += add_cost (TYPE_MODE (ctype));
3768 cost += difference_cost (data,
3769 ubase, integer_zero_node,
3770 &symbol_present, &var_present, &offset,
3771 depends_on);
3774 /* If we are after the increment, the value of the candidate is higher by
3775 one iteration. */
3776 if (stmt_after_increment (data->current_loop, cand, at))
3777 offset -= ratio * cstepi;
3779 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3780 (symbol/var/const parts may be omitted). If we are looking for an address,
3781 find the cost of addressing this. */
3782 if (address_p)
3783 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3785 /* Otherwise estimate the costs for computing the expression. */
3786 aratio = ratio > 0 ? ratio : -ratio;
3787 if (!symbol_present && !var_present && !offset)
3789 if (ratio != 1)
3790 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3792 return cost;
3795 if (aratio != 1)
3796 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3798 n_sums = 1;
3799 if (var_present
3800 /* Symbol + offset should be compile-time computable. */
3801 && (symbol_present || offset))
3802 n_sums++;
3804 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3806 fallback:
3808 /* Just get the expression, expand it and measure the cost. */
3809 tree comp = get_computation_at (data->current_loop, use, cand, at);
3811 if (!comp)
3812 return INFTY;
3814 if (address_p)
3815 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3817 return computation_cost (comp);
3821 /* Determines the cost of the computation by that USE is expressed
3822 from induction variable CAND. If ADDRESS_P is true, we just need
3823 to create an address from it, otherwise we want to get it into
3824 register. A set of invariants we depend on is stored in
3825 DEPENDS_ON. */
3827 static unsigned
3828 get_computation_cost (struct ivopts_data *data,
3829 struct iv_use *use, struct iv_cand *cand,
3830 bool address_p, bitmap *depends_on)
3832 return get_computation_cost_at (data,
3833 use, cand, address_p, depends_on, use->stmt);
3836 /* Determines cost of basing replacement of USE on CAND in a generic
3837 expression. */
3839 static bool
3840 determine_use_iv_cost_generic (struct ivopts_data *data,
3841 struct iv_use *use, struct iv_cand *cand)
3843 bitmap depends_on;
3844 unsigned cost;
3846 /* The simple case first -- if we need to express value of the preserved
3847 original biv, the cost is 0. This also prevents us from counting the
3848 cost of increment twice -- once at this use and once in the cost of
3849 the candidate. */
3850 if (cand->pos == IP_ORIGINAL
3851 && cand->incremented_at == use->stmt)
3853 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3854 return true;
3857 cost = get_computation_cost (data, use, cand, false, &depends_on);
3858 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3860 return cost != INFTY;
3863 /* Determines cost of basing replacement of USE on CAND in an address. */
3865 static bool
3866 determine_use_iv_cost_address (struct ivopts_data *data,
3867 struct iv_use *use, struct iv_cand *cand)
3869 bitmap depends_on;
3870 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3872 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3874 return cost != INFTY;
3877 /* Computes value of induction variable IV in iteration NITER. */
3879 static tree
3880 iv_value (struct iv *iv, tree niter)
3882 tree val;
3883 tree type = TREE_TYPE (iv->base);
3885 niter = fold_convert (type, niter);
3886 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3888 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3891 /* Computes value of candidate CAND at position AT in iteration NITER. */
3893 static tree
3894 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3896 tree val = iv_value (cand->iv, niter);
3897 tree type = TREE_TYPE (cand->iv->base);
3899 if (stmt_after_increment (loop, cand, at))
3900 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3902 return val;
3905 /* Returns period of induction variable iv. */
3907 static tree
3908 iv_period (struct iv *iv)
3910 tree step = iv->step, period, type;
3911 tree pow2div;
3913 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3915 /* Period of the iv is gcd (step, type range). Since type range is power
3916 of two, it suffices to determine the maximum power of two that divides
3917 step. */
3918 pow2div = num_ending_zeros (step);
3919 type = unsigned_type_for (TREE_TYPE (step));
3921 period = build_low_bits_mask (type,
3922 (TYPE_PRECISION (type)
3923 - tree_low_cst (pow2div, 1)));
3925 return period;
3928 /* Returns the comparison operator used when eliminating the iv USE. */
3930 static enum tree_code
3931 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3933 struct loop *loop = data->current_loop;
3934 basic_block ex_bb;
3935 edge exit;
3937 ex_bb = bb_for_stmt (use->stmt);
3938 exit = EDGE_SUCC (ex_bb, 0);
3939 if (flow_bb_inside_loop_p (loop, exit->dest))
3940 exit = EDGE_SUCC (ex_bb, 1);
3942 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3945 /* Check whether it is possible to express the condition in USE by comparison
3946 of candidate CAND. If so, store the value compared with to BOUND. */
3948 static bool
3949 may_eliminate_iv (struct ivopts_data *data,
3950 struct iv_use *use, struct iv_cand *cand, tree *bound)
3952 basic_block ex_bb;
3953 edge exit;
3954 tree nit, nit_type;
3955 tree wider_type, period, per_type;
3956 struct loop *loop = data->current_loop;
3958 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3959 return false;
3961 /* For now works only for exits that dominate the loop latch. TODO -- extend
3962 for other conditions inside loop body. */
3963 ex_bb = bb_for_stmt (use->stmt);
3964 if (use->stmt != last_stmt (ex_bb)
3965 || TREE_CODE (use->stmt) != COND_EXPR)
3966 return false;
3967 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3968 return false;
3970 exit = EDGE_SUCC (ex_bb, 0);
3971 if (flow_bb_inside_loop_p (loop, exit->dest))
3972 exit = EDGE_SUCC (ex_bb, 1);
3973 if (flow_bb_inside_loop_p (loop, exit->dest))
3974 return false;
3976 nit = niter_for_exit (data, exit);
3977 if (!nit)
3978 return false;
3980 nit_type = TREE_TYPE (nit);
3982 /* Determine whether we may use the variable to test whether niter iterations
3983 elapsed. This is the case iff the period of the induction variable is
3984 greater than the number of iterations. */
3985 period = iv_period (cand->iv);
3986 if (!period)
3987 return false;
3988 per_type = TREE_TYPE (period);
3990 wider_type = TREE_TYPE (period);
3991 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3992 wider_type = per_type;
3993 else
3994 wider_type = nit_type;
3996 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3997 fold_convert (wider_type, period),
3998 fold_convert (wider_type, nit))))
3999 return false;
4001 *bound = cand_value_at (loop, cand, use->stmt, nit);
4002 return true;
4005 /* Determines cost of basing replacement of USE on CAND in a condition. */
4007 static bool
4008 determine_use_iv_cost_condition (struct ivopts_data *data,
4009 struct iv_use *use, struct iv_cand *cand)
4011 tree bound = NULL_TREE, op, cond;
4012 bitmap depends_on = NULL;
4013 unsigned cost;
4015 /* Only consider real candidates. */
4016 if (!cand->iv)
4018 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4019 return false;
4022 if (may_eliminate_iv (data, use, cand, &bound))
4024 cost = force_var_cost (data, bound, &depends_on);
4026 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4027 return cost != INFTY;
4030 /* The induction variable elimination failed; just express the original
4031 giv. If it is compared with an invariant, note that we cannot get
4032 rid of it. */
4033 cost = get_computation_cost (data, use, cand, false, &depends_on);
4035 cond = *use->op_p;
4036 if (TREE_CODE (cond) != SSA_NAME)
4038 op = TREE_OPERAND (cond, 0);
4039 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4040 op = TREE_OPERAND (cond, 1);
4041 if (TREE_CODE (op) == SSA_NAME)
4043 op = get_iv (data, op)->base;
4044 fd_ivopts_data = data;
4045 walk_tree (&op, find_depends, &depends_on, NULL);
4049 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4050 return cost != INFTY;
4053 /* Determines cost of basing replacement of USE on CAND. Returns false
4054 if USE cannot be based on CAND. */
4056 static bool
4057 determine_use_iv_cost (struct ivopts_data *data,
4058 struct iv_use *use, struct iv_cand *cand)
4060 switch (use->type)
4062 case USE_NONLINEAR_EXPR:
4063 return determine_use_iv_cost_generic (data, use, cand);
4065 case USE_ADDRESS:
4066 return determine_use_iv_cost_address (data, use, cand);
4068 case USE_COMPARE:
4069 return determine_use_iv_cost_condition (data, use, cand);
4071 default:
4072 gcc_unreachable ();
4076 /* Determines costs of basing the use of the iv on an iv candidate. */
4078 static void
4079 determine_use_iv_costs (struct ivopts_data *data)
4081 unsigned i, j;
4082 struct iv_use *use;
4083 struct iv_cand *cand;
4084 bitmap to_clear = BITMAP_ALLOC (NULL);
4086 alloc_use_cost_map (data);
4088 for (i = 0; i < n_iv_uses (data); i++)
4090 use = iv_use (data, i);
4092 if (data->consider_all_candidates)
4094 for (j = 0; j < n_iv_cands (data); j++)
4096 cand = iv_cand (data, j);
4097 determine_use_iv_cost (data, use, cand);
4100 else
4102 bitmap_iterator bi;
4104 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4106 cand = iv_cand (data, j);
4107 if (!determine_use_iv_cost (data, use, cand))
4108 bitmap_set_bit (to_clear, j);
4111 /* Remove the candidates for that the cost is infinite from
4112 the list of related candidates. */
4113 bitmap_and_compl_into (use->related_cands, to_clear);
4114 bitmap_clear (to_clear);
4118 BITMAP_FREE (to_clear);
4120 if (dump_file && (dump_flags & TDF_DETAILS))
4122 fprintf (dump_file, "Use-candidate costs:\n");
4124 for (i = 0; i < n_iv_uses (data); i++)
4126 use = iv_use (data, i);
4128 fprintf (dump_file, "Use %d:\n", i);
4129 fprintf (dump_file, " cand\tcost\tdepends on\n");
4130 for (j = 0; j < use->n_map_members; j++)
4132 if (!use->cost_map[j].cand
4133 || use->cost_map[j].cost == INFTY)
4134 continue;
4136 fprintf (dump_file, " %d\t%d\t",
4137 use->cost_map[j].cand->id,
4138 use->cost_map[j].cost);
4139 if (use->cost_map[j].depends_on)
4140 bitmap_print (dump_file,
4141 use->cost_map[j].depends_on, "","");
4142 fprintf (dump_file, "\n");
4145 fprintf (dump_file, "\n");
4147 fprintf (dump_file, "\n");
4151 /* Determines cost of the candidate CAND. */
4153 static void
4154 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4156 unsigned cost_base, cost_step;
4157 tree base;
4159 if (!cand->iv)
4161 cand->cost = 0;
4162 return;
4165 /* There are two costs associated with the candidate -- its increment
4166 and its initialization. The second is almost negligible for any loop
4167 that rolls enough, so we take it just very little into account. */
4169 base = cand->iv->base;
4170 cost_base = force_var_cost (data, base, NULL);
4171 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4173 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4175 /* Prefer the original iv unless we may gain something by replacing it;
4176 this is not really relevant for artificial ivs created by other
4177 passes. */
4178 if (cand->pos == IP_ORIGINAL
4179 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4180 cand->cost--;
4182 /* Prefer not to insert statements into latch unless there are some
4183 already (so that we do not create unnecessary jumps). */
4184 if (cand->pos == IP_END
4185 && empty_block_p (ip_end_pos (data->current_loop)))
4186 cand->cost++;
4189 /* Determines costs of computation of the candidates. */
4191 static void
4192 determine_iv_costs (struct ivopts_data *data)
4194 unsigned i;
4196 if (dump_file && (dump_flags & TDF_DETAILS))
4198 fprintf (dump_file, "Candidate costs:\n");
4199 fprintf (dump_file, " cand\tcost\n");
4202 for (i = 0; i < n_iv_cands (data); i++)
4204 struct iv_cand *cand = iv_cand (data, i);
4206 determine_iv_cost (data, cand);
4208 if (dump_file && (dump_flags & TDF_DETAILS))
4209 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4212 if (dump_file && (dump_flags & TDF_DETAILS))
4213 fprintf (dump_file, "\n");
4216 /* Calculates cost for having SIZE induction variables. */
4218 static unsigned
4219 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4221 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4224 /* For each size of the induction variable set determine the penalty. */
4226 static void
4227 determine_set_costs (struct ivopts_data *data)
4229 unsigned j, n;
4230 tree phi, op;
4231 struct loop *loop = data->current_loop;
4232 bitmap_iterator bi;
4234 /* We use the following model (definitely improvable, especially the
4235 cost function -- TODO):
4237 We estimate the number of registers available (using MD data), name it A.
4239 We estimate the number of registers used by the loop, name it U. This
4240 number is obtained as the number of loop phi nodes (not counting virtual
4241 registers and bivs) + the number of variables from outside of the loop.
4243 We set a reserve R (free regs that are used for temporary computations,
4244 etc.). For now the reserve is a constant 3.
4246 Let I be the number of induction variables.
4248 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4249 make a lot of ivs without a reason).
4250 -- if A - R < U + I <= A, the cost is I * PRES_COST
4251 -- if U + I > A, the cost is I * PRES_COST and
4252 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4254 if (dump_file && (dump_flags & TDF_DETAILS))
4256 fprintf (dump_file, "Global costs:\n");
4257 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4258 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4259 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4260 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4263 n = 0;
4264 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4266 op = PHI_RESULT (phi);
4268 if (!is_gimple_reg (op))
4269 continue;
4271 if (get_iv (data, op))
4272 continue;
4274 n++;
4277 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4279 struct version_info *info = ver_info (data, j);
4281 if (info->inv_id && info->has_nonlin_use)
4282 n++;
4285 data->regs_used = n;
4286 if (dump_file && (dump_flags & TDF_DETAILS))
4287 fprintf (dump_file, " regs_used %d\n", n);
4289 if (dump_file && (dump_flags & TDF_DETAILS))
4291 fprintf (dump_file, " cost for size:\n");
4292 fprintf (dump_file, " ivs\tcost\n");
4293 for (j = 0; j <= 2 * target_avail_regs; j++)
4294 fprintf (dump_file, " %d\t%d\n", j,
4295 ivopts_global_cost_for_size (data, j));
4296 fprintf (dump_file, "\n");
4300 /* Returns true if A is a cheaper cost pair than B. */
4302 static bool
4303 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4305 if (!a)
4306 return false;
4308 if (!b)
4309 return true;
4311 if (a->cost < b->cost)
4312 return true;
4314 if (a->cost > b->cost)
4315 return false;
4317 /* In case the costs are the same, prefer the cheaper candidate. */
4318 if (a->cand->cost < b->cand->cost)
4319 return true;
4321 return false;
4324 /* Computes the cost field of IVS structure. */
4326 static void
4327 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4329 unsigned cost = 0;
4331 cost += ivs->cand_use_cost;
4332 cost += ivs->cand_cost;
4333 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4335 ivs->cost = cost;
4338 /* Remove invariants in set INVS to set IVS. */
4340 static void
4341 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4343 bitmap_iterator bi;
4344 unsigned iid;
4346 if (!invs)
4347 return;
4349 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4351 ivs->n_invariant_uses[iid]--;
4352 if (ivs->n_invariant_uses[iid] == 0)
4353 ivs->n_regs--;
4357 /* Set USE not to be expressed by any candidate in IVS. */
4359 static void
4360 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4361 struct iv_use *use)
4363 unsigned uid = use->id, cid;
4364 struct cost_pair *cp;
4366 cp = ivs->cand_for_use[uid];
4367 if (!cp)
4368 return;
4369 cid = cp->cand->id;
4371 ivs->bad_uses++;
4372 ivs->cand_for_use[uid] = NULL;
4373 ivs->n_cand_uses[cid]--;
4375 if (ivs->n_cand_uses[cid] == 0)
4377 bitmap_clear_bit (ivs->cands, cid);
4378 /* Do not count the pseudocandidates. */
4379 if (cp->cand->iv)
4380 ivs->n_regs--;
4381 ivs->n_cands--;
4382 ivs->cand_cost -= cp->cand->cost;
4384 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4387 ivs->cand_use_cost -= cp->cost;
4389 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4390 iv_ca_recount_cost (data, ivs);
4393 /* Add invariants in set INVS to set IVS. */
4395 static void
4396 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4398 bitmap_iterator bi;
4399 unsigned iid;
4401 if (!invs)
4402 return;
4404 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4406 ivs->n_invariant_uses[iid]++;
4407 if (ivs->n_invariant_uses[iid] == 1)
4408 ivs->n_regs++;
4412 /* Set cost pair for USE in set IVS to CP. */
4414 static void
4415 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4416 struct iv_use *use, struct cost_pair *cp)
4418 unsigned uid = use->id, cid;
4420 if (ivs->cand_for_use[uid] == cp)
4421 return;
4423 if (ivs->cand_for_use[uid])
4424 iv_ca_set_no_cp (data, ivs, use);
4426 if (cp)
4428 cid = cp->cand->id;
4430 ivs->bad_uses--;
4431 ivs->cand_for_use[uid] = cp;
4432 ivs->n_cand_uses[cid]++;
4433 if (ivs->n_cand_uses[cid] == 1)
4435 bitmap_set_bit (ivs->cands, cid);
4436 /* Do not count the pseudocandidates. */
4437 if (cp->cand->iv)
4438 ivs->n_regs++;
4439 ivs->n_cands++;
4440 ivs->cand_cost += cp->cand->cost;
4442 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4445 ivs->cand_use_cost += cp->cost;
4446 iv_ca_set_add_invariants (ivs, cp->depends_on);
4447 iv_ca_recount_cost (data, ivs);
4451 /* Extend set IVS by expressing USE by some of the candidates in it
4452 if possible. */
4454 static void
4455 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4456 struct iv_use *use)
4458 struct cost_pair *best_cp = NULL, *cp;
4459 bitmap_iterator bi;
4460 unsigned i;
4462 gcc_assert (ivs->upto >= use->id);
4464 if (ivs->upto == use->id)
4466 ivs->upto++;
4467 ivs->bad_uses++;
4470 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4472 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4474 if (cheaper_cost_pair (cp, best_cp))
4475 best_cp = cp;
4478 iv_ca_set_cp (data, ivs, use, best_cp);
4481 /* Get cost for assignment IVS. */
4483 static unsigned
4484 iv_ca_cost (struct iv_ca *ivs)
4486 return (ivs->bad_uses ? INFTY : ivs->cost);
4489 /* Returns true if all dependences of CP are among invariants in IVS. */
4491 static bool
4492 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4494 unsigned i;
4495 bitmap_iterator bi;
4497 if (!cp->depends_on)
4498 return true;
4500 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4502 if (ivs->n_invariant_uses[i] == 0)
4503 return false;
4506 return true;
4509 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4510 it before NEXT_CHANGE. */
4512 static struct iv_ca_delta *
4513 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4514 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4516 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4518 change->use = use;
4519 change->old_cp = old_cp;
4520 change->new_cp = new_cp;
4521 change->next_change = next_change;
4523 return change;
4526 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4527 are rewritten. */
4529 static struct iv_ca_delta *
4530 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4532 struct iv_ca_delta *last;
4534 if (!l2)
4535 return l1;
4537 if (!l1)
4538 return l2;
4540 for (last = l1; last->next_change; last = last->next_change)
4541 continue;
4542 last->next_change = l2;
4544 return l1;
4547 /* Returns candidate by that USE is expressed in IVS. */
4549 static struct cost_pair *
4550 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4552 return ivs->cand_for_use[use->id];
4555 /* Reverse the list of changes DELTA, forming the inverse to it. */
4557 static struct iv_ca_delta *
4558 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4560 struct iv_ca_delta *act, *next, *prev = NULL;
4561 struct cost_pair *tmp;
4563 for (act = delta; act; act = next)
4565 next = act->next_change;
4566 act->next_change = prev;
4567 prev = act;
4569 tmp = act->old_cp;
4570 act->old_cp = act->new_cp;
4571 act->new_cp = tmp;
4574 return prev;
4577 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4578 reverted instead. */
4580 static void
4581 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4582 struct iv_ca_delta *delta, bool forward)
4584 struct cost_pair *from, *to;
4585 struct iv_ca_delta *act;
4587 if (!forward)
4588 delta = iv_ca_delta_reverse (delta);
4590 for (act = delta; act; act = act->next_change)
4592 from = act->old_cp;
4593 to = act->new_cp;
4594 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4595 iv_ca_set_cp (data, ivs, act->use, to);
4598 if (!forward)
4599 iv_ca_delta_reverse (delta);
4602 /* Returns true if CAND is used in IVS. */
4604 static bool
4605 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4607 return ivs->n_cand_uses[cand->id] > 0;
4610 /* Returns number of induction variable candidates in the set IVS. */
4612 static unsigned
4613 iv_ca_n_cands (struct iv_ca *ivs)
4615 return ivs->n_cands;
4618 /* Free the list of changes DELTA. */
4620 static void
4621 iv_ca_delta_free (struct iv_ca_delta **delta)
4623 struct iv_ca_delta *act, *next;
4625 for (act = *delta; act; act = next)
4627 next = act->next_change;
4628 free (act);
4631 *delta = NULL;
4634 /* Allocates new iv candidates assignment. */
4636 static struct iv_ca *
4637 iv_ca_new (struct ivopts_data *data)
4639 struct iv_ca *nw = XNEW (struct iv_ca);
4641 nw->upto = 0;
4642 nw->bad_uses = 0;
4643 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4644 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4645 nw->cands = BITMAP_ALLOC (NULL);
4646 nw->n_cands = 0;
4647 nw->n_regs = 0;
4648 nw->cand_use_cost = 0;
4649 nw->cand_cost = 0;
4650 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4651 nw->cost = 0;
4653 return nw;
4656 /* Free memory occupied by the set IVS. */
4658 static void
4659 iv_ca_free (struct iv_ca **ivs)
4661 free ((*ivs)->cand_for_use);
4662 free ((*ivs)->n_cand_uses);
4663 BITMAP_FREE ((*ivs)->cands);
4664 free ((*ivs)->n_invariant_uses);
4665 free (*ivs);
4666 *ivs = NULL;
4669 /* Dumps IVS to FILE. */
4671 static void
4672 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4674 const char *pref = " invariants ";
4675 unsigned i;
4677 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4678 bitmap_print (file, ivs->cands, " candidates ","\n");
4680 for (i = 1; i <= data->max_inv_id; i++)
4681 if (ivs->n_invariant_uses[i])
4683 fprintf (file, "%s%d", pref, i);
4684 pref = ", ";
4686 fprintf (file, "\n");
4689 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4690 new set, and store differences in DELTA. Number of induction variables
4691 in the new set is stored to N_IVS. */
4693 static unsigned
4694 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4695 struct iv_cand *cand, struct iv_ca_delta **delta,
4696 unsigned *n_ivs)
4698 unsigned i, cost;
4699 struct iv_use *use;
4700 struct cost_pair *old_cp, *new_cp;
4702 *delta = NULL;
4703 for (i = 0; i < ivs->upto; i++)
4705 use = iv_use (data, i);
4706 old_cp = iv_ca_cand_for_use (ivs, use);
4708 if (old_cp
4709 && old_cp->cand == cand)
4710 continue;
4712 new_cp = get_use_iv_cost (data, use, cand);
4713 if (!new_cp)
4714 continue;
4716 if (!iv_ca_has_deps (ivs, new_cp))
4717 continue;
4719 if (!cheaper_cost_pair (new_cp, old_cp))
4720 continue;
4722 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4725 iv_ca_delta_commit (data, ivs, *delta, true);
4726 cost = iv_ca_cost (ivs);
4727 if (n_ivs)
4728 *n_ivs = iv_ca_n_cands (ivs);
4729 iv_ca_delta_commit (data, ivs, *delta, false);
4731 return cost;
4734 /* Try narrowing set IVS by removing CAND. Return the cost of
4735 the new set and store the differences in DELTA. */
4737 static unsigned
4738 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4739 struct iv_cand *cand, struct iv_ca_delta **delta)
4741 unsigned i, ci;
4742 struct iv_use *use;
4743 struct cost_pair *old_cp, *new_cp, *cp;
4744 bitmap_iterator bi;
4745 struct iv_cand *cnd;
4746 unsigned cost;
4748 *delta = NULL;
4749 for (i = 0; i < n_iv_uses (data); i++)
4751 use = iv_use (data, i);
4753 old_cp = iv_ca_cand_for_use (ivs, use);
4754 if (old_cp->cand != cand)
4755 continue;
4757 new_cp = NULL;
4759 if (data->consider_all_candidates)
4761 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4763 if (ci == cand->id)
4764 continue;
4766 cnd = iv_cand (data, ci);
4768 cp = get_use_iv_cost (data, use, cnd);
4769 if (!cp)
4770 continue;
4771 if (!iv_ca_has_deps (ivs, cp))
4772 continue;
4774 if (!cheaper_cost_pair (cp, new_cp))
4775 continue;
4777 new_cp = cp;
4780 else
4782 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4784 if (ci == cand->id)
4785 continue;
4787 cnd = iv_cand (data, ci);
4789 cp = get_use_iv_cost (data, use, cnd);
4790 if (!cp)
4791 continue;
4792 if (!iv_ca_has_deps (ivs, cp))
4793 continue;
4795 if (!cheaper_cost_pair (cp, new_cp))
4796 continue;
4798 new_cp = cp;
4802 if (!new_cp)
4804 iv_ca_delta_free (delta);
4805 return INFTY;
4808 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4811 iv_ca_delta_commit (data, ivs, *delta, true);
4812 cost = iv_ca_cost (ivs);
4813 iv_ca_delta_commit (data, ivs, *delta, false);
4815 return cost;
4818 /* Try optimizing the set of candidates IVS by removing candidates different
4819 from to EXCEPT_CAND from it. Return cost of the new set, and store
4820 differences in DELTA. */
4822 static unsigned
4823 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4824 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4826 bitmap_iterator bi;
4827 struct iv_ca_delta *act_delta, *best_delta;
4828 unsigned i, best_cost, acost;
4829 struct iv_cand *cand;
4831 best_delta = NULL;
4832 best_cost = iv_ca_cost (ivs);
4834 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4836 cand = iv_cand (data, i);
4838 if (cand == except_cand)
4839 continue;
4841 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4843 if (acost < best_cost)
4845 best_cost = acost;
4846 iv_ca_delta_free (&best_delta);
4847 best_delta = act_delta;
4849 else
4850 iv_ca_delta_free (&act_delta);
4853 if (!best_delta)
4855 *delta = NULL;
4856 return best_cost;
4859 /* Recurse to possibly remove other unnecessary ivs. */
4860 iv_ca_delta_commit (data, ivs, best_delta, true);
4861 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4862 iv_ca_delta_commit (data, ivs, best_delta, false);
4863 *delta = iv_ca_delta_join (best_delta, *delta);
4864 return best_cost;
4867 /* Tries to extend the sets IVS in the best possible way in order
4868 to express the USE. */
4870 static bool
4871 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4872 struct iv_use *use)
4874 unsigned best_cost, act_cost;
4875 unsigned i;
4876 bitmap_iterator bi;
4877 struct iv_cand *cand;
4878 struct iv_ca_delta *best_delta = NULL, *act_delta;
4879 struct cost_pair *cp;
4881 iv_ca_add_use (data, ivs, use);
4882 best_cost = iv_ca_cost (ivs);
4884 cp = iv_ca_cand_for_use (ivs, use);
4885 if (cp)
4887 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4888 iv_ca_set_no_cp (data, ivs, use);
4891 /* First try important candidates. Only if it fails, try the specific ones.
4892 Rationale -- in loops with many variables the best choice often is to use
4893 just one generic biv. If we added here many ivs specific to the uses,
4894 the optimization algorithm later would be likely to get stuck in a local
4895 minimum, thus causing us to create too many ivs. The approach from
4896 few ivs to more seems more likely to be successful -- starting from few
4897 ivs, replacing an expensive use by a specific iv should always be a
4898 win. */
4899 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4901 cand = iv_cand (data, i);
4903 if (iv_ca_cand_used_p (ivs, cand))
4904 continue;
4906 cp = get_use_iv_cost (data, use, cand);
4907 if (!cp)
4908 continue;
4910 iv_ca_set_cp (data, ivs, use, cp);
4911 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4912 iv_ca_set_no_cp (data, ivs, use);
4913 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4915 if (act_cost < best_cost)
4917 best_cost = act_cost;
4919 iv_ca_delta_free (&best_delta);
4920 best_delta = act_delta;
4922 else
4923 iv_ca_delta_free (&act_delta);
4926 if (best_cost == INFTY)
4928 for (i = 0; i < use->n_map_members; i++)
4930 cp = use->cost_map + i;
4931 cand = cp->cand;
4932 if (!cand)
4933 continue;
4935 /* Already tried this. */
4936 if (cand->important)
4937 continue;
4939 if (iv_ca_cand_used_p (ivs, cand))
4940 continue;
4942 act_delta = NULL;
4943 iv_ca_set_cp (data, ivs, use, cp);
4944 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4945 iv_ca_set_no_cp (data, ivs, use);
4946 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4947 cp, act_delta);
4949 if (act_cost < best_cost)
4951 best_cost = act_cost;
4953 if (best_delta)
4954 iv_ca_delta_free (&best_delta);
4955 best_delta = act_delta;
4957 else
4958 iv_ca_delta_free (&act_delta);
4962 iv_ca_delta_commit (data, ivs, best_delta, true);
4963 iv_ca_delta_free (&best_delta);
4965 return (best_cost != INFTY);
4968 /* Finds an initial assignment of candidates to uses. */
4970 static struct iv_ca *
4971 get_initial_solution (struct ivopts_data *data)
4973 struct iv_ca *ivs = iv_ca_new (data);
4974 unsigned i;
4976 for (i = 0; i < n_iv_uses (data); i++)
4977 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4979 iv_ca_free (&ivs);
4980 return NULL;
4983 return ivs;
4986 /* Tries to improve set of induction variables IVS. */
4988 static bool
4989 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4991 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4992 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4993 struct iv_cand *cand;
4995 /* Try extending the set of induction variables by one. */
4996 for (i = 0; i < n_iv_cands (data); i++)
4998 cand = iv_cand (data, i);
5000 if (iv_ca_cand_used_p (ivs, cand))
5001 continue;
5003 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5004 if (!act_delta)
5005 continue;
5007 /* If we successfully added the candidate and the set is small enough,
5008 try optimizing it by removing other candidates. */
5009 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5011 iv_ca_delta_commit (data, ivs, act_delta, true);
5012 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5013 iv_ca_delta_commit (data, ivs, act_delta, false);
5014 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5017 if (acost < best_cost)
5019 best_cost = acost;
5020 iv_ca_delta_free (&best_delta);
5021 best_delta = act_delta;
5023 else
5024 iv_ca_delta_free (&act_delta);
5027 if (!best_delta)
5029 /* Try removing the candidates from the set instead. */
5030 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5032 /* Nothing more we can do. */
5033 if (!best_delta)
5034 return false;
5037 iv_ca_delta_commit (data, ivs, best_delta, true);
5038 gcc_assert (best_cost == iv_ca_cost (ivs));
5039 iv_ca_delta_free (&best_delta);
5040 return true;
5043 /* Attempts to find the optimal set of induction variables. We do simple
5044 greedy heuristic -- we try to replace at most one candidate in the selected
5045 solution and remove the unused ivs while this improves the cost. */
5047 static struct iv_ca *
5048 find_optimal_iv_set (struct ivopts_data *data)
5050 unsigned i;
5051 struct iv_ca *set;
5052 struct iv_use *use;
5054 /* Get the initial solution. */
5055 set = get_initial_solution (data);
5056 if (!set)
5058 if (dump_file && (dump_flags & TDF_DETAILS))
5059 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5060 return NULL;
5063 if (dump_file && (dump_flags & TDF_DETAILS))
5065 fprintf (dump_file, "Initial set of candidates:\n");
5066 iv_ca_dump (data, dump_file, set);
5069 while (try_improve_iv_set (data, set))
5071 if (dump_file && (dump_flags & TDF_DETAILS))
5073 fprintf (dump_file, "Improved to:\n");
5074 iv_ca_dump (data, dump_file, set);
5078 if (dump_file && (dump_flags & TDF_DETAILS))
5079 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5081 for (i = 0; i < n_iv_uses (data); i++)
5083 use = iv_use (data, i);
5084 use->selected = iv_ca_cand_for_use (set, use)->cand;
5087 return set;
5090 /* Creates a new induction variable corresponding to CAND. */
5092 static void
5093 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5095 block_stmt_iterator incr_pos;
5096 tree base;
5097 bool after = false;
5099 if (!cand->iv)
5100 return;
5102 switch (cand->pos)
5104 case IP_NORMAL:
5105 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5106 break;
5108 case IP_END:
5109 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5110 after = true;
5111 break;
5113 case IP_ORIGINAL:
5114 /* Mark that the iv is preserved. */
5115 name_info (data, cand->var_before)->preserve_biv = true;
5116 name_info (data, cand->var_after)->preserve_biv = true;
5118 /* Rewrite the increment so that it uses var_before directly. */
5119 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5121 return;
5124 gimple_add_tmp_var (cand->var_before);
5125 add_referenced_var (cand->var_before);
5127 base = unshare_expr (cand->iv->base);
5129 create_iv (base, unshare_expr (cand->iv->step),
5130 cand->var_before, data->current_loop,
5131 &incr_pos, after, &cand->var_before, &cand->var_after);
5134 /* Creates new induction variables described in SET. */
5136 static void
5137 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5139 unsigned i;
5140 struct iv_cand *cand;
5141 bitmap_iterator bi;
5143 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5145 cand = iv_cand (data, i);
5146 create_new_iv (data, cand);
5150 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5151 is true, remove also the ssa name defined by the statement. */
5153 static void
5154 remove_statement (tree stmt, bool including_defined_name)
5156 if (TREE_CODE (stmt) == PHI_NODE)
5158 if (!including_defined_name)
5160 /* Prevent the ssa name defined by the statement from being removed. */
5161 SET_PHI_RESULT (stmt, NULL);
5163 remove_phi_node (stmt, NULL_TREE);
5165 else
5167 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5169 bsi_remove (&bsi, true);
5173 /* Rewrites USE (definition of iv used in a nonlinear expression)
5174 using candidate CAND. */
5176 static void
5177 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5178 struct iv_use *use, struct iv_cand *cand)
5180 tree comp;
5181 tree op, stmts, tgt, ass;
5182 block_stmt_iterator bsi, pbsi;
5184 /* An important special case -- if we are asked to express value of
5185 the original iv by itself, just exit; there is no need to
5186 introduce a new computation (that might also need casting the
5187 variable to unsigned and back). */
5188 if (cand->pos == IP_ORIGINAL
5189 && cand->incremented_at == use->stmt)
5191 tree step, ctype, utype;
5192 enum tree_code incr_code = PLUS_EXPR;
5194 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5195 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5197 step = cand->iv->step;
5198 ctype = TREE_TYPE (step);
5199 utype = TREE_TYPE (cand->var_after);
5200 if (TREE_CODE (step) == NEGATE_EXPR)
5202 incr_code = MINUS_EXPR;
5203 step = TREE_OPERAND (step, 0);
5206 /* Check whether we may leave the computation unchanged.
5207 This is the case only if it does not rely on other
5208 computations in the loop -- otherwise, the computation
5209 we rely upon may be removed in remove_unused_ivs,
5210 thus leading to ICE. */
5211 op = TREE_OPERAND (use->stmt, 1);
5212 if (TREE_CODE (op) == PLUS_EXPR
5213 || TREE_CODE (op) == MINUS_EXPR)
5215 if (TREE_OPERAND (op, 0) == cand->var_before)
5216 op = TREE_OPERAND (op, 1);
5217 else if (TREE_CODE (op) == PLUS_EXPR
5218 && TREE_OPERAND (op, 1) == cand->var_before)
5219 op = TREE_OPERAND (op, 0);
5220 else
5221 op = NULL_TREE;
5223 else
5224 op = NULL_TREE;
5226 if (op
5227 && (TREE_CODE (op) == INTEGER_CST
5228 || operand_equal_p (op, step, 0)))
5229 return;
5231 /* Otherwise, add the necessary computations to express
5232 the iv. */
5233 op = fold_convert (ctype, cand->var_before);
5234 comp = fold_convert (utype,
5235 build2 (incr_code, ctype, op,
5236 unshare_expr (step)));
5238 else
5239 comp = get_computation (data->current_loop, use, cand);
5241 switch (TREE_CODE (use->stmt))
5243 case PHI_NODE:
5244 tgt = PHI_RESULT (use->stmt);
5246 /* If we should keep the biv, do not replace it. */
5247 if (name_info (data, tgt)->preserve_biv)
5248 return;
5250 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5251 while (!bsi_end_p (pbsi)
5252 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5254 bsi = pbsi;
5255 bsi_next (&pbsi);
5257 break;
5259 case MODIFY_EXPR:
5260 tgt = TREE_OPERAND (use->stmt, 0);
5261 bsi = bsi_for_stmt (use->stmt);
5262 break;
5264 default:
5265 gcc_unreachable ();
5268 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5270 if (TREE_CODE (use->stmt) == PHI_NODE)
5272 if (stmts)
5273 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5274 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5275 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5276 remove_statement (use->stmt, false);
5277 SSA_NAME_DEF_STMT (tgt) = ass;
5279 else
5281 if (stmts)
5282 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5283 TREE_OPERAND (use->stmt, 1) = op;
5287 /* Replaces ssa name in index IDX by its basic variable. Callback for
5288 for_each_index. */
5290 static bool
5291 idx_remove_ssa_names (tree base, tree *idx,
5292 void *data ATTRIBUTE_UNUSED)
5294 tree *op;
5296 if (TREE_CODE (*idx) == SSA_NAME)
5297 *idx = SSA_NAME_VAR (*idx);
5299 if (TREE_CODE (base) == ARRAY_REF)
5301 op = &TREE_OPERAND (base, 2);
5302 if (*op
5303 && TREE_CODE (*op) == SSA_NAME)
5304 *op = SSA_NAME_VAR (*op);
5305 op = &TREE_OPERAND (base, 3);
5306 if (*op
5307 && TREE_CODE (*op) == SSA_NAME)
5308 *op = SSA_NAME_VAR (*op);
5311 return true;
5314 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5316 static tree
5317 unshare_and_remove_ssa_names (tree ref)
5319 ref = unshare_expr (ref);
5320 for_each_index (&ref, idx_remove_ssa_names, NULL);
5322 return ref;
5325 /* Extract the alias analysis info for the memory reference REF. There are
5326 several ways how this information may be stored and what precisely is
5327 its semantics depending on the type of the reference, but there always is
5328 somewhere hidden one _DECL node that is used to determine the set of
5329 virtual operands for the reference. The code below deciphers this jungle
5330 and extracts this single useful piece of information. */
5332 static tree
5333 get_ref_tag (tree ref, tree orig)
5335 tree var = get_base_address (ref);
5336 tree aref = NULL_TREE, tag, sv;
5337 HOST_WIDE_INT offset, size, maxsize;
5339 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5341 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5342 if (ref)
5343 break;
5346 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5347 return unshare_expr (sv);
5349 if (!var)
5350 return NULL_TREE;
5352 if (TREE_CODE (var) == INDIRECT_REF)
5354 /* If the base is a dereference of a pointer, first check its name memory
5355 tag. If it does not have one, use its symbol memory tag. */
5356 var = TREE_OPERAND (var, 0);
5357 if (TREE_CODE (var) != SSA_NAME)
5358 return NULL_TREE;
5360 if (SSA_NAME_PTR_INFO (var))
5362 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5363 if (tag)
5364 return tag;
5367 var = SSA_NAME_VAR (var);
5368 tag = var_ann (var)->symbol_mem_tag;
5369 gcc_assert (tag != NULL_TREE);
5370 return tag;
5372 else
5374 if (!DECL_P (var))
5375 return NULL_TREE;
5377 tag = var_ann (var)->symbol_mem_tag;
5378 if (tag)
5379 return tag;
5381 return var;
5385 /* Copies the reference information from OLD_REF to NEW_REF. */
5387 static void
5388 copy_ref_info (tree new_ref, tree old_ref)
5390 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5391 copy_mem_ref_info (new_ref, old_ref);
5392 else
5394 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5395 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5399 /* Rewrites USE (address that is an iv) using candidate CAND. */
5401 static void
5402 rewrite_use_address (struct ivopts_data *data,
5403 struct iv_use *use, struct iv_cand *cand)
5405 struct affine_tree_combination aff;
5406 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5407 tree ref;
5409 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5410 unshare_aff_combination (&aff);
5412 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5413 copy_ref_info (ref, *use->op_p);
5414 *use->op_p = ref;
5417 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5418 candidate CAND. */
5420 static void
5421 rewrite_use_compare (struct ivopts_data *data,
5422 struct iv_use *use, struct iv_cand *cand)
5424 tree comp;
5425 tree *op_p, cond, op, stmts, bound;
5426 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5427 enum tree_code compare;
5428 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5430 bound = cp->value;
5431 if (bound)
5433 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5434 tree var_type = TREE_TYPE (var);
5436 compare = iv_elimination_compare (data, use);
5437 bound = fold_convert (var_type, bound);
5438 op = force_gimple_operand (unshare_expr (bound), &stmts,
5439 true, NULL_TREE);
5441 if (stmts)
5442 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5444 *use->op_p = build2 (compare, boolean_type_node, var, op);
5445 update_stmt (use->stmt);
5446 return;
5449 /* The induction variable elimination failed; just express the original
5450 giv. */
5451 comp = get_computation (data->current_loop, use, cand);
5453 cond = *use->op_p;
5454 op_p = &TREE_OPERAND (cond, 0);
5455 if (TREE_CODE (*op_p) != SSA_NAME
5456 || zero_p (get_iv (data, *op_p)->step))
5457 op_p = &TREE_OPERAND (cond, 1);
5459 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5460 if (stmts)
5461 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5463 *op_p = op;
5466 /* Ensure that operand *OP_P may be used at the end of EXIT without
5467 violating loop closed ssa form. */
5469 static void
5470 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5472 basic_block def_bb;
5473 struct loop *def_loop;
5474 tree phi, use;
5476 use = USE_FROM_PTR (op_p);
5477 if (TREE_CODE (use) != SSA_NAME)
5478 return;
5480 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5481 if (!def_bb)
5482 return;
5484 def_loop = def_bb->loop_father;
5485 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5486 return;
5488 /* Try finding a phi node that copies the value out of the loop. */
5489 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5490 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5491 break;
5493 if (!phi)
5495 /* Create such a phi node. */
5496 tree new_name = duplicate_ssa_name (use, NULL);
5498 phi = create_phi_node (new_name, exit->dest);
5499 SSA_NAME_DEF_STMT (new_name) = phi;
5500 add_phi_arg (phi, use, exit);
5503 SET_USE (op_p, PHI_RESULT (phi));
5506 /* Ensure that operands of STMT may be used at the end of EXIT without
5507 violating loop closed ssa form. */
5509 static void
5510 protect_loop_closed_ssa_form (edge exit, tree stmt)
5512 ssa_op_iter iter;
5513 use_operand_p use_p;
5515 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5516 protect_loop_closed_ssa_form_use (exit, use_p);
5519 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5520 so that they are emitted on the correct place, and so that the loop closed
5521 ssa form is preserved. */
5523 void
5524 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5526 tree_stmt_iterator tsi;
5527 block_stmt_iterator bsi;
5528 tree phi, stmt, def, next;
5530 if (!single_pred_p (exit->dest))
5531 split_loop_exit_edge (exit);
5533 /* Ensure there is label in exit->dest, so that we can
5534 insert after it. */
5535 tree_block_label (exit->dest);
5536 bsi = bsi_after_labels (exit->dest);
5538 if (TREE_CODE (stmts) == STATEMENT_LIST)
5540 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5542 tree stmt = tsi_stmt (tsi);
5543 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5544 protect_loop_closed_ssa_form (exit, stmt);
5547 else
5549 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5550 protect_loop_closed_ssa_form (exit, stmts);
5553 if (!op)
5554 return;
5556 for (phi = phi_nodes (exit->dest); phi; phi = next)
5558 next = PHI_CHAIN (phi);
5560 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5562 def = PHI_RESULT (phi);
5563 remove_statement (phi, false);
5564 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5565 def, op);
5566 SSA_NAME_DEF_STMT (def) = stmt;
5567 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5572 /* Rewrites USE using candidate CAND. */
5574 static void
5575 rewrite_use (struct ivopts_data *data,
5576 struct iv_use *use, struct iv_cand *cand)
5578 switch (use->type)
5580 case USE_NONLINEAR_EXPR:
5581 rewrite_use_nonlinear_expr (data, use, cand);
5582 break;
5584 case USE_ADDRESS:
5585 rewrite_use_address (data, use, cand);
5586 break;
5588 case USE_COMPARE:
5589 rewrite_use_compare (data, use, cand);
5590 break;
5592 default:
5593 gcc_unreachable ();
5595 mark_new_vars_to_rename (use->stmt);
5598 /* Rewrite the uses using the selected induction variables. */
5600 static void
5601 rewrite_uses (struct ivopts_data *data)
5603 unsigned i;
5604 struct iv_cand *cand;
5605 struct iv_use *use;
5607 for (i = 0; i < n_iv_uses (data); i++)
5609 use = iv_use (data, i);
5610 cand = use->selected;
5611 gcc_assert (cand);
5613 rewrite_use (data, use, cand);
5617 /* Removes the ivs that are not used after rewriting. */
5619 static void
5620 remove_unused_ivs (struct ivopts_data *data)
5622 unsigned j;
5623 bitmap_iterator bi;
5625 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5627 struct version_info *info;
5629 info = ver_info (data, j);
5630 if (info->iv
5631 && !zero_p (info->iv->step)
5632 && !info->inv_id
5633 && !info->iv->have_use_for
5634 && !info->preserve_biv)
5635 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5639 /* Frees data allocated by the optimization of a single loop. */
5641 static void
5642 free_loop_data (struct ivopts_data *data)
5644 unsigned i, j;
5645 bitmap_iterator bi;
5646 tree obj;
5648 htab_empty (data->niters);
5650 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5652 struct version_info *info;
5654 info = ver_info (data, i);
5655 if (info->iv)
5656 free (info->iv);
5657 info->iv = NULL;
5658 info->has_nonlin_use = false;
5659 info->preserve_biv = false;
5660 info->inv_id = 0;
5662 bitmap_clear (data->relevant);
5663 bitmap_clear (data->important_candidates);
5665 for (i = 0; i < n_iv_uses (data); i++)
5667 struct iv_use *use = iv_use (data, i);
5669 free (use->iv);
5670 BITMAP_FREE (use->related_cands);
5671 for (j = 0; j < use->n_map_members; j++)
5672 if (use->cost_map[j].depends_on)
5673 BITMAP_FREE (use->cost_map[j].depends_on);
5674 free (use->cost_map);
5675 free (use);
5677 VEC_truncate (iv_use_p, data->iv_uses, 0);
5679 for (i = 0; i < n_iv_cands (data); i++)
5681 struct iv_cand *cand = iv_cand (data, i);
5683 if (cand->iv)
5684 free (cand->iv);
5685 if (cand->depends_on)
5686 BITMAP_FREE (cand->depends_on);
5687 free (cand);
5689 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5691 if (data->version_info_size < num_ssa_names)
5693 data->version_info_size = 2 * num_ssa_names;
5694 free (data->version_info);
5695 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5698 data->max_inv_id = 0;
5700 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5701 SET_DECL_RTL (obj, NULL_RTX);
5703 VEC_truncate (tree, decl_rtl_to_reset, 0);
5706 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5707 loop tree. */
5709 static void
5710 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5712 free_loop_data (data);
5713 free (data->version_info);
5714 BITMAP_FREE (data->relevant);
5715 BITMAP_FREE (data->important_candidates);
5716 htab_delete (data->niters);
5718 VEC_free (tree, heap, decl_rtl_to_reset);
5719 VEC_free (iv_use_p, heap, data->iv_uses);
5720 VEC_free (iv_cand_p, heap, data->iv_candidates);
5723 /* Optimizes the LOOP. Returns true if anything changed. */
5725 static bool
5726 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5728 bool changed = false;
5729 struct iv_ca *iv_ca;
5730 edge exit;
5732 data->current_loop = loop;
5734 if (dump_file && (dump_flags & TDF_DETAILS))
5736 fprintf (dump_file, "Processing loop %d\n", loop->num);
5738 exit = single_dom_exit (loop);
5739 if (exit)
5741 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5742 exit->src->index, exit->dest->index);
5743 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5744 fprintf (dump_file, "\n");
5747 fprintf (dump_file, "\n");
5750 /* For each ssa name determines whether it behaves as an induction variable
5751 in some loop. */
5752 if (!find_induction_variables (data))
5753 goto finish;
5755 /* Finds interesting uses (item 1). */
5756 find_interesting_uses (data);
5757 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5758 goto finish;
5760 /* Finds candidates for the induction variables (item 2). */
5761 find_iv_candidates (data);
5763 /* Calculates the costs (item 3, part 1). */
5764 determine_use_iv_costs (data);
5765 determine_iv_costs (data);
5766 determine_set_costs (data);
5768 /* Find the optimal set of induction variables (item 3, part 2). */
5769 iv_ca = find_optimal_iv_set (data);
5770 if (!iv_ca)
5771 goto finish;
5772 changed = true;
5774 /* Create the new induction variables (item 4, part 1). */
5775 create_new_ivs (data, iv_ca);
5776 iv_ca_free (&iv_ca);
5778 /* Rewrite the uses (item 4, part 2). */
5779 rewrite_uses (data);
5781 /* Remove the ivs that are unused after rewriting. */
5782 remove_unused_ivs (data);
5784 /* We have changed the structure of induction variables; it might happen
5785 that definitions in the scev database refer to some of them that were
5786 eliminated. */
5787 scev_reset ();
5789 finish:
5790 free_loop_data (data);
5792 return changed;
5795 /* Main entry point. Optimizes induction variables in LOOPS. */
5797 void
5798 tree_ssa_iv_optimize (struct loops *loops)
5800 struct loop *loop;
5801 struct ivopts_data data;
5803 tree_ssa_iv_optimize_init (&data);
5805 /* Optimize the loops starting with the innermost ones. */
5806 loop = loops->tree_root;
5807 while (loop->inner)
5808 loop = loop->inner;
5810 /* Scan the loops, inner ones first. */
5811 while (loop != loops->tree_root)
5813 if (dump_file && (dump_flags & TDF_DETAILS))
5814 flow_loop_dump (loop, dump_file, NULL, 1);
5816 tree_ssa_iv_optimize_loop (&data, loop);
5818 if (loop->next)
5820 loop = loop->next;
5821 while (loop->inner)
5822 loop = loop->inner;
5824 else
5825 loop = loop->outer;
5828 tree_ssa_iv_optimize_finalize (&data);