* tree-ssa-structalias.h (alias_info): Remove num_references.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blobacc8c8a3da5e7a2d3993fe8698bb05a215ec6e90
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Types of uses. */
125 enum use_type
127 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
128 USE_ADDRESS, /* Use in an address. */
129 USE_COMPARE /* Use is a compare. */
132 /* The candidate - cost pair. */
133 struct cost_pair
135 struct iv_cand *cand; /* The candidate. */
136 unsigned cost; /* The cost. */
137 bitmap depends_on; /* The list of invariants that have to be
138 preserved. */
139 tree value; /* For final value elimination, the expression for
140 the final value of the iv. For iv elimination,
141 the new bound to compare with. */
144 /* Use. */
145 struct iv_use
147 unsigned id; /* The id of the use. */
148 enum use_type type; /* Type of the use. */
149 struct iv *iv; /* The induction variable it is based on. */
150 tree stmt; /* Statement in that it occurs. */
151 tree *op_p; /* The place where it occurs. */
152 bitmap related_cands; /* The set of "related" iv candidates, plus the common
153 important ones. */
155 unsigned n_map_members; /* Number of candidates in the cost_map list. */
156 struct cost_pair *cost_map;
157 /* The costs wrto the iv candidates. */
159 struct iv_cand *selected;
160 /* The selected candidate. */
163 /* The position where the iv is computed. */
164 enum iv_position
166 IP_NORMAL, /* At the end, just before the exit condition. */
167 IP_END, /* At the end of the latch block. */
168 IP_ORIGINAL /* The original biv. */
171 /* The induction variable candidate. */
172 struct iv_cand
174 unsigned id; /* The number of the candidate. */
175 bool important; /* Whether this is an "important" candidate, i.e. such
176 that it should be considered by all uses. */
177 enum iv_position pos; /* Where it is computed. */
178 tree incremented_at; /* For original biv, the statement where it is
179 incremented. */
180 tree var_before; /* The variable used for it before increment. */
181 tree var_after; /* The variable used for it after increment. */
182 struct iv *iv; /* The value of the candidate. NULL for
183 "pseudocandidate" used to indicate the possibility
184 to replace the final value of an iv by direct
185 computation of the value. */
186 unsigned cost; /* Cost of the candidate. */
187 bitmap depends_on; /* The list of invariants that are used in step of the
188 biv. */
191 /* The data used by the induction variable optimizations. */
193 typedef struct iv_use *iv_use_p;
194 DEF_VEC_P(iv_use_p);
195 DEF_VEC_ALLOC_P(iv_use_p,heap);
197 typedef struct iv_cand *iv_cand_p;
198 DEF_VEC_P(iv_cand_p);
199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
201 struct ivopts_data
203 /* The currently optimized loop. */
204 struct loop *current_loop;
206 /* Number of registers used in it. */
207 unsigned regs_used;
209 /* Numbers of iterations for all exits of the current loop. */
210 htab_t niters;
212 /* The size of version_info array allocated. */
213 unsigned version_info_size;
215 /* The array of information for the ssa names. */
216 struct version_info *version_info;
218 /* The bitmap of indices in version_info whose value was changed. */
219 bitmap relevant;
221 /* The maximum invariant id. */
222 unsigned max_inv_id;
224 /* The uses of induction variables. */
225 VEC(iv_use_p,heap) *iv_uses;
227 /* The candidates. */
228 VEC(iv_cand_p,heap) *iv_candidates;
230 /* A bitmap of important candidates. */
231 bitmap important_candidates;
233 /* Whether to consider just related and important candidates when replacing a
234 use. */
235 bool consider_all_candidates;
238 /* An assignment of iv candidates to uses. */
240 struct iv_ca
242 /* The number of uses covered by the assignment. */
243 unsigned upto;
245 /* Number of uses that cannot be expressed by the candidates in the set. */
246 unsigned bad_uses;
248 /* Candidate assigned to a use, together with the related costs. */
249 struct cost_pair **cand_for_use;
251 /* Number of times each candidate is used. */
252 unsigned *n_cand_uses;
254 /* The candidates used. */
255 bitmap cands;
257 /* The number of candidates in the set. */
258 unsigned n_cands;
260 /* Total number of registers needed. */
261 unsigned n_regs;
263 /* Total cost of expressing uses. */
264 unsigned cand_use_cost;
266 /* Total cost of candidates. */
267 unsigned cand_cost;
269 /* Number of times each invariant is used. */
270 unsigned *n_invariant_uses;
272 /* Total cost of the assignment. */
273 unsigned cost;
276 /* Difference of two iv candidate assignments. */
278 struct iv_ca_delta
280 /* Changed use. */
281 struct iv_use *use;
283 /* An old assignment (for rollback purposes). */
284 struct cost_pair *old_cp;
286 /* A new assignment. */
287 struct cost_pair *new_cp;
289 /* Next change in the list. */
290 struct iv_ca_delta *next_change;
293 /* Bound on number of candidates below that all candidates are considered. */
295 #define CONSIDER_ALL_CANDIDATES_BOUND \
296 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
299 optimizing such a loop would help, and it would take ages). */
301 #define MAX_CONSIDERED_USES \
302 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
304 /* If there are at most this number of ivs in the set, try removing unnecessary
305 ivs from the set always. */
307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
308 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
310 /* The list of trees for that the decl_rtl field must be reset is stored
311 here. */
313 static VEC(tree,heap) *decl_rtl_to_reset;
315 /* Number of uses recorded in DATA. */
317 static inline unsigned
318 n_iv_uses (struct ivopts_data *data)
320 return VEC_length (iv_use_p, data->iv_uses);
323 /* Ith use recorded in DATA. */
325 static inline struct iv_use *
326 iv_use (struct ivopts_data *data, unsigned i)
328 return VEC_index (iv_use_p, data->iv_uses, i);
331 /* Number of candidates recorded in DATA. */
333 static inline unsigned
334 n_iv_cands (struct ivopts_data *data)
336 return VEC_length (iv_cand_p, data->iv_candidates);
339 /* Ith candidate recorded in DATA. */
341 static inline struct iv_cand *
342 iv_cand (struct ivopts_data *data, unsigned i)
344 return VEC_index (iv_cand_p, data->iv_candidates, i);
347 /* The single loop exit if it dominates the latch, NULL otherwise. */
349 edge
350 single_dom_exit (struct loop *loop)
352 edge exit = loop->single_exit;
354 if (!exit)
355 return NULL;
357 if (!just_once_each_iteration_p (loop, exit->src))
358 return NULL;
360 return exit;
363 /* Dumps information about the induction variable IV to FILE. */
365 extern void dump_iv (FILE *, struct iv *);
366 void
367 dump_iv (FILE *file, struct iv *iv)
369 if (iv->ssa_name)
371 fprintf (file, "ssa name ");
372 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
373 fprintf (file, "\n");
376 fprintf (file, " type ");
377 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
378 fprintf (file, "\n");
380 if (iv->step)
382 fprintf (file, " base ");
383 print_generic_expr (file, iv->base, TDF_SLIM);
384 fprintf (file, "\n");
386 fprintf (file, " step ");
387 print_generic_expr (file, iv->step, TDF_SLIM);
388 fprintf (file, "\n");
390 else
392 fprintf (file, " invariant ");
393 print_generic_expr (file, iv->base, TDF_SLIM);
394 fprintf (file, "\n");
397 if (iv->base_object)
399 fprintf (file, " base object ");
400 print_generic_expr (file, iv->base_object, TDF_SLIM);
401 fprintf (file, "\n");
404 if (iv->biv_p)
405 fprintf (file, " is a biv\n");
408 /* Dumps information about the USE to FILE. */
410 extern void dump_use (FILE *, struct iv_use *);
411 void
412 dump_use (FILE *file, struct iv_use *use)
414 fprintf (file, "use %d\n", use->id);
416 switch (use->type)
418 case USE_NONLINEAR_EXPR:
419 fprintf (file, " generic\n");
420 break;
422 case USE_ADDRESS:
423 fprintf (file, " address\n");
424 break;
426 case USE_COMPARE:
427 fprintf (file, " compare\n");
428 break;
430 default:
431 gcc_unreachable ();
434 fprintf (file, " in statement ");
435 print_generic_expr (file, use->stmt, TDF_SLIM);
436 fprintf (file, "\n");
438 fprintf (file, " at position ");
439 if (use->op_p)
440 print_generic_expr (file, *use->op_p, TDF_SLIM);
441 fprintf (file, "\n");
443 dump_iv (file, use->iv);
445 if (use->related_cands)
447 fprintf (file, " related candidates ");
448 dump_bitmap (file, use->related_cands);
452 /* Dumps information about the uses to FILE. */
454 extern void dump_uses (FILE *, struct ivopts_data *);
455 void
456 dump_uses (FILE *file, struct ivopts_data *data)
458 unsigned i;
459 struct iv_use *use;
461 for (i = 0; i < n_iv_uses (data); i++)
463 use = iv_use (data, i);
465 dump_use (file, use);
466 fprintf (file, "\n");
470 /* Dumps information about induction variable candidate CAND to FILE. */
472 extern void dump_cand (FILE *, struct iv_cand *);
473 void
474 dump_cand (FILE *file, struct iv_cand *cand)
476 struct iv *iv = cand->iv;
478 fprintf (file, "candidate %d%s\n",
479 cand->id, cand->important ? " (important)" : "");
481 if (cand->depends_on)
483 fprintf (file, " depends on ");
484 dump_bitmap (file, cand->depends_on);
487 if (!iv)
489 fprintf (file, " final value replacement\n");
490 return;
493 switch (cand->pos)
495 case IP_NORMAL:
496 fprintf (file, " incremented before exit test\n");
497 break;
499 case IP_END:
500 fprintf (file, " incremented at end\n");
501 break;
503 case IP_ORIGINAL:
504 fprintf (file, " original biv\n");
505 break;
508 dump_iv (file, iv);
511 /* Returns the info for ssa version VER. */
513 static inline struct version_info *
514 ver_info (struct ivopts_data *data, unsigned ver)
516 return data->version_info + ver;
519 /* Returns the info for ssa name NAME. */
521 static inline struct version_info *
522 name_info (struct ivopts_data *data, tree name)
524 return ver_info (data, SSA_NAME_VERSION (name));
527 /* Checks whether there exists number X such that X * B = A, counting modulo
528 2^BITS. */
530 static bool
531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
532 HOST_WIDE_INT *x)
534 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
535 unsigned HOST_WIDE_INT inv, ex, val;
536 unsigned i;
538 a &= mask;
539 b &= mask;
541 /* First divide the whole equation by 2 as long as possible. */
542 while (!(a & 1) && !(b & 1))
544 a >>= 1;
545 b >>= 1;
546 bits--;
547 mask >>= 1;
550 if (!(b & 1))
552 /* If b is still even, a is odd and there is no such x. */
553 return false;
556 /* Find the inverse of b. We compute it as
557 b^(2^(bits - 1) - 1) (mod 2^bits). */
558 inv = 1;
559 ex = b;
560 for (i = 0; i < bits - 1; i++)
562 inv = (inv * ex) & mask;
563 ex = (ex * ex) & mask;
566 val = (a * inv) & mask;
568 gcc_assert (((val * b) & mask) == a);
570 if ((val >> (bits - 1)) & 1)
571 val |= ~mask;
573 *x = val;
575 return true;
578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
579 emitted in LOOP. */
581 static bool
582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
584 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
586 gcc_assert (bb);
588 if (sbb == loop->latch)
589 return true;
591 if (sbb != bb)
592 return false;
594 return stmt == last_stmt (bb);
597 /* Returns true if STMT if after the place where the original induction
598 variable CAND is incremented. */
600 static bool
601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
603 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
604 basic_block stmt_bb = bb_for_stmt (stmt);
605 block_stmt_iterator bsi;
607 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
608 return false;
610 if (stmt_bb != cand_bb)
611 return true;
613 /* Scan the block from the end, since the original ivs are usually
614 incremented at the end of the loop body. */
615 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
617 if (bsi_stmt (bsi) == cand->incremented_at)
618 return false;
619 if (bsi_stmt (bsi) == stmt)
620 return true;
624 /* Returns true if STMT if after the place where the induction variable
625 CAND is incremented in LOOP. */
627 static bool
628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
630 switch (cand->pos)
632 case IP_END:
633 return false;
635 case IP_NORMAL:
636 return stmt_after_ip_normal_pos (loop, stmt);
638 case IP_ORIGINAL:
639 return stmt_after_ip_original_pos (cand, stmt);
641 default:
642 gcc_unreachable ();
646 /* Element of the table in that we cache the numbers of iterations obtained
647 from exits of the loop. */
649 struct nfe_cache_elt
651 /* The edge for that the number of iterations is cached. */
652 edge exit;
654 /* True if the # of iterations was successfully determined. */
655 bool valid_p;
657 /* Description of # of iterations. */
658 struct tree_niter_desc niter;
661 /* Hash function for nfe_cache_elt E. */
663 static hashval_t
664 nfe_hash (const void *e)
666 const struct nfe_cache_elt *elt = e;
668 return htab_hash_pointer (elt->exit);
671 /* Equality function for nfe_cache_elt E1 and edge E2. */
673 static int
674 nfe_eq (const void *e1, const void *e2)
676 const struct nfe_cache_elt *elt1 = e1;
678 return elt1->exit == e2;
681 /* Returns structure describing number of iterations determined from
682 EXIT of DATA->current_loop, or NULL if something goes wrong. */
684 static struct tree_niter_desc *
685 niter_for_exit (struct ivopts_data *data, edge exit)
687 struct nfe_cache_elt *nfe_desc;
688 PTR *slot;
690 slot = htab_find_slot_with_hash (data->niters, exit,
691 htab_hash_pointer (exit),
692 INSERT);
694 if (!*slot)
696 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
697 nfe_desc->exit = exit;
698 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
699 exit, &nfe_desc->niter,
700 true);
701 *slot = nfe_desc;
703 else
704 nfe_desc = *slot;
706 if (!nfe_desc->valid_p)
707 return NULL;
709 return &nfe_desc->niter;
712 /* Returns structure describing number of iterations determined from
713 single dominating exit of DATA->current_loop, or NULL if something
714 goes wrong. */
716 static struct tree_niter_desc *
717 niter_for_single_dom_exit (struct ivopts_data *data)
719 edge exit = single_dom_exit (data->current_loop);
721 if (!exit)
722 return NULL;
724 return niter_for_exit (data, exit);
727 /* Initializes data structures used by the iv optimization pass, stored
728 in DATA. */
730 static void
731 tree_ssa_iv_optimize_init (struct ivopts_data *data)
733 data->version_info_size = 2 * num_ssa_names;
734 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
735 data->relevant = BITMAP_ALLOC (NULL);
736 data->important_candidates = BITMAP_ALLOC (NULL);
737 data->max_inv_id = 0;
738 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
739 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
740 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
741 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
744 /* Returns a memory object to that EXPR points. In case we are able to
745 determine that it does not point to any such object, NULL is returned. */
747 static tree
748 determine_base_object (tree expr)
750 enum tree_code code = TREE_CODE (expr);
751 tree base, obj, op0, op1;
753 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
754 return NULL_TREE;
756 switch (code)
758 case INTEGER_CST:
759 return NULL_TREE;
761 case ADDR_EXPR:
762 obj = TREE_OPERAND (expr, 0);
763 base = get_base_address (obj);
765 if (!base)
766 return expr;
768 if (TREE_CODE (base) == INDIRECT_REF)
769 return determine_base_object (TREE_OPERAND (base, 0));
771 return fold_convert (ptr_type_node,
772 build_fold_addr_expr (base));
774 case PLUS_EXPR:
775 case MINUS_EXPR:
776 op0 = determine_base_object (TREE_OPERAND (expr, 0));
777 op1 = determine_base_object (TREE_OPERAND (expr, 1));
779 if (!op1)
780 return op0;
782 if (!op0)
783 return (code == PLUS_EXPR
784 ? op1
785 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
787 return fold_build2 (code, ptr_type_node, op0, op1);
789 case NOP_EXPR:
790 case CONVERT_EXPR:
791 return determine_base_object (TREE_OPERAND (expr, 0));
793 default:
794 return fold_convert (ptr_type_node, expr);
798 /* Allocates an induction variable with given initial value BASE and step STEP
799 for loop LOOP. */
801 static struct iv *
802 alloc_iv (tree base, tree step)
804 struct iv *iv = XCNEW (struct iv);
806 if (step && integer_zerop (step))
807 step = NULL_TREE;
809 iv->base = base;
810 iv->base_object = determine_base_object (base);
811 iv->step = step;
812 iv->biv_p = false;
813 iv->have_use_for = false;
814 iv->use_id = 0;
815 iv->ssa_name = NULL_TREE;
817 return iv;
820 /* Sets STEP and BASE for induction variable IV. */
822 static void
823 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
825 struct version_info *info = name_info (data, iv);
827 gcc_assert (!info->iv);
829 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
830 info->iv = alloc_iv (base, step);
831 info->iv->ssa_name = iv;
834 /* Finds induction variable declaration for VAR. */
836 static struct iv *
837 get_iv (struct ivopts_data *data, tree var)
839 basic_block bb;
841 if (!name_info (data, var)->iv)
843 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
845 if (!bb
846 || !flow_bb_inside_loop_p (data->current_loop, bb))
847 set_iv (data, var, var, NULL_TREE);
850 return name_info (data, var)->iv;
853 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
854 not define a simple affine biv with nonzero step. */
856 static tree
857 determine_biv_step (tree phi)
859 struct loop *loop = bb_for_stmt (phi)->loop_father;
860 tree name = PHI_RESULT (phi);
861 affine_iv iv;
863 if (!is_gimple_reg (name))
864 return NULL_TREE;
866 if (!simple_iv (loop, phi, name, &iv, true))
867 return NULL_TREE;
869 return (zero_p (iv.step) ? NULL_TREE : iv.step);
872 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
874 static bool
875 abnormal_ssa_name_p (tree exp)
877 if (!exp)
878 return false;
880 if (TREE_CODE (exp) != SSA_NAME)
881 return false;
883 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
886 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
887 abnormal phi node. Callback for for_each_index. */
889 static bool
890 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
891 void *data ATTRIBUTE_UNUSED)
893 if (TREE_CODE (base) == ARRAY_REF)
895 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
896 return false;
897 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
898 return false;
901 return !abnormal_ssa_name_p (*index);
904 /* Returns true if EXPR contains a ssa name that occurs in an
905 abnormal phi node. */
907 static bool
908 contains_abnormal_ssa_name_p (tree expr)
910 enum tree_code code;
911 enum tree_code_class class;
913 if (!expr)
914 return false;
916 code = TREE_CODE (expr);
917 class = TREE_CODE_CLASS (code);
919 if (code == SSA_NAME)
920 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
922 if (code == INTEGER_CST
923 || is_gimple_min_invariant (expr))
924 return false;
926 if (code == ADDR_EXPR)
927 return !for_each_index (&TREE_OPERAND (expr, 0),
928 idx_contains_abnormal_ssa_name_p,
929 NULL);
931 switch (class)
933 case tcc_binary:
934 case tcc_comparison:
935 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
936 return true;
938 /* Fallthru. */
939 case tcc_unary:
940 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
941 return true;
943 break;
945 default:
946 gcc_unreachable ();
949 return false;
952 /* Finds basic ivs. */
954 static bool
955 find_bivs (struct ivopts_data *data)
957 tree phi, step, type, base;
958 bool found = false;
959 struct loop *loop = data->current_loop;
961 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
963 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
964 continue;
966 step = determine_biv_step (phi);
967 if (!step)
968 continue;
970 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
971 base = expand_simple_operations (base);
972 if (contains_abnormal_ssa_name_p (base)
973 || contains_abnormal_ssa_name_p (step))
974 continue;
976 type = TREE_TYPE (PHI_RESULT (phi));
977 base = fold_convert (type, base);
978 if (step)
979 step = fold_convert (type, step);
981 set_iv (data, PHI_RESULT (phi), base, step);
982 found = true;
985 return found;
988 /* Marks basic ivs. */
990 static void
991 mark_bivs (struct ivopts_data *data)
993 tree phi, var;
994 struct iv *iv, *incr_iv;
995 struct loop *loop = data->current_loop;
996 basic_block incr_bb;
998 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1000 iv = get_iv (data, PHI_RESULT (phi));
1001 if (!iv)
1002 continue;
1004 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1005 incr_iv = get_iv (data, var);
1006 if (!incr_iv)
1007 continue;
1009 /* If the increment is in the subloop, ignore it. */
1010 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1011 if (incr_bb->loop_father != data->current_loop
1012 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1013 continue;
1015 iv->biv_p = true;
1016 incr_iv->biv_p = true;
1020 /* Checks whether STMT defines a linear induction variable and stores its
1021 parameters to IV. */
1023 static bool
1024 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1026 tree lhs;
1027 struct loop *loop = data->current_loop;
1029 iv->base = NULL_TREE;
1030 iv->step = NULL_TREE;
1032 if (TREE_CODE (stmt) != MODIFY_EXPR)
1033 return false;
1035 lhs = TREE_OPERAND (stmt, 0);
1036 if (TREE_CODE (lhs) != SSA_NAME)
1037 return false;
1039 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1040 return false;
1041 iv->base = expand_simple_operations (iv->base);
1043 if (contains_abnormal_ssa_name_p (iv->base)
1044 || contains_abnormal_ssa_name_p (iv->step))
1045 return false;
1047 return true;
1050 /* Finds general ivs in statement STMT. */
1052 static void
1053 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1055 affine_iv iv;
1057 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1058 return;
1060 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1063 /* Finds general ivs in basic block BB. */
1065 static void
1066 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1068 block_stmt_iterator bsi;
1070 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1071 find_givs_in_stmt (data, bsi_stmt (bsi));
1074 /* Finds general ivs. */
1076 static void
1077 find_givs (struct ivopts_data *data)
1079 struct loop *loop = data->current_loop;
1080 basic_block *body = get_loop_body_in_dom_order (loop);
1081 unsigned i;
1083 for (i = 0; i < loop->num_nodes; i++)
1084 find_givs_in_bb (data, body[i]);
1085 free (body);
1088 /* For each ssa name defined in LOOP determines whether it is an induction
1089 variable and if so, its initial value and step. */
1091 static bool
1092 find_induction_variables (struct ivopts_data *data)
1094 unsigned i;
1095 bitmap_iterator bi;
1097 if (!find_bivs (data))
1098 return false;
1100 find_givs (data);
1101 mark_bivs (data);
1103 if (dump_file && (dump_flags & TDF_DETAILS))
1105 struct tree_niter_desc *niter;
1107 niter = niter_for_single_dom_exit (data);
1109 if (niter)
1111 fprintf (dump_file, " number of iterations ");
1112 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1113 fprintf (dump_file, "\n");
1115 fprintf (dump_file, " may be zero if ");
1116 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1117 fprintf (dump_file, "\n");
1118 fprintf (dump_file, "\n");
1121 fprintf (dump_file, "Induction variables:\n\n");
1123 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1125 if (ver_info (data, i)->iv)
1126 dump_iv (dump_file, ver_info (data, i)->iv);
1130 return true;
1133 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1135 static struct iv_use *
1136 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1137 tree stmt, enum use_type use_type)
1139 struct iv_use *use = XCNEW (struct iv_use);
1141 use->id = n_iv_uses (data);
1142 use->type = use_type;
1143 use->iv = iv;
1144 use->stmt = stmt;
1145 use->op_p = use_p;
1146 use->related_cands = BITMAP_ALLOC (NULL);
1148 /* To avoid showing ssa name in the dumps, if it was not reset by the
1149 caller. */
1150 iv->ssa_name = NULL_TREE;
1152 if (dump_file && (dump_flags & TDF_DETAILS))
1153 dump_use (dump_file, use);
1155 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1157 return use;
1160 /* Checks whether OP is a loop-level invariant and if so, records it.
1161 NONLINEAR_USE is true if the invariant is used in a way we do not
1162 handle specially. */
1164 static void
1165 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1167 basic_block bb;
1168 struct version_info *info;
1170 if (TREE_CODE (op) != SSA_NAME
1171 || !is_gimple_reg (op))
1172 return;
1174 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1175 if (bb
1176 && flow_bb_inside_loop_p (data->current_loop, bb))
1177 return;
1179 info = name_info (data, op);
1180 info->name = op;
1181 info->has_nonlin_use |= nonlinear_use;
1182 if (!info->inv_id)
1183 info->inv_id = ++data->max_inv_id;
1184 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1187 /* Checks whether the use OP is interesting and if so, records it. */
1189 static struct iv_use *
1190 find_interesting_uses_op (struct ivopts_data *data, tree op)
1192 struct iv *iv;
1193 struct iv *civ;
1194 tree stmt;
1195 struct iv_use *use;
1197 if (TREE_CODE (op) != SSA_NAME)
1198 return NULL;
1200 iv = get_iv (data, op);
1201 if (!iv)
1202 return NULL;
1204 if (iv->have_use_for)
1206 use = iv_use (data, iv->use_id);
1208 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1209 return use;
1212 if (zero_p (iv->step))
1214 record_invariant (data, op, true);
1215 return NULL;
1217 iv->have_use_for = true;
1219 civ = XNEW (struct iv);
1220 *civ = *iv;
1222 stmt = SSA_NAME_DEF_STMT (op);
1223 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1224 || TREE_CODE (stmt) == MODIFY_EXPR);
1226 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1227 iv->use_id = use->id;
1229 return use;
1232 /* Checks whether the condition *COND_P in STMT is interesting
1233 and if so, records it. */
1235 static void
1236 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1238 tree *op0_p;
1239 tree *op1_p;
1240 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1241 struct iv const_iv;
1242 tree zero = integer_zero_node;
1244 const_iv.step = NULL_TREE;
1246 if (TREE_CODE (*cond_p) != SSA_NAME
1247 && !COMPARISON_CLASS_P (*cond_p))
1248 return;
1250 if (TREE_CODE (*cond_p) == SSA_NAME)
1252 op0_p = cond_p;
1253 op1_p = &zero;
1255 else
1257 op0_p = &TREE_OPERAND (*cond_p, 0);
1258 op1_p = &TREE_OPERAND (*cond_p, 1);
1261 if (TREE_CODE (*op0_p) == SSA_NAME)
1262 iv0 = get_iv (data, *op0_p);
1263 else
1264 iv0 = &const_iv;
1266 if (TREE_CODE (*op1_p) == SSA_NAME)
1267 iv1 = get_iv (data, *op1_p);
1268 else
1269 iv1 = &const_iv;
1271 if (/* When comparing with non-invariant value, we may not do any senseful
1272 induction variable elimination. */
1273 (!iv0 || !iv1)
1274 /* Eliminating condition based on two ivs would be nontrivial.
1275 ??? TODO -- it is not really important to handle this case. */
1276 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1278 find_interesting_uses_op (data, *op0_p);
1279 find_interesting_uses_op (data, *op1_p);
1280 return;
1283 if (zero_p (iv0->step) && zero_p (iv1->step))
1285 /* If both are invariants, this is a work for unswitching. */
1286 return;
1289 civ = XNEW (struct iv);
1290 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1291 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1294 /* Returns true if expression EXPR is obviously invariant in LOOP,
1295 i.e. if all its operands are defined outside of the LOOP. */
1297 bool
1298 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1300 basic_block def_bb;
1301 unsigned i, len;
1303 if (is_gimple_min_invariant (expr))
1304 return true;
1306 if (TREE_CODE (expr) == SSA_NAME)
1308 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1309 if (def_bb
1310 && flow_bb_inside_loop_p (loop, def_bb))
1311 return false;
1313 return true;
1316 if (!EXPR_P (expr))
1317 return false;
1319 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1320 for (i = 0; i < len; i++)
1321 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1322 return false;
1324 return true;
1327 /* Cumulates the steps of indices into DATA and replaces their values with the
1328 initial ones. Returns false when the value of the index cannot be determined.
1329 Callback for for_each_index. */
1331 struct ifs_ivopts_data
1333 struct ivopts_data *ivopts_data;
1334 tree stmt;
1335 tree *step_p;
1338 static bool
1339 idx_find_step (tree base, tree *idx, void *data)
1341 struct ifs_ivopts_data *dta = data;
1342 struct iv *iv;
1343 tree step, iv_step, lbound, off;
1344 struct loop *loop = dta->ivopts_data->current_loop;
1346 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1347 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1348 return false;
1350 /* If base is a component ref, require that the offset of the reference
1351 be invariant. */
1352 if (TREE_CODE (base) == COMPONENT_REF)
1354 off = component_ref_field_offset (base);
1355 return expr_invariant_in_loop_p (loop, off);
1358 /* If base is array, first check whether we will be able to move the
1359 reference out of the loop (in order to take its address in strength
1360 reduction). In order for this to work we need both lower bound
1361 and step to be loop invariants. */
1362 if (TREE_CODE (base) == ARRAY_REF)
1364 step = array_ref_element_size (base);
1365 lbound = array_ref_low_bound (base);
1367 if (!expr_invariant_in_loop_p (loop, step)
1368 || !expr_invariant_in_loop_p (loop, lbound))
1369 return false;
1372 if (TREE_CODE (*idx) != SSA_NAME)
1373 return true;
1375 iv = get_iv (dta->ivopts_data, *idx);
1376 if (!iv)
1377 return false;
1379 *idx = iv->base;
1381 if (!iv->step)
1382 return true;
1384 if (TREE_CODE (base) == ARRAY_REF)
1386 step = array_ref_element_size (base);
1388 /* We only handle addresses whose step is an integer constant. */
1389 if (TREE_CODE (step) != INTEGER_CST)
1390 return false;
1392 else
1393 /* The step for pointer arithmetics already is 1 byte. */
1394 step = build_int_cst (sizetype, 1);
1396 /* FIXME: convert_step should not be used outside chrec_convert: fix
1397 this by calling chrec_convert. */
1398 iv_step = convert_step (dta->ivopts_data->current_loop,
1399 sizetype, iv->base, iv->step, dta->stmt);
1401 if (!iv_step)
1403 /* The index might wrap. */
1404 return false;
1407 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1409 if (!*dta->step_p)
1410 *dta->step_p = step;
1411 else
1412 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1414 return true;
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1420 static bool
1421 idx_record_use (tree base, tree *idx,
1422 void *data)
1424 find_interesting_uses_op (data, *idx);
1425 if (TREE_CODE (base) == ARRAY_REF)
1427 find_interesting_uses_op (data, array_ref_element_size (base));
1428 find_interesting_uses_op (data, array_ref_low_bound (base));
1430 return true;
1433 /* Returns true if memory reference REF may be unaligned. */
1435 static bool
1436 may_be_unaligned_p (tree ref)
1438 tree base;
1439 tree base_type;
1440 HOST_WIDE_INT bitsize;
1441 HOST_WIDE_INT bitpos;
1442 tree toffset;
1443 enum machine_mode mode;
1444 int unsignedp, volatilep;
1445 unsigned base_align;
1447 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1448 thus they are not misaligned. */
1449 if (TREE_CODE (ref) == TARGET_MEM_REF)
1450 return false;
1452 /* The test below is basically copy of what expr.c:normal_inner_ref
1453 does to check whether the object must be loaded by parts when
1454 STRICT_ALIGNMENT is true. */
1455 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1456 &unsignedp, &volatilep, true);
1457 base_type = TREE_TYPE (base);
1458 base_align = TYPE_ALIGN (base_type);
1460 if (mode != BLKmode
1461 && (base_align < GET_MODE_ALIGNMENT (mode)
1462 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1463 || bitpos % BITS_PER_UNIT != 0))
1464 return true;
1466 return false;
1469 /* Finds addresses in *OP_P inside STMT. */
1471 static void
1472 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1474 tree base = *op_p, step = NULL;
1475 struct iv *civ;
1476 struct ifs_ivopts_data ifs_ivopts_data;
1478 /* Do not play with volatile memory references. A bit too conservative,
1479 perhaps, but safe. */
1480 if (stmt_ann (stmt)->has_volatile_ops)
1481 goto fail;
1483 /* Ignore bitfields for now. Not really something terribly complicated
1484 to handle. TODO. */
1485 if (TREE_CODE (base) == BIT_FIELD_REF
1486 || (TREE_CODE (base) == COMPONENT_REF
1487 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1))))
1488 goto fail;
1490 if (STRICT_ALIGNMENT
1491 && may_be_unaligned_p (base))
1492 goto fail;
1494 base = unshare_expr (base);
1496 if (TREE_CODE (base) == TARGET_MEM_REF)
1498 tree type = build_pointer_type (TREE_TYPE (base));
1499 tree astep;
1501 if (TMR_BASE (base)
1502 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1504 civ = get_iv (data, TMR_BASE (base));
1505 if (!civ)
1506 goto fail;
1508 TMR_BASE (base) = civ->base;
1509 step = civ->step;
1511 if (TMR_INDEX (base)
1512 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1514 civ = get_iv (data, TMR_INDEX (base));
1515 if (!civ)
1516 goto fail;
1518 TMR_INDEX (base) = civ->base;
1519 astep = civ->step;
1521 if (astep)
1523 if (TMR_STEP (base))
1524 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1526 if (step)
1527 step = fold_build2 (PLUS_EXPR, type, step, astep);
1528 else
1529 step = astep;
1533 if (zero_p (step))
1534 goto fail;
1535 base = tree_mem_ref_addr (type, base);
1537 else
1539 ifs_ivopts_data.ivopts_data = data;
1540 ifs_ivopts_data.stmt = stmt;
1541 ifs_ivopts_data.step_p = &step;
1542 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1543 || zero_p (step))
1544 goto fail;
1546 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1547 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1549 base = build_fold_addr_expr (base);
1552 civ = alloc_iv (base, step);
1553 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1554 return;
1556 fail:
1557 for_each_index (op_p, idx_record_use, data);
1560 /* Finds and records invariants used in STMT. */
1562 static void
1563 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1565 ssa_op_iter iter;
1566 use_operand_p use_p;
1567 tree op;
1569 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1571 op = USE_FROM_PTR (use_p);
1572 record_invariant (data, op, false);
1576 /* Finds interesting uses of induction variables in the statement STMT. */
1578 static void
1579 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1581 struct iv *iv;
1582 tree op, lhs, rhs;
1583 ssa_op_iter iter;
1584 use_operand_p use_p;
1586 find_invariants_stmt (data, stmt);
1588 if (TREE_CODE (stmt) == COND_EXPR)
1590 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1591 return;
1594 if (TREE_CODE (stmt) == MODIFY_EXPR)
1596 lhs = TREE_OPERAND (stmt, 0);
1597 rhs = TREE_OPERAND (stmt, 1);
1599 if (TREE_CODE (lhs) == SSA_NAME)
1601 /* If the statement defines an induction variable, the uses are not
1602 interesting by themselves. */
1604 iv = get_iv (data, lhs);
1606 if (iv && !zero_p (iv->step))
1607 return;
1610 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1612 case tcc_comparison:
1613 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1614 return;
1616 case tcc_reference:
1617 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1618 if (REFERENCE_CLASS_P (lhs))
1619 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1620 return;
1622 default: ;
1625 if (REFERENCE_CLASS_P (lhs)
1626 && is_gimple_val (rhs))
1628 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1629 find_interesting_uses_op (data, rhs);
1630 return;
1633 /* TODO -- we should also handle address uses of type
1635 memory = call (whatever);
1639 call (memory). */
1642 if (TREE_CODE (stmt) == PHI_NODE
1643 && bb_for_stmt (stmt) == data->current_loop->header)
1645 lhs = PHI_RESULT (stmt);
1646 iv = get_iv (data, lhs);
1648 if (iv && !zero_p (iv->step))
1649 return;
1652 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1654 op = USE_FROM_PTR (use_p);
1656 if (TREE_CODE (op) != SSA_NAME)
1657 continue;
1659 iv = get_iv (data, op);
1660 if (!iv)
1661 continue;
1663 find_interesting_uses_op (data, op);
1667 /* Finds interesting uses of induction variables outside of loops
1668 on loop exit edge EXIT. */
1670 static void
1671 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1673 tree phi, def;
1675 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1677 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1678 find_interesting_uses_op (data, def);
1682 /* Finds uses of the induction variables that are interesting. */
1684 static void
1685 find_interesting_uses (struct ivopts_data *data)
1687 basic_block bb;
1688 block_stmt_iterator bsi;
1689 tree phi;
1690 basic_block *body = get_loop_body (data->current_loop);
1691 unsigned i;
1692 struct version_info *info;
1693 edge e;
1695 if (dump_file && (dump_flags & TDF_DETAILS))
1696 fprintf (dump_file, "Uses:\n\n");
1698 for (i = 0; i < data->current_loop->num_nodes; i++)
1700 edge_iterator ei;
1701 bb = body[i];
1703 FOR_EACH_EDGE (e, ei, bb->succs)
1704 if (e->dest != EXIT_BLOCK_PTR
1705 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1706 find_interesting_uses_outside (data, e);
1708 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1709 find_interesting_uses_stmt (data, phi);
1710 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1711 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1714 if (dump_file && (dump_flags & TDF_DETAILS))
1716 bitmap_iterator bi;
1718 fprintf (dump_file, "\n");
1720 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1722 info = ver_info (data, i);
1723 if (info->inv_id)
1725 fprintf (dump_file, " ");
1726 print_generic_expr (dump_file, info->name, TDF_SLIM);
1727 fprintf (dump_file, " is invariant (%d)%s\n",
1728 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1732 fprintf (dump_file, "\n");
1735 free (body);
1738 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1739 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1740 we are at the top-level of the processed address. */
1742 static tree
1743 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1744 unsigned HOST_WIDE_INT *offset)
1746 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1747 enum tree_code code;
1748 tree type, orig_type = TREE_TYPE (expr);
1749 unsigned HOST_WIDE_INT off0, off1, st;
1750 tree orig_expr = expr;
1752 STRIP_NOPS (expr);
1754 type = TREE_TYPE (expr);
1755 code = TREE_CODE (expr);
1756 *offset = 0;
1758 switch (code)
1760 case INTEGER_CST:
1761 if (!cst_and_fits_in_hwi (expr)
1762 || zero_p (expr))
1763 return orig_expr;
1765 *offset = int_cst_value (expr);
1766 return build_int_cst (orig_type, 0);
1768 case PLUS_EXPR:
1769 case MINUS_EXPR:
1770 op0 = TREE_OPERAND (expr, 0);
1771 op1 = TREE_OPERAND (expr, 1);
1773 op0 = strip_offset_1 (op0, false, false, &off0);
1774 op1 = strip_offset_1 (op1, false, false, &off1);
1776 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1777 if (op0 == TREE_OPERAND (expr, 0)
1778 && op1 == TREE_OPERAND (expr, 1))
1779 return orig_expr;
1781 if (zero_p (op1))
1782 expr = op0;
1783 else if (zero_p (op0))
1785 if (code == PLUS_EXPR)
1786 expr = op1;
1787 else
1788 expr = fold_build1 (NEGATE_EXPR, type, op1);
1790 else
1791 expr = fold_build2 (code, type, op0, op1);
1793 return fold_convert (orig_type, expr);
1795 case ARRAY_REF:
1796 if (!inside_addr)
1797 return orig_expr;
1799 step = array_ref_element_size (expr);
1800 if (!cst_and_fits_in_hwi (step))
1801 break;
1803 st = int_cst_value (step);
1804 op1 = TREE_OPERAND (expr, 1);
1805 op1 = strip_offset_1 (op1, false, false, &off1);
1806 *offset = off1 * st;
1808 if (top_compref
1809 && zero_p (op1))
1811 /* Strip the component reference completely. */
1812 op0 = TREE_OPERAND (expr, 0);
1813 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1814 *offset += off0;
1815 return op0;
1817 break;
1819 case COMPONENT_REF:
1820 if (!inside_addr)
1821 return orig_expr;
1823 tmp = component_ref_field_offset (expr);
1824 if (top_compref
1825 && cst_and_fits_in_hwi (tmp))
1827 /* Strip the component reference completely. */
1828 op0 = TREE_OPERAND (expr, 0);
1829 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1830 *offset = off0 + int_cst_value (tmp);
1831 return op0;
1833 break;
1835 case ADDR_EXPR:
1836 op0 = TREE_OPERAND (expr, 0);
1837 op0 = strip_offset_1 (op0, true, true, &off0);
1838 *offset += off0;
1840 if (op0 == TREE_OPERAND (expr, 0))
1841 return orig_expr;
1843 expr = build_fold_addr_expr (op0);
1844 return fold_convert (orig_type, expr);
1846 case INDIRECT_REF:
1847 inside_addr = false;
1848 break;
1850 default:
1851 return orig_expr;
1854 /* Default handling of expressions for that we want to recurse into
1855 the first operand. */
1856 op0 = TREE_OPERAND (expr, 0);
1857 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1858 *offset += off0;
1860 if (op0 == TREE_OPERAND (expr, 0)
1861 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1862 return orig_expr;
1864 expr = copy_node (expr);
1865 TREE_OPERAND (expr, 0) = op0;
1866 if (op1)
1867 TREE_OPERAND (expr, 1) = op1;
1869 /* Inside address, we might strip the top level component references,
1870 thus changing type of the expression. Handling of ADDR_EXPR
1871 will fix that. */
1872 expr = fold_convert (orig_type, expr);
1874 return expr;
1877 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1879 static tree
1880 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1882 return strip_offset_1 (expr, false, false, offset);
1885 /* Returns variant of TYPE that can be used as base for different uses.
1886 For integer types, we return unsigned variant of the type, which
1887 avoids problems with overflows. For pointer types, we return void *. */
1889 static tree
1890 generic_type_for (tree type)
1892 if (POINTER_TYPE_P (type))
1893 return ptr_type_node;
1895 if (TYPE_UNSIGNED (type))
1896 return type;
1898 return unsigned_type_for (type);
1901 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1902 the bitmap to that we should store it. */
1904 static struct ivopts_data *fd_ivopts_data;
1905 static tree
1906 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1908 bitmap *depends_on = data;
1909 struct version_info *info;
1911 if (TREE_CODE (*expr_p) != SSA_NAME)
1912 return NULL_TREE;
1913 info = name_info (fd_ivopts_data, *expr_p);
1915 if (!info->inv_id || info->has_nonlin_use)
1916 return NULL_TREE;
1918 if (!*depends_on)
1919 *depends_on = BITMAP_ALLOC (NULL);
1920 bitmap_set_bit (*depends_on, info->inv_id);
1922 return NULL_TREE;
1925 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1926 position to POS. If USE is not NULL, the candidate is set as related to
1927 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1928 replacement of the final value of the iv by a direct computation. */
1930 static struct iv_cand *
1931 add_candidate_1 (struct ivopts_data *data,
1932 tree base, tree step, bool important, enum iv_position pos,
1933 struct iv_use *use, tree incremented_at)
1935 unsigned i;
1936 struct iv_cand *cand = NULL;
1937 tree type, orig_type;
1939 if (base)
1941 orig_type = TREE_TYPE (base);
1942 type = generic_type_for (orig_type);
1943 if (type != orig_type)
1945 base = fold_convert (type, base);
1946 if (step)
1947 step = fold_convert (type, step);
1951 for (i = 0; i < n_iv_cands (data); i++)
1953 cand = iv_cand (data, i);
1955 if (cand->pos != pos)
1956 continue;
1958 if (cand->incremented_at != incremented_at)
1959 continue;
1961 if (!cand->iv)
1963 if (!base && !step)
1964 break;
1966 continue;
1969 if (!base && !step)
1970 continue;
1972 if (!operand_equal_p (base, cand->iv->base, 0))
1973 continue;
1975 if (zero_p (cand->iv->step))
1977 if (zero_p (step))
1978 break;
1980 else
1982 if (step && operand_equal_p (step, cand->iv->step, 0))
1983 break;
1987 if (i == n_iv_cands (data))
1989 cand = XCNEW (struct iv_cand);
1990 cand->id = i;
1992 if (!base && !step)
1993 cand->iv = NULL;
1994 else
1995 cand->iv = alloc_iv (base, step);
1997 cand->pos = pos;
1998 if (pos != IP_ORIGINAL && cand->iv)
2000 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2001 cand->var_after = cand->var_before;
2003 cand->important = important;
2004 cand->incremented_at = incremented_at;
2005 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2007 if (step
2008 && TREE_CODE (step) != INTEGER_CST)
2010 fd_ivopts_data = data;
2011 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2014 if (dump_file && (dump_flags & TDF_DETAILS))
2015 dump_cand (dump_file, cand);
2018 if (important && !cand->important)
2020 cand->important = true;
2021 if (dump_file && (dump_flags & TDF_DETAILS))
2022 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2025 if (use)
2027 bitmap_set_bit (use->related_cands, i);
2028 if (dump_file && (dump_flags & TDF_DETAILS))
2029 fprintf (dump_file, "Candidate %d is related to use %d\n",
2030 cand->id, use->id);
2033 return cand;
2036 /* Returns true if incrementing the induction variable at the end of the LOOP
2037 is allowed.
2039 The purpose is to avoid splitting latch edge with a biv increment, thus
2040 creating a jump, possibly confusing other optimization passes and leaving
2041 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2042 is not available (so we do not have a better alternative), or if the latch
2043 edge is already nonempty. */
2045 static bool
2046 allow_ip_end_pos_p (struct loop *loop)
2048 if (!ip_normal_pos (loop))
2049 return true;
2051 if (!empty_block_p (ip_end_pos (loop)))
2052 return true;
2054 return false;
2057 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2058 position to POS. If USE is not NULL, the candidate is set as related to
2059 it. The candidate computation is scheduled on all available positions. */
2061 static void
2062 add_candidate (struct ivopts_data *data,
2063 tree base, tree step, bool important, struct iv_use *use)
2065 if (ip_normal_pos (data->current_loop))
2066 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2067 if (ip_end_pos (data->current_loop)
2068 && allow_ip_end_pos_p (data->current_loop))
2069 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2072 /* Add a standard "0 + 1 * iteration" iv candidate for a
2073 type with SIZE bits. */
2075 static void
2076 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2077 unsigned int size)
2079 tree type = lang_hooks.types.type_for_size (size, true);
2080 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2081 true, NULL);
2084 /* Adds standard iv candidates. */
2086 static void
2087 add_standard_iv_candidates (struct ivopts_data *data)
2089 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2091 /* The same for a double-integer type if it is still fast enough. */
2092 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2093 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2097 /* Adds candidates bases on the old induction variable IV. */
2099 static void
2100 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2102 tree phi, def;
2103 struct iv_cand *cand;
2105 add_candidate (data, iv->base, iv->step, true, NULL);
2107 /* The same, but with initial value zero. */
2108 add_candidate (data,
2109 build_int_cst (TREE_TYPE (iv->base), 0),
2110 iv->step, true, NULL);
2112 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2113 if (TREE_CODE (phi) == PHI_NODE)
2115 /* Additionally record the possibility of leaving the original iv
2116 untouched. */
2117 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2118 cand = add_candidate_1 (data,
2119 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2120 SSA_NAME_DEF_STMT (def));
2121 cand->var_before = iv->ssa_name;
2122 cand->var_after = def;
2126 /* Adds candidates based on the old induction variables. */
2128 static void
2129 add_old_ivs_candidates (struct ivopts_data *data)
2131 unsigned i;
2132 struct iv *iv;
2133 bitmap_iterator bi;
2135 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2137 iv = ver_info (data, i)->iv;
2138 if (iv && iv->biv_p && !zero_p (iv->step))
2139 add_old_iv_candidates (data, iv);
2143 /* Adds candidates based on the value of the induction variable IV and USE. */
2145 static void
2146 add_iv_value_candidates (struct ivopts_data *data,
2147 struct iv *iv, struct iv_use *use)
2149 unsigned HOST_WIDE_INT offset;
2150 tree base;
2152 add_candidate (data, iv->base, iv->step, false, use);
2154 /* The same, but with initial value zero. Make such variable important,
2155 since it is generic enough so that possibly many uses may be based
2156 on it. */
2157 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2158 iv->step, true, use);
2160 /* Third, try removing the constant offset. */
2161 base = strip_offset (iv->base, &offset);
2162 if (offset)
2163 add_candidate (data, base, iv->step, false, use);
2166 /* Adds candidates based on the uses. */
2168 static void
2169 add_derived_ivs_candidates (struct ivopts_data *data)
2171 unsigned i;
2173 for (i = 0; i < n_iv_uses (data); i++)
2175 struct iv_use *use = iv_use (data, i);
2177 if (!use)
2178 continue;
2180 switch (use->type)
2182 case USE_NONLINEAR_EXPR:
2183 case USE_COMPARE:
2184 case USE_ADDRESS:
2185 /* Just add the ivs based on the value of the iv used here. */
2186 add_iv_value_candidates (data, use->iv, use);
2187 break;
2189 default:
2190 gcc_unreachable ();
2195 /* Record important candidates and add them to related_cands bitmaps
2196 if needed. */
2198 static void
2199 record_important_candidates (struct ivopts_data *data)
2201 unsigned i;
2202 struct iv_use *use;
2204 for (i = 0; i < n_iv_cands (data); i++)
2206 struct iv_cand *cand = iv_cand (data, i);
2208 if (cand->important)
2209 bitmap_set_bit (data->important_candidates, i);
2212 data->consider_all_candidates = (n_iv_cands (data)
2213 <= CONSIDER_ALL_CANDIDATES_BOUND);
2215 if (data->consider_all_candidates)
2217 /* We will not need "related_cands" bitmaps in this case,
2218 so release them to decrease peak memory consumption. */
2219 for (i = 0; i < n_iv_uses (data); i++)
2221 use = iv_use (data, i);
2222 BITMAP_FREE (use->related_cands);
2225 else
2227 /* Add important candidates to the related_cands bitmaps. */
2228 for (i = 0; i < n_iv_uses (data); i++)
2229 bitmap_ior_into (iv_use (data, i)->related_cands,
2230 data->important_candidates);
2234 /* Finds the candidates for the induction variables. */
2236 static void
2237 find_iv_candidates (struct ivopts_data *data)
2239 /* Add commonly used ivs. */
2240 add_standard_iv_candidates (data);
2242 /* Add old induction variables. */
2243 add_old_ivs_candidates (data);
2245 /* Add induction variables derived from uses. */
2246 add_derived_ivs_candidates (data);
2248 /* Record the important candidates. */
2249 record_important_candidates (data);
2252 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2253 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2254 we allocate a simple list to every use. */
2256 static void
2257 alloc_use_cost_map (struct ivopts_data *data)
2259 unsigned i, size, s, j;
2261 for (i = 0; i < n_iv_uses (data); i++)
2263 struct iv_use *use = iv_use (data, i);
2264 bitmap_iterator bi;
2266 if (data->consider_all_candidates)
2267 size = n_iv_cands (data);
2268 else
2270 s = 0;
2271 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2273 s++;
2276 /* Round up to the power of two, so that moduling by it is fast. */
2277 for (size = 1; size < s; size <<= 1)
2278 continue;
2281 use->n_map_members = size;
2282 use->cost_map = XCNEWVEC (struct cost_pair, size);
2286 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2287 on invariants DEPENDS_ON and that the value used in expressing it
2288 is VALUE.*/
2290 static void
2291 set_use_iv_cost (struct ivopts_data *data,
2292 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2293 bitmap depends_on, tree value)
2295 unsigned i, s;
2297 if (cost == INFTY)
2299 BITMAP_FREE (depends_on);
2300 return;
2303 if (data->consider_all_candidates)
2305 use->cost_map[cand->id].cand = cand;
2306 use->cost_map[cand->id].cost = cost;
2307 use->cost_map[cand->id].depends_on = depends_on;
2308 use->cost_map[cand->id].value = value;
2309 return;
2312 /* n_map_members is a power of two, so this computes modulo. */
2313 s = cand->id & (use->n_map_members - 1);
2314 for (i = s; i < use->n_map_members; i++)
2315 if (!use->cost_map[i].cand)
2316 goto found;
2317 for (i = 0; i < s; i++)
2318 if (!use->cost_map[i].cand)
2319 goto found;
2321 gcc_unreachable ();
2323 found:
2324 use->cost_map[i].cand = cand;
2325 use->cost_map[i].cost = cost;
2326 use->cost_map[i].depends_on = depends_on;
2327 use->cost_map[i].value = value;
2330 /* Gets cost of (USE, CANDIDATE) pair. */
2332 static struct cost_pair *
2333 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2334 struct iv_cand *cand)
2336 unsigned i, s;
2337 struct cost_pair *ret;
2339 if (!cand)
2340 return NULL;
2342 if (data->consider_all_candidates)
2344 ret = use->cost_map + cand->id;
2345 if (!ret->cand)
2346 return NULL;
2348 return ret;
2351 /* n_map_members is a power of two, so this computes modulo. */
2352 s = cand->id & (use->n_map_members - 1);
2353 for (i = s; i < use->n_map_members; i++)
2354 if (use->cost_map[i].cand == cand)
2355 return use->cost_map + i;
2357 for (i = 0; i < s; i++)
2358 if (use->cost_map[i].cand == cand)
2359 return use->cost_map + i;
2361 return NULL;
2364 /* Returns estimate on cost of computing SEQ. */
2366 static unsigned
2367 seq_cost (rtx seq)
2369 unsigned cost = 0;
2370 rtx set;
2372 for (; seq; seq = NEXT_INSN (seq))
2374 set = single_set (seq);
2375 if (set)
2376 cost += rtx_cost (set, SET);
2377 else
2378 cost++;
2381 return cost;
2384 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2385 static rtx
2386 produce_memory_decl_rtl (tree obj, int *regno)
2388 rtx x;
2390 gcc_assert (obj);
2391 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2393 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2394 x = gen_rtx_SYMBOL_REF (Pmode, name);
2396 else
2397 x = gen_raw_REG (Pmode, (*regno)++);
2399 return gen_rtx_MEM (DECL_MODE (obj), x);
2402 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2403 walk_tree. DATA contains the actual fake register number. */
2405 static tree
2406 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2408 tree obj = NULL_TREE;
2409 rtx x = NULL_RTX;
2410 int *regno = data;
2412 switch (TREE_CODE (*expr_p))
2414 case ADDR_EXPR:
2415 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2416 handled_component_p (*expr_p);
2417 expr_p = &TREE_OPERAND (*expr_p, 0))
2418 continue;
2419 obj = *expr_p;
2420 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2421 x = produce_memory_decl_rtl (obj, regno);
2422 break;
2424 case SSA_NAME:
2425 *ws = 0;
2426 obj = SSA_NAME_VAR (*expr_p);
2427 if (!DECL_RTL_SET_P (obj))
2428 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2429 break;
2431 case VAR_DECL:
2432 case PARM_DECL:
2433 case RESULT_DECL:
2434 *ws = 0;
2435 obj = *expr_p;
2437 if (DECL_RTL_SET_P (obj))
2438 break;
2440 if (DECL_MODE (obj) == BLKmode)
2441 x = produce_memory_decl_rtl (obj, regno);
2442 else
2443 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2445 break;
2447 default:
2448 break;
2451 if (x)
2453 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2454 SET_DECL_RTL (obj, x);
2457 return NULL_TREE;
2460 /* Determines cost of the computation of EXPR. */
2462 static unsigned
2463 computation_cost (tree expr)
2465 rtx seq, rslt;
2466 tree type = TREE_TYPE (expr);
2467 unsigned cost;
2468 /* Avoid using hard regs in ways which may be unsupported. */
2469 int regno = LAST_VIRTUAL_REGISTER + 1;
2471 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2472 start_sequence ();
2473 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2474 seq = get_insns ();
2475 end_sequence ();
2477 cost = seq_cost (seq);
2478 if (MEM_P (rslt))
2479 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2481 return cost;
2484 /* Returns variable containing the value of candidate CAND at statement AT. */
2486 static tree
2487 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2489 if (stmt_after_increment (loop, cand, stmt))
2490 return cand->var_after;
2491 else
2492 return cand->var_before;
2495 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2496 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2499 tree_int_cst_sign_bit (tree t)
2501 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2502 unsigned HOST_WIDE_INT w;
2504 if (bitno < HOST_BITS_PER_WIDE_INT)
2505 w = TREE_INT_CST_LOW (t);
2506 else
2508 w = TREE_INT_CST_HIGH (t);
2509 bitno -= HOST_BITS_PER_WIDE_INT;
2512 return (w >> bitno) & 1;
2515 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2516 return cst. Otherwise return NULL_TREE. */
2518 static tree
2519 constant_multiple_of (tree type, tree top, tree bot)
2521 tree res, mby, p0, p1;
2522 enum tree_code code;
2523 bool negate;
2525 STRIP_NOPS (top);
2526 STRIP_NOPS (bot);
2528 if (operand_equal_p (top, bot, 0))
2529 return build_int_cst (type, 1);
2531 code = TREE_CODE (top);
2532 switch (code)
2534 case MULT_EXPR:
2535 mby = TREE_OPERAND (top, 1);
2536 if (TREE_CODE (mby) != INTEGER_CST)
2537 return NULL_TREE;
2539 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2540 if (!res)
2541 return NULL_TREE;
2543 return fold_binary_to_constant (MULT_EXPR, type, res,
2544 fold_convert (type, mby));
2546 case PLUS_EXPR:
2547 case MINUS_EXPR:
2548 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2549 if (!p0)
2550 return NULL_TREE;
2551 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2552 if (!p1)
2553 return NULL_TREE;
2555 return fold_binary_to_constant (code, type, p0, p1);
2557 case INTEGER_CST:
2558 if (TREE_CODE (bot) != INTEGER_CST)
2559 return NULL_TREE;
2561 bot = fold_convert (type, bot);
2562 top = fold_convert (type, top);
2564 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2565 the result afterwards. */
2566 if (tree_int_cst_sign_bit (bot))
2568 negate = true;
2569 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2571 else
2572 negate = false;
2574 /* Ditto for TOP. */
2575 if (tree_int_cst_sign_bit (top))
2577 negate = !negate;
2578 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2581 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2582 return NULL_TREE;
2584 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2585 if (negate)
2586 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2587 return res;
2589 default:
2590 return NULL_TREE;
2594 /* Sets COMB to CST. */
2596 static void
2597 aff_combination_const (struct affine_tree_combination *comb, tree type,
2598 unsigned HOST_WIDE_INT cst)
2600 unsigned prec = TYPE_PRECISION (type);
2602 comb->type = type;
2603 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2605 comb->n = 0;
2606 comb->rest = NULL_TREE;
2607 comb->offset = cst & comb->mask;
2610 /* Sets COMB to single element ELT. */
2612 static void
2613 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2615 unsigned prec = TYPE_PRECISION (type);
2617 comb->type = type;
2618 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2620 comb->n = 1;
2621 comb->elts[0] = elt;
2622 comb->coefs[0] = 1;
2623 comb->rest = NULL_TREE;
2624 comb->offset = 0;
2627 /* Scales COMB by SCALE. */
2629 static void
2630 aff_combination_scale (struct affine_tree_combination *comb,
2631 unsigned HOST_WIDE_INT scale)
2633 unsigned i, j;
2635 if (scale == 1)
2636 return;
2638 if (scale == 0)
2640 aff_combination_const (comb, comb->type, 0);
2641 return;
2644 comb->offset = (scale * comb->offset) & comb->mask;
2645 for (i = 0, j = 0; i < comb->n; i++)
2647 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2648 comb->elts[j] = comb->elts[i];
2649 if (comb->coefs[j] != 0)
2650 j++;
2652 comb->n = j;
2654 if (comb->rest)
2656 if (comb->n < MAX_AFF_ELTS)
2658 comb->coefs[comb->n] = scale;
2659 comb->elts[comb->n] = comb->rest;
2660 comb->rest = NULL_TREE;
2661 comb->n++;
2663 else
2664 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2665 build_int_cst_type (comb->type, scale));
2669 /* Adds ELT * SCALE to COMB. */
2671 static void
2672 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2673 unsigned HOST_WIDE_INT scale)
2675 unsigned i;
2677 if (scale == 0)
2678 return;
2680 for (i = 0; i < comb->n; i++)
2681 if (operand_equal_p (comb->elts[i], elt, 0))
2683 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2684 if (comb->coefs[i])
2685 return;
2687 comb->n--;
2688 comb->coefs[i] = comb->coefs[comb->n];
2689 comb->elts[i] = comb->elts[comb->n];
2691 if (comb->rest)
2693 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2694 comb->coefs[comb->n] = 1;
2695 comb->elts[comb->n] = comb->rest;
2696 comb->rest = NULL_TREE;
2697 comb->n++;
2699 return;
2701 if (comb->n < MAX_AFF_ELTS)
2703 comb->coefs[comb->n] = scale;
2704 comb->elts[comb->n] = elt;
2705 comb->n++;
2706 return;
2709 if (scale == 1)
2710 elt = fold_convert (comb->type, elt);
2711 else
2712 elt = fold_build2 (MULT_EXPR, comb->type,
2713 fold_convert (comb->type, elt),
2714 build_int_cst_type (comb->type, scale));
2716 if (comb->rest)
2717 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2718 else
2719 comb->rest = elt;
2722 /* Adds COMB2 to COMB1. */
2724 static void
2725 aff_combination_add (struct affine_tree_combination *comb1,
2726 struct affine_tree_combination *comb2)
2728 unsigned i;
2730 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2731 for (i = 0; i < comb2->n; i++)
2732 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2733 if (comb2->rest)
2734 aff_combination_add_elt (comb1, comb2->rest, 1);
2737 /* Splits EXPR into an affine combination of parts. */
2739 static void
2740 tree_to_aff_combination (tree expr, tree type,
2741 struct affine_tree_combination *comb)
2743 struct affine_tree_combination tmp;
2744 enum tree_code code;
2745 tree cst, core, toffset;
2746 HOST_WIDE_INT bitpos, bitsize;
2747 enum machine_mode mode;
2748 int unsignedp, volatilep;
2750 STRIP_NOPS (expr);
2752 code = TREE_CODE (expr);
2753 switch (code)
2755 case INTEGER_CST:
2756 aff_combination_const (comb, type, int_cst_value (expr));
2757 return;
2759 case PLUS_EXPR:
2760 case MINUS_EXPR:
2761 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2762 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2763 if (code == MINUS_EXPR)
2764 aff_combination_scale (&tmp, -1);
2765 aff_combination_add (comb, &tmp);
2766 return;
2768 case MULT_EXPR:
2769 cst = TREE_OPERAND (expr, 1);
2770 if (TREE_CODE (cst) != INTEGER_CST)
2771 break;
2772 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2773 aff_combination_scale (comb, int_cst_value (cst));
2774 return;
2776 case NEGATE_EXPR:
2777 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2778 aff_combination_scale (comb, -1);
2779 return;
2781 case ADDR_EXPR:
2782 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2783 &toffset, &mode, &unsignedp, &volatilep,
2784 false);
2785 if (bitpos % BITS_PER_UNIT != 0)
2786 break;
2787 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2788 core = build_fold_addr_expr (core);
2789 if (TREE_CODE (core) == ADDR_EXPR)
2790 aff_combination_add_elt (comb, core, 1);
2791 else
2793 tree_to_aff_combination (core, type, &tmp);
2794 aff_combination_add (comb, &tmp);
2796 if (toffset)
2798 tree_to_aff_combination (toffset, type, &tmp);
2799 aff_combination_add (comb, &tmp);
2801 return;
2803 default:
2804 break;
2807 aff_combination_elt (comb, type, expr);
2810 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2812 static tree
2813 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2814 unsigned HOST_WIDE_INT mask)
2816 enum tree_code code;
2818 scale &= mask;
2819 elt = fold_convert (type, elt);
2821 if (scale == 1)
2823 if (!expr)
2824 return elt;
2826 return fold_build2 (PLUS_EXPR, type, expr, elt);
2829 if (scale == mask)
2831 if (!expr)
2832 return fold_build1 (NEGATE_EXPR, type, elt);
2834 return fold_build2 (MINUS_EXPR, type, expr, elt);
2837 if (!expr)
2838 return fold_build2 (MULT_EXPR, type, elt,
2839 build_int_cst_type (type, scale));
2841 if ((scale | (mask >> 1)) == mask)
2843 /* Scale is negative. */
2844 code = MINUS_EXPR;
2845 scale = (-scale) & mask;
2847 else
2848 code = PLUS_EXPR;
2850 elt = fold_build2 (MULT_EXPR, type, elt,
2851 build_int_cst_type (type, scale));
2852 return fold_build2 (code, type, expr, elt);
2855 /* Copies the tree elements of COMB to ensure that they are not shared. */
2857 static void
2858 unshare_aff_combination (struct affine_tree_combination *comb)
2860 unsigned i;
2862 for (i = 0; i < comb->n; i++)
2863 comb->elts[i] = unshare_expr (comb->elts[i]);
2864 if (comb->rest)
2865 comb->rest = unshare_expr (comb->rest);
2868 /* Makes tree from the affine combination COMB. */
2870 static tree
2871 aff_combination_to_tree (struct affine_tree_combination *comb)
2873 tree type = comb->type;
2874 tree expr = comb->rest;
2875 unsigned i;
2876 unsigned HOST_WIDE_INT off, sgn;
2878 /* Handle the special case produced by get_computation_aff when
2879 the type does not fit in HOST_WIDE_INT. */
2880 if (comb->n == 0 && comb->offset == 0)
2881 return fold_convert (type, expr);
2883 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2885 for (i = 0; i < comb->n; i++)
2886 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2887 comb->mask);
2889 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2891 /* Offset is negative. */
2892 off = (-comb->offset) & comb->mask;
2893 sgn = comb->mask;
2895 else
2897 off = comb->offset;
2898 sgn = 1;
2900 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2901 comb->mask);
2904 /* Determines the expression by that USE is expressed from induction variable
2905 CAND at statement AT in LOOP. The expression is stored in a decomposed
2906 form into AFF. Returns false if USE cannot be expressed using CAND. */
2908 static bool
2909 get_computation_aff (struct loop *loop,
2910 struct iv_use *use, struct iv_cand *cand, tree at,
2911 struct affine_tree_combination *aff)
2913 tree ubase = use->iv->base;
2914 tree ustep = use->iv->step;
2915 tree cbase = cand->iv->base;
2916 tree cstep = cand->iv->step;
2917 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2918 tree uutype;
2919 tree expr, delta;
2920 tree ratio;
2921 unsigned HOST_WIDE_INT ustepi, cstepi;
2922 HOST_WIDE_INT ratioi;
2923 struct affine_tree_combination cbase_aff, expr_aff;
2924 tree cstep_orig = cstep, ustep_orig = ustep;
2926 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2928 /* We do not have a precision to express the values of use. */
2929 return false;
2932 expr = var_at_stmt (loop, cand, at);
2934 if (TREE_TYPE (expr) != ctype)
2936 /* This may happen with the original ivs. */
2937 expr = fold_convert (ctype, expr);
2940 if (TYPE_UNSIGNED (utype))
2941 uutype = utype;
2942 else
2944 uutype = unsigned_type_for (utype);
2945 ubase = fold_convert (uutype, ubase);
2946 ustep = fold_convert (uutype, ustep);
2949 if (uutype != ctype)
2951 expr = fold_convert (uutype, expr);
2952 cbase = fold_convert (uutype, cbase);
2953 cstep = fold_convert (uutype, cstep);
2955 /* If the conversion is not noop, we must take it into account when
2956 considering the value of the step. */
2957 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2958 cstep_orig = cstep;
2961 if (cst_and_fits_in_hwi (cstep_orig)
2962 && cst_and_fits_in_hwi (ustep_orig))
2964 ustepi = int_cst_value (ustep_orig);
2965 cstepi = int_cst_value (cstep_orig);
2967 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2969 /* TODO maybe consider case when ustep divides cstep and the ratio is
2970 a power of 2 (so that the division is fast to execute)? We would
2971 need to be much more careful with overflows etc. then. */
2972 return false;
2975 ratio = build_int_cst_type (uutype, ratioi);
2977 else
2979 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
2980 if (!ratio)
2981 return false;
2983 /* Ratioi is only used to detect special cases when the multiplicative
2984 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
2985 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
2986 to integer_onep/integer_all_onesp, since the former ignores
2987 TREE_OVERFLOW. */
2988 if (cst_and_fits_in_hwi (ratio))
2989 ratioi = int_cst_value (ratio);
2990 else if (integer_onep (ratio))
2991 ratioi = 1;
2992 else if (integer_all_onesp (ratio))
2993 ratioi = -1;
2994 else
2995 ratioi = 0;
2998 /* We may need to shift the value if we are after the increment. */
2999 if (stmt_after_increment (loop, cand, at))
3000 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3002 /* use = ubase - ratio * cbase + ratio * var.
3004 In general case ubase + ratio * (var - cbase) could be better (one less
3005 multiplication), but often it is possible to eliminate redundant parts
3006 of computations from (ubase - ratio * cbase) term, and if it does not
3007 happen, fold is able to apply the distributive law to obtain this form
3008 anyway. */
3010 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3012 /* Let's compute in trees and just return the result in AFF. This case
3013 should not be very common, and fold itself is not that bad either,
3014 so making the aff. functions more complicated to handle this case
3015 is not that urgent. */
3016 if (ratioi == 1)
3018 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3019 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3021 else if (ratioi == -1)
3023 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3024 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3026 else
3028 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3029 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3030 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3031 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3034 aff->type = uutype;
3035 aff->n = 0;
3036 aff->offset = 0;
3037 aff->mask = 0;
3038 aff->rest = expr;
3039 return true;
3042 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3043 possible to compute ratioi. */
3044 gcc_assert (ratioi);
3046 tree_to_aff_combination (ubase, uutype, aff);
3047 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3048 tree_to_aff_combination (expr, uutype, &expr_aff);
3049 aff_combination_scale (&cbase_aff, -ratioi);
3050 aff_combination_scale (&expr_aff, ratioi);
3051 aff_combination_add (aff, &cbase_aff);
3052 aff_combination_add (aff, &expr_aff);
3054 return true;
3057 /* Determines the expression by that USE is expressed from induction variable
3058 CAND at statement AT in LOOP. The computation is unshared. */
3060 static tree
3061 get_computation_at (struct loop *loop,
3062 struct iv_use *use, struct iv_cand *cand, tree at)
3064 struct affine_tree_combination aff;
3065 tree type = TREE_TYPE (use->iv->base);
3067 if (!get_computation_aff (loop, use, cand, at, &aff))
3068 return NULL_TREE;
3069 unshare_aff_combination (&aff);
3070 return fold_convert (type, aff_combination_to_tree (&aff));
3073 /* Determines the expression by that USE is expressed from induction variable
3074 CAND in LOOP. The computation is unshared. */
3076 static tree
3077 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3079 return get_computation_at (loop, use, cand, use->stmt);
3082 /* Returns cost of addition in MODE. */
3084 static unsigned
3085 add_cost (enum machine_mode mode)
3087 static unsigned costs[NUM_MACHINE_MODES];
3088 rtx seq;
3089 unsigned cost;
3091 if (costs[mode])
3092 return costs[mode];
3094 start_sequence ();
3095 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3096 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3097 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3098 NULL_RTX);
3099 seq = get_insns ();
3100 end_sequence ();
3102 cost = seq_cost (seq);
3103 if (!cost)
3104 cost = 1;
3106 costs[mode] = cost;
3108 if (dump_file && (dump_flags & TDF_DETAILS))
3109 fprintf (dump_file, "Addition in %s costs %d\n",
3110 GET_MODE_NAME (mode), cost);
3111 return cost;
3114 /* Entry in a hashtable of already known costs for multiplication. */
3115 struct mbc_entry
3117 HOST_WIDE_INT cst; /* The constant to multiply by. */
3118 enum machine_mode mode; /* In mode. */
3119 unsigned cost; /* The cost. */
3122 /* Counts hash value for the ENTRY. */
3124 static hashval_t
3125 mbc_entry_hash (const void *entry)
3127 const struct mbc_entry *e = entry;
3129 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3132 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3134 static int
3135 mbc_entry_eq (const void *entry1, const void *entry2)
3137 const struct mbc_entry *e1 = entry1;
3138 const struct mbc_entry *e2 = entry2;
3140 return (e1->mode == e2->mode
3141 && e1->cst == e2->cst);
3144 /* Returns cost of multiplication by constant CST in MODE. */
3146 unsigned
3147 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3149 static htab_t costs;
3150 struct mbc_entry **cached, act;
3151 rtx seq;
3152 unsigned cost;
3154 if (!costs)
3155 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3157 act.mode = mode;
3158 act.cst = cst;
3159 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3160 if (*cached)
3161 return (*cached)->cost;
3163 *cached = XNEW (struct mbc_entry);
3164 (*cached)->mode = mode;
3165 (*cached)->cst = cst;
3167 start_sequence ();
3168 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3169 gen_int_mode (cst, mode), NULL_RTX, 0);
3170 seq = get_insns ();
3171 end_sequence ();
3173 cost = seq_cost (seq);
3175 if (dump_file && (dump_flags & TDF_DETAILS))
3176 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3177 (int) cst, GET_MODE_NAME (mode), cost);
3179 (*cached)->cost = cost;
3181 return cost;
3184 /* Returns true if multiplying by RATIO is allowed in address. */
3186 bool
3187 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3189 #define MAX_RATIO 128
3190 static sbitmap valid_mult;
3192 if (!valid_mult)
3194 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3195 rtx addr;
3196 HOST_WIDE_INT i;
3198 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3199 sbitmap_zero (valid_mult);
3200 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3201 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3203 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3204 if (memory_address_p (Pmode, addr))
3205 SET_BIT (valid_mult, i + MAX_RATIO);
3208 if (dump_file && (dump_flags & TDF_DETAILS))
3210 fprintf (dump_file, " allowed multipliers:");
3211 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3212 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3213 fprintf (dump_file, " %d", (int) i);
3214 fprintf (dump_file, "\n");
3215 fprintf (dump_file, "\n");
3219 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3220 return false;
3222 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3225 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3226 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3227 variable is omitted. The created memory accesses MODE.
3229 TODO -- there must be some better way. This all is quite crude. */
3231 static unsigned
3232 get_address_cost (bool symbol_present, bool var_present,
3233 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3235 static bool initialized = false;
3236 static HOST_WIDE_INT rat, off;
3237 static HOST_WIDE_INT min_offset, max_offset;
3238 static unsigned costs[2][2][2][2];
3239 unsigned cost, acost;
3240 rtx seq, addr, base;
3241 bool offset_p, ratio_p;
3242 rtx reg1;
3243 HOST_WIDE_INT s_offset;
3244 unsigned HOST_WIDE_INT mask;
3245 unsigned bits;
3247 if (!initialized)
3249 HOST_WIDE_INT i;
3250 initialized = true;
3252 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3254 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3255 for (i = 1; i <= 1 << 20; i <<= 1)
3257 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3258 if (!memory_address_p (Pmode, addr))
3259 break;
3261 max_offset = i >> 1;
3262 off = max_offset;
3264 for (i = 1; i <= 1 << 20; i <<= 1)
3266 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3267 if (!memory_address_p (Pmode, addr))
3268 break;
3270 min_offset = -(i >> 1);
3272 if (dump_file && (dump_flags & TDF_DETAILS))
3274 fprintf (dump_file, "get_address_cost:\n");
3275 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3276 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3279 rat = 1;
3280 for (i = 2; i <= MAX_RATIO; i++)
3281 if (multiplier_allowed_in_address_p (i))
3283 rat = i;
3284 break;
3288 bits = GET_MODE_BITSIZE (Pmode);
3289 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3290 offset &= mask;
3291 if ((offset >> (bits - 1) & 1))
3292 offset |= ~mask;
3293 s_offset = offset;
3295 cost = 0;
3296 offset_p = (s_offset != 0
3297 && min_offset <= s_offset && s_offset <= max_offset);
3298 ratio_p = (ratio != 1
3299 && multiplier_allowed_in_address_p (ratio));
3301 if (ratio != 1 && !ratio_p)
3302 cost += multiply_by_cost (ratio, Pmode);
3304 if (s_offset && !offset_p && !symbol_present)
3306 cost += add_cost (Pmode);
3307 var_present = true;
3310 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3311 if (!acost)
3313 int old_cse_not_expected;
3314 acost = 0;
3316 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3317 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3318 if (ratio_p)
3319 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3321 if (var_present)
3322 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3324 if (symbol_present)
3326 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3327 if (offset_p)
3328 base = gen_rtx_fmt_e (CONST, Pmode,
3329 gen_rtx_fmt_ee (PLUS, Pmode,
3330 base,
3331 gen_int_mode (off, Pmode)));
3333 else if (offset_p)
3334 base = gen_int_mode (off, Pmode);
3335 else
3336 base = NULL_RTX;
3338 if (base)
3339 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3341 start_sequence ();
3342 /* To avoid splitting addressing modes, pretend that no cse will
3343 follow. */
3344 old_cse_not_expected = cse_not_expected;
3345 cse_not_expected = true;
3346 addr = memory_address (Pmode, addr);
3347 cse_not_expected = old_cse_not_expected;
3348 seq = get_insns ();
3349 end_sequence ();
3351 acost = seq_cost (seq);
3352 acost += address_cost (addr, Pmode);
3354 if (!acost)
3355 acost = 1;
3356 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3359 return cost + acost;
3362 /* Estimates cost of forcing expression EXPR into a variable. */
3364 unsigned
3365 force_expr_to_var_cost (tree expr)
3367 static bool costs_initialized = false;
3368 static unsigned integer_cost;
3369 static unsigned symbol_cost;
3370 static unsigned address_cost;
3371 tree op0, op1;
3372 unsigned cost0, cost1, cost;
3373 enum machine_mode mode;
3375 if (!costs_initialized)
3377 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3378 rtx x = gen_rtx_MEM (DECL_MODE (var),
3379 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3380 tree addr;
3381 tree type = build_pointer_type (integer_type_node);
3383 integer_cost = computation_cost (build_int_cst (integer_type_node,
3384 2000));
3386 SET_DECL_RTL (var, x);
3387 TREE_STATIC (var) = 1;
3388 addr = build1 (ADDR_EXPR, type, var);
3389 symbol_cost = computation_cost (addr) + 1;
3391 address_cost
3392 = computation_cost (build2 (PLUS_EXPR, type,
3393 addr,
3394 build_int_cst (type, 2000))) + 1;
3395 if (dump_file && (dump_flags & TDF_DETAILS))
3397 fprintf (dump_file, "force_expr_to_var_cost:\n");
3398 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3399 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3400 fprintf (dump_file, " address %d\n", (int) address_cost);
3401 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3402 fprintf (dump_file, "\n");
3405 costs_initialized = true;
3408 STRIP_NOPS (expr);
3410 if (SSA_VAR_P (expr))
3411 return 0;
3413 if (TREE_INVARIANT (expr))
3415 if (TREE_CODE (expr) == INTEGER_CST)
3416 return integer_cost;
3418 if (TREE_CODE (expr) == ADDR_EXPR)
3420 tree obj = TREE_OPERAND (expr, 0);
3422 if (TREE_CODE (obj) == VAR_DECL
3423 || TREE_CODE (obj) == PARM_DECL
3424 || TREE_CODE (obj) == RESULT_DECL)
3425 return symbol_cost;
3428 return address_cost;
3431 switch (TREE_CODE (expr))
3433 case PLUS_EXPR:
3434 case MINUS_EXPR:
3435 case MULT_EXPR:
3436 op0 = TREE_OPERAND (expr, 0);
3437 op1 = TREE_OPERAND (expr, 1);
3438 STRIP_NOPS (op0);
3439 STRIP_NOPS (op1);
3441 if (is_gimple_val (op0))
3442 cost0 = 0;
3443 else
3444 cost0 = force_expr_to_var_cost (op0);
3446 if (is_gimple_val (op1))
3447 cost1 = 0;
3448 else
3449 cost1 = force_expr_to_var_cost (op1);
3451 break;
3453 default:
3454 /* Just an arbitrary value, FIXME. */
3455 return target_spill_cost;
3458 mode = TYPE_MODE (TREE_TYPE (expr));
3459 switch (TREE_CODE (expr))
3461 case PLUS_EXPR:
3462 case MINUS_EXPR:
3463 cost = add_cost (mode);
3464 break;
3466 case MULT_EXPR:
3467 if (cst_and_fits_in_hwi (op0))
3468 cost = multiply_by_cost (int_cst_value (op0), mode);
3469 else if (cst_and_fits_in_hwi (op1))
3470 cost = multiply_by_cost (int_cst_value (op1), mode);
3471 else
3472 return target_spill_cost;
3473 break;
3475 default:
3476 gcc_unreachable ();
3479 cost += cost0;
3480 cost += cost1;
3482 /* Bound the cost by target_spill_cost. The parts of complicated
3483 computations often are either loop invariant or at least can
3484 be shared between several iv uses, so letting this grow without
3485 limits would not give reasonable results. */
3486 return cost < target_spill_cost ? cost : target_spill_cost;
3489 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3490 invariants the computation depends on. */
3492 static unsigned
3493 force_var_cost (struct ivopts_data *data,
3494 tree expr, bitmap *depends_on)
3496 if (depends_on)
3498 fd_ivopts_data = data;
3499 walk_tree (&expr, find_depends, depends_on, NULL);
3502 return force_expr_to_var_cost (expr);
3505 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3506 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3507 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3508 invariants the computation depends on. */
3510 static unsigned
3511 split_address_cost (struct ivopts_data *data,
3512 tree addr, bool *symbol_present, bool *var_present,
3513 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3515 tree core;
3516 HOST_WIDE_INT bitsize;
3517 HOST_WIDE_INT bitpos;
3518 tree toffset;
3519 enum machine_mode mode;
3520 int unsignedp, volatilep;
3522 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3523 &unsignedp, &volatilep, false);
3525 if (toffset != 0
3526 || bitpos % BITS_PER_UNIT != 0
3527 || TREE_CODE (core) != VAR_DECL)
3529 *symbol_present = false;
3530 *var_present = true;
3531 fd_ivopts_data = data;
3532 walk_tree (&addr, find_depends, depends_on, NULL);
3533 return target_spill_cost;
3536 *offset += bitpos / BITS_PER_UNIT;
3537 if (TREE_STATIC (core)
3538 || DECL_EXTERNAL (core))
3540 *symbol_present = true;
3541 *var_present = false;
3542 return 0;
3545 *symbol_present = false;
3546 *var_present = true;
3547 return 0;
3550 /* Estimates cost of expressing difference of addresses E1 - E2 as
3551 var + symbol + offset. The value of offset is added to OFFSET,
3552 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3553 part is missing. DEPENDS_ON is a set of the invariants the computation
3554 depends on. */
3556 static unsigned
3557 ptr_difference_cost (struct ivopts_data *data,
3558 tree e1, tree e2, bool *symbol_present, bool *var_present,
3559 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3561 HOST_WIDE_INT diff = 0;
3562 unsigned cost;
3564 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3566 if (ptr_difference_const (e1, e2, &diff))
3568 *offset += diff;
3569 *symbol_present = false;
3570 *var_present = false;
3571 return 0;
3574 if (e2 == integer_zero_node)
3575 return split_address_cost (data, TREE_OPERAND (e1, 0),
3576 symbol_present, var_present, offset, depends_on);
3578 *symbol_present = false;
3579 *var_present = true;
3581 cost = force_var_cost (data, e1, depends_on);
3582 cost += force_var_cost (data, e2, depends_on);
3583 cost += add_cost (Pmode);
3585 return cost;
3588 /* Estimates cost of expressing difference E1 - E2 as
3589 var + symbol + offset. The value of offset is added to OFFSET,
3590 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3591 part is missing. DEPENDS_ON is a set of the invariants the computation
3592 depends on. */
3594 static unsigned
3595 difference_cost (struct ivopts_data *data,
3596 tree e1, tree e2, bool *symbol_present, bool *var_present,
3597 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3599 unsigned cost;
3600 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3601 unsigned HOST_WIDE_INT off1, off2;
3603 e1 = strip_offset (e1, &off1);
3604 e2 = strip_offset (e2, &off2);
3605 *offset += off1 - off2;
3607 STRIP_NOPS (e1);
3608 STRIP_NOPS (e2);
3610 if (TREE_CODE (e1) == ADDR_EXPR)
3611 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3612 depends_on);
3613 *symbol_present = false;
3615 if (operand_equal_p (e1, e2, 0))
3617 *var_present = false;
3618 return 0;
3620 *var_present = true;
3621 if (zero_p (e2))
3622 return force_var_cost (data, e1, depends_on);
3624 if (zero_p (e1))
3626 cost = force_var_cost (data, e2, depends_on);
3627 cost += multiply_by_cost (-1, mode);
3629 return cost;
3632 cost = force_var_cost (data, e1, depends_on);
3633 cost += force_var_cost (data, e2, depends_on);
3634 cost += add_cost (mode);
3636 return cost;
3639 /* Determines the cost of the computation by that USE is expressed
3640 from induction variable CAND. If ADDRESS_P is true, we just need
3641 to create an address from it, otherwise we want to get it into
3642 register. A set of invariants we depend on is stored in
3643 DEPENDS_ON. AT is the statement at that the value is computed. */
3645 static unsigned
3646 get_computation_cost_at (struct ivopts_data *data,
3647 struct iv_use *use, struct iv_cand *cand,
3648 bool address_p, bitmap *depends_on, tree at)
3650 tree ubase = use->iv->base, ustep = use->iv->step;
3651 tree cbase, cstep;
3652 tree utype = TREE_TYPE (ubase), ctype;
3653 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3654 HOST_WIDE_INT ratio, aratio;
3655 bool var_present, symbol_present;
3656 unsigned cost = 0, n_sums;
3658 *depends_on = NULL;
3660 /* Only consider real candidates. */
3661 if (!cand->iv)
3662 return INFTY;
3664 cbase = cand->iv->base;
3665 cstep = cand->iv->step;
3666 ctype = TREE_TYPE (cbase);
3668 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3670 /* We do not have a precision to express the values of use. */
3671 return INFTY;
3674 if (address_p)
3676 /* Do not try to express address of an object with computation based
3677 on address of a different object. This may cause problems in rtl
3678 level alias analysis (that does not expect this to be happening,
3679 as this is illegal in C), and would be unlikely to be useful
3680 anyway. */
3681 if (use->iv->base_object
3682 && cand->iv->base_object
3683 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3684 return INFTY;
3687 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3689 /* TODO -- add direct handling of this case. */
3690 goto fallback;
3693 /* CSTEPI is removed from the offset in case statement is after the
3694 increment. If the step is not constant, we use zero instead.
3695 This is a bit imprecise (there is the extra addition), but
3696 redundancy elimination is likely to transform the code so that
3697 it uses value of the variable before increment anyway,
3698 so it is not that much unrealistic. */
3699 if (cst_and_fits_in_hwi (cstep))
3700 cstepi = int_cst_value (cstep);
3701 else
3702 cstepi = 0;
3704 if (cst_and_fits_in_hwi (ustep)
3705 && cst_and_fits_in_hwi (cstep))
3707 ustepi = int_cst_value (ustep);
3709 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3710 return INFTY;
3712 else
3714 tree rat;
3716 rat = constant_multiple_of (utype, ustep, cstep);
3718 if (!rat)
3719 return INFTY;
3721 if (cst_and_fits_in_hwi (rat))
3722 ratio = int_cst_value (rat);
3723 else if (integer_onep (rat))
3724 ratio = 1;
3725 else if (integer_all_onesp (rat))
3726 ratio = -1;
3727 else
3728 return INFTY;
3731 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3732 or ratio == 1, it is better to handle this like
3734 ubase - ratio * cbase + ratio * var
3736 (also holds in the case ratio == -1, TODO. */
3738 if (cst_and_fits_in_hwi (cbase))
3740 offset = - ratio * int_cst_value (cbase);
3741 cost += difference_cost (data,
3742 ubase, integer_zero_node,
3743 &symbol_present, &var_present, &offset,
3744 depends_on);
3746 else if (ratio == 1)
3748 cost += difference_cost (data,
3749 ubase, cbase,
3750 &symbol_present, &var_present, &offset,
3751 depends_on);
3753 else
3755 cost += force_var_cost (data, cbase, depends_on);
3756 cost += add_cost (TYPE_MODE (ctype));
3757 cost += difference_cost (data,
3758 ubase, integer_zero_node,
3759 &symbol_present, &var_present, &offset,
3760 depends_on);
3763 /* If we are after the increment, the value of the candidate is higher by
3764 one iteration. */
3765 if (stmt_after_increment (data->current_loop, cand, at))
3766 offset -= ratio * cstepi;
3768 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3769 (symbol/var/const parts may be omitted). If we are looking for an address,
3770 find the cost of addressing this. */
3771 if (address_p)
3772 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3774 /* Otherwise estimate the costs for computing the expression. */
3775 aratio = ratio > 0 ? ratio : -ratio;
3776 if (!symbol_present && !var_present && !offset)
3778 if (ratio != 1)
3779 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3781 return cost;
3784 if (aratio != 1)
3785 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3787 n_sums = 1;
3788 if (var_present
3789 /* Symbol + offset should be compile-time computable. */
3790 && (symbol_present || offset))
3791 n_sums++;
3793 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3795 fallback:
3797 /* Just get the expression, expand it and measure the cost. */
3798 tree comp = get_computation_at (data->current_loop, use, cand, at);
3800 if (!comp)
3801 return INFTY;
3803 if (address_p)
3804 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3806 return computation_cost (comp);
3810 /* Determines the cost of the computation by that USE is expressed
3811 from induction variable CAND. If ADDRESS_P is true, we just need
3812 to create an address from it, otherwise we want to get it into
3813 register. A set of invariants we depend on is stored in
3814 DEPENDS_ON. */
3816 static unsigned
3817 get_computation_cost (struct ivopts_data *data,
3818 struct iv_use *use, struct iv_cand *cand,
3819 bool address_p, bitmap *depends_on)
3821 return get_computation_cost_at (data,
3822 use, cand, address_p, depends_on, use->stmt);
3825 /* Determines cost of basing replacement of USE on CAND in a generic
3826 expression. */
3828 static bool
3829 determine_use_iv_cost_generic (struct ivopts_data *data,
3830 struct iv_use *use, struct iv_cand *cand)
3832 bitmap depends_on;
3833 unsigned cost;
3835 /* The simple case first -- if we need to express value of the preserved
3836 original biv, the cost is 0. This also prevents us from counting the
3837 cost of increment twice -- once at this use and once in the cost of
3838 the candidate. */
3839 if (cand->pos == IP_ORIGINAL
3840 && cand->incremented_at == use->stmt)
3842 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3843 return true;
3846 cost = get_computation_cost (data, use, cand, false, &depends_on);
3847 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3849 return cost != INFTY;
3852 /* Determines cost of basing replacement of USE on CAND in an address. */
3854 static bool
3855 determine_use_iv_cost_address (struct ivopts_data *data,
3856 struct iv_use *use, struct iv_cand *cand)
3858 bitmap depends_on;
3859 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3861 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3863 return cost != INFTY;
3866 /* Computes value of induction variable IV in iteration NITER. */
3868 static tree
3869 iv_value (struct iv *iv, tree niter)
3871 tree val;
3872 tree type = TREE_TYPE (iv->base);
3874 niter = fold_convert (type, niter);
3875 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3877 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3880 /* Computes value of candidate CAND at position AT in iteration NITER. */
3882 static tree
3883 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3885 tree val = iv_value (cand->iv, niter);
3886 tree type = TREE_TYPE (cand->iv->base);
3888 if (stmt_after_increment (loop, cand, at))
3889 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3891 return val;
3894 /* Returns period of induction variable iv. */
3896 static tree
3897 iv_period (struct iv *iv)
3899 tree step = iv->step, period, type;
3900 tree pow2div;
3902 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3904 /* Period of the iv is gcd (step, type range). Since type range is power
3905 of two, it suffices to determine the maximum power of two that divides
3906 step. */
3907 pow2div = num_ending_zeros (step);
3908 type = unsigned_type_for (TREE_TYPE (step));
3910 period = build_low_bits_mask (type,
3911 (TYPE_PRECISION (type)
3912 - tree_low_cst (pow2div, 1)));
3914 return period;
3917 /* Returns the comparison operator used when eliminating the iv USE. */
3919 static enum tree_code
3920 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3922 struct loop *loop = data->current_loop;
3923 basic_block ex_bb;
3924 edge exit;
3926 ex_bb = bb_for_stmt (use->stmt);
3927 exit = EDGE_SUCC (ex_bb, 0);
3928 if (flow_bb_inside_loop_p (loop, exit->dest))
3929 exit = EDGE_SUCC (ex_bb, 1);
3931 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3934 /* Check whether it is possible to express the condition in USE by comparison
3935 of candidate CAND. If so, store the value compared with to BOUND. */
3937 static bool
3938 may_eliminate_iv (struct ivopts_data *data,
3939 struct iv_use *use, struct iv_cand *cand, tree *bound)
3941 basic_block ex_bb;
3942 edge exit;
3943 struct tree_niter_desc *niter;
3944 tree nit, nit_type;
3945 tree wider_type, period, per_type;
3946 struct loop *loop = data->current_loop;
3948 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3949 return false;
3951 /* For now works only for exits that dominate the loop latch. TODO -- extend
3952 for other conditions inside loop body. */
3953 ex_bb = bb_for_stmt (use->stmt);
3954 if (use->stmt != last_stmt (ex_bb)
3955 || TREE_CODE (use->stmt) != COND_EXPR)
3956 return false;
3957 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3958 return false;
3960 exit = EDGE_SUCC (ex_bb, 0);
3961 if (flow_bb_inside_loop_p (loop, exit->dest))
3962 exit = EDGE_SUCC (ex_bb, 1);
3963 if (flow_bb_inside_loop_p (loop, exit->dest))
3964 return false;
3966 niter = niter_for_exit (data, exit);
3967 if (!niter
3968 || !zero_p (niter->may_be_zero))
3969 return false;
3971 nit = niter->niter;
3972 nit_type = TREE_TYPE (nit);
3974 /* Determine whether we may use the variable to test whether niter iterations
3975 elapsed. This is the case iff the period of the induction variable is
3976 greater than the number of iterations. */
3977 period = iv_period (cand->iv);
3978 if (!period)
3979 return false;
3980 per_type = TREE_TYPE (period);
3982 wider_type = TREE_TYPE (period);
3983 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3984 wider_type = per_type;
3985 else
3986 wider_type = nit_type;
3988 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3989 fold_convert (wider_type, period),
3990 fold_convert (wider_type, nit))))
3991 return false;
3993 *bound = cand_value_at (loop, cand, use->stmt, nit);
3994 return true;
3997 /* Determines cost of basing replacement of USE on CAND in a condition. */
3999 static bool
4000 determine_use_iv_cost_condition (struct ivopts_data *data,
4001 struct iv_use *use, struct iv_cand *cand)
4003 tree bound = NULL_TREE, op, cond;
4004 bitmap depends_on = NULL;
4005 unsigned cost;
4007 /* Only consider real candidates. */
4008 if (!cand->iv)
4010 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4011 return false;
4014 if (may_eliminate_iv (data, use, cand, &bound))
4016 cost = force_var_cost (data, bound, &depends_on);
4018 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4019 return cost != INFTY;
4022 /* The induction variable elimination failed; just express the original
4023 giv. If it is compared with an invariant, note that we cannot get
4024 rid of it. */
4025 cost = get_computation_cost (data, use, cand, false, &depends_on);
4027 cond = *use->op_p;
4028 if (TREE_CODE (cond) != SSA_NAME)
4030 op = TREE_OPERAND (cond, 0);
4031 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4032 op = TREE_OPERAND (cond, 1);
4033 if (TREE_CODE (op) == SSA_NAME)
4035 op = get_iv (data, op)->base;
4036 fd_ivopts_data = data;
4037 walk_tree (&op, find_depends, &depends_on, NULL);
4041 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4042 return cost != INFTY;
4045 /* Determines cost of basing replacement of USE on CAND. Returns false
4046 if USE cannot be based on CAND. */
4048 static bool
4049 determine_use_iv_cost (struct ivopts_data *data,
4050 struct iv_use *use, struct iv_cand *cand)
4052 switch (use->type)
4054 case USE_NONLINEAR_EXPR:
4055 return determine_use_iv_cost_generic (data, use, cand);
4057 case USE_ADDRESS:
4058 return determine_use_iv_cost_address (data, use, cand);
4060 case USE_COMPARE:
4061 return determine_use_iv_cost_condition (data, use, cand);
4063 default:
4064 gcc_unreachable ();
4068 /* Determines costs of basing the use of the iv on an iv candidate. */
4070 static void
4071 determine_use_iv_costs (struct ivopts_data *data)
4073 unsigned i, j;
4074 struct iv_use *use;
4075 struct iv_cand *cand;
4076 bitmap to_clear = BITMAP_ALLOC (NULL);
4078 alloc_use_cost_map (data);
4080 for (i = 0; i < n_iv_uses (data); i++)
4082 use = iv_use (data, i);
4084 if (data->consider_all_candidates)
4086 for (j = 0; j < n_iv_cands (data); j++)
4088 cand = iv_cand (data, j);
4089 determine_use_iv_cost (data, use, cand);
4092 else
4094 bitmap_iterator bi;
4096 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4098 cand = iv_cand (data, j);
4099 if (!determine_use_iv_cost (data, use, cand))
4100 bitmap_set_bit (to_clear, j);
4103 /* Remove the candidates for that the cost is infinite from
4104 the list of related candidates. */
4105 bitmap_and_compl_into (use->related_cands, to_clear);
4106 bitmap_clear (to_clear);
4110 BITMAP_FREE (to_clear);
4112 if (dump_file && (dump_flags & TDF_DETAILS))
4114 fprintf (dump_file, "Use-candidate costs:\n");
4116 for (i = 0; i < n_iv_uses (data); i++)
4118 use = iv_use (data, i);
4120 fprintf (dump_file, "Use %d:\n", i);
4121 fprintf (dump_file, " cand\tcost\tdepends on\n");
4122 for (j = 0; j < use->n_map_members; j++)
4124 if (!use->cost_map[j].cand
4125 || use->cost_map[j].cost == INFTY)
4126 continue;
4128 fprintf (dump_file, " %d\t%d\t",
4129 use->cost_map[j].cand->id,
4130 use->cost_map[j].cost);
4131 if (use->cost_map[j].depends_on)
4132 bitmap_print (dump_file,
4133 use->cost_map[j].depends_on, "","");
4134 fprintf (dump_file, "\n");
4137 fprintf (dump_file, "\n");
4139 fprintf (dump_file, "\n");
4143 /* Determines cost of the candidate CAND. */
4145 static void
4146 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4148 unsigned cost_base, cost_step;
4149 tree base;
4151 if (!cand->iv)
4153 cand->cost = 0;
4154 return;
4157 /* There are two costs associated with the candidate -- its increment
4158 and its initialization. The second is almost negligible for any loop
4159 that rolls enough, so we take it just very little into account. */
4161 base = cand->iv->base;
4162 cost_base = force_var_cost (data, base, NULL);
4163 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4165 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4167 /* Prefer the original iv unless we may gain something by replacing it;
4168 this is not really relevant for artificial ivs created by other
4169 passes. */
4170 if (cand->pos == IP_ORIGINAL
4171 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4172 cand->cost--;
4174 /* Prefer not to insert statements into latch unless there are some
4175 already (so that we do not create unnecessary jumps). */
4176 if (cand->pos == IP_END
4177 && empty_block_p (ip_end_pos (data->current_loop)))
4178 cand->cost++;
4181 /* Determines costs of computation of the candidates. */
4183 static void
4184 determine_iv_costs (struct ivopts_data *data)
4186 unsigned i;
4188 if (dump_file && (dump_flags & TDF_DETAILS))
4190 fprintf (dump_file, "Candidate costs:\n");
4191 fprintf (dump_file, " cand\tcost\n");
4194 for (i = 0; i < n_iv_cands (data); i++)
4196 struct iv_cand *cand = iv_cand (data, i);
4198 determine_iv_cost (data, cand);
4200 if (dump_file && (dump_flags & TDF_DETAILS))
4201 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4204 if (dump_file && (dump_flags & TDF_DETAILS))
4205 fprintf (dump_file, "\n");
4208 /* Calculates cost for having SIZE induction variables. */
4210 static unsigned
4211 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4213 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4216 /* For each size of the induction variable set determine the penalty. */
4218 static void
4219 determine_set_costs (struct ivopts_data *data)
4221 unsigned j, n;
4222 tree phi, op;
4223 struct loop *loop = data->current_loop;
4224 bitmap_iterator bi;
4226 /* We use the following model (definitely improvable, especially the
4227 cost function -- TODO):
4229 We estimate the number of registers available (using MD data), name it A.
4231 We estimate the number of registers used by the loop, name it U. This
4232 number is obtained as the number of loop phi nodes (not counting virtual
4233 registers and bivs) + the number of variables from outside of the loop.
4235 We set a reserve R (free regs that are used for temporary computations,
4236 etc.). For now the reserve is a constant 3.
4238 Let I be the number of induction variables.
4240 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4241 make a lot of ivs without a reason).
4242 -- if A - R < U + I <= A, the cost is I * PRES_COST
4243 -- if U + I > A, the cost is I * PRES_COST and
4244 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4246 if (dump_file && (dump_flags & TDF_DETAILS))
4248 fprintf (dump_file, "Global costs:\n");
4249 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4250 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4251 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4252 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4255 n = 0;
4256 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4258 op = PHI_RESULT (phi);
4260 if (!is_gimple_reg (op))
4261 continue;
4263 if (get_iv (data, op))
4264 continue;
4266 n++;
4269 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4271 struct version_info *info = ver_info (data, j);
4273 if (info->inv_id && info->has_nonlin_use)
4274 n++;
4277 data->regs_used = n;
4278 if (dump_file && (dump_flags & TDF_DETAILS))
4279 fprintf (dump_file, " regs_used %d\n", n);
4281 if (dump_file && (dump_flags & TDF_DETAILS))
4283 fprintf (dump_file, " cost for size:\n");
4284 fprintf (dump_file, " ivs\tcost\n");
4285 for (j = 0; j <= 2 * target_avail_regs; j++)
4286 fprintf (dump_file, " %d\t%d\n", j,
4287 ivopts_global_cost_for_size (data, j));
4288 fprintf (dump_file, "\n");
4292 /* Returns true if A is a cheaper cost pair than B. */
4294 static bool
4295 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4297 if (!a)
4298 return false;
4300 if (!b)
4301 return true;
4303 if (a->cost < b->cost)
4304 return true;
4306 if (a->cost > b->cost)
4307 return false;
4309 /* In case the costs are the same, prefer the cheaper candidate. */
4310 if (a->cand->cost < b->cand->cost)
4311 return true;
4313 return false;
4316 /* Computes the cost field of IVS structure. */
4318 static void
4319 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4321 unsigned cost = 0;
4323 cost += ivs->cand_use_cost;
4324 cost += ivs->cand_cost;
4325 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4327 ivs->cost = cost;
4330 /* Remove invariants in set INVS to set IVS. */
4332 static void
4333 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4335 bitmap_iterator bi;
4336 unsigned iid;
4338 if (!invs)
4339 return;
4341 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4343 ivs->n_invariant_uses[iid]--;
4344 if (ivs->n_invariant_uses[iid] == 0)
4345 ivs->n_regs--;
4349 /* Set USE not to be expressed by any candidate in IVS. */
4351 static void
4352 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4353 struct iv_use *use)
4355 unsigned uid = use->id, cid;
4356 struct cost_pair *cp;
4358 cp = ivs->cand_for_use[uid];
4359 if (!cp)
4360 return;
4361 cid = cp->cand->id;
4363 ivs->bad_uses++;
4364 ivs->cand_for_use[uid] = NULL;
4365 ivs->n_cand_uses[cid]--;
4367 if (ivs->n_cand_uses[cid] == 0)
4369 bitmap_clear_bit (ivs->cands, cid);
4370 /* Do not count the pseudocandidates. */
4371 if (cp->cand->iv)
4372 ivs->n_regs--;
4373 ivs->n_cands--;
4374 ivs->cand_cost -= cp->cand->cost;
4376 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4379 ivs->cand_use_cost -= cp->cost;
4381 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4382 iv_ca_recount_cost (data, ivs);
4385 /* Add invariants in set INVS to set IVS. */
4387 static void
4388 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4390 bitmap_iterator bi;
4391 unsigned iid;
4393 if (!invs)
4394 return;
4396 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4398 ivs->n_invariant_uses[iid]++;
4399 if (ivs->n_invariant_uses[iid] == 1)
4400 ivs->n_regs++;
4404 /* Set cost pair for USE in set IVS to CP. */
4406 static void
4407 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4408 struct iv_use *use, struct cost_pair *cp)
4410 unsigned uid = use->id, cid;
4412 if (ivs->cand_for_use[uid] == cp)
4413 return;
4415 if (ivs->cand_for_use[uid])
4416 iv_ca_set_no_cp (data, ivs, use);
4418 if (cp)
4420 cid = cp->cand->id;
4422 ivs->bad_uses--;
4423 ivs->cand_for_use[uid] = cp;
4424 ivs->n_cand_uses[cid]++;
4425 if (ivs->n_cand_uses[cid] == 1)
4427 bitmap_set_bit (ivs->cands, cid);
4428 /* Do not count the pseudocandidates. */
4429 if (cp->cand->iv)
4430 ivs->n_regs++;
4431 ivs->n_cands++;
4432 ivs->cand_cost += cp->cand->cost;
4434 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4437 ivs->cand_use_cost += cp->cost;
4438 iv_ca_set_add_invariants (ivs, cp->depends_on);
4439 iv_ca_recount_cost (data, ivs);
4443 /* Extend set IVS by expressing USE by some of the candidates in it
4444 if possible. */
4446 static void
4447 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4448 struct iv_use *use)
4450 struct cost_pair *best_cp = NULL, *cp;
4451 bitmap_iterator bi;
4452 unsigned i;
4454 gcc_assert (ivs->upto >= use->id);
4456 if (ivs->upto == use->id)
4458 ivs->upto++;
4459 ivs->bad_uses++;
4462 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4464 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4466 if (cheaper_cost_pair (cp, best_cp))
4467 best_cp = cp;
4470 iv_ca_set_cp (data, ivs, use, best_cp);
4473 /* Get cost for assignment IVS. */
4475 static unsigned
4476 iv_ca_cost (struct iv_ca *ivs)
4478 return (ivs->bad_uses ? INFTY : ivs->cost);
4481 /* Returns true if all dependences of CP are among invariants in IVS. */
4483 static bool
4484 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4486 unsigned i;
4487 bitmap_iterator bi;
4489 if (!cp->depends_on)
4490 return true;
4492 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4494 if (ivs->n_invariant_uses[i] == 0)
4495 return false;
4498 return true;
4501 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4502 it before NEXT_CHANGE. */
4504 static struct iv_ca_delta *
4505 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4506 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4508 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4510 change->use = use;
4511 change->old_cp = old_cp;
4512 change->new_cp = new_cp;
4513 change->next_change = next_change;
4515 return change;
4518 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4519 are rewritten. */
4521 static struct iv_ca_delta *
4522 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4524 struct iv_ca_delta *last;
4526 if (!l2)
4527 return l1;
4529 if (!l1)
4530 return l2;
4532 for (last = l1; last->next_change; last = last->next_change)
4533 continue;
4534 last->next_change = l2;
4536 return l1;
4539 /* Returns candidate by that USE is expressed in IVS. */
4541 static struct cost_pair *
4542 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4544 return ivs->cand_for_use[use->id];
4547 /* Reverse the list of changes DELTA, forming the inverse to it. */
4549 static struct iv_ca_delta *
4550 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4552 struct iv_ca_delta *act, *next, *prev = NULL;
4553 struct cost_pair *tmp;
4555 for (act = delta; act; act = next)
4557 next = act->next_change;
4558 act->next_change = prev;
4559 prev = act;
4561 tmp = act->old_cp;
4562 act->old_cp = act->new_cp;
4563 act->new_cp = tmp;
4566 return prev;
4569 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4570 reverted instead. */
4572 static void
4573 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4574 struct iv_ca_delta *delta, bool forward)
4576 struct cost_pair *from, *to;
4577 struct iv_ca_delta *act;
4579 if (!forward)
4580 delta = iv_ca_delta_reverse (delta);
4582 for (act = delta; act; act = act->next_change)
4584 from = act->old_cp;
4585 to = act->new_cp;
4586 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4587 iv_ca_set_cp (data, ivs, act->use, to);
4590 if (!forward)
4591 iv_ca_delta_reverse (delta);
4594 /* Returns true if CAND is used in IVS. */
4596 static bool
4597 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4599 return ivs->n_cand_uses[cand->id] > 0;
4602 /* Returns number of induction variable candidates in the set IVS. */
4604 static unsigned
4605 iv_ca_n_cands (struct iv_ca *ivs)
4607 return ivs->n_cands;
4610 /* Free the list of changes DELTA. */
4612 static void
4613 iv_ca_delta_free (struct iv_ca_delta **delta)
4615 struct iv_ca_delta *act, *next;
4617 for (act = *delta; act; act = next)
4619 next = act->next_change;
4620 free (act);
4623 *delta = NULL;
4626 /* Allocates new iv candidates assignment. */
4628 static struct iv_ca *
4629 iv_ca_new (struct ivopts_data *data)
4631 struct iv_ca *nw = XNEW (struct iv_ca);
4633 nw->upto = 0;
4634 nw->bad_uses = 0;
4635 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4636 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4637 nw->cands = BITMAP_ALLOC (NULL);
4638 nw->n_cands = 0;
4639 nw->n_regs = 0;
4640 nw->cand_use_cost = 0;
4641 nw->cand_cost = 0;
4642 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4643 nw->cost = 0;
4645 return nw;
4648 /* Free memory occupied by the set IVS. */
4650 static void
4651 iv_ca_free (struct iv_ca **ivs)
4653 free ((*ivs)->cand_for_use);
4654 free ((*ivs)->n_cand_uses);
4655 BITMAP_FREE ((*ivs)->cands);
4656 free ((*ivs)->n_invariant_uses);
4657 free (*ivs);
4658 *ivs = NULL;
4661 /* Dumps IVS to FILE. */
4663 static void
4664 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4666 const char *pref = " invariants ";
4667 unsigned i;
4669 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4670 bitmap_print (file, ivs->cands, " candidates ","\n");
4672 for (i = 1; i <= data->max_inv_id; i++)
4673 if (ivs->n_invariant_uses[i])
4675 fprintf (file, "%s%d", pref, i);
4676 pref = ", ";
4678 fprintf (file, "\n");
4681 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4682 new set, and store differences in DELTA. Number of induction variables
4683 in the new set is stored to N_IVS. */
4685 static unsigned
4686 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4687 struct iv_cand *cand, struct iv_ca_delta **delta,
4688 unsigned *n_ivs)
4690 unsigned i, cost;
4691 struct iv_use *use;
4692 struct cost_pair *old_cp, *new_cp;
4694 *delta = NULL;
4695 for (i = 0; i < ivs->upto; i++)
4697 use = iv_use (data, i);
4698 old_cp = iv_ca_cand_for_use (ivs, use);
4700 if (old_cp
4701 && old_cp->cand == cand)
4702 continue;
4704 new_cp = get_use_iv_cost (data, use, cand);
4705 if (!new_cp)
4706 continue;
4708 if (!iv_ca_has_deps (ivs, new_cp))
4709 continue;
4711 if (!cheaper_cost_pair (new_cp, old_cp))
4712 continue;
4714 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4717 iv_ca_delta_commit (data, ivs, *delta, true);
4718 cost = iv_ca_cost (ivs);
4719 if (n_ivs)
4720 *n_ivs = iv_ca_n_cands (ivs);
4721 iv_ca_delta_commit (data, ivs, *delta, false);
4723 return cost;
4726 /* Try narrowing set IVS by removing CAND. Return the cost of
4727 the new set and store the differences in DELTA. */
4729 static unsigned
4730 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4731 struct iv_cand *cand, struct iv_ca_delta **delta)
4733 unsigned i, ci;
4734 struct iv_use *use;
4735 struct cost_pair *old_cp, *new_cp, *cp;
4736 bitmap_iterator bi;
4737 struct iv_cand *cnd;
4738 unsigned cost;
4740 *delta = NULL;
4741 for (i = 0; i < n_iv_uses (data); i++)
4743 use = iv_use (data, i);
4745 old_cp = iv_ca_cand_for_use (ivs, use);
4746 if (old_cp->cand != cand)
4747 continue;
4749 new_cp = NULL;
4751 if (data->consider_all_candidates)
4753 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4755 if (ci == cand->id)
4756 continue;
4758 cnd = iv_cand (data, ci);
4760 cp = get_use_iv_cost (data, use, cnd);
4761 if (!cp)
4762 continue;
4763 if (!iv_ca_has_deps (ivs, cp))
4764 continue;
4766 if (!cheaper_cost_pair (cp, new_cp))
4767 continue;
4769 new_cp = cp;
4772 else
4774 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4776 if (ci == cand->id)
4777 continue;
4779 cnd = iv_cand (data, ci);
4781 cp = get_use_iv_cost (data, use, cnd);
4782 if (!cp)
4783 continue;
4784 if (!iv_ca_has_deps (ivs, cp))
4785 continue;
4787 if (!cheaper_cost_pair (cp, new_cp))
4788 continue;
4790 new_cp = cp;
4794 if (!new_cp)
4796 iv_ca_delta_free (delta);
4797 return INFTY;
4800 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4803 iv_ca_delta_commit (data, ivs, *delta, true);
4804 cost = iv_ca_cost (ivs);
4805 iv_ca_delta_commit (data, ivs, *delta, false);
4807 return cost;
4810 /* Try optimizing the set of candidates IVS by removing candidates different
4811 from to EXCEPT_CAND from it. Return cost of the new set, and store
4812 differences in DELTA. */
4814 static unsigned
4815 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4816 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4818 bitmap_iterator bi;
4819 struct iv_ca_delta *act_delta, *best_delta;
4820 unsigned i, best_cost, acost;
4821 struct iv_cand *cand;
4823 best_delta = NULL;
4824 best_cost = iv_ca_cost (ivs);
4826 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4828 cand = iv_cand (data, i);
4830 if (cand == except_cand)
4831 continue;
4833 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4835 if (acost < best_cost)
4837 best_cost = acost;
4838 iv_ca_delta_free (&best_delta);
4839 best_delta = act_delta;
4841 else
4842 iv_ca_delta_free (&act_delta);
4845 if (!best_delta)
4847 *delta = NULL;
4848 return best_cost;
4851 /* Recurse to possibly remove other unnecessary ivs. */
4852 iv_ca_delta_commit (data, ivs, best_delta, true);
4853 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4854 iv_ca_delta_commit (data, ivs, best_delta, false);
4855 *delta = iv_ca_delta_join (best_delta, *delta);
4856 return best_cost;
4859 /* Tries to extend the sets IVS in the best possible way in order
4860 to express the USE. */
4862 static bool
4863 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4864 struct iv_use *use)
4866 unsigned best_cost, act_cost;
4867 unsigned i;
4868 bitmap_iterator bi;
4869 struct iv_cand *cand;
4870 struct iv_ca_delta *best_delta = NULL, *act_delta;
4871 struct cost_pair *cp;
4873 iv_ca_add_use (data, ivs, use);
4874 best_cost = iv_ca_cost (ivs);
4876 cp = iv_ca_cand_for_use (ivs, use);
4877 if (cp)
4879 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4880 iv_ca_set_no_cp (data, ivs, use);
4883 /* First try important candidates. Only if it fails, try the specific ones.
4884 Rationale -- in loops with many variables the best choice often is to use
4885 just one generic biv. If we added here many ivs specific to the uses,
4886 the optimization algorithm later would be likely to get stuck in a local
4887 minimum, thus causing us to create too many ivs. The approach from
4888 few ivs to more seems more likely to be successful -- starting from few
4889 ivs, replacing an expensive use by a specific iv should always be a
4890 win. */
4891 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4893 cand = iv_cand (data, i);
4895 if (iv_ca_cand_used_p (ivs, cand))
4896 continue;
4898 cp = get_use_iv_cost (data, use, cand);
4899 if (!cp)
4900 continue;
4902 iv_ca_set_cp (data, ivs, use, cp);
4903 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4904 iv_ca_set_no_cp (data, ivs, use);
4905 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4907 if (act_cost < best_cost)
4909 best_cost = act_cost;
4911 iv_ca_delta_free (&best_delta);
4912 best_delta = act_delta;
4914 else
4915 iv_ca_delta_free (&act_delta);
4918 if (best_cost == INFTY)
4920 for (i = 0; i < use->n_map_members; i++)
4922 cp = use->cost_map + i;
4923 cand = cp->cand;
4924 if (!cand)
4925 continue;
4927 /* Already tried this. */
4928 if (cand->important)
4929 continue;
4931 if (iv_ca_cand_used_p (ivs, cand))
4932 continue;
4934 act_delta = NULL;
4935 iv_ca_set_cp (data, ivs, use, cp);
4936 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4937 iv_ca_set_no_cp (data, ivs, use);
4938 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4939 cp, act_delta);
4941 if (act_cost < best_cost)
4943 best_cost = act_cost;
4945 if (best_delta)
4946 iv_ca_delta_free (&best_delta);
4947 best_delta = act_delta;
4949 else
4950 iv_ca_delta_free (&act_delta);
4954 iv_ca_delta_commit (data, ivs, best_delta, true);
4955 iv_ca_delta_free (&best_delta);
4957 return (best_cost != INFTY);
4960 /* Finds an initial assignment of candidates to uses. */
4962 static struct iv_ca *
4963 get_initial_solution (struct ivopts_data *data)
4965 struct iv_ca *ivs = iv_ca_new (data);
4966 unsigned i;
4968 for (i = 0; i < n_iv_uses (data); i++)
4969 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4971 iv_ca_free (&ivs);
4972 return NULL;
4975 return ivs;
4978 /* Tries to improve set of induction variables IVS. */
4980 static bool
4981 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4983 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4984 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4985 struct iv_cand *cand;
4987 /* Try extending the set of induction variables by one. */
4988 for (i = 0; i < n_iv_cands (data); i++)
4990 cand = iv_cand (data, i);
4992 if (iv_ca_cand_used_p (ivs, cand))
4993 continue;
4995 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4996 if (!act_delta)
4997 continue;
4999 /* If we successfully added the candidate and the set is small enough,
5000 try optimizing it by removing other candidates. */
5001 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5003 iv_ca_delta_commit (data, ivs, act_delta, true);
5004 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5005 iv_ca_delta_commit (data, ivs, act_delta, false);
5006 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5009 if (acost < best_cost)
5011 best_cost = acost;
5012 iv_ca_delta_free (&best_delta);
5013 best_delta = act_delta;
5015 else
5016 iv_ca_delta_free (&act_delta);
5019 if (!best_delta)
5021 /* Try removing the candidates from the set instead. */
5022 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5024 /* Nothing more we can do. */
5025 if (!best_delta)
5026 return false;
5029 iv_ca_delta_commit (data, ivs, best_delta, true);
5030 gcc_assert (best_cost == iv_ca_cost (ivs));
5031 iv_ca_delta_free (&best_delta);
5032 return true;
5035 /* Attempts to find the optimal set of induction variables. We do simple
5036 greedy heuristic -- we try to replace at most one candidate in the selected
5037 solution and remove the unused ivs while this improves the cost. */
5039 static struct iv_ca *
5040 find_optimal_iv_set (struct ivopts_data *data)
5042 unsigned i;
5043 struct iv_ca *set;
5044 struct iv_use *use;
5046 /* Get the initial solution. */
5047 set = get_initial_solution (data);
5048 if (!set)
5050 if (dump_file && (dump_flags & TDF_DETAILS))
5051 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5052 return NULL;
5055 if (dump_file && (dump_flags & TDF_DETAILS))
5057 fprintf (dump_file, "Initial set of candidates:\n");
5058 iv_ca_dump (data, dump_file, set);
5061 while (try_improve_iv_set (data, set))
5063 if (dump_file && (dump_flags & TDF_DETAILS))
5065 fprintf (dump_file, "Improved to:\n");
5066 iv_ca_dump (data, dump_file, set);
5070 if (dump_file && (dump_flags & TDF_DETAILS))
5071 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5073 for (i = 0; i < n_iv_uses (data); i++)
5075 use = iv_use (data, i);
5076 use->selected = iv_ca_cand_for_use (set, use)->cand;
5079 return set;
5082 /* Creates a new induction variable corresponding to CAND. */
5084 static void
5085 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5087 block_stmt_iterator incr_pos;
5088 tree base;
5089 bool after = false;
5091 if (!cand->iv)
5092 return;
5094 switch (cand->pos)
5096 case IP_NORMAL:
5097 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5098 break;
5100 case IP_END:
5101 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5102 after = true;
5103 break;
5105 case IP_ORIGINAL:
5106 /* Mark that the iv is preserved. */
5107 name_info (data, cand->var_before)->preserve_biv = true;
5108 name_info (data, cand->var_after)->preserve_biv = true;
5110 /* Rewrite the increment so that it uses var_before directly. */
5111 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5113 return;
5116 gimple_add_tmp_var (cand->var_before);
5117 add_referenced_tmp_var (cand->var_before);
5119 base = unshare_expr (cand->iv->base);
5121 create_iv (base, unshare_expr (cand->iv->step),
5122 cand->var_before, data->current_loop,
5123 &incr_pos, after, &cand->var_before, &cand->var_after);
5126 /* Creates new induction variables described in SET. */
5128 static void
5129 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5131 unsigned i;
5132 struct iv_cand *cand;
5133 bitmap_iterator bi;
5135 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5137 cand = iv_cand (data, i);
5138 create_new_iv (data, cand);
5142 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5143 is true, remove also the ssa name defined by the statement. */
5145 static void
5146 remove_statement (tree stmt, bool including_defined_name)
5148 if (TREE_CODE (stmt) == PHI_NODE)
5150 if (!including_defined_name)
5152 /* Prevent the ssa name defined by the statement from being removed. */
5153 SET_PHI_RESULT (stmt, NULL);
5155 remove_phi_node (stmt, NULL_TREE);
5157 else
5159 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5161 bsi_remove (&bsi, true);
5165 /* Rewrites USE (definition of iv used in a nonlinear expression)
5166 using candidate CAND. */
5168 static void
5169 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5170 struct iv_use *use, struct iv_cand *cand)
5172 tree comp;
5173 tree op, stmts, tgt, ass;
5174 block_stmt_iterator bsi, pbsi;
5176 /* An important special case -- if we are asked to express value of
5177 the original iv by itself, just exit; there is no need to
5178 introduce a new computation (that might also need casting the
5179 variable to unsigned and back). */
5180 if (cand->pos == IP_ORIGINAL
5181 && cand->incremented_at == use->stmt)
5183 tree step, ctype, utype;
5184 enum tree_code incr_code = PLUS_EXPR;
5186 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5187 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5189 step = cand->iv->step;
5190 ctype = TREE_TYPE (step);
5191 utype = TREE_TYPE (cand->var_after);
5192 if (TREE_CODE (step) == NEGATE_EXPR)
5194 incr_code = MINUS_EXPR;
5195 step = TREE_OPERAND (step, 0);
5198 /* Check whether we may leave the computation unchanged.
5199 This is the case only if it does not rely on other
5200 computations in the loop -- otherwise, the computation
5201 we rely upon may be removed in remove_unused_ivs,
5202 thus leading to ICE. */
5203 op = TREE_OPERAND (use->stmt, 1);
5204 if (TREE_CODE (op) == PLUS_EXPR
5205 || TREE_CODE (op) == MINUS_EXPR)
5207 if (TREE_OPERAND (op, 0) == cand->var_before)
5208 op = TREE_OPERAND (op, 1);
5209 else if (TREE_CODE (op) == PLUS_EXPR
5210 && TREE_OPERAND (op, 1) == cand->var_before)
5211 op = TREE_OPERAND (op, 0);
5212 else
5213 op = NULL_TREE;
5215 else
5216 op = NULL_TREE;
5218 if (op
5219 && (TREE_CODE (op) == INTEGER_CST
5220 || operand_equal_p (op, step, 0)))
5221 return;
5223 /* Otherwise, add the necessary computations to express
5224 the iv. */
5225 op = fold_convert (ctype, cand->var_before);
5226 comp = fold_convert (utype,
5227 build2 (incr_code, ctype, op,
5228 unshare_expr (step)));
5230 else
5231 comp = get_computation (data->current_loop, use, cand);
5233 switch (TREE_CODE (use->stmt))
5235 case PHI_NODE:
5236 tgt = PHI_RESULT (use->stmt);
5238 /* If we should keep the biv, do not replace it. */
5239 if (name_info (data, tgt)->preserve_biv)
5240 return;
5242 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5243 while (!bsi_end_p (pbsi)
5244 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5246 bsi = pbsi;
5247 bsi_next (&pbsi);
5249 break;
5251 case MODIFY_EXPR:
5252 tgt = TREE_OPERAND (use->stmt, 0);
5253 bsi = bsi_for_stmt (use->stmt);
5254 break;
5256 default:
5257 gcc_unreachable ();
5260 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5262 if (TREE_CODE (use->stmt) == PHI_NODE)
5264 if (stmts)
5265 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5266 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5267 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5268 remove_statement (use->stmt, false);
5269 SSA_NAME_DEF_STMT (tgt) = ass;
5271 else
5273 if (stmts)
5274 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5275 TREE_OPERAND (use->stmt, 1) = op;
5279 /* Replaces ssa name in index IDX by its basic variable. Callback for
5280 for_each_index. */
5282 static bool
5283 idx_remove_ssa_names (tree base, tree *idx,
5284 void *data ATTRIBUTE_UNUSED)
5286 tree *op;
5288 if (TREE_CODE (*idx) == SSA_NAME)
5289 *idx = SSA_NAME_VAR (*idx);
5291 if (TREE_CODE (base) == ARRAY_REF)
5293 op = &TREE_OPERAND (base, 2);
5294 if (*op
5295 && TREE_CODE (*op) == SSA_NAME)
5296 *op = SSA_NAME_VAR (*op);
5297 op = &TREE_OPERAND (base, 3);
5298 if (*op
5299 && TREE_CODE (*op) == SSA_NAME)
5300 *op = SSA_NAME_VAR (*op);
5303 return true;
5306 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5308 static tree
5309 unshare_and_remove_ssa_names (tree ref)
5311 ref = unshare_expr (ref);
5312 for_each_index (&ref, idx_remove_ssa_names, NULL);
5314 return ref;
5317 /* Extract the alias analysis info for the memory reference REF. There are
5318 several ways how this information may be stored and what precisely is
5319 its semantics depending on the type of the reference, but there always is
5320 somewhere hidden one _DECL node that is used to determine the set of
5321 virtual operands for the reference. The code below deciphers this jungle
5322 and extracts this single useful piece of information. */
5324 static tree
5325 get_ref_tag (tree ref, tree orig)
5327 tree var = get_base_address (ref);
5328 tree aref = NULL_TREE, tag, sv;
5329 HOST_WIDE_INT offset, size, maxsize;
5331 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5333 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5334 if (ref)
5335 break;
5338 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5339 return unshare_expr (sv);
5341 if (!var)
5342 return NULL_TREE;
5344 if (TREE_CODE (var) == INDIRECT_REF)
5346 /* If the base is a dereference of a pointer, first check its name memory
5347 tag. If it does not have one, use its symbol memory tag. */
5348 var = TREE_OPERAND (var, 0);
5349 if (TREE_CODE (var) != SSA_NAME)
5350 return NULL_TREE;
5352 if (SSA_NAME_PTR_INFO (var))
5354 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5355 if (tag)
5356 return tag;
5359 var = SSA_NAME_VAR (var);
5360 tag = var_ann (var)->symbol_mem_tag;
5361 gcc_assert (tag != NULL_TREE);
5362 return tag;
5364 else
5366 if (!DECL_P (var))
5367 return NULL_TREE;
5369 tag = var_ann (var)->symbol_mem_tag;
5370 if (tag)
5371 return tag;
5373 return var;
5377 /* Copies the reference information from OLD_REF to NEW_REF. */
5379 static void
5380 copy_ref_info (tree new_ref, tree old_ref)
5382 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5383 copy_mem_ref_info (new_ref, old_ref);
5384 else
5386 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5387 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5391 /* Rewrites USE (address that is an iv) using candidate CAND. */
5393 static void
5394 rewrite_use_address (struct ivopts_data *data,
5395 struct iv_use *use, struct iv_cand *cand)
5397 struct affine_tree_combination aff;
5398 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5399 tree ref;
5401 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5402 unshare_aff_combination (&aff);
5404 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5405 copy_ref_info (ref, *use->op_p);
5406 *use->op_p = ref;
5409 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5410 candidate CAND. */
5412 static void
5413 rewrite_use_compare (struct ivopts_data *data,
5414 struct iv_use *use, struct iv_cand *cand)
5416 tree comp;
5417 tree *op_p, cond, op, stmts, bound;
5418 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5419 enum tree_code compare;
5420 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5422 bound = cp->value;
5423 if (bound)
5425 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5426 tree var_type = TREE_TYPE (var);
5428 compare = iv_elimination_compare (data, use);
5429 bound = fold_convert (var_type, bound);
5430 op = force_gimple_operand (unshare_expr (bound), &stmts,
5431 true, NULL_TREE);
5433 if (stmts)
5434 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5436 *use->op_p = build2 (compare, boolean_type_node, var, op);
5437 update_stmt (use->stmt);
5438 return;
5441 /* The induction variable elimination failed; just express the original
5442 giv. */
5443 comp = get_computation (data->current_loop, use, cand);
5445 cond = *use->op_p;
5446 op_p = &TREE_OPERAND (cond, 0);
5447 if (TREE_CODE (*op_p) != SSA_NAME
5448 || zero_p (get_iv (data, *op_p)->step))
5449 op_p = &TREE_OPERAND (cond, 1);
5451 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5452 if (stmts)
5453 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5455 *op_p = op;
5458 /* Ensure that operand *OP_P may be used at the end of EXIT without
5459 violating loop closed ssa form. */
5461 static void
5462 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5464 basic_block def_bb;
5465 struct loop *def_loop;
5466 tree phi, use;
5468 use = USE_FROM_PTR (op_p);
5469 if (TREE_CODE (use) != SSA_NAME)
5470 return;
5472 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5473 if (!def_bb)
5474 return;
5476 def_loop = def_bb->loop_father;
5477 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5478 return;
5480 /* Try finding a phi node that copies the value out of the loop. */
5481 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5482 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5483 break;
5485 if (!phi)
5487 /* Create such a phi node. */
5488 tree new_name = duplicate_ssa_name (use, NULL);
5490 phi = create_phi_node (new_name, exit->dest);
5491 SSA_NAME_DEF_STMT (new_name) = phi;
5492 add_phi_arg (phi, use, exit);
5495 SET_USE (op_p, PHI_RESULT (phi));
5498 /* Ensure that operands of STMT may be used at the end of EXIT without
5499 violating loop closed ssa form. */
5501 static void
5502 protect_loop_closed_ssa_form (edge exit, tree stmt)
5504 ssa_op_iter iter;
5505 use_operand_p use_p;
5507 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5508 protect_loop_closed_ssa_form_use (exit, use_p);
5511 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5512 so that they are emitted on the correct place, and so that the loop closed
5513 ssa form is preserved. */
5515 void
5516 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5518 tree_stmt_iterator tsi;
5519 block_stmt_iterator bsi;
5520 tree phi, stmt, def, next;
5522 if (!single_pred_p (exit->dest))
5523 split_loop_exit_edge (exit);
5525 /* Ensure there is label in exit->dest, so that we can
5526 insert after it. */
5527 tree_block_label (exit->dest);
5528 bsi = bsi_after_labels (exit->dest);
5530 if (TREE_CODE (stmts) == STATEMENT_LIST)
5532 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5534 tree stmt = tsi_stmt (tsi);
5535 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5536 protect_loop_closed_ssa_form (exit, stmt);
5539 else
5541 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5542 protect_loop_closed_ssa_form (exit, stmts);
5545 if (!op)
5546 return;
5548 for (phi = phi_nodes (exit->dest); phi; phi = next)
5550 next = PHI_CHAIN (phi);
5552 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5554 def = PHI_RESULT (phi);
5555 remove_statement (phi, false);
5556 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5557 def, op);
5558 SSA_NAME_DEF_STMT (def) = stmt;
5559 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5564 /* Rewrites USE using candidate CAND. */
5566 static void
5567 rewrite_use (struct ivopts_data *data,
5568 struct iv_use *use, struct iv_cand *cand)
5570 switch (use->type)
5572 case USE_NONLINEAR_EXPR:
5573 rewrite_use_nonlinear_expr (data, use, cand);
5574 break;
5576 case USE_ADDRESS:
5577 rewrite_use_address (data, use, cand);
5578 break;
5580 case USE_COMPARE:
5581 rewrite_use_compare (data, use, cand);
5582 break;
5584 default:
5585 gcc_unreachable ();
5587 mark_new_vars_to_rename (use->stmt);
5590 /* Rewrite the uses using the selected induction variables. */
5592 static void
5593 rewrite_uses (struct ivopts_data *data)
5595 unsigned i;
5596 struct iv_cand *cand;
5597 struct iv_use *use;
5599 for (i = 0; i < n_iv_uses (data); i++)
5601 use = iv_use (data, i);
5602 cand = use->selected;
5603 gcc_assert (cand);
5605 rewrite_use (data, use, cand);
5609 /* Removes the ivs that are not used after rewriting. */
5611 static void
5612 remove_unused_ivs (struct ivopts_data *data)
5614 unsigned j;
5615 bitmap_iterator bi;
5617 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5619 struct version_info *info;
5621 info = ver_info (data, j);
5622 if (info->iv
5623 && !zero_p (info->iv->step)
5624 && !info->inv_id
5625 && !info->iv->have_use_for
5626 && !info->preserve_biv)
5627 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5631 /* Frees data allocated by the optimization of a single loop. */
5633 static void
5634 free_loop_data (struct ivopts_data *data)
5636 unsigned i, j;
5637 bitmap_iterator bi;
5638 tree obj;
5640 htab_empty (data->niters);
5642 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5644 struct version_info *info;
5646 info = ver_info (data, i);
5647 if (info->iv)
5648 free (info->iv);
5649 info->iv = NULL;
5650 info->has_nonlin_use = false;
5651 info->preserve_biv = false;
5652 info->inv_id = 0;
5654 bitmap_clear (data->relevant);
5655 bitmap_clear (data->important_candidates);
5657 for (i = 0; i < n_iv_uses (data); i++)
5659 struct iv_use *use = iv_use (data, i);
5661 free (use->iv);
5662 BITMAP_FREE (use->related_cands);
5663 for (j = 0; j < use->n_map_members; j++)
5664 if (use->cost_map[j].depends_on)
5665 BITMAP_FREE (use->cost_map[j].depends_on);
5666 free (use->cost_map);
5667 free (use);
5669 VEC_truncate (iv_use_p, data->iv_uses, 0);
5671 for (i = 0; i < n_iv_cands (data); i++)
5673 struct iv_cand *cand = iv_cand (data, i);
5675 if (cand->iv)
5676 free (cand->iv);
5677 if (cand->depends_on)
5678 BITMAP_FREE (cand->depends_on);
5679 free (cand);
5681 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5683 if (data->version_info_size < num_ssa_names)
5685 data->version_info_size = 2 * num_ssa_names;
5686 free (data->version_info);
5687 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5690 data->max_inv_id = 0;
5692 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5693 SET_DECL_RTL (obj, NULL_RTX);
5695 VEC_truncate (tree, decl_rtl_to_reset, 0);
5698 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5699 loop tree. */
5701 static void
5702 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5704 free_loop_data (data);
5705 free (data->version_info);
5706 BITMAP_FREE (data->relevant);
5707 BITMAP_FREE (data->important_candidates);
5708 htab_delete (data->niters);
5710 VEC_free (tree, heap, decl_rtl_to_reset);
5711 VEC_free (iv_use_p, heap, data->iv_uses);
5712 VEC_free (iv_cand_p, heap, data->iv_candidates);
5715 /* Optimizes the LOOP. Returns true if anything changed. */
5717 static bool
5718 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5720 bool changed = false;
5721 struct iv_ca *iv_ca;
5722 edge exit;
5724 data->current_loop = loop;
5726 if (dump_file && (dump_flags & TDF_DETAILS))
5728 fprintf (dump_file, "Processing loop %d\n", loop->num);
5730 exit = single_dom_exit (loop);
5731 if (exit)
5733 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5734 exit->src->index, exit->dest->index);
5735 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5736 fprintf (dump_file, "\n");
5739 fprintf (dump_file, "\n");
5742 /* For each ssa name determines whether it behaves as an induction variable
5743 in some loop. */
5744 if (!find_induction_variables (data))
5745 goto finish;
5747 /* Finds interesting uses (item 1). */
5748 find_interesting_uses (data);
5749 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5750 goto finish;
5752 /* Finds candidates for the induction variables (item 2). */
5753 find_iv_candidates (data);
5755 /* Calculates the costs (item 3, part 1). */
5756 determine_use_iv_costs (data);
5757 determine_iv_costs (data);
5758 determine_set_costs (data);
5760 /* Find the optimal set of induction variables (item 3, part 2). */
5761 iv_ca = find_optimal_iv_set (data);
5762 if (!iv_ca)
5763 goto finish;
5764 changed = true;
5766 /* Create the new induction variables (item 4, part 1). */
5767 create_new_ivs (data, iv_ca);
5768 iv_ca_free (&iv_ca);
5770 /* Rewrite the uses (item 4, part 2). */
5771 rewrite_uses (data);
5773 /* Remove the ivs that are unused after rewriting. */
5774 remove_unused_ivs (data);
5776 /* We have changed the structure of induction variables; it might happen
5777 that definitions in the scev database refer to some of them that were
5778 eliminated. */
5779 scev_reset ();
5781 finish:
5782 free_loop_data (data);
5784 return changed;
5787 /* Main entry point. Optimizes induction variables in LOOPS. */
5789 void
5790 tree_ssa_iv_optimize (struct loops *loops)
5792 struct loop *loop;
5793 struct ivopts_data data;
5795 tree_ssa_iv_optimize_init (&data);
5797 /* Optimize the loops starting with the innermost ones. */
5798 loop = loops->tree_root;
5799 while (loop->inner)
5800 loop = loop->inner;
5802 /* Scan the loops, inner ones first. */
5803 while (loop != loops->tree_root)
5805 if (dump_file && (dump_flags & TDF_DETAILS))
5806 flow_loop_dump (loop, dump_file, NULL, 1);
5808 tree_ssa_iv_optimize_loop (&data, loop);
5810 if (loop->next)
5812 loop = loop->next;
5813 while (loop->inner)
5814 loop = loop->inner;
5816 else
5817 loop = loop->outer;
5820 tree_ssa_iv_optimize_finalize (&data);