2006-03-15 Paul Brook <paul@codesourcery.com>
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob9e310416c34929f0af39e1f1c41e7f6c1d6717e4
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Types of uses. */
125 enum use_type
127 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
128 USE_ADDRESS, /* Use in an address. */
129 USE_COMPARE /* Use is a compare. */
132 /* The candidate - cost pair. */
133 struct cost_pair
135 struct iv_cand *cand; /* The candidate. */
136 unsigned cost; /* The cost. */
137 bitmap depends_on; /* The list of invariants that have to be
138 preserved. */
139 tree value; /* For final value elimination, the expression for
140 the final value of the iv. For iv elimination,
141 the new bound to compare with. */
144 /* Use. */
145 struct iv_use
147 unsigned id; /* The id of the use. */
148 enum use_type type; /* Type of the use. */
149 struct iv *iv; /* The induction variable it is based on. */
150 tree stmt; /* Statement in that it occurs. */
151 tree *op_p; /* The place where it occurs. */
152 bitmap related_cands; /* The set of "related" iv candidates, plus the common
153 important ones. */
155 unsigned n_map_members; /* Number of candidates in the cost_map list. */
156 struct cost_pair *cost_map;
157 /* The costs wrto the iv candidates. */
159 struct iv_cand *selected;
160 /* The selected candidate. */
163 /* The position where the iv is computed. */
164 enum iv_position
166 IP_NORMAL, /* At the end, just before the exit condition. */
167 IP_END, /* At the end of the latch block. */
168 IP_ORIGINAL /* The original biv. */
171 /* The induction variable candidate. */
172 struct iv_cand
174 unsigned id; /* The number of the candidate. */
175 bool important; /* Whether this is an "important" candidate, i.e. such
176 that it should be considered by all uses. */
177 enum iv_position pos; /* Where it is computed. */
178 tree incremented_at; /* For original biv, the statement where it is
179 incremented. */
180 tree var_before; /* The variable used for it before increment. */
181 tree var_after; /* The variable used for it after increment. */
182 struct iv *iv; /* The value of the candidate. NULL for
183 "pseudocandidate" used to indicate the possibility
184 to replace the final value of an iv by direct
185 computation of the value. */
186 unsigned cost; /* Cost of the candidate. */
187 bitmap depends_on; /* The list of invariants that are used in step of the
188 biv. */
191 /* The data used by the induction variable optimizations. */
193 typedef struct iv_use *iv_use_p;
194 DEF_VEC_P(iv_use_p);
195 DEF_VEC_ALLOC_P(iv_use_p,heap);
197 typedef struct iv_cand *iv_cand_p;
198 DEF_VEC_P(iv_cand_p);
199 DEF_VEC_ALLOC_P(iv_cand_p,heap);
201 struct ivopts_data
203 /* The currently optimized loop. */
204 struct loop *current_loop;
206 /* Number of registers used in it. */
207 unsigned regs_used;
209 /* Numbers of iterations for all exits of the current loop. */
210 htab_t niters;
212 /* The size of version_info array allocated. */
213 unsigned version_info_size;
215 /* The array of information for the ssa names. */
216 struct version_info *version_info;
218 /* The bitmap of indices in version_info whose value was changed. */
219 bitmap relevant;
221 /* The maximum invariant id. */
222 unsigned max_inv_id;
224 /* The uses of induction variables. */
225 VEC(iv_use_p,heap) *iv_uses;
227 /* The candidates. */
228 VEC(iv_cand_p,heap) *iv_candidates;
230 /* A bitmap of important candidates. */
231 bitmap important_candidates;
233 /* Whether to consider just related and important candidates when replacing a
234 use. */
235 bool consider_all_candidates;
238 /* An assignment of iv candidates to uses. */
240 struct iv_ca
242 /* The number of uses covered by the assignment. */
243 unsigned upto;
245 /* Number of uses that cannot be expressed by the candidates in the set. */
246 unsigned bad_uses;
248 /* Candidate assigned to a use, together with the related costs. */
249 struct cost_pair **cand_for_use;
251 /* Number of times each candidate is used. */
252 unsigned *n_cand_uses;
254 /* The candidates used. */
255 bitmap cands;
257 /* The number of candidates in the set. */
258 unsigned n_cands;
260 /* Total number of registers needed. */
261 unsigned n_regs;
263 /* Total cost of expressing uses. */
264 unsigned cand_use_cost;
266 /* Total cost of candidates. */
267 unsigned cand_cost;
269 /* Number of times each invariant is used. */
270 unsigned *n_invariant_uses;
272 /* Total cost of the assignment. */
273 unsigned cost;
276 /* Difference of two iv candidate assignments. */
278 struct iv_ca_delta
280 /* Changed use. */
281 struct iv_use *use;
283 /* An old assignment (for rollback purposes). */
284 struct cost_pair *old_cp;
286 /* A new assignment. */
287 struct cost_pair *new_cp;
289 /* Next change in the list. */
290 struct iv_ca_delta *next_change;
293 /* Bound on number of candidates below that all candidates are considered. */
295 #define CONSIDER_ALL_CANDIDATES_BOUND \
296 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
298 /* If there are more iv occurrences, we just give up (it is quite unlikely that
299 optimizing such a loop would help, and it would take ages). */
301 #define MAX_CONSIDERED_USES \
302 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
304 /* If there are at most this number of ivs in the set, try removing unnecessary
305 ivs from the set always. */
307 #define ALWAYS_PRUNE_CAND_SET_BOUND \
308 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
310 /* The list of trees for that the decl_rtl field must be reset is stored
311 here. */
313 static VEC(tree,heap) *decl_rtl_to_reset;
315 /* Number of uses recorded in DATA. */
317 static inline unsigned
318 n_iv_uses (struct ivopts_data *data)
320 return VEC_length (iv_use_p, data->iv_uses);
323 /* Ith use recorded in DATA. */
325 static inline struct iv_use *
326 iv_use (struct ivopts_data *data, unsigned i)
328 return VEC_index (iv_use_p, data->iv_uses, i);
331 /* Number of candidates recorded in DATA. */
333 static inline unsigned
334 n_iv_cands (struct ivopts_data *data)
336 return VEC_length (iv_cand_p, data->iv_candidates);
339 /* Ith candidate recorded in DATA. */
341 static inline struct iv_cand *
342 iv_cand (struct ivopts_data *data, unsigned i)
344 return VEC_index (iv_cand_p, data->iv_candidates, i);
347 /* The single loop exit if it dominates the latch, NULL otherwise. */
349 edge
350 single_dom_exit (struct loop *loop)
352 edge exit = loop->single_exit;
354 if (!exit)
355 return NULL;
357 if (!just_once_each_iteration_p (loop, exit->src))
358 return NULL;
360 return exit;
363 /* Dumps information about the induction variable IV to FILE. */
365 extern void dump_iv (FILE *, struct iv *);
366 void
367 dump_iv (FILE *file, struct iv *iv)
369 if (iv->ssa_name)
371 fprintf (file, "ssa name ");
372 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
373 fprintf (file, "\n");
376 fprintf (file, " type ");
377 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
378 fprintf (file, "\n");
380 if (iv->step)
382 fprintf (file, " base ");
383 print_generic_expr (file, iv->base, TDF_SLIM);
384 fprintf (file, "\n");
386 fprintf (file, " step ");
387 print_generic_expr (file, iv->step, TDF_SLIM);
388 fprintf (file, "\n");
390 else
392 fprintf (file, " invariant ");
393 print_generic_expr (file, iv->base, TDF_SLIM);
394 fprintf (file, "\n");
397 if (iv->base_object)
399 fprintf (file, " base object ");
400 print_generic_expr (file, iv->base_object, TDF_SLIM);
401 fprintf (file, "\n");
404 if (iv->biv_p)
405 fprintf (file, " is a biv\n");
408 /* Dumps information about the USE to FILE. */
410 extern void dump_use (FILE *, struct iv_use *);
411 void
412 dump_use (FILE *file, struct iv_use *use)
414 fprintf (file, "use %d\n", use->id);
416 switch (use->type)
418 case USE_NONLINEAR_EXPR:
419 fprintf (file, " generic\n");
420 break;
422 case USE_ADDRESS:
423 fprintf (file, " address\n");
424 break;
426 case USE_COMPARE:
427 fprintf (file, " compare\n");
428 break;
430 default:
431 gcc_unreachable ();
434 fprintf (file, " in statement ");
435 print_generic_expr (file, use->stmt, TDF_SLIM);
436 fprintf (file, "\n");
438 fprintf (file, " at position ");
439 if (use->op_p)
440 print_generic_expr (file, *use->op_p, TDF_SLIM);
441 fprintf (file, "\n");
443 dump_iv (file, use->iv);
445 if (use->related_cands)
447 fprintf (file, " related candidates ");
448 dump_bitmap (file, use->related_cands);
452 /* Dumps information about the uses to FILE. */
454 extern void dump_uses (FILE *, struct ivopts_data *);
455 void
456 dump_uses (FILE *file, struct ivopts_data *data)
458 unsigned i;
459 struct iv_use *use;
461 for (i = 0; i < n_iv_uses (data); i++)
463 use = iv_use (data, i);
465 dump_use (file, use);
466 fprintf (file, "\n");
470 /* Dumps information about induction variable candidate CAND to FILE. */
472 extern void dump_cand (FILE *, struct iv_cand *);
473 void
474 dump_cand (FILE *file, struct iv_cand *cand)
476 struct iv *iv = cand->iv;
478 fprintf (file, "candidate %d%s\n",
479 cand->id, cand->important ? " (important)" : "");
481 if (cand->depends_on)
483 fprintf (file, " depends on ");
484 dump_bitmap (file, cand->depends_on);
487 if (!iv)
489 fprintf (file, " final value replacement\n");
490 return;
493 switch (cand->pos)
495 case IP_NORMAL:
496 fprintf (file, " incremented before exit test\n");
497 break;
499 case IP_END:
500 fprintf (file, " incremented at end\n");
501 break;
503 case IP_ORIGINAL:
504 fprintf (file, " original biv\n");
505 break;
508 dump_iv (file, iv);
511 /* Returns the info for ssa version VER. */
513 static inline struct version_info *
514 ver_info (struct ivopts_data *data, unsigned ver)
516 return data->version_info + ver;
519 /* Returns the info for ssa name NAME. */
521 static inline struct version_info *
522 name_info (struct ivopts_data *data, tree name)
524 return ver_info (data, SSA_NAME_VERSION (name));
527 /* Checks whether there exists number X such that X * B = A, counting modulo
528 2^BITS. */
530 static bool
531 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
532 HOST_WIDE_INT *x)
534 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
535 unsigned HOST_WIDE_INT inv, ex, val;
536 unsigned i;
538 a &= mask;
539 b &= mask;
541 /* First divide the whole equation by 2 as long as possible. */
542 while (!(a & 1) && !(b & 1))
544 a >>= 1;
545 b >>= 1;
546 bits--;
547 mask >>= 1;
550 if (!(b & 1))
552 /* If b is still even, a is odd and there is no such x. */
553 return false;
556 /* Find the inverse of b. We compute it as
557 b^(2^(bits - 1) - 1) (mod 2^bits). */
558 inv = 1;
559 ex = b;
560 for (i = 0; i < bits - 1; i++)
562 inv = (inv * ex) & mask;
563 ex = (ex * ex) & mask;
566 val = (a * inv) & mask;
568 gcc_assert (((val * b) & mask) == a);
570 if ((val >> (bits - 1)) & 1)
571 val |= ~mask;
573 *x = val;
575 return true;
578 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
579 emitted in LOOP. */
581 static bool
582 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
584 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
586 gcc_assert (bb);
588 if (sbb == loop->latch)
589 return true;
591 if (sbb != bb)
592 return false;
594 return stmt == last_stmt (bb);
597 /* Returns true if STMT if after the place where the original induction
598 variable CAND is incremented. */
600 static bool
601 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
603 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
604 basic_block stmt_bb = bb_for_stmt (stmt);
605 block_stmt_iterator bsi;
607 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
608 return false;
610 if (stmt_bb != cand_bb)
611 return true;
613 /* Scan the block from the end, since the original ivs are usually
614 incremented at the end of the loop body. */
615 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
617 if (bsi_stmt (bsi) == cand->incremented_at)
618 return false;
619 if (bsi_stmt (bsi) == stmt)
620 return true;
624 /* Returns true if STMT if after the place where the induction variable
625 CAND is incremented in LOOP. */
627 static bool
628 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
630 switch (cand->pos)
632 case IP_END:
633 return false;
635 case IP_NORMAL:
636 return stmt_after_ip_normal_pos (loop, stmt);
638 case IP_ORIGINAL:
639 return stmt_after_ip_original_pos (cand, stmt);
641 default:
642 gcc_unreachable ();
646 /* Element of the table in that we cache the numbers of iterations obtained
647 from exits of the loop. */
649 struct nfe_cache_elt
651 /* The edge for that the number of iterations is cached. */
652 edge exit;
654 /* True if the # of iterations was successfully determined. */
655 bool valid_p;
657 /* Description of # of iterations. */
658 struct tree_niter_desc niter;
661 /* Hash function for nfe_cache_elt E. */
663 static hashval_t
664 nfe_hash (const void *e)
666 const struct nfe_cache_elt *elt = e;
668 return htab_hash_pointer (elt->exit);
671 /* Equality function for nfe_cache_elt E1 and edge E2. */
673 static int
674 nfe_eq (const void *e1, const void *e2)
676 const struct nfe_cache_elt *elt1 = e1;
678 return elt1->exit == e2;
681 /* Returns structure describing number of iterations determined from
682 EXIT of DATA->current_loop, or NULL if something goes wrong. */
684 static struct tree_niter_desc *
685 niter_for_exit (struct ivopts_data *data, edge exit)
687 struct nfe_cache_elt *nfe_desc;
688 PTR *slot;
690 slot = htab_find_slot_with_hash (data->niters, exit,
691 htab_hash_pointer (exit),
692 INSERT);
694 if (!*slot)
696 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
697 nfe_desc->exit = exit;
698 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
699 exit, &nfe_desc->niter,
700 true);
701 *slot = nfe_desc;
703 else
704 nfe_desc = *slot;
706 if (!nfe_desc->valid_p)
707 return NULL;
709 return &nfe_desc->niter;
712 /* Returns structure describing number of iterations determined from
713 single dominating exit of DATA->current_loop, or NULL if something
714 goes wrong. */
716 static struct tree_niter_desc *
717 niter_for_single_dom_exit (struct ivopts_data *data)
719 edge exit = single_dom_exit (data->current_loop);
721 if (!exit)
722 return NULL;
724 return niter_for_exit (data, exit);
727 /* Initializes data structures used by the iv optimization pass, stored
728 in DATA. */
730 static void
731 tree_ssa_iv_optimize_init (struct ivopts_data *data)
733 data->version_info_size = 2 * num_ssa_names;
734 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
735 data->relevant = BITMAP_ALLOC (NULL);
736 data->important_candidates = BITMAP_ALLOC (NULL);
737 data->max_inv_id = 0;
738 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
739 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
740 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
741 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
744 /* Returns a memory object to that EXPR points. In case we are able to
745 determine that it does not point to any such object, NULL is returned. */
747 static tree
748 determine_base_object (tree expr)
750 enum tree_code code = TREE_CODE (expr);
751 tree base, obj, op0, op1;
753 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
754 return NULL_TREE;
756 switch (code)
758 case INTEGER_CST:
759 return NULL_TREE;
761 case ADDR_EXPR:
762 obj = TREE_OPERAND (expr, 0);
763 base = get_base_address (obj);
765 if (!base)
766 return expr;
768 if (TREE_CODE (base) == INDIRECT_REF)
769 return determine_base_object (TREE_OPERAND (base, 0));
771 return fold_convert (ptr_type_node,
772 build_fold_addr_expr (base));
774 case PLUS_EXPR:
775 case MINUS_EXPR:
776 op0 = determine_base_object (TREE_OPERAND (expr, 0));
777 op1 = determine_base_object (TREE_OPERAND (expr, 1));
779 if (!op1)
780 return op0;
782 if (!op0)
783 return (code == PLUS_EXPR
784 ? op1
785 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
787 return fold_build2 (code, ptr_type_node, op0, op1);
789 case NOP_EXPR:
790 case CONVERT_EXPR:
791 return determine_base_object (TREE_OPERAND (expr, 0));
793 default:
794 return fold_convert (ptr_type_node, expr);
798 /* Allocates an induction variable with given initial value BASE and step STEP
799 for loop LOOP. */
801 static struct iv *
802 alloc_iv (tree base, tree step)
804 struct iv *iv = XCNEW (struct iv);
806 if (step && integer_zerop (step))
807 step = NULL_TREE;
809 iv->base = base;
810 iv->base_object = determine_base_object (base);
811 iv->step = step;
812 iv->biv_p = false;
813 iv->have_use_for = false;
814 iv->use_id = 0;
815 iv->ssa_name = NULL_TREE;
817 return iv;
820 /* Sets STEP and BASE for induction variable IV. */
822 static void
823 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
825 struct version_info *info = name_info (data, iv);
827 gcc_assert (!info->iv);
829 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
830 info->iv = alloc_iv (base, step);
831 info->iv->ssa_name = iv;
834 /* Finds induction variable declaration for VAR. */
836 static struct iv *
837 get_iv (struct ivopts_data *data, tree var)
839 basic_block bb;
841 if (!name_info (data, var)->iv)
843 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
845 if (!bb
846 || !flow_bb_inside_loop_p (data->current_loop, bb))
847 set_iv (data, var, var, NULL_TREE);
850 return name_info (data, var)->iv;
853 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
854 not define a simple affine biv with nonzero step. */
856 static tree
857 determine_biv_step (tree phi)
859 struct loop *loop = bb_for_stmt (phi)->loop_father;
860 tree name = PHI_RESULT (phi);
861 affine_iv iv;
863 if (!is_gimple_reg (name))
864 return NULL_TREE;
866 if (!simple_iv (loop, phi, name, &iv, true))
867 return NULL_TREE;
869 return (zero_p (iv.step) ? NULL_TREE : iv.step);
872 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
874 static bool
875 abnormal_ssa_name_p (tree exp)
877 if (!exp)
878 return false;
880 if (TREE_CODE (exp) != SSA_NAME)
881 return false;
883 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
886 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
887 abnormal phi node. Callback for for_each_index. */
889 static bool
890 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
891 void *data ATTRIBUTE_UNUSED)
893 if (TREE_CODE (base) == ARRAY_REF)
895 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
896 return false;
897 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
898 return false;
901 return !abnormal_ssa_name_p (*index);
904 /* Returns true if EXPR contains a ssa name that occurs in an
905 abnormal phi node. */
907 static bool
908 contains_abnormal_ssa_name_p (tree expr)
910 enum tree_code code;
911 enum tree_code_class class;
913 if (!expr)
914 return false;
916 code = TREE_CODE (expr);
917 class = TREE_CODE_CLASS (code);
919 if (code == SSA_NAME)
920 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
922 if (code == INTEGER_CST
923 || is_gimple_min_invariant (expr))
924 return false;
926 if (code == ADDR_EXPR)
927 return !for_each_index (&TREE_OPERAND (expr, 0),
928 idx_contains_abnormal_ssa_name_p,
929 NULL);
931 switch (class)
933 case tcc_binary:
934 case tcc_comparison:
935 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
936 return true;
938 /* Fallthru. */
939 case tcc_unary:
940 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
941 return true;
943 break;
945 default:
946 gcc_unreachable ();
949 return false;
952 /* Finds basic ivs. */
954 static bool
955 find_bivs (struct ivopts_data *data)
957 tree phi, step, type, base;
958 bool found = false;
959 struct loop *loop = data->current_loop;
961 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
963 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
964 continue;
966 step = determine_biv_step (phi);
967 if (!step)
968 continue;
970 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
971 base = expand_simple_operations (base);
972 if (contains_abnormal_ssa_name_p (base)
973 || contains_abnormal_ssa_name_p (step))
974 continue;
976 type = TREE_TYPE (PHI_RESULT (phi));
977 base = fold_convert (type, base);
978 if (step)
979 step = fold_convert (type, step);
981 set_iv (data, PHI_RESULT (phi), base, step);
982 found = true;
985 return found;
988 /* Marks basic ivs. */
990 static void
991 mark_bivs (struct ivopts_data *data)
993 tree phi, var;
994 struct iv *iv, *incr_iv;
995 struct loop *loop = data->current_loop;
996 basic_block incr_bb;
998 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1000 iv = get_iv (data, PHI_RESULT (phi));
1001 if (!iv)
1002 continue;
1004 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1005 incr_iv = get_iv (data, var);
1006 if (!incr_iv)
1007 continue;
1009 /* If the increment is in the subloop, ignore it. */
1010 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1011 if (incr_bb->loop_father != data->current_loop
1012 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1013 continue;
1015 iv->biv_p = true;
1016 incr_iv->biv_p = true;
1020 /* Checks whether STMT defines a linear induction variable and stores its
1021 parameters to IV. */
1023 static bool
1024 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1026 tree lhs;
1027 struct loop *loop = data->current_loop;
1029 iv->base = NULL_TREE;
1030 iv->step = NULL_TREE;
1032 if (TREE_CODE (stmt) != MODIFY_EXPR)
1033 return false;
1035 lhs = TREE_OPERAND (stmt, 0);
1036 if (TREE_CODE (lhs) != SSA_NAME)
1037 return false;
1039 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1040 return false;
1041 iv->base = expand_simple_operations (iv->base);
1043 if (contains_abnormal_ssa_name_p (iv->base)
1044 || contains_abnormal_ssa_name_p (iv->step))
1045 return false;
1047 return true;
1050 /* Finds general ivs in statement STMT. */
1052 static void
1053 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1055 affine_iv iv;
1057 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1058 return;
1060 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1063 /* Finds general ivs in basic block BB. */
1065 static void
1066 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1068 block_stmt_iterator bsi;
1070 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1071 find_givs_in_stmt (data, bsi_stmt (bsi));
1074 /* Finds general ivs. */
1076 static void
1077 find_givs (struct ivopts_data *data)
1079 struct loop *loop = data->current_loop;
1080 basic_block *body = get_loop_body_in_dom_order (loop);
1081 unsigned i;
1083 for (i = 0; i < loop->num_nodes; i++)
1084 find_givs_in_bb (data, body[i]);
1085 free (body);
1088 /* For each ssa name defined in LOOP determines whether it is an induction
1089 variable and if so, its initial value and step. */
1091 static bool
1092 find_induction_variables (struct ivopts_data *data)
1094 unsigned i;
1095 bitmap_iterator bi;
1097 if (!find_bivs (data))
1098 return false;
1100 find_givs (data);
1101 mark_bivs (data);
1103 if (dump_file && (dump_flags & TDF_DETAILS))
1105 struct tree_niter_desc *niter;
1107 niter = niter_for_single_dom_exit (data);
1109 if (niter)
1111 fprintf (dump_file, " number of iterations ");
1112 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1113 fprintf (dump_file, "\n");
1115 fprintf (dump_file, " may be zero if ");
1116 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1117 fprintf (dump_file, "\n");
1118 fprintf (dump_file, "\n");
1121 fprintf (dump_file, "Induction variables:\n\n");
1123 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1125 if (ver_info (data, i)->iv)
1126 dump_iv (dump_file, ver_info (data, i)->iv);
1130 return true;
1133 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1135 static struct iv_use *
1136 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1137 tree stmt, enum use_type use_type)
1139 struct iv_use *use = XCNEW (struct iv_use);
1141 use->id = n_iv_uses (data);
1142 use->type = use_type;
1143 use->iv = iv;
1144 use->stmt = stmt;
1145 use->op_p = use_p;
1146 use->related_cands = BITMAP_ALLOC (NULL);
1148 /* To avoid showing ssa name in the dumps, if it was not reset by the
1149 caller. */
1150 iv->ssa_name = NULL_TREE;
1152 if (dump_file && (dump_flags & TDF_DETAILS))
1153 dump_use (dump_file, use);
1155 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1157 return use;
1160 /* Checks whether OP is a loop-level invariant and if so, records it.
1161 NONLINEAR_USE is true if the invariant is used in a way we do not
1162 handle specially. */
1164 static void
1165 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1167 basic_block bb;
1168 struct version_info *info;
1170 if (TREE_CODE (op) != SSA_NAME
1171 || !is_gimple_reg (op))
1172 return;
1174 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1175 if (bb
1176 && flow_bb_inside_loop_p (data->current_loop, bb))
1177 return;
1179 info = name_info (data, op);
1180 info->name = op;
1181 info->has_nonlin_use |= nonlinear_use;
1182 if (!info->inv_id)
1183 info->inv_id = ++data->max_inv_id;
1184 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1187 /* Checks whether the use OP is interesting and if so, records it. */
1189 static struct iv_use *
1190 find_interesting_uses_op (struct ivopts_data *data, tree op)
1192 struct iv *iv;
1193 struct iv *civ;
1194 tree stmt;
1195 struct iv_use *use;
1197 if (TREE_CODE (op) != SSA_NAME)
1198 return NULL;
1200 iv = get_iv (data, op);
1201 if (!iv)
1202 return NULL;
1204 if (iv->have_use_for)
1206 use = iv_use (data, iv->use_id);
1208 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1209 return use;
1212 if (zero_p (iv->step))
1214 record_invariant (data, op, true);
1215 return NULL;
1217 iv->have_use_for = true;
1219 civ = XNEW (struct iv);
1220 *civ = *iv;
1222 stmt = SSA_NAME_DEF_STMT (op);
1223 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1224 || TREE_CODE (stmt) == MODIFY_EXPR);
1226 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1227 iv->use_id = use->id;
1229 return use;
1232 /* Checks whether the condition *COND_P in STMT is interesting
1233 and if so, records it. */
1235 static void
1236 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1238 tree *op0_p;
1239 tree *op1_p;
1240 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1241 struct iv const_iv;
1242 tree zero = integer_zero_node;
1244 const_iv.step = NULL_TREE;
1246 if (TREE_CODE (*cond_p) != SSA_NAME
1247 && !COMPARISON_CLASS_P (*cond_p))
1248 return;
1250 if (TREE_CODE (*cond_p) == SSA_NAME)
1252 op0_p = cond_p;
1253 op1_p = &zero;
1255 else
1257 op0_p = &TREE_OPERAND (*cond_p, 0);
1258 op1_p = &TREE_OPERAND (*cond_p, 1);
1261 if (TREE_CODE (*op0_p) == SSA_NAME)
1262 iv0 = get_iv (data, *op0_p);
1263 else
1264 iv0 = &const_iv;
1266 if (TREE_CODE (*op1_p) == SSA_NAME)
1267 iv1 = get_iv (data, *op1_p);
1268 else
1269 iv1 = &const_iv;
1271 if (/* When comparing with non-invariant value, we may not do any senseful
1272 induction variable elimination. */
1273 (!iv0 || !iv1)
1274 /* Eliminating condition based on two ivs would be nontrivial.
1275 ??? TODO -- it is not really important to handle this case. */
1276 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1278 find_interesting_uses_op (data, *op0_p);
1279 find_interesting_uses_op (data, *op1_p);
1280 return;
1283 if (zero_p (iv0->step) && zero_p (iv1->step))
1285 /* If both are invariants, this is a work for unswitching. */
1286 return;
1289 civ = XNEW (struct iv);
1290 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1291 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1294 /* Returns true if expression EXPR is obviously invariant in LOOP,
1295 i.e. if all its operands are defined outside of the LOOP. */
1297 bool
1298 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1300 basic_block def_bb;
1301 unsigned i, len;
1303 if (is_gimple_min_invariant (expr))
1304 return true;
1306 if (TREE_CODE (expr) == SSA_NAME)
1308 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1309 if (def_bb
1310 && flow_bb_inside_loop_p (loop, def_bb))
1311 return false;
1313 return true;
1316 if (!EXPR_P (expr))
1317 return false;
1319 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1320 for (i = 0; i < len; i++)
1321 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1322 return false;
1324 return true;
1327 /* Cumulates the steps of indices into DATA and replaces their values with the
1328 initial ones. Returns false when the value of the index cannot be determined.
1329 Callback for for_each_index. */
1331 struct ifs_ivopts_data
1333 struct ivopts_data *ivopts_data;
1334 tree stmt;
1335 tree *step_p;
1338 static bool
1339 idx_find_step (tree base, tree *idx, void *data)
1341 struct ifs_ivopts_data *dta = data;
1342 struct iv *iv;
1343 tree step, iv_step, lbound, off;
1344 struct loop *loop = dta->ivopts_data->current_loop;
1346 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1347 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1348 return false;
1350 /* If base is a component ref, require that the offset of the reference
1351 be invariant. */
1352 if (TREE_CODE (base) == COMPONENT_REF)
1354 off = component_ref_field_offset (base);
1355 return expr_invariant_in_loop_p (loop, off);
1358 /* If base is array, first check whether we will be able to move the
1359 reference out of the loop (in order to take its address in strength
1360 reduction). In order for this to work we need both lower bound
1361 and step to be loop invariants. */
1362 if (TREE_CODE (base) == ARRAY_REF)
1364 step = array_ref_element_size (base);
1365 lbound = array_ref_low_bound (base);
1367 if (!expr_invariant_in_loop_p (loop, step)
1368 || !expr_invariant_in_loop_p (loop, lbound))
1369 return false;
1372 if (TREE_CODE (*idx) != SSA_NAME)
1373 return true;
1375 iv = get_iv (dta->ivopts_data, *idx);
1376 if (!iv)
1377 return false;
1379 *idx = iv->base;
1381 if (!iv->step)
1382 return true;
1384 if (TREE_CODE (base) == ARRAY_REF)
1386 step = array_ref_element_size (base);
1388 /* We only handle addresses whose step is an integer constant. */
1389 if (TREE_CODE (step) != INTEGER_CST)
1390 return false;
1392 else
1393 /* The step for pointer arithmetics already is 1 byte. */
1394 step = build_int_cst (sizetype, 1);
1396 /* FIXME: convert_step should not be used outside chrec_convert: fix
1397 this by calling chrec_convert. */
1398 iv_step = convert_step (dta->ivopts_data->current_loop,
1399 sizetype, iv->base, iv->step, dta->stmt);
1401 if (!iv_step)
1403 /* The index might wrap. */
1404 return false;
1407 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1409 if (!*dta->step_p)
1410 *dta->step_p = step;
1411 else
1412 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1414 return true;
1417 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1420 static bool
1421 idx_record_use (tree base, tree *idx,
1422 void *data)
1424 find_interesting_uses_op (data, *idx);
1425 if (TREE_CODE (base) == ARRAY_REF)
1427 find_interesting_uses_op (data, array_ref_element_size (base));
1428 find_interesting_uses_op (data, array_ref_low_bound (base));
1430 return true;
1433 /* Returns true if memory reference REF may be unaligned. */
1435 static bool
1436 may_be_unaligned_p (tree ref)
1438 tree base;
1439 tree base_type;
1440 HOST_WIDE_INT bitsize;
1441 HOST_WIDE_INT bitpos;
1442 tree toffset;
1443 enum machine_mode mode;
1444 int unsignedp, volatilep;
1445 unsigned base_align;
1447 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1448 thus they are not misaligned. */
1449 if (TREE_CODE (ref) == TARGET_MEM_REF)
1450 return false;
1452 /* The test below is basically copy of what expr.c:normal_inner_ref
1453 does to check whether the object must be loaded by parts when
1454 STRICT_ALIGNMENT is true. */
1455 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1456 &unsignedp, &volatilep, true);
1457 base_type = TREE_TYPE (base);
1458 base_align = TYPE_ALIGN (base_type);
1460 if (mode != BLKmode
1461 && (base_align < GET_MODE_ALIGNMENT (mode)
1462 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1463 || bitpos % BITS_PER_UNIT != 0))
1464 return true;
1466 return false;
1469 /* Finds addresses in *OP_P inside STMT. */
1471 static void
1472 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1474 tree base = *op_p, step = NULL;
1475 struct iv *civ;
1476 struct ifs_ivopts_data ifs_ivopts_data;
1478 /* Do not play with volatile memory references. A bit too conservative,
1479 perhaps, but safe. */
1480 if (stmt_ann (stmt)->has_volatile_ops)
1481 goto fail;
1483 /* Ignore bitfields for now. Not really something terribly complicated
1484 to handle. TODO. */
1485 if (TREE_CODE (base) == COMPONENT_REF
1486 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1487 goto fail;
1489 if (STRICT_ALIGNMENT
1490 && may_be_unaligned_p (base))
1491 goto fail;
1493 base = unshare_expr (base);
1495 if (TREE_CODE (base) == TARGET_MEM_REF)
1497 tree type = build_pointer_type (TREE_TYPE (base));
1498 tree astep;
1500 if (TMR_BASE (base)
1501 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1503 civ = get_iv (data, TMR_BASE (base));
1504 if (!civ)
1505 goto fail;
1507 TMR_BASE (base) = civ->base;
1508 step = civ->step;
1510 if (TMR_INDEX (base)
1511 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1513 civ = get_iv (data, TMR_INDEX (base));
1514 if (!civ)
1515 goto fail;
1517 TMR_INDEX (base) = civ->base;
1518 astep = civ->step;
1520 if (astep)
1522 if (TMR_STEP (base))
1523 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1525 if (step)
1526 step = fold_build2 (PLUS_EXPR, type, step, astep);
1527 else
1528 step = astep;
1532 if (zero_p (step))
1533 goto fail;
1534 base = tree_mem_ref_addr (type, base);
1536 else
1538 ifs_ivopts_data.ivopts_data = data;
1539 ifs_ivopts_data.stmt = stmt;
1540 ifs_ivopts_data.step_p = &step;
1541 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1542 || zero_p (step))
1543 goto fail;
1545 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1546 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1548 base = build_fold_addr_expr (base);
1551 civ = alloc_iv (base, step);
1552 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1553 return;
1555 fail:
1556 for_each_index (op_p, idx_record_use, data);
1559 /* Finds and records invariants used in STMT. */
1561 static void
1562 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1564 ssa_op_iter iter;
1565 use_operand_p use_p;
1566 tree op;
1568 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1570 op = USE_FROM_PTR (use_p);
1571 record_invariant (data, op, false);
1575 /* Finds interesting uses of induction variables in the statement STMT. */
1577 static void
1578 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1580 struct iv *iv;
1581 tree op, lhs, rhs;
1582 ssa_op_iter iter;
1583 use_operand_p use_p;
1585 find_invariants_stmt (data, stmt);
1587 if (TREE_CODE (stmt) == COND_EXPR)
1589 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1590 return;
1593 if (TREE_CODE (stmt) == MODIFY_EXPR)
1595 lhs = TREE_OPERAND (stmt, 0);
1596 rhs = TREE_OPERAND (stmt, 1);
1598 if (TREE_CODE (lhs) == SSA_NAME)
1600 /* If the statement defines an induction variable, the uses are not
1601 interesting by themselves. */
1603 iv = get_iv (data, lhs);
1605 if (iv && !zero_p (iv->step))
1606 return;
1609 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1611 case tcc_comparison:
1612 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1613 return;
1615 case tcc_reference:
1616 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1617 if (REFERENCE_CLASS_P (lhs))
1618 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1619 return;
1621 default: ;
1624 if (REFERENCE_CLASS_P (lhs)
1625 && is_gimple_val (rhs))
1627 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1628 find_interesting_uses_op (data, rhs);
1629 return;
1632 /* TODO -- we should also handle address uses of type
1634 memory = call (whatever);
1638 call (memory). */
1641 if (TREE_CODE (stmt) == PHI_NODE
1642 && bb_for_stmt (stmt) == data->current_loop->header)
1644 lhs = PHI_RESULT (stmt);
1645 iv = get_iv (data, lhs);
1647 if (iv && !zero_p (iv->step))
1648 return;
1651 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1653 op = USE_FROM_PTR (use_p);
1655 if (TREE_CODE (op) != SSA_NAME)
1656 continue;
1658 iv = get_iv (data, op);
1659 if (!iv)
1660 continue;
1662 find_interesting_uses_op (data, op);
1666 /* Finds interesting uses of induction variables outside of loops
1667 on loop exit edge EXIT. */
1669 static void
1670 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1672 tree phi, def;
1674 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1676 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1677 find_interesting_uses_op (data, def);
1681 /* Finds uses of the induction variables that are interesting. */
1683 static void
1684 find_interesting_uses (struct ivopts_data *data)
1686 basic_block bb;
1687 block_stmt_iterator bsi;
1688 tree phi;
1689 basic_block *body = get_loop_body (data->current_loop);
1690 unsigned i;
1691 struct version_info *info;
1692 edge e;
1694 if (dump_file && (dump_flags & TDF_DETAILS))
1695 fprintf (dump_file, "Uses:\n\n");
1697 for (i = 0; i < data->current_loop->num_nodes; i++)
1699 edge_iterator ei;
1700 bb = body[i];
1702 FOR_EACH_EDGE (e, ei, bb->succs)
1703 if (e->dest != EXIT_BLOCK_PTR
1704 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1705 find_interesting_uses_outside (data, e);
1707 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1708 find_interesting_uses_stmt (data, phi);
1709 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1710 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1713 if (dump_file && (dump_flags & TDF_DETAILS))
1715 bitmap_iterator bi;
1717 fprintf (dump_file, "\n");
1719 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1721 info = ver_info (data, i);
1722 if (info->inv_id)
1724 fprintf (dump_file, " ");
1725 print_generic_expr (dump_file, info->name, TDF_SLIM);
1726 fprintf (dump_file, " is invariant (%d)%s\n",
1727 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1731 fprintf (dump_file, "\n");
1734 free (body);
1737 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1738 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1739 we are at the top-level of the processed address. */
1741 static tree
1742 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1743 unsigned HOST_WIDE_INT *offset)
1745 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1746 enum tree_code code;
1747 tree type, orig_type = TREE_TYPE (expr);
1748 unsigned HOST_WIDE_INT off0, off1, st;
1749 tree orig_expr = expr;
1751 STRIP_NOPS (expr);
1753 type = TREE_TYPE (expr);
1754 code = TREE_CODE (expr);
1755 *offset = 0;
1757 switch (code)
1759 case INTEGER_CST:
1760 if (!cst_and_fits_in_hwi (expr)
1761 || zero_p (expr))
1762 return orig_expr;
1764 *offset = int_cst_value (expr);
1765 return build_int_cst_type (orig_type, 0);
1767 case PLUS_EXPR:
1768 case MINUS_EXPR:
1769 op0 = TREE_OPERAND (expr, 0);
1770 op1 = TREE_OPERAND (expr, 1);
1772 op0 = strip_offset_1 (op0, false, false, &off0);
1773 op1 = strip_offset_1 (op1, false, false, &off1);
1775 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1776 if (op0 == TREE_OPERAND (expr, 0)
1777 && op1 == TREE_OPERAND (expr, 1))
1778 return orig_expr;
1780 if (zero_p (op1))
1781 expr = op0;
1782 else if (zero_p (op0))
1784 if (code == PLUS_EXPR)
1785 expr = op1;
1786 else
1787 expr = fold_build1 (NEGATE_EXPR, type, op1);
1789 else
1790 expr = fold_build2 (code, type, op0, op1);
1792 return fold_convert (orig_type, expr);
1794 case ARRAY_REF:
1795 if (!inside_addr)
1796 return orig_expr;
1798 step = array_ref_element_size (expr);
1799 if (!cst_and_fits_in_hwi (step))
1800 break;
1802 st = int_cst_value (step);
1803 op1 = TREE_OPERAND (expr, 1);
1804 op1 = strip_offset_1 (op1, false, false, &off1);
1805 *offset = off1 * st;
1807 if (top_compref
1808 && zero_p (op1))
1810 /* Strip the component reference completely. */
1811 op0 = TREE_OPERAND (expr, 0);
1812 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1813 *offset += off0;
1814 return op0;
1816 break;
1818 case COMPONENT_REF:
1819 if (!inside_addr)
1820 return orig_expr;
1822 tmp = component_ref_field_offset (expr);
1823 if (top_compref
1824 && cst_and_fits_in_hwi (tmp))
1826 /* Strip the component reference completely. */
1827 op0 = TREE_OPERAND (expr, 0);
1828 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1829 *offset = off0 + int_cst_value (tmp);
1830 return op0;
1832 break;
1834 case ADDR_EXPR:
1835 op0 = TREE_OPERAND (expr, 0);
1836 op0 = strip_offset_1 (op0, true, true, &off0);
1837 *offset += off0;
1839 if (op0 == TREE_OPERAND (expr, 0))
1840 return orig_expr;
1842 expr = build_fold_addr_expr (op0);
1843 return fold_convert (orig_type, expr);
1845 case INDIRECT_REF:
1846 inside_addr = false;
1847 break;
1849 default:
1850 return orig_expr;
1853 /* Default handling of expressions for that we want to recurse into
1854 the first operand. */
1855 op0 = TREE_OPERAND (expr, 0);
1856 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1857 *offset += off0;
1859 if (op0 == TREE_OPERAND (expr, 0)
1860 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1861 return orig_expr;
1863 expr = copy_node (expr);
1864 TREE_OPERAND (expr, 0) = op0;
1865 if (op1)
1866 TREE_OPERAND (expr, 1) = op1;
1868 /* Inside address, we might strip the top level component references,
1869 thus changing type of the expression. Handling of ADDR_EXPR
1870 will fix that. */
1871 expr = fold_convert (orig_type, expr);
1873 return expr;
1876 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1878 static tree
1879 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1881 return strip_offset_1 (expr, false, false, offset);
1884 /* Returns variant of TYPE that can be used as base for different uses.
1885 For integer types, we return unsigned variant of the type, which
1886 avoids problems with overflows. For pointer types, we return void *. */
1888 static tree
1889 generic_type_for (tree type)
1891 if (POINTER_TYPE_P (type))
1892 return ptr_type_node;
1894 if (TYPE_UNSIGNED (type))
1895 return type;
1897 return unsigned_type_for (type);
1900 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1901 the bitmap to that we should store it. */
1903 static struct ivopts_data *fd_ivopts_data;
1904 static tree
1905 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1907 bitmap *depends_on = data;
1908 struct version_info *info;
1910 if (TREE_CODE (*expr_p) != SSA_NAME)
1911 return NULL_TREE;
1912 info = name_info (fd_ivopts_data, *expr_p);
1914 if (!info->inv_id || info->has_nonlin_use)
1915 return NULL_TREE;
1917 if (!*depends_on)
1918 *depends_on = BITMAP_ALLOC (NULL);
1919 bitmap_set_bit (*depends_on, info->inv_id);
1921 return NULL_TREE;
1924 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1925 position to POS. If USE is not NULL, the candidate is set as related to
1926 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1927 replacement of the final value of the iv by a direct computation. */
1929 static struct iv_cand *
1930 add_candidate_1 (struct ivopts_data *data,
1931 tree base, tree step, bool important, enum iv_position pos,
1932 struct iv_use *use, tree incremented_at)
1934 unsigned i;
1935 struct iv_cand *cand = NULL;
1936 tree type, orig_type;
1938 if (base)
1940 orig_type = TREE_TYPE (base);
1941 type = generic_type_for (orig_type);
1942 if (type != orig_type)
1944 base = fold_convert (type, base);
1945 if (step)
1946 step = fold_convert (type, step);
1950 for (i = 0; i < n_iv_cands (data); i++)
1952 cand = iv_cand (data, i);
1954 if (cand->pos != pos)
1955 continue;
1957 if (cand->incremented_at != incremented_at)
1958 continue;
1960 if (!cand->iv)
1962 if (!base && !step)
1963 break;
1965 continue;
1968 if (!base && !step)
1969 continue;
1971 if (!operand_equal_p (base, cand->iv->base, 0))
1972 continue;
1974 if (zero_p (cand->iv->step))
1976 if (zero_p (step))
1977 break;
1979 else
1981 if (step && operand_equal_p (step, cand->iv->step, 0))
1982 break;
1986 if (i == n_iv_cands (data))
1988 cand = XCNEW (struct iv_cand);
1989 cand->id = i;
1991 if (!base && !step)
1992 cand->iv = NULL;
1993 else
1994 cand->iv = alloc_iv (base, step);
1996 cand->pos = pos;
1997 if (pos != IP_ORIGINAL && cand->iv)
1999 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2000 cand->var_after = cand->var_before;
2002 cand->important = important;
2003 cand->incremented_at = incremented_at;
2004 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2006 if (step
2007 && TREE_CODE (step) != INTEGER_CST)
2009 fd_ivopts_data = data;
2010 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2013 if (dump_file && (dump_flags & TDF_DETAILS))
2014 dump_cand (dump_file, cand);
2017 if (important && !cand->important)
2019 cand->important = true;
2020 if (dump_file && (dump_flags & TDF_DETAILS))
2021 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2024 if (use)
2026 bitmap_set_bit (use->related_cands, i);
2027 if (dump_file && (dump_flags & TDF_DETAILS))
2028 fprintf (dump_file, "Candidate %d is related to use %d\n",
2029 cand->id, use->id);
2032 return cand;
2035 /* Returns true if incrementing the induction variable at the end of the LOOP
2036 is allowed.
2038 The purpose is to avoid splitting latch edge with a biv increment, thus
2039 creating a jump, possibly confusing other optimization passes and leaving
2040 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2041 is not available (so we do not have a better alternative), or if the latch
2042 edge is already nonempty. */
2044 static bool
2045 allow_ip_end_pos_p (struct loop *loop)
2047 if (!ip_normal_pos (loop))
2048 return true;
2050 if (!empty_block_p (ip_end_pos (loop)))
2051 return true;
2053 return false;
2056 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2057 position to POS. If USE is not NULL, the candidate is set as related to
2058 it. The candidate computation is scheduled on all available positions. */
2060 static void
2061 add_candidate (struct ivopts_data *data,
2062 tree base, tree step, bool important, struct iv_use *use)
2064 if (ip_normal_pos (data->current_loop))
2065 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2066 if (ip_end_pos (data->current_loop)
2067 && allow_ip_end_pos_p (data->current_loop))
2068 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2071 /* Add a standard "0 + 1 * iteration" iv candidate for a
2072 type with SIZE bits. */
2074 static void
2075 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2076 unsigned int size)
2078 tree type = lang_hooks.types.type_for_size (size, true);
2079 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2080 true, NULL);
2083 /* Adds standard iv candidates. */
2085 static void
2086 add_standard_iv_candidates (struct ivopts_data *data)
2088 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2090 /* The same for a double-integer type if it is still fast enough. */
2091 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2092 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2096 /* Adds candidates bases on the old induction variable IV. */
2098 static void
2099 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2101 tree phi, def;
2102 struct iv_cand *cand;
2104 add_candidate (data, iv->base, iv->step, true, NULL);
2106 /* The same, but with initial value zero. */
2107 add_candidate (data,
2108 build_int_cst (TREE_TYPE (iv->base), 0),
2109 iv->step, true, NULL);
2111 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2112 if (TREE_CODE (phi) == PHI_NODE)
2114 /* Additionally record the possibility of leaving the original iv
2115 untouched. */
2116 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2117 cand = add_candidate_1 (data,
2118 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2119 SSA_NAME_DEF_STMT (def));
2120 cand->var_before = iv->ssa_name;
2121 cand->var_after = def;
2125 /* Adds candidates based on the old induction variables. */
2127 static void
2128 add_old_ivs_candidates (struct ivopts_data *data)
2130 unsigned i;
2131 struct iv *iv;
2132 bitmap_iterator bi;
2134 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2136 iv = ver_info (data, i)->iv;
2137 if (iv && iv->biv_p && !zero_p (iv->step))
2138 add_old_iv_candidates (data, iv);
2142 /* Adds candidates based on the value of the induction variable IV and USE. */
2144 static void
2145 add_iv_value_candidates (struct ivopts_data *data,
2146 struct iv *iv, struct iv_use *use)
2148 unsigned HOST_WIDE_INT offset;
2149 tree base;
2151 add_candidate (data, iv->base, iv->step, false, use);
2153 /* The same, but with initial value zero. Make such variable important,
2154 since it is generic enough so that possibly many uses may be based
2155 on it. */
2156 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2157 iv->step, true, use);
2159 /* Third, try removing the constant offset. */
2160 base = strip_offset (iv->base, &offset);
2161 if (offset)
2162 add_candidate (data, base, iv->step, false, use);
2165 /* Adds candidates based on the uses. */
2167 static void
2168 add_derived_ivs_candidates (struct ivopts_data *data)
2170 unsigned i;
2172 for (i = 0; i < n_iv_uses (data); i++)
2174 struct iv_use *use = iv_use (data, i);
2176 if (!use)
2177 continue;
2179 switch (use->type)
2181 case USE_NONLINEAR_EXPR:
2182 case USE_COMPARE:
2183 case USE_ADDRESS:
2184 /* Just add the ivs based on the value of the iv used here. */
2185 add_iv_value_candidates (data, use->iv, use);
2186 break;
2188 default:
2189 gcc_unreachable ();
2194 /* Record important candidates and add them to related_cands bitmaps
2195 if needed. */
2197 static void
2198 record_important_candidates (struct ivopts_data *data)
2200 unsigned i;
2201 struct iv_use *use;
2203 for (i = 0; i < n_iv_cands (data); i++)
2205 struct iv_cand *cand = iv_cand (data, i);
2207 if (cand->important)
2208 bitmap_set_bit (data->important_candidates, i);
2211 data->consider_all_candidates = (n_iv_cands (data)
2212 <= CONSIDER_ALL_CANDIDATES_BOUND);
2214 if (data->consider_all_candidates)
2216 /* We will not need "related_cands" bitmaps in this case,
2217 so release them to decrease peak memory consumption. */
2218 for (i = 0; i < n_iv_uses (data); i++)
2220 use = iv_use (data, i);
2221 BITMAP_FREE (use->related_cands);
2224 else
2226 /* Add important candidates to the related_cands bitmaps. */
2227 for (i = 0; i < n_iv_uses (data); i++)
2228 bitmap_ior_into (iv_use (data, i)->related_cands,
2229 data->important_candidates);
2233 /* Finds the candidates for the induction variables. */
2235 static void
2236 find_iv_candidates (struct ivopts_data *data)
2238 /* Add commonly used ivs. */
2239 add_standard_iv_candidates (data);
2241 /* Add old induction variables. */
2242 add_old_ivs_candidates (data);
2244 /* Add induction variables derived from uses. */
2245 add_derived_ivs_candidates (data);
2247 /* Record the important candidates. */
2248 record_important_candidates (data);
2251 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2252 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2253 we allocate a simple list to every use. */
2255 static void
2256 alloc_use_cost_map (struct ivopts_data *data)
2258 unsigned i, size, s, j;
2260 for (i = 0; i < n_iv_uses (data); i++)
2262 struct iv_use *use = iv_use (data, i);
2263 bitmap_iterator bi;
2265 if (data->consider_all_candidates)
2266 size = n_iv_cands (data);
2267 else
2269 s = 0;
2270 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2272 s++;
2275 /* Round up to the power of two, so that moduling by it is fast. */
2276 for (size = 1; size < s; size <<= 1)
2277 continue;
2280 use->n_map_members = size;
2281 use->cost_map = XCNEWVEC (struct cost_pair, size);
2285 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2286 on invariants DEPENDS_ON and that the value used in expressing it
2287 is VALUE.*/
2289 static void
2290 set_use_iv_cost (struct ivopts_data *data,
2291 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2292 bitmap depends_on, tree value)
2294 unsigned i, s;
2296 if (cost == INFTY)
2298 BITMAP_FREE (depends_on);
2299 return;
2302 if (data->consider_all_candidates)
2304 use->cost_map[cand->id].cand = cand;
2305 use->cost_map[cand->id].cost = cost;
2306 use->cost_map[cand->id].depends_on = depends_on;
2307 use->cost_map[cand->id].value = value;
2308 return;
2311 /* n_map_members is a power of two, so this computes modulo. */
2312 s = cand->id & (use->n_map_members - 1);
2313 for (i = s; i < use->n_map_members; i++)
2314 if (!use->cost_map[i].cand)
2315 goto found;
2316 for (i = 0; i < s; i++)
2317 if (!use->cost_map[i].cand)
2318 goto found;
2320 gcc_unreachable ();
2322 found:
2323 use->cost_map[i].cand = cand;
2324 use->cost_map[i].cost = cost;
2325 use->cost_map[i].depends_on = depends_on;
2326 use->cost_map[i].value = value;
2329 /* Gets cost of (USE, CANDIDATE) pair. */
2331 static struct cost_pair *
2332 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2333 struct iv_cand *cand)
2335 unsigned i, s;
2336 struct cost_pair *ret;
2338 if (!cand)
2339 return NULL;
2341 if (data->consider_all_candidates)
2343 ret = use->cost_map + cand->id;
2344 if (!ret->cand)
2345 return NULL;
2347 return ret;
2350 /* n_map_members is a power of two, so this computes modulo. */
2351 s = cand->id & (use->n_map_members - 1);
2352 for (i = s; i < use->n_map_members; i++)
2353 if (use->cost_map[i].cand == cand)
2354 return use->cost_map + i;
2356 for (i = 0; i < s; i++)
2357 if (use->cost_map[i].cand == cand)
2358 return use->cost_map + i;
2360 return NULL;
2363 /* Returns estimate on cost of computing SEQ. */
2365 static unsigned
2366 seq_cost (rtx seq)
2368 unsigned cost = 0;
2369 rtx set;
2371 for (; seq; seq = NEXT_INSN (seq))
2373 set = single_set (seq);
2374 if (set)
2375 cost += rtx_cost (set, SET);
2376 else
2377 cost++;
2380 return cost;
2383 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2384 static rtx
2385 produce_memory_decl_rtl (tree obj, int *regno)
2387 rtx x;
2389 gcc_assert (obj);
2390 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2392 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2393 x = gen_rtx_SYMBOL_REF (Pmode, name);
2395 else
2396 x = gen_raw_REG (Pmode, (*regno)++);
2398 return gen_rtx_MEM (DECL_MODE (obj), x);
2401 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2402 walk_tree. DATA contains the actual fake register number. */
2404 static tree
2405 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2407 tree obj = NULL_TREE;
2408 rtx x = NULL_RTX;
2409 int *regno = data;
2411 switch (TREE_CODE (*expr_p))
2413 case ADDR_EXPR:
2414 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2415 handled_component_p (*expr_p);
2416 expr_p = &TREE_OPERAND (*expr_p, 0))
2417 continue;
2418 obj = *expr_p;
2419 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2420 x = produce_memory_decl_rtl (obj, regno);
2421 break;
2423 case SSA_NAME:
2424 *ws = 0;
2425 obj = SSA_NAME_VAR (*expr_p);
2426 if (!DECL_RTL_SET_P (obj))
2427 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2428 break;
2430 case VAR_DECL:
2431 case PARM_DECL:
2432 case RESULT_DECL:
2433 *ws = 0;
2434 obj = *expr_p;
2436 if (DECL_RTL_SET_P (obj))
2437 break;
2439 if (DECL_MODE (obj) == BLKmode)
2440 x = produce_memory_decl_rtl (obj, regno);
2441 else
2442 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2444 break;
2446 default:
2447 break;
2450 if (x)
2452 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2453 SET_DECL_RTL (obj, x);
2456 return NULL_TREE;
2459 /* Determines cost of the computation of EXPR. */
2461 static unsigned
2462 computation_cost (tree expr)
2464 rtx seq, rslt;
2465 tree type = TREE_TYPE (expr);
2466 unsigned cost;
2467 /* Avoid using hard regs in ways which may be unsupported. */
2468 int regno = LAST_VIRTUAL_REGISTER + 1;
2470 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2471 start_sequence ();
2472 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2473 seq = get_insns ();
2474 end_sequence ();
2476 cost = seq_cost (seq);
2477 if (MEM_P (rslt))
2478 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2480 return cost;
2483 /* Returns variable containing the value of candidate CAND at statement AT. */
2485 static tree
2486 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2488 if (stmt_after_increment (loop, cand, stmt))
2489 return cand->var_after;
2490 else
2491 return cand->var_before;
2494 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2495 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2498 tree_int_cst_sign_bit (tree t)
2500 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2501 unsigned HOST_WIDE_INT w;
2503 if (bitno < HOST_BITS_PER_WIDE_INT)
2504 w = TREE_INT_CST_LOW (t);
2505 else
2507 w = TREE_INT_CST_HIGH (t);
2508 bitno -= HOST_BITS_PER_WIDE_INT;
2511 return (w >> bitno) & 1;
2514 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2515 return cst. Otherwise return NULL_TREE. */
2517 static tree
2518 constant_multiple_of (tree type, tree top, tree bot)
2520 tree res, mby, p0, p1;
2521 enum tree_code code;
2522 bool negate;
2524 STRIP_NOPS (top);
2525 STRIP_NOPS (bot);
2527 if (operand_equal_p (top, bot, 0))
2528 return build_int_cst (type, 1);
2530 code = TREE_CODE (top);
2531 switch (code)
2533 case MULT_EXPR:
2534 mby = TREE_OPERAND (top, 1);
2535 if (TREE_CODE (mby) != INTEGER_CST)
2536 return NULL_TREE;
2538 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2539 if (!res)
2540 return NULL_TREE;
2542 return fold_binary_to_constant (MULT_EXPR, type, res,
2543 fold_convert (type, mby));
2545 case PLUS_EXPR:
2546 case MINUS_EXPR:
2547 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2548 if (!p0)
2549 return NULL_TREE;
2550 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2551 if (!p1)
2552 return NULL_TREE;
2554 return fold_binary_to_constant (code, type, p0, p1);
2556 case INTEGER_CST:
2557 if (TREE_CODE (bot) != INTEGER_CST)
2558 return NULL_TREE;
2560 bot = fold_convert (type, bot);
2561 top = fold_convert (type, top);
2563 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2564 the result afterwards. */
2565 if (tree_int_cst_sign_bit (bot))
2567 negate = true;
2568 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2570 else
2571 negate = false;
2573 /* Ditto for TOP. */
2574 if (tree_int_cst_sign_bit (top))
2576 negate = !negate;
2577 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2580 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2581 return NULL_TREE;
2583 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2584 if (negate)
2585 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2586 return res;
2588 default:
2589 return NULL_TREE;
2593 /* Sets COMB to CST. */
2595 static void
2596 aff_combination_const (struct affine_tree_combination *comb, tree type,
2597 unsigned HOST_WIDE_INT cst)
2599 unsigned prec = TYPE_PRECISION (type);
2601 comb->type = type;
2602 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2604 comb->n = 0;
2605 comb->rest = NULL_TREE;
2606 comb->offset = cst & comb->mask;
2609 /* Sets COMB to single element ELT. */
2611 static void
2612 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2614 unsigned prec = TYPE_PRECISION (type);
2616 comb->type = type;
2617 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2619 comb->n = 1;
2620 comb->elts[0] = elt;
2621 comb->coefs[0] = 1;
2622 comb->rest = NULL_TREE;
2623 comb->offset = 0;
2626 /* Scales COMB by SCALE. */
2628 static void
2629 aff_combination_scale (struct affine_tree_combination *comb,
2630 unsigned HOST_WIDE_INT scale)
2632 unsigned i, j;
2634 if (scale == 1)
2635 return;
2637 if (scale == 0)
2639 aff_combination_const (comb, comb->type, 0);
2640 return;
2643 comb->offset = (scale * comb->offset) & comb->mask;
2644 for (i = 0, j = 0; i < comb->n; i++)
2646 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2647 comb->elts[j] = comb->elts[i];
2648 if (comb->coefs[j] != 0)
2649 j++;
2651 comb->n = j;
2653 if (comb->rest)
2655 if (comb->n < MAX_AFF_ELTS)
2657 comb->coefs[comb->n] = scale;
2658 comb->elts[comb->n] = comb->rest;
2659 comb->rest = NULL_TREE;
2660 comb->n++;
2662 else
2663 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2664 build_int_cst_type (comb->type, scale));
2668 /* Adds ELT * SCALE to COMB. */
2670 static void
2671 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2672 unsigned HOST_WIDE_INT scale)
2674 unsigned i;
2676 if (scale == 0)
2677 return;
2679 for (i = 0; i < comb->n; i++)
2680 if (operand_equal_p (comb->elts[i], elt, 0))
2682 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2683 if (comb->coefs[i])
2684 return;
2686 comb->n--;
2687 comb->coefs[i] = comb->coefs[comb->n];
2688 comb->elts[i] = comb->elts[comb->n];
2690 if (comb->rest)
2692 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2693 comb->coefs[comb->n] = 1;
2694 comb->elts[comb->n] = comb->rest;
2695 comb->rest = NULL_TREE;
2696 comb->n++;
2698 return;
2700 if (comb->n < MAX_AFF_ELTS)
2702 comb->coefs[comb->n] = scale;
2703 comb->elts[comb->n] = elt;
2704 comb->n++;
2705 return;
2708 if (scale == 1)
2709 elt = fold_convert (comb->type, elt);
2710 else
2711 elt = fold_build2 (MULT_EXPR, comb->type,
2712 fold_convert (comb->type, elt),
2713 build_int_cst_type (comb->type, scale));
2715 if (comb->rest)
2716 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2717 else
2718 comb->rest = elt;
2721 /* Adds COMB2 to COMB1. */
2723 static void
2724 aff_combination_add (struct affine_tree_combination *comb1,
2725 struct affine_tree_combination *comb2)
2727 unsigned i;
2729 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2730 for (i = 0; i < comb2->n; i++)
2731 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2732 if (comb2->rest)
2733 aff_combination_add_elt (comb1, comb2->rest, 1);
2736 /* Splits EXPR into an affine combination of parts. */
2738 static void
2739 tree_to_aff_combination (tree expr, tree type,
2740 struct affine_tree_combination *comb)
2742 struct affine_tree_combination tmp;
2743 enum tree_code code;
2744 tree cst, core, toffset;
2745 HOST_WIDE_INT bitpos, bitsize;
2746 enum machine_mode mode;
2747 int unsignedp, volatilep;
2749 STRIP_NOPS (expr);
2751 code = TREE_CODE (expr);
2752 switch (code)
2754 case INTEGER_CST:
2755 aff_combination_const (comb, type, int_cst_value (expr));
2756 return;
2758 case PLUS_EXPR:
2759 case MINUS_EXPR:
2760 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2761 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2762 if (code == MINUS_EXPR)
2763 aff_combination_scale (&tmp, -1);
2764 aff_combination_add (comb, &tmp);
2765 return;
2767 case MULT_EXPR:
2768 cst = TREE_OPERAND (expr, 1);
2769 if (TREE_CODE (cst) != INTEGER_CST)
2770 break;
2771 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2772 aff_combination_scale (comb, int_cst_value (cst));
2773 return;
2775 case NEGATE_EXPR:
2776 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2777 aff_combination_scale (comb, -1);
2778 return;
2780 case ADDR_EXPR:
2781 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2782 &toffset, &mode, &unsignedp, &volatilep,
2783 false);
2784 if (bitpos % BITS_PER_UNIT != 0)
2785 break;
2786 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2787 core = build_fold_addr_expr (core);
2788 if (TREE_CODE (core) == ADDR_EXPR)
2789 aff_combination_add_elt (comb, core, 1);
2790 else
2792 tree_to_aff_combination (core, type, &tmp);
2793 aff_combination_add (comb, &tmp);
2795 if (toffset)
2797 tree_to_aff_combination (toffset, type, &tmp);
2798 aff_combination_add (comb, &tmp);
2800 return;
2802 default:
2803 break;
2806 aff_combination_elt (comb, type, expr);
2809 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2811 static tree
2812 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2813 unsigned HOST_WIDE_INT mask)
2815 enum tree_code code;
2817 scale &= mask;
2818 elt = fold_convert (type, elt);
2820 if (scale == 1)
2822 if (!expr)
2823 return elt;
2825 return fold_build2 (PLUS_EXPR, type, expr, elt);
2828 if (scale == mask)
2830 if (!expr)
2831 return fold_build1 (NEGATE_EXPR, type, elt);
2833 return fold_build2 (MINUS_EXPR, type, expr, elt);
2836 if (!expr)
2837 return fold_build2 (MULT_EXPR, type, elt,
2838 build_int_cst_type (type, scale));
2840 if ((scale | (mask >> 1)) == mask)
2842 /* Scale is negative. */
2843 code = MINUS_EXPR;
2844 scale = (-scale) & mask;
2846 else
2847 code = PLUS_EXPR;
2849 elt = fold_build2 (MULT_EXPR, type, elt,
2850 build_int_cst_type (type, scale));
2851 return fold_build2 (code, type, expr, elt);
2854 /* Copies the tree elements of COMB to ensure that they are not shared. */
2856 static void
2857 unshare_aff_combination (struct affine_tree_combination *comb)
2859 unsigned i;
2861 for (i = 0; i < comb->n; i++)
2862 comb->elts[i] = unshare_expr (comb->elts[i]);
2863 if (comb->rest)
2864 comb->rest = unshare_expr (comb->rest);
2867 /* Makes tree from the affine combination COMB. */
2869 static tree
2870 aff_combination_to_tree (struct affine_tree_combination *comb)
2872 tree type = comb->type;
2873 tree expr = comb->rest;
2874 unsigned i;
2875 unsigned HOST_WIDE_INT off, sgn;
2877 /* Handle the special case produced by get_computation_aff when
2878 the type does not fit in HOST_WIDE_INT. */
2879 if (comb->n == 0 && comb->offset == 0)
2880 return fold_convert (type, expr);
2882 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2884 for (i = 0; i < comb->n; i++)
2885 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2886 comb->mask);
2888 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2890 /* Offset is negative. */
2891 off = (-comb->offset) & comb->mask;
2892 sgn = comb->mask;
2894 else
2896 off = comb->offset;
2897 sgn = 1;
2899 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2900 comb->mask);
2903 /* Determines the expression by that USE is expressed from induction variable
2904 CAND at statement AT in LOOP. The expression is stored in a decomposed
2905 form into AFF. Returns false if USE cannot be expressed using CAND. */
2907 static bool
2908 get_computation_aff (struct loop *loop,
2909 struct iv_use *use, struct iv_cand *cand, tree at,
2910 struct affine_tree_combination *aff)
2912 tree ubase = use->iv->base;
2913 tree ustep = use->iv->step;
2914 tree cbase = cand->iv->base;
2915 tree cstep = cand->iv->step;
2916 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2917 tree uutype;
2918 tree expr, delta;
2919 tree ratio;
2920 unsigned HOST_WIDE_INT ustepi, cstepi;
2921 HOST_WIDE_INT ratioi;
2922 struct affine_tree_combination cbase_aff, expr_aff;
2923 tree cstep_orig = cstep, ustep_orig = ustep;
2925 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2927 /* We do not have a precision to express the values of use. */
2928 return false;
2931 expr = var_at_stmt (loop, cand, at);
2933 if (TREE_TYPE (expr) != ctype)
2935 /* This may happen with the original ivs. */
2936 expr = fold_convert (ctype, expr);
2939 if (TYPE_UNSIGNED (utype))
2940 uutype = utype;
2941 else
2943 uutype = unsigned_type_for (utype);
2944 ubase = fold_convert (uutype, ubase);
2945 ustep = fold_convert (uutype, ustep);
2948 if (uutype != ctype)
2950 expr = fold_convert (uutype, expr);
2951 cbase = fold_convert (uutype, cbase);
2952 cstep = fold_convert (uutype, cstep);
2954 /* If the conversion is not noop, we must take it into account when
2955 considering the value of the step. */
2956 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2957 cstep_orig = cstep;
2960 if (cst_and_fits_in_hwi (cstep_orig)
2961 && cst_and_fits_in_hwi (ustep_orig))
2963 ustepi = int_cst_value (ustep_orig);
2964 cstepi = int_cst_value (cstep_orig);
2966 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2968 /* TODO maybe consider case when ustep divides cstep and the ratio is
2969 a power of 2 (so that the division is fast to execute)? We would
2970 need to be much more careful with overflows etc. then. */
2971 return false;
2974 ratio = build_int_cst_type (uutype, ratioi);
2976 else
2978 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
2979 if (!ratio)
2980 return false;
2982 /* Ratioi is only used to detect special cases when the multiplicative
2983 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
2984 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
2985 to integer_onep/integer_all_onesp, since the former ignores
2986 TREE_OVERFLOW. */
2987 if (cst_and_fits_in_hwi (ratio))
2988 ratioi = int_cst_value (ratio);
2989 else if (integer_onep (ratio))
2990 ratioi = 1;
2991 else if (integer_all_onesp (ratio))
2992 ratioi = -1;
2993 else
2994 ratioi = 0;
2997 /* We may need to shift the value if we are after the increment. */
2998 if (stmt_after_increment (loop, cand, at))
2999 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3001 /* use = ubase - ratio * cbase + ratio * var.
3003 In general case ubase + ratio * (var - cbase) could be better (one less
3004 multiplication), but often it is possible to eliminate redundant parts
3005 of computations from (ubase - ratio * cbase) term, and if it does not
3006 happen, fold is able to apply the distributive law to obtain this form
3007 anyway. */
3009 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3011 /* Let's compute in trees and just return the result in AFF. This case
3012 should not be very common, and fold itself is not that bad either,
3013 so making the aff. functions more complicated to handle this case
3014 is not that urgent. */
3015 if (ratioi == 1)
3017 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3018 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3020 else if (ratioi == -1)
3022 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3023 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3025 else
3027 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3028 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3029 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3030 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3033 aff->type = uutype;
3034 aff->n = 0;
3035 aff->offset = 0;
3036 aff->mask = 0;
3037 aff->rest = expr;
3038 return true;
3041 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3042 possible to compute ratioi. */
3043 gcc_assert (ratioi);
3045 tree_to_aff_combination (ubase, uutype, aff);
3046 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3047 tree_to_aff_combination (expr, uutype, &expr_aff);
3048 aff_combination_scale (&cbase_aff, -ratioi);
3049 aff_combination_scale (&expr_aff, ratioi);
3050 aff_combination_add (aff, &cbase_aff);
3051 aff_combination_add (aff, &expr_aff);
3053 return true;
3056 /* Determines the expression by that USE is expressed from induction variable
3057 CAND at statement AT in LOOP. The computation is unshared. */
3059 static tree
3060 get_computation_at (struct loop *loop,
3061 struct iv_use *use, struct iv_cand *cand, tree at)
3063 struct affine_tree_combination aff;
3064 tree type = TREE_TYPE (use->iv->base);
3066 if (!get_computation_aff (loop, use, cand, at, &aff))
3067 return NULL_TREE;
3068 unshare_aff_combination (&aff);
3069 return fold_convert (type, aff_combination_to_tree (&aff));
3072 /* Determines the expression by that USE is expressed from induction variable
3073 CAND in LOOP. The computation is unshared. */
3075 static tree
3076 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3078 return get_computation_at (loop, use, cand, use->stmt);
3081 /* Returns cost of addition in MODE. */
3083 static unsigned
3084 add_cost (enum machine_mode mode)
3086 static unsigned costs[NUM_MACHINE_MODES];
3087 rtx seq;
3088 unsigned cost;
3090 if (costs[mode])
3091 return costs[mode];
3093 start_sequence ();
3094 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3095 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3096 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3097 NULL_RTX);
3098 seq = get_insns ();
3099 end_sequence ();
3101 cost = seq_cost (seq);
3102 if (!cost)
3103 cost = 1;
3105 costs[mode] = cost;
3107 if (dump_file && (dump_flags & TDF_DETAILS))
3108 fprintf (dump_file, "Addition in %s costs %d\n",
3109 GET_MODE_NAME (mode), cost);
3110 return cost;
3113 /* Entry in a hashtable of already known costs for multiplication. */
3114 struct mbc_entry
3116 HOST_WIDE_INT cst; /* The constant to multiply by. */
3117 enum machine_mode mode; /* In mode. */
3118 unsigned cost; /* The cost. */
3121 /* Counts hash value for the ENTRY. */
3123 static hashval_t
3124 mbc_entry_hash (const void *entry)
3126 const struct mbc_entry *e = entry;
3128 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3131 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3133 static int
3134 mbc_entry_eq (const void *entry1, const void *entry2)
3136 const struct mbc_entry *e1 = entry1;
3137 const struct mbc_entry *e2 = entry2;
3139 return (e1->mode == e2->mode
3140 && e1->cst == e2->cst);
3143 /* Returns cost of multiplication by constant CST in MODE. */
3145 unsigned
3146 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3148 static htab_t costs;
3149 struct mbc_entry **cached, act;
3150 rtx seq;
3151 unsigned cost;
3153 if (!costs)
3154 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3156 act.mode = mode;
3157 act.cst = cst;
3158 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3159 if (*cached)
3160 return (*cached)->cost;
3162 *cached = XNEW (struct mbc_entry);
3163 (*cached)->mode = mode;
3164 (*cached)->cst = cst;
3166 start_sequence ();
3167 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3168 gen_int_mode (cst, mode), NULL_RTX, 0);
3169 seq = get_insns ();
3170 end_sequence ();
3172 cost = seq_cost (seq);
3174 if (dump_file && (dump_flags & TDF_DETAILS))
3175 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3176 (int) cst, GET_MODE_NAME (mode), cost);
3178 (*cached)->cost = cost;
3180 return cost;
3183 /* Returns true if multiplying by RATIO is allowed in address. */
3185 bool
3186 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3188 #define MAX_RATIO 128
3189 static sbitmap valid_mult;
3191 if (!valid_mult)
3193 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3194 rtx addr;
3195 HOST_WIDE_INT i;
3197 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3198 sbitmap_zero (valid_mult);
3199 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3200 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3202 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3203 if (memory_address_p (Pmode, addr))
3204 SET_BIT (valid_mult, i + MAX_RATIO);
3207 if (dump_file && (dump_flags & TDF_DETAILS))
3209 fprintf (dump_file, " allowed multipliers:");
3210 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3211 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3212 fprintf (dump_file, " %d", (int) i);
3213 fprintf (dump_file, "\n");
3214 fprintf (dump_file, "\n");
3218 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3219 return false;
3221 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3224 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3225 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3226 variable is omitted. The created memory accesses MODE.
3228 TODO -- there must be some better way. This all is quite crude. */
3230 static unsigned
3231 get_address_cost (bool symbol_present, bool var_present,
3232 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3234 static bool initialized = false;
3235 static HOST_WIDE_INT rat, off;
3236 static HOST_WIDE_INT min_offset, max_offset;
3237 static unsigned costs[2][2][2][2];
3238 unsigned cost, acost;
3239 rtx seq, addr, base;
3240 bool offset_p, ratio_p;
3241 rtx reg1;
3242 HOST_WIDE_INT s_offset;
3243 unsigned HOST_WIDE_INT mask;
3244 unsigned bits;
3246 if (!initialized)
3248 HOST_WIDE_INT i;
3249 initialized = true;
3251 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3253 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3254 for (i = 1; i <= 1 << 20; i <<= 1)
3256 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3257 if (!memory_address_p (Pmode, addr))
3258 break;
3260 max_offset = i >> 1;
3261 off = max_offset;
3263 for (i = 1; i <= 1 << 20; i <<= 1)
3265 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3266 if (!memory_address_p (Pmode, addr))
3267 break;
3269 min_offset = -(i >> 1);
3271 if (dump_file && (dump_flags & TDF_DETAILS))
3273 fprintf (dump_file, "get_address_cost:\n");
3274 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3275 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3278 rat = 1;
3279 for (i = 2; i <= MAX_RATIO; i++)
3280 if (multiplier_allowed_in_address_p (i))
3282 rat = i;
3283 break;
3287 bits = GET_MODE_BITSIZE (Pmode);
3288 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3289 offset &= mask;
3290 if ((offset >> (bits - 1) & 1))
3291 offset |= ~mask;
3292 s_offset = offset;
3294 cost = 0;
3295 offset_p = (s_offset != 0
3296 && min_offset <= s_offset && s_offset <= max_offset);
3297 ratio_p = (ratio != 1
3298 && multiplier_allowed_in_address_p (ratio));
3300 if (ratio != 1 && !ratio_p)
3301 cost += multiply_by_cost (ratio, Pmode);
3303 if (s_offset && !offset_p && !symbol_present)
3305 cost += add_cost (Pmode);
3306 var_present = true;
3309 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3310 if (!acost)
3312 int old_cse_not_expected;
3313 acost = 0;
3315 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3316 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3317 if (ratio_p)
3318 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3320 if (var_present)
3321 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3323 if (symbol_present)
3325 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3326 if (offset_p)
3327 base = gen_rtx_fmt_e (CONST, Pmode,
3328 gen_rtx_fmt_ee (PLUS, Pmode,
3329 base,
3330 gen_int_mode (off, Pmode)));
3332 else if (offset_p)
3333 base = gen_int_mode (off, Pmode);
3334 else
3335 base = NULL_RTX;
3337 if (base)
3338 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3340 start_sequence ();
3341 /* To avoid splitting addressing modes, pretend that no cse will
3342 follow. */
3343 old_cse_not_expected = cse_not_expected;
3344 cse_not_expected = true;
3345 addr = memory_address (Pmode, addr);
3346 cse_not_expected = old_cse_not_expected;
3347 seq = get_insns ();
3348 end_sequence ();
3350 acost = seq_cost (seq);
3351 acost += address_cost (addr, Pmode);
3353 if (!acost)
3354 acost = 1;
3355 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3358 return cost + acost;
3361 /* Estimates cost of forcing expression EXPR into a variable. */
3363 unsigned
3364 force_expr_to_var_cost (tree expr)
3366 static bool costs_initialized = false;
3367 static unsigned integer_cost;
3368 static unsigned symbol_cost;
3369 static unsigned address_cost;
3370 tree op0, op1;
3371 unsigned cost0, cost1, cost;
3372 enum machine_mode mode;
3374 if (!costs_initialized)
3376 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3377 rtx x = gen_rtx_MEM (DECL_MODE (var),
3378 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3379 tree addr;
3380 tree type = build_pointer_type (integer_type_node);
3382 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3383 2000));
3385 SET_DECL_RTL (var, x);
3386 TREE_STATIC (var) = 1;
3387 addr = build1 (ADDR_EXPR, type, var);
3388 symbol_cost = computation_cost (addr) + 1;
3390 address_cost
3391 = computation_cost (build2 (PLUS_EXPR, type,
3392 addr,
3393 build_int_cst_type (type, 2000))) + 1;
3394 if (dump_file && (dump_flags & TDF_DETAILS))
3396 fprintf (dump_file, "force_expr_to_var_cost:\n");
3397 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3398 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3399 fprintf (dump_file, " address %d\n", (int) address_cost);
3400 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3401 fprintf (dump_file, "\n");
3404 costs_initialized = true;
3407 STRIP_NOPS (expr);
3409 if (SSA_VAR_P (expr))
3410 return 0;
3412 if (TREE_INVARIANT (expr))
3414 if (TREE_CODE (expr) == INTEGER_CST)
3415 return integer_cost;
3417 if (TREE_CODE (expr) == ADDR_EXPR)
3419 tree obj = TREE_OPERAND (expr, 0);
3421 if (TREE_CODE (obj) == VAR_DECL
3422 || TREE_CODE (obj) == PARM_DECL
3423 || TREE_CODE (obj) == RESULT_DECL)
3424 return symbol_cost;
3427 return address_cost;
3430 switch (TREE_CODE (expr))
3432 case PLUS_EXPR:
3433 case MINUS_EXPR:
3434 case MULT_EXPR:
3435 op0 = TREE_OPERAND (expr, 0);
3436 op1 = TREE_OPERAND (expr, 1);
3437 STRIP_NOPS (op0);
3438 STRIP_NOPS (op1);
3440 if (is_gimple_val (op0))
3441 cost0 = 0;
3442 else
3443 cost0 = force_expr_to_var_cost (op0);
3445 if (is_gimple_val (op1))
3446 cost1 = 0;
3447 else
3448 cost1 = force_expr_to_var_cost (op1);
3450 break;
3452 default:
3453 /* Just an arbitrary value, FIXME. */
3454 return target_spill_cost;
3457 mode = TYPE_MODE (TREE_TYPE (expr));
3458 switch (TREE_CODE (expr))
3460 case PLUS_EXPR:
3461 case MINUS_EXPR:
3462 cost = add_cost (mode);
3463 break;
3465 case MULT_EXPR:
3466 if (cst_and_fits_in_hwi (op0))
3467 cost = multiply_by_cost (int_cst_value (op0), mode);
3468 else if (cst_and_fits_in_hwi (op1))
3469 cost = multiply_by_cost (int_cst_value (op1), mode);
3470 else
3471 return target_spill_cost;
3472 break;
3474 default:
3475 gcc_unreachable ();
3478 cost += cost0;
3479 cost += cost1;
3481 /* Bound the cost by target_spill_cost. The parts of complicated
3482 computations often are either loop invariant or at least can
3483 be shared between several iv uses, so letting this grow without
3484 limits would not give reasonable results. */
3485 return cost < target_spill_cost ? cost : target_spill_cost;
3488 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3489 invariants the computation depends on. */
3491 static unsigned
3492 force_var_cost (struct ivopts_data *data,
3493 tree expr, bitmap *depends_on)
3495 if (depends_on)
3497 fd_ivopts_data = data;
3498 walk_tree (&expr, find_depends, depends_on, NULL);
3501 return force_expr_to_var_cost (expr);
3504 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3505 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3506 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3507 invariants the computation depends on. */
3509 static unsigned
3510 split_address_cost (struct ivopts_data *data,
3511 tree addr, bool *symbol_present, bool *var_present,
3512 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3514 tree core;
3515 HOST_WIDE_INT bitsize;
3516 HOST_WIDE_INT bitpos;
3517 tree toffset;
3518 enum machine_mode mode;
3519 int unsignedp, volatilep;
3521 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3522 &unsignedp, &volatilep, false);
3524 if (toffset != 0
3525 || bitpos % BITS_PER_UNIT != 0
3526 || TREE_CODE (core) != VAR_DECL)
3528 *symbol_present = false;
3529 *var_present = true;
3530 fd_ivopts_data = data;
3531 walk_tree (&addr, find_depends, depends_on, NULL);
3532 return target_spill_cost;
3535 *offset += bitpos / BITS_PER_UNIT;
3536 if (TREE_STATIC (core)
3537 || DECL_EXTERNAL (core))
3539 *symbol_present = true;
3540 *var_present = false;
3541 return 0;
3544 *symbol_present = false;
3545 *var_present = true;
3546 return 0;
3549 /* Estimates cost of expressing difference of addresses E1 - E2 as
3550 var + symbol + offset. The value of offset is added to OFFSET,
3551 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3552 part is missing. DEPENDS_ON is a set of the invariants the computation
3553 depends on. */
3555 static unsigned
3556 ptr_difference_cost (struct ivopts_data *data,
3557 tree e1, tree e2, bool *symbol_present, bool *var_present,
3558 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3560 HOST_WIDE_INT diff = 0;
3561 unsigned cost;
3563 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3565 if (ptr_difference_const (e1, e2, &diff))
3567 *offset += diff;
3568 *symbol_present = false;
3569 *var_present = false;
3570 return 0;
3573 if (e2 == integer_zero_node)
3574 return split_address_cost (data, TREE_OPERAND (e1, 0),
3575 symbol_present, var_present, offset, depends_on);
3577 *symbol_present = false;
3578 *var_present = true;
3580 cost = force_var_cost (data, e1, depends_on);
3581 cost += force_var_cost (data, e2, depends_on);
3582 cost += add_cost (Pmode);
3584 return cost;
3587 /* Estimates cost of expressing difference E1 - E2 as
3588 var + symbol + offset. The value of offset is added to OFFSET,
3589 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3590 part is missing. DEPENDS_ON is a set of the invariants the computation
3591 depends on. */
3593 static unsigned
3594 difference_cost (struct ivopts_data *data,
3595 tree e1, tree e2, bool *symbol_present, bool *var_present,
3596 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3598 unsigned cost;
3599 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3600 unsigned HOST_WIDE_INT off1, off2;
3602 e1 = strip_offset (e1, &off1);
3603 e2 = strip_offset (e2, &off2);
3604 *offset += off1 - off2;
3606 STRIP_NOPS (e1);
3607 STRIP_NOPS (e2);
3609 if (TREE_CODE (e1) == ADDR_EXPR)
3610 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3611 depends_on);
3612 *symbol_present = false;
3614 if (operand_equal_p (e1, e2, 0))
3616 *var_present = false;
3617 return 0;
3619 *var_present = true;
3620 if (zero_p (e2))
3621 return force_var_cost (data, e1, depends_on);
3623 if (zero_p (e1))
3625 cost = force_var_cost (data, e2, depends_on);
3626 cost += multiply_by_cost (-1, mode);
3628 return cost;
3631 cost = force_var_cost (data, e1, depends_on);
3632 cost += force_var_cost (data, e2, depends_on);
3633 cost += add_cost (mode);
3635 return cost;
3638 /* Determines the cost of the computation by that USE is expressed
3639 from induction variable CAND. If ADDRESS_P is true, we just need
3640 to create an address from it, otherwise we want to get it into
3641 register. A set of invariants we depend on is stored in
3642 DEPENDS_ON. AT is the statement at that the value is computed. */
3644 static unsigned
3645 get_computation_cost_at (struct ivopts_data *data,
3646 struct iv_use *use, struct iv_cand *cand,
3647 bool address_p, bitmap *depends_on, tree at)
3649 tree ubase = use->iv->base, ustep = use->iv->step;
3650 tree cbase, cstep;
3651 tree utype = TREE_TYPE (ubase), ctype;
3652 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3653 HOST_WIDE_INT ratio, aratio;
3654 bool var_present, symbol_present;
3655 unsigned cost = 0, n_sums;
3657 *depends_on = NULL;
3659 /* Only consider real candidates. */
3660 if (!cand->iv)
3661 return INFTY;
3663 cbase = cand->iv->base;
3664 cstep = cand->iv->step;
3665 ctype = TREE_TYPE (cbase);
3667 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3669 /* We do not have a precision to express the values of use. */
3670 return INFTY;
3673 if (address_p)
3675 /* Do not try to express address of an object with computation based
3676 on address of a different object. This may cause problems in rtl
3677 level alias analysis (that does not expect this to be happening,
3678 as this is illegal in C), and would be unlikely to be useful
3679 anyway. */
3680 if (use->iv->base_object
3681 && cand->iv->base_object
3682 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3683 return INFTY;
3686 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3688 /* TODO -- add direct handling of this case. */
3689 goto fallback;
3692 /* CSTEPI is removed from the offset in case statement is after the
3693 increment. If the step is not constant, we use zero instead.
3694 This is a bit imprecise (there is the extra addition), but
3695 redundancy elimination is likely to transform the code so that
3696 it uses value of the variable before increment anyway,
3697 so it is not that much unrealistic. */
3698 if (cst_and_fits_in_hwi (cstep))
3699 cstepi = int_cst_value (cstep);
3700 else
3701 cstepi = 0;
3703 if (cst_and_fits_in_hwi (ustep)
3704 && cst_and_fits_in_hwi (cstep))
3706 ustepi = int_cst_value (ustep);
3708 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3709 return INFTY;
3711 else
3713 tree rat;
3715 rat = constant_multiple_of (utype, ustep, cstep);
3717 if (!rat)
3718 return INFTY;
3720 if (cst_and_fits_in_hwi (rat))
3721 ratio = int_cst_value (rat);
3722 else if (integer_onep (rat))
3723 ratio = 1;
3724 else if (integer_all_onesp (rat))
3725 ratio = -1;
3726 else
3727 return INFTY;
3730 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3731 or ratio == 1, it is better to handle this like
3733 ubase - ratio * cbase + ratio * var
3735 (also holds in the case ratio == -1, TODO. */
3737 if (cst_and_fits_in_hwi (cbase))
3739 offset = - ratio * int_cst_value (cbase);
3740 cost += difference_cost (data,
3741 ubase, integer_zero_node,
3742 &symbol_present, &var_present, &offset,
3743 depends_on);
3745 else if (ratio == 1)
3747 cost += difference_cost (data,
3748 ubase, cbase,
3749 &symbol_present, &var_present, &offset,
3750 depends_on);
3752 else
3754 cost += force_var_cost (data, cbase, depends_on);
3755 cost += add_cost (TYPE_MODE (ctype));
3756 cost += difference_cost (data,
3757 ubase, integer_zero_node,
3758 &symbol_present, &var_present, &offset,
3759 depends_on);
3762 /* If we are after the increment, the value of the candidate is higher by
3763 one iteration. */
3764 if (stmt_after_increment (data->current_loop, cand, at))
3765 offset -= ratio * cstepi;
3767 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3768 (symbol/var/const parts may be omitted). If we are looking for an address,
3769 find the cost of addressing this. */
3770 if (address_p)
3771 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3773 /* Otherwise estimate the costs for computing the expression. */
3774 aratio = ratio > 0 ? ratio : -ratio;
3775 if (!symbol_present && !var_present && !offset)
3777 if (ratio != 1)
3778 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3780 return cost;
3783 if (aratio != 1)
3784 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3786 n_sums = 1;
3787 if (var_present
3788 /* Symbol + offset should be compile-time computable. */
3789 && (symbol_present || offset))
3790 n_sums++;
3792 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3794 fallback:
3796 /* Just get the expression, expand it and measure the cost. */
3797 tree comp = get_computation_at (data->current_loop, use, cand, at);
3799 if (!comp)
3800 return INFTY;
3802 if (address_p)
3803 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3805 return computation_cost (comp);
3809 /* Determines the cost of the computation by that USE is expressed
3810 from induction variable CAND. If ADDRESS_P is true, we just need
3811 to create an address from it, otherwise we want to get it into
3812 register. A set of invariants we depend on is stored in
3813 DEPENDS_ON. */
3815 static unsigned
3816 get_computation_cost (struct ivopts_data *data,
3817 struct iv_use *use, struct iv_cand *cand,
3818 bool address_p, bitmap *depends_on)
3820 return get_computation_cost_at (data,
3821 use, cand, address_p, depends_on, use->stmt);
3824 /* Determines cost of basing replacement of USE on CAND in a generic
3825 expression. */
3827 static bool
3828 determine_use_iv_cost_generic (struct ivopts_data *data,
3829 struct iv_use *use, struct iv_cand *cand)
3831 bitmap depends_on;
3832 unsigned cost;
3834 /* The simple case first -- if we need to express value of the preserved
3835 original biv, the cost is 0. This also prevents us from counting the
3836 cost of increment twice -- once at this use and once in the cost of
3837 the candidate. */
3838 if (cand->pos == IP_ORIGINAL
3839 && cand->incremented_at == use->stmt)
3841 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3842 return true;
3845 cost = get_computation_cost (data, use, cand, false, &depends_on);
3846 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3848 return cost != INFTY;
3851 /* Determines cost of basing replacement of USE on CAND in an address. */
3853 static bool
3854 determine_use_iv_cost_address (struct ivopts_data *data,
3855 struct iv_use *use, struct iv_cand *cand)
3857 bitmap depends_on;
3858 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3860 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3862 return cost != INFTY;
3865 /* Computes value of induction variable IV in iteration NITER. */
3867 static tree
3868 iv_value (struct iv *iv, tree niter)
3870 tree val;
3871 tree type = TREE_TYPE (iv->base);
3873 niter = fold_convert (type, niter);
3874 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3876 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3879 /* Computes value of candidate CAND at position AT in iteration NITER. */
3881 static tree
3882 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3884 tree val = iv_value (cand->iv, niter);
3885 tree type = TREE_TYPE (cand->iv->base);
3887 if (stmt_after_increment (loop, cand, at))
3888 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3890 return val;
3893 /* Returns period of induction variable iv. */
3895 static tree
3896 iv_period (struct iv *iv)
3898 tree step = iv->step, period, type;
3899 tree pow2div;
3901 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3903 /* Period of the iv is gcd (step, type range). Since type range is power
3904 of two, it suffices to determine the maximum power of two that divides
3905 step. */
3906 pow2div = num_ending_zeros (step);
3907 type = unsigned_type_for (TREE_TYPE (step));
3909 period = build_low_bits_mask (type,
3910 (TYPE_PRECISION (type)
3911 - tree_low_cst (pow2div, 1)));
3913 return period;
3916 /* Returns the comparison operator used when eliminating the iv USE. */
3918 static enum tree_code
3919 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3921 struct loop *loop = data->current_loop;
3922 basic_block ex_bb;
3923 edge exit;
3925 ex_bb = bb_for_stmt (use->stmt);
3926 exit = EDGE_SUCC (ex_bb, 0);
3927 if (flow_bb_inside_loop_p (loop, exit->dest))
3928 exit = EDGE_SUCC (ex_bb, 1);
3930 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3933 /* Check whether it is possible to express the condition in USE by comparison
3934 of candidate CAND. If so, store the value compared with to BOUND. */
3936 static bool
3937 may_eliminate_iv (struct ivopts_data *data,
3938 struct iv_use *use, struct iv_cand *cand, tree *bound)
3940 basic_block ex_bb;
3941 edge exit;
3942 struct tree_niter_desc *niter;
3943 tree nit, nit_type;
3944 tree wider_type, period, per_type;
3945 struct loop *loop = data->current_loop;
3947 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3948 return false;
3950 /* For now works only for exits that dominate the loop latch. TODO -- extend
3951 for other conditions inside loop body. */
3952 ex_bb = bb_for_stmt (use->stmt);
3953 if (use->stmt != last_stmt (ex_bb)
3954 || TREE_CODE (use->stmt) != COND_EXPR)
3955 return false;
3956 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3957 return false;
3959 exit = EDGE_SUCC (ex_bb, 0);
3960 if (flow_bb_inside_loop_p (loop, exit->dest))
3961 exit = EDGE_SUCC (ex_bb, 1);
3962 if (flow_bb_inside_loop_p (loop, exit->dest))
3963 return false;
3965 niter = niter_for_exit (data, exit);
3966 if (!niter
3967 || !zero_p (niter->may_be_zero))
3968 return false;
3970 nit = niter->niter;
3971 nit_type = TREE_TYPE (nit);
3973 /* Determine whether we may use the variable to test whether niter iterations
3974 elapsed. This is the case iff the period of the induction variable is
3975 greater than the number of iterations. */
3976 period = iv_period (cand->iv);
3977 if (!period)
3978 return false;
3979 per_type = TREE_TYPE (period);
3981 wider_type = TREE_TYPE (period);
3982 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3983 wider_type = per_type;
3984 else
3985 wider_type = nit_type;
3987 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3988 fold_convert (wider_type, period),
3989 fold_convert (wider_type, nit))))
3990 return false;
3992 *bound = cand_value_at (loop, cand, use->stmt, nit);
3993 return true;
3996 /* Determines cost of basing replacement of USE on CAND in a condition. */
3998 static bool
3999 determine_use_iv_cost_condition (struct ivopts_data *data,
4000 struct iv_use *use, struct iv_cand *cand)
4002 tree bound = NULL_TREE, op, cond;
4003 bitmap depends_on = NULL;
4004 unsigned cost;
4006 /* Only consider real candidates. */
4007 if (!cand->iv)
4009 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4010 return false;
4013 if (may_eliminate_iv (data, use, cand, &bound))
4015 cost = force_var_cost (data, bound, &depends_on);
4017 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4018 return cost != INFTY;
4021 /* The induction variable elimination failed; just express the original
4022 giv. If it is compared with an invariant, note that we cannot get
4023 rid of it. */
4024 cost = get_computation_cost (data, use, cand, false, &depends_on);
4026 cond = *use->op_p;
4027 if (TREE_CODE (cond) != SSA_NAME)
4029 op = TREE_OPERAND (cond, 0);
4030 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4031 op = TREE_OPERAND (cond, 1);
4032 if (TREE_CODE (op) == SSA_NAME)
4034 op = get_iv (data, op)->base;
4035 fd_ivopts_data = data;
4036 walk_tree (&op, find_depends, &depends_on, NULL);
4040 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4041 return cost != INFTY;
4044 /* Determines cost of basing replacement of USE on CAND. Returns false
4045 if USE cannot be based on CAND. */
4047 static bool
4048 determine_use_iv_cost (struct ivopts_data *data,
4049 struct iv_use *use, struct iv_cand *cand)
4051 switch (use->type)
4053 case USE_NONLINEAR_EXPR:
4054 return determine_use_iv_cost_generic (data, use, cand);
4056 case USE_ADDRESS:
4057 return determine_use_iv_cost_address (data, use, cand);
4059 case USE_COMPARE:
4060 return determine_use_iv_cost_condition (data, use, cand);
4062 default:
4063 gcc_unreachable ();
4067 /* Determines costs of basing the use of the iv on an iv candidate. */
4069 static void
4070 determine_use_iv_costs (struct ivopts_data *data)
4072 unsigned i, j;
4073 struct iv_use *use;
4074 struct iv_cand *cand;
4075 bitmap to_clear = BITMAP_ALLOC (NULL);
4077 alloc_use_cost_map (data);
4079 for (i = 0; i < n_iv_uses (data); i++)
4081 use = iv_use (data, i);
4083 if (data->consider_all_candidates)
4085 for (j = 0; j < n_iv_cands (data); j++)
4087 cand = iv_cand (data, j);
4088 determine_use_iv_cost (data, use, cand);
4091 else
4093 bitmap_iterator bi;
4095 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4097 cand = iv_cand (data, j);
4098 if (!determine_use_iv_cost (data, use, cand))
4099 bitmap_set_bit (to_clear, j);
4102 /* Remove the candidates for that the cost is infinite from
4103 the list of related candidates. */
4104 bitmap_and_compl_into (use->related_cands, to_clear);
4105 bitmap_clear (to_clear);
4109 BITMAP_FREE (to_clear);
4111 if (dump_file && (dump_flags & TDF_DETAILS))
4113 fprintf (dump_file, "Use-candidate costs:\n");
4115 for (i = 0; i < n_iv_uses (data); i++)
4117 use = iv_use (data, i);
4119 fprintf (dump_file, "Use %d:\n", i);
4120 fprintf (dump_file, " cand\tcost\tdepends on\n");
4121 for (j = 0; j < use->n_map_members; j++)
4123 if (!use->cost_map[j].cand
4124 || use->cost_map[j].cost == INFTY)
4125 continue;
4127 fprintf (dump_file, " %d\t%d\t",
4128 use->cost_map[j].cand->id,
4129 use->cost_map[j].cost);
4130 if (use->cost_map[j].depends_on)
4131 bitmap_print (dump_file,
4132 use->cost_map[j].depends_on, "","");
4133 fprintf (dump_file, "\n");
4136 fprintf (dump_file, "\n");
4138 fprintf (dump_file, "\n");
4142 /* Determines cost of the candidate CAND. */
4144 static void
4145 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4147 unsigned cost_base, cost_step;
4148 tree base;
4150 if (!cand->iv)
4152 cand->cost = 0;
4153 return;
4156 /* There are two costs associated with the candidate -- its increment
4157 and its initialization. The second is almost negligible for any loop
4158 that rolls enough, so we take it just very little into account. */
4160 base = cand->iv->base;
4161 cost_base = force_var_cost (data, base, NULL);
4162 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4164 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4166 /* Prefer the original iv unless we may gain something by replacing it;
4167 this is not really relevant for artificial ivs created by other
4168 passes. */
4169 if (cand->pos == IP_ORIGINAL
4170 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4171 cand->cost--;
4173 /* Prefer not to insert statements into latch unless there are some
4174 already (so that we do not create unnecessary jumps). */
4175 if (cand->pos == IP_END
4176 && empty_block_p (ip_end_pos (data->current_loop)))
4177 cand->cost++;
4180 /* Determines costs of computation of the candidates. */
4182 static void
4183 determine_iv_costs (struct ivopts_data *data)
4185 unsigned i;
4187 if (dump_file && (dump_flags & TDF_DETAILS))
4189 fprintf (dump_file, "Candidate costs:\n");
4190 fprintf (dump_file, " cand\tcost\n");
4193 for (i = 0; i < n_iv_cands (data); i++)
4195 struct iv_cand *cand = iv_cand (data, i);
4197 determine_iv_cost (data, cand);
4199 if (dump_file && (dump_flags & TDF_DETAILS))
4200 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4203 if (dump_file && (dump_flags & TDF_DETAILS))
4204 fprintf (dump_file, "\n");
4207 /* Calculates cost for having SIZE induction variables. */
4209 static unsigned
4210 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4212 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
4215 /* For each size of the induction variable set determine the penalty. */
4217 static void
4218 determine_set_costs (struct ivopts_data *data)
4220 unsigned j, n;
4221 tree phi, op;
4222 struct loop *loop = data->current_loop;
4223 bitmap_iterator bi;
4225 /* We use the following model (definitely improvable, especially the
4226 cost function -- TODO):
4228 We estimate the number of registers available (using MD data), name it A.
4230 We estimate the number of registers used by the loop, name it U. This
4231 number is obtained as the number of loop phi nodes (not counting virtual
4232 registers and bivs) + the number of variables from outside of the loop.
4234 We set a reserve R (free regs that are used for temporary computations,
4235 etc.). For now the reserve is a constant 3.
4237 Let I be the number of induction variables.
4239 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4240 make a lot of ivs without a reason).
4241 -- if A - R < U + I <= A, the cost is I * PRES_COST
4242 -- if U + I > A, the cost is I * PRES_COST and
4243 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4245 if (dump_file && (dump_flags & TDF_DETAILS))
4247 fprintf (dump_file, "Global costs:\n");
4248 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4249 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4250 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4251 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4254 n = 0;
4255 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4257 op = PHI_RESULT (phi);
4259 if (!is_gimple_reg (op))
4260 continue;
4262 if (get_iv (data, op))
4263 continue;
4265 n++;
4268 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4270 struct version_info *info = ver_info (data, j);
4272 if (info->inv_id && info->has_nonlin_use)
4273 n++;
4276 data->regs_used = n;
4277 if (dump_file && (dump_flags & TDF_DETAILS))
4278 fprintf (dump_file, " regs_used %d\n", n);
4280 if (dump_file && (dump_flags & TDF_DETAILS))
4282 fprintf (dump_file, " cost for size:\n");
4283 fprintf (dump_file, " ivs\tcost\n");
4284 for (j = 0; j <= 2 * target_avail_regs; j++)
4285 fprintf (dump_file, " %d\t%d\n", j,
4286 ivopts_global_cost_for_size (data, j));
4287 fprintf (dump_file, "\n");
4291 /* Returns true if A is a cheaper cost pair than B. */
4293 static bool
4294 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4296 if (!a)
4297 return false;
4299 if (!b)
4300 return true;
4302 if (a->cost < b->cost)
4303 return true;
4305 if (a->cost > b->cost)
4306 return false;
4308 /* In case the costs are the same, prefer the cheaper candidate. */
4309 if (a->cand->cost < b->cand->cost)
4310 return true;
4312 return false;
4315 /* Computes the cost field of IVS structure. */
4317 static void
4318 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4320 unsigned cost = 0;
4322 cost += ivs->cand_use_cost;
4323 cost += ivs->cand_cost;
4324 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4326 ivs->cost = cost;
4329 /* Remove invariants in set INVS to set IVS. */
4331 static void
4332 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4334 bitmap_iterator bi;
4335 unsigned iid;
4337 if (!invs)
4338 return;
4340 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4342 ivs->n_invariant_uses[iid]--;
4343 if (ivs->n_invariant_uses[iid] == 0)
4344 ivs->n_regs--;
4348 /* Set USE not to be expressed by any candidate in IVS. */
4350 static void
4351 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4352 struct iv_use *use)
4354 unsigned uid = use->id, cid;
4355 struct cost_pair *cp;
4357 cp = ivs->cand_for_use[uid];
4358 if (!cp)
4359 return;
4360 cid = cp->cand->id;
4362 ivs->bad_uses++;
4363 ivs->cand_for_use[uid] = NULL;
4364 ivs->n_cand_uses[cid]--;
4366 if (ivs->n_cand_uses[cid] == 0)
4368 bitmap_clear_bit (ivs->cands, cid);
4369 /* Do not count the pseudocandidates. */
4370 if (cp->cand->iv)
4371 ivs->n_regs--;
4372 ivs->n_cands--;
4373 ivs->cand_cost -= cp->cand->cost;
4375 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4378 ivs->cand_use_cost -= cp->cost;
4380 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4381 iv_ca_recount_cost (data, ivs);
4384 /* Add invariants in set INVS to set IVS. */
4386 static void
4387 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4389 bitmap_iterator bi;
4390 unsigned iid;
4392 if (!invs)
4393 return;
4395 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4397 ivs->n_invariant_uses[iid]++;
4398 if (ivs->n_invariant_uses[iid] == 1)
4399 ivs->n_regs++;
4403 /* Set cost pair for USE in set IVS to CP. */
4405 static void
4406 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4407 struct iv_use *use, struct cost_pair *cp)
4409 unsigned uid = use->id, cid;
4411 if (ivs->cand_for_use[uid] == cp)
4412 return;
4414 if (ivs->cand_for_use[uid])
4415 iv_ca_set_no_cp (data, ivs, use);
4417 if (cp)
4419 cid = cp->cand->id;
4421 ivs->bad_uses--;
4422 ivs->cand_for_use[uid] = cp;
4423 ivs->n_cand_uses[cid]++;
4424 if (ivs->n_cand_uses[cid] == 1)
4426 bitmap_set_bit (ivs->cands, cid);
4427 /* Do not count the pseudocandidates. */
4428 if (cp->cand->iv)
4429 ivs->n_regs++;
4430 ivs->n_cands++;
4431 ivs->cand_cost += cp->cand->cost;
4433 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4436 ivs->cand_use_cost += cp->cost;
4437 iv_ca_set_add_invariants (ivs, cp->depends_on);
4438 iv_ca_recount_cost (data, ivs);
4442 /* Extend set IVS by expressing USE by some of the candidates in it
4443 if possible. */
4445 static void
4446 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4447 struct iv_use *use)
4449 struct cost_pair *best_cp = NULL, *cp;
4450 bitmap_iterator bi;
4451 unsigned i;
4453 gcc_assert (ivs->upto >= use->id);
4455 if (ivs->upto == use->id)
4457 ivs->upto++;
4458 ivs->bad_uses++;
4461 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4463 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4465 if (cheaper_cost_pair (cp, best_cp))
4466 best_cp = cp;
4469 iv_ca_set_cp (data, ivs, use, best_cp);
4472 /* Get cost for assignment IVS. */
4474 static unsigned
4475 iv_ca_cost (struct iv_ca *ivs)
4477 return (ivs->bad_uses ? INFTY : ivs->cost);
4480 /* Returns true if all dependences of CP are among invariants in IVS. */
4482 static bool
4483 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4485 unsigned i;
4486 bitmap_iterator bi;
4488 if (!cp->depends_on)
4489 return true;
4491 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4493 if (ivs->n_invariant_uses[i] == 0)
4494 return false;
4497 return true;
4500 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4501 it before NEXT_CHANGE. */
4503 static struct iv_ca_delta *
4504 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4505 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4507 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4509 change->use = use;
4510 change->old_cp = old_cp;
4511 change->new_cp = new_cp;
4512 change->next_change = next_change;
4514 return change;
4517 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4518 are rewritten. */
4520 static struct iv_ca_delta *
4521 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4523 struct iv_ca_delta *last;
4525 if (!l2)
4526 return l1;
4528 if (!l1)
4529 return l2;
4531 for (last = l1; last->next_change; last = last->next_change)
4532 continue;
4533 last->next_change = l2;
4535 return l1;
4538 /* Returns candidate by that USE is expressed in IVS. */
4540 static struct cost_pair *
4541 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4543 return ivs->cand_for_use[use->id];
4546 /* Reverse the list of changes DELTA, forming the inverse to it. */
4548 static struct iv_ca_delta *
4549 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4551 struct iv_ca_delta *act, *next, *prev = NULL;
4552 struct cost_pair *tmp;
4554 for (act = delta; act; act = next)
4556 next = act->next_change;
4557 act->next_change = prev;
4558 prev = act;
4560 tmp = act->old_cp;
4561 act->old_cp = act->new_cp;
4562 act->new_cp = tmp;
4565 return prev;
4568 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4569 reverted instead. */
4571 static void
4572 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4573 struct iv_ca_delta *delta, bool forward)
4575 struct cost_pair *from, *to;
4576 struct iv_ca_delta *act;
4578 if (!forward)
4579 delta = iv_ca_delta_reverse (delta);
4581 for (act = delta; act; act = act->next_change)
4583 from = act->old_cp;
4584 to = act->new_cp;
4585 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4586 iv_ca_set_cp (data, ivs, act->use, to);
4589 if (!forward)
4590 iv_ca_delta_reverse (delta);
4593 /* Returns true if CAND is used in IVS. */
4595 static bool
4596 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4598 return ivs->n_cand_uses[cand->id] > 0;
4601 /* Returns number of induction variable candidates in the set IVS. */
4603 static unsigned
4604 iv_ca_n_cands (struct iv_ca *ivs)
4606 return ivs->n_cands;
4609 /* Free the list of changes DELTA. */
4611 static void
4612 iv_ca_delta_free (struct iv_ca_delta **delta)
4614 struct iv_ca_delta *act, *next;
4616 for (act = *delta; act; act = next)
4618 next = act->next_change;
4619 free (act);
4622 *delta = NULL;
4625 /* Allocates new iv candidates assignment. */
4627 static struct iv_ca *
4628 iv_ca_new (struct ivopts_data *data)
4630 struct iv_ca *nw = XNEW (struct iv_ca);
4632 nw->upto = 0;
4633 nw->bad_uses = 0;
4634 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4635 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4636 nw->cands = BITMAP_ALLOC (NULL);
4637 nw->n_cands = 0;
4638 nw->n_regs = 0;
4639 nw->cand_use_cost = 0;
4640 nw->cand_cost = 0;
4641 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4642 nw->cost = 0;
4644 return nw;
4647 /* Free memory occupied by the set IVS. */
4649 static void
4650 iv_ca_free (struct iv_ca **ivs)
4652 free ((*ivs)->cand_for_use);
4653 free ((*ivs)->n_cand_uses);
4654 BITMAP_FREE ((*ivs)->cands);
4655 free ((*ivs)->n_invariant_uses);
4656 free (*ivs);
4657 *ivs = NULL;
4660 /* Dumps IVS to FILE. */
4662 static void
4663 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4665 const char *pref = " invariants ";
4666 unsigned i;
4668 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4669 bitmap_print (file, ivs->cands, " candidates ","\n");
4671 for (i = 1; i <= data->max_inv_id; i++)
4672 if (ivs->n_invariant_uses[i])
4674 fprintf (file, "%s%d", pref, i);
4675 pref = ", ";
4677 fprintf (file, "\n");
4680 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4681 new set, and store differences in DELTA. Number of induction variables
4682 in the new set is stored to N_IVS. */
4684 static unsigned
4685 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4686 struct iv_cand *cand, struct iv_ca_delta **delta,
4687 unsigned *n_ivs)
4689 unsigned i, cost;
4690 struct iv_use *use;
4691 struct cost_pair *old_cp, *new_cp;
4693 *delta = NULL;
4694 for (i = 0; i < ivs->upto; i++)
4696 use = iv_use (data, i);
4697 old_cp = iv_ca_cand_for_use (ivs, use);
4699 if (old_cp
4700 && old_cp->cand == cand)
4701 continue;
4703 new_cp = get_use_iv_cost (data, use, cand);
4704 if (!new_cp)
4705 continue;
4707 if (!iv_ca_has_deps (ivs, new_cp))
4708 continue;
4710 if (!cheaper_cost_pair (new_cp, old_cp))
4711 continue;
4713 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4716 iv_ca_delta_commit (data, ivs, *delta, true);
4717 cost = iv_ca_cost (ivs);
4718 if (n_ivs)
4719 *n_ivs = iv_ca_n_cands (ivs);
4720 iv_ca_delta_commit (data, ivs, *delta, false);
4722 return cost;
4725 /* Try narrowing set IVS by removing CAND. Return the cost of
4726 the new set and store the differences in DELTA. */
4728 static unsigned
4729 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4730 struct iv_cand *cand, struct iv_ca_delta **delta)
4732 unsigned i, ci;
4733 struct iv_use *use;
4734 struct cost_pair *old_cp, *new_cp, *cp;
4735 bitmap_iterator bi;
4736 struct iv_cand *cnd;
4737 unsigned cost;
4739 *delta = NULL;
4740 for (i = 0; i < n_iv_uses (data); i++)
4742 use = iv_use (data, i);
4744 old_cp = iv_ca_cand_for_use (ivs, use);
4745 if (old_cp->cand != cand)
4746 continue;
4748 new_cp = NULL;
4750 if (data->consider_all_candidates)
4752 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4754 if (ci == cand->id)
4755 continue;
4757 cnd = iv_cand (data, ci);
4759 cp = get_use_iv_cost (data, use, cnd);
4760 if (!cp)
4761 continue;
4762 if (!iv_ca_has_deps (ivs, cp))
4763 continue;
4765 if (!cheaper_cost_pair (cp, new_cp))
4766 continue;
4768 new_cp = cp;
4771 else
4773 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4775 if (ci == cand->id)
4776 continue;
4778 cnd = iv_cand (data, ci);
4780 cp = get_use_iv_cost (data, use, cnd);
4781 if (!cp)
4782 continue;
4783 if (!iv_ca_has_deps (ivs, cp))
4784 continue;
4786 if (!cheaper_cost_pair (cp, new_cp))
4787 continue;
4789 new_cp = cp;
4793 if (!new_cp)
4795 iv_ca_delta_free (delta);
4796 return INFTY;
4799 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4802 iv_ca_delta_commit (data, ivs, *delta, true);
4803 cost = iv_ca_cost (ivs);
4804 iv_ca_delta_commit (data, ivs, *delta, false);
4806 return cost;
4809 /* Try optimizing the set of candidates IVS by removing candidates different
4810 from to EXCEPT_CAND from it. Return cost of the new set, and store
4811 differences in DELTA. */
4813 static unsigned
4814 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4815 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4817 bitmap_iterator bi;
4818 struct iv_ca_delta *act_delta, *best_delta;
4819 unsigned i, best_cost, acost;
4820 struct iv_cand *cand;
4822 best_delta = NULL;
4823 best_cost = iv_ca_cost (ivs);
4825 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4827 cand = iv_cand (data, i);
4829 if (cand == except_cand)
4830 continue;
4832 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4834 if (acost < best_cost)
4836 best_cost = acost;
4837 iv_ca_delta_free (&best_delta);
4838 best_delta = act_delta;
4840 else
4841 iv_ca_delta_free (&act_delta);
4844 if (!best_delta)
4846 *delta = NULL;
4847 return best_cost;
4850 /* Recurse to possibly remove other unnecessary ivs. */
4851 iv_ca_delta_commit (data, ivs, best_delta, true);
4852 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4853 iv_ca_delta_commit (data, ivs, best_delta, false);
4854 *delta = iv_ca_delta_join (best_delta, *delta);
4855 return best_cost;
4858 /* Tries to extend the sets IVS in the best possible way in order
4859 to express the USE. */
4861 static bool
4862 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4863 struct iv_use *use)
4865 unsigned best_cost, act_cost;
4866 unsigned i;
4867 bitmap_iterator bi;
4868 struct iv_cand *cand;
4869 struct iv_ca_delta *best_delta = NULL, *act_delta;
4870 struct cost_pair *cp;
4872 iv_ca_add_use (data, ivs, use);
4873 best_cost = iv_ca_cost (ivs);
4875 cp = iv_ca_cand_for_use (ivs, use);
4876 if (cp)
4878 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4879 iv_ca_set_no_cp (data, ivs, use);
4882 /* First try important candidates. Only if it fails, try the specific ones.
4883 Rationale -- in loops with many variables the best choice often is to use
4884 just one generic biv. If we added here many ivs specific to the uses,
4885 the optimization algorithm later would be likely to get stuck in a local
4886 minimum, thus causing us to create too many ivs. The approach from
4887 few ivs to more seems more likely to be successful -- starting from few
4888 ivs, replacing an expensive use by a specific iv should always be a
4889 win. */
4890 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4892 cand = iv_cand (data, i);
4894 if (iv_ca_cand_used_p (ivs, cand))
4895 continue;
4897 cp = get_use_iv_cost (data, use, cand);
4898 if (!cp)
4899 continue;
4901 iv_ca_set_cp (data, ivs, use, cp);
4902 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4903 iv_ca_set_no_cp (data, ivs, use);
4904 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4906 if (act_cost < best_cost)
4908 best_cost = act_cost;
4910 iv_ca_delta_free (&best_delta);
4911 best_delta = act_delta;
4913 else
4914 iv_ca_delta_free (&act_delta);
4917 if (best_cost == INFTY)
4919 for (i = 0; i < use->n_map_members; i++)
4921 cp = use->cost_map + i;
4922 cand = cp->cand;
4923 if (!cand)
4924 continue;
4926 /* Already tried this. */
4927 if (cand->important)
4928 continue;
4930 if (iv_ca_cand_used_p (ivs, cand))
4931 continue;
4933 act_delta = NULL;
4934 iv_ca_set_cp (data, ivs, use, cp);
4935 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4936 iv_ca_set_no_cp (data, ivs, use);
4937 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4938 cp, act_delta);
4940 if (act_cost < best_cost)
4942 best_cost = act_cost;
4944 if (best_delta)
4945 iv_ca_delta_free (&best_delta);
4946 best_delta = act_delta;
4948 else
4949 iv_ca_delta_free (&act_delta);
4953 iv_ca_delta_commit (data, ivs, best_delta, true);
4954 iv_ca_delta_free (&best_delta);
4956 return (best_cost != INFTY);
4959 /* Finds an initial assignment of candidates to uses. */
4961 static struct iv_ca *
4962 get_initial_solution (struct ivopts_data *data)
4964 struct iv_ca *ivs = iv_ca_new (data);
4965 unsigned i;
4967 for (i = 0; i < n_iv_uses (data); i++)
4968 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4970 iv_ca_free (&ivs);
4971 return NULL;
4974 return ivs;
4977 /* Tries to improve set of induction variables IVS. */
4979 static bool
4980 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4982 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4983 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4984 struct iv_cand *cand;
4986 /* Try extending the set of induction variables by one. */
4987 for (i = 0; i < n_iv_cands (data); i++)
4989 cand = iv_cand (data, i);
4991 if (iv_ca_cand_used_p (ivs, cand))
4992 continue;
4994 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4995 if (!act_delta)
4996 continue;
4998 /* If we successfully added the candidate and the set is small enough,
4999 try optimizing it by removing other candidates. */
5000 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5002 iv_ca_delta_commit (data, ivs, act_delta, true);
5003 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5004 iv_ca_delta_commit (data, ivs, act_delta, false);
5005 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5008 if (acost < best_cost)
5010 best_cost = acost;
5011 iv_ca_delta_free (&best_delta);
5012 best_delta = act_delta;
5014 else
5015 iv_ca_delta_free (&act_delta);
5018 if (!best_delta)
5020 /* Try removing the candidates from the set instead. */
5021 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5023 /* Nothing more we can do. */
5024 if (!best_delta)
5025 return false;
5028 iv_ca_delta_commit (data, ivs, best_delta, true);
5029 gcc_assert (best_cost == iv_ca_cost (ivs));
5030 iv_ca_delta_free (&best_delta);
5031 return true;
5034 /* Attempts to find the optimal set of induction variables. We do simple
5035 greedy heuristic -- we try to replace at most one candidate in the selected
5036 solution and remove the unused ivs while this improves the cost. */
5038 static struct iv_ca *
5039 find_optimal_iv_set (struct ivopts_data *data)
5041 unsigned i;
5042 struct iv_ca *set;
5043 struct iv_use *use;
5045 /* Get the initial solution. */
5046 set = get_initial_solution (data);
5047 if (!set)
5049 if (dump_file && (dump_flags & TDF_DETAILS))
5050 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5051 return NULL;
5054 if (dump_file && (dump_flags & TDF_DETAILS))
5056 fprintf (dump_file, "Initial set of candidates:\n");
5057 iv_ca_dump (data, dump_file, set);
5060 while (try_improve_iv_set (data, set))
5062 if (dump_file && (dump_flags & TDF_DETAILS))
5064 fprintf (dump_file, "Improved to:\n");
5065 iv_ca_dump (data, dump_file, set);
5069 if (dump_file && (dump_flags & TDF_DETAILS))
5070 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5072 for (i = 0; i < n_iv_uses (data); i++)
5074 use = iv_use (data, i);
5075 use->selected = iv_ca_cand_for_use (set, use)->cand;
5078 return set;
5081 /* Creates a new induction variable corresponding to CAND. */
5083 static void
5084 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5086 block_stmt_iterator incr_pos;
5087 tree base;
5088 bool after = false;
5090 if (!cand->iv)
5091 return;
5093 switch (cand->pos)
5095 case IP_NORMAL:
5096 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5097 break;
5099 case IP_END:
5100 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5101 after = true;
5102 break;
5104 case IP_ORIGINAL:
5105 /* Mark that the iv is preserved. */
5106 name_info (data, cand->var_before)->preserve_biv = true;
5107 name_info (data, cand->var_after)->preserve_biv = true;
5109 /* Rewrite the increment so that it uses var_before directly. */
5110 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5112 return;
5115 gimple_add_tmp_var (cand->var_before);
5116 add_referenced_tmp_var (cand->var_before);
5118 base = unshare_expr (cand->iv->base);
5120 create_iv (base, unshare_expr (cand->iv->step),
5121 cand->var_before, data->current_loop,
5122 &incr_pos, after, &cand->var_before, &cand->var_after);
5125 /* Creates new induction variables described in SET. */
5127 static void
5128 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5130 unsigned i;
5131 struct iv_cand *cand;
5132 bitmap_iterator bi;
5134 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5136 cand = iv_cand (data, i);
5137 create_new_iv (data, cand);
5141 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5142 is true, remove also the ssa name defined by the statement. */
5144 static void
5145 remove_statement (tree stmt, bool including_defined_name)
5147 if (TREE_CODE (stmt) == PHI_NODE)
5149 if (!including_defined_name)
5151 /* Prevent the ssa name defined by the statement from being removed. */
5152 SET_PHI_RESULT (stmt, NULL);
5154 remove_phi_node (stmt, NULL_TREE);
5156 else
5158 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5160 bsi_remove (&bsi, true);
5164 /* Rewrites USE (definition of iv used in a nonlinear expression)
5165 using candidate CAND. */
5167 static void
5168 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5169 struct iv_use *use, struct iv_cand *cand)
5171 tree comp;
5172 tree op, stmts, tgt, ass;
5173 block_stmt_iterator bsi, pbsi;
5175 /* An important special case -- if we are asked to express value of
5176 the original iv by itself, just exit; there is no need to
5177 introduce a new computation (that might also need casting the
5178 variable to unsigned and back). */
5179 if (cand->pos == IP_ORIGINAL
5180 && cand->incremented_at == use->stmt)
5182 tree step, ctype, utype;
5183 enum tree_code incr_code = PLUS_EXPR;
5185 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5186 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5188 step = cand->iv->step;
5189 ctype = TREE_TYPE (step);
5190 utype = TREE_TYPE (cand->var_after);
5191 if (TREE_CODE (step) == NEGATE_EXPR)
5193 incr_code = MINUS_EXPR;
5194 step = TREE_OPERAND (step, 0);
5197 /* Check whether we may leave the computation unchanged.
5198 This is the case only if it does not rely on other
5199 computations in the loop -- otherwise, the computation
5200 we rely upon may be removed in remove_unused_ivs,
5201 thus leading to ICE. */
5202 op = TREE_OPERAND (use->stmt, 1);
5203 if (TREE_CODE (op) == PLUS_EXPR
5204 || TREE_CODE (op) == MINUS_EXPR)
5206 if (TREE_OPERAND (op, 0) == cand->var_before)
5207 op = TREE_OPERAND (op, 1);
5208 else if (TREE_CODE (op) == PLUS_EXPR
5209 && TREE_OPERAND (op, 1) == cand->var_before)
5210 op = TREE_OPERAND (op, 0);
5211 else
5212 op = NULL_TREE;
5214 else
5215 op = NULL_TREE;
5217 if (op
5218 && (TREE_CODE (op) == INTEGER_CST
5219 || operand_equal_p (op, step, 0)))
5220 return;
5222 /* Otherwise, add the necessary computations to express
5223 the iv. */
5224 op = fold_convert (ctype, cand->var_before);
5225 comp = fold_convert (utype,
5226 build2 (incr_code, ctype, op,
5227 unshare_expr (step)));
5229 else
5230 comp = get_computation (data->current_loop, use, cand);
5232 switch (TREE_CODE (use->stmt))
5234 case PHI_NODE:
5235 tgt = PHI_RESULT (use->stmt);
5237 /* If we should keep the biv, do not replace it. */
5238 if (name_info (data, tgt)->preserve_biv)
5239 return;
5241 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5242 while (!bsi_end_p (pbsi)
5243 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5245 bsi = pbsi;
5246 bsi_next (&pbsi);
5248 break;
5250 case MODIFY_EXPR:
5251 tgt = TREE_OPERAND (use->stmt, 0);
5252 bsi = bsi_for_stmt (use->stmt);
5253 break;
5255 default:
5256 gcc_unreachable ();
5259 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5261 if (TREE_CODE (use->stmt) == PHI_NODE)
5263 if (stmts)
5264 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5265 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5266 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5267 remove_statement (use->stmt, false);
5268 SSA_NAME_DEF_STMT (tgt) = ass;
5270 else
5272 if (stmts)
5273 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5274 TREE_OPERAND (use->stmt, 1) = op;
5278 /* Replaces ssa name in index IDX by its basic variable. Callback for
5279 for_each_index. */
5281 static bool
5282 idx_remove_ssa_names (tree base, tree *idx,
5283 void *data ATTRIBUTE_UNUSED)
5285 tree *op;
5287 if (TREE_CODE (*idx) == SSA_NAME)
5288 *idx = SSA_NAME_VAR (*idx);
5290 if (TREE_CODE (base) == ARRAY_REF)
5292 op = &TREE_OPERAND (base, 2);
5293 if (*op
5294 && TREE_CODE (*op) == SSA_NAME)
5295 *op = SSA_NAME_VAR (*op);
5296 op = &TREE_OPERAND (base, 3);
5297 if (*op
5298 && TREE_CODE (*op) == SSA_NAME)
5299 *op = SSA_NAME_VAR (*op);
5302 return true;
5305 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5307 static tree
5308 unshare_and_remove_ssa_names (tree ref)
5310 ref = unshare_expr (ref);
5311 for_each_index (&ref, idx_remove_ssa_names, NULL);
5313 return ref;
5316 /* Extract the alias analysis info for the memory reference REF. There are
5317 several ways how this information may be stored and what precisely is
5318 its semantics depending on the type of the reference, but there always is
5319 somewhere hidden one _DECL node that is used to determine the set of
5320 virtual operands for the reference. The code below deciphers this jungle
5321 and extracts this single useful piece of information. */
5323 static tree
5324 get_ref_tag (tree ref, tree orig)
5326 tree var = get_base_address (ref);
5327 tree aref = NULL_TREE, tag, sv;
5328 HOST_WIDE_INT offset, size, maxsize;
5330 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5332 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5333 if (ref)
5334 break;
5337 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5338 return unshare_expr (sv);
5340 if (!var)
5341 return NULL_TREE;
5343 if (TREE_CODE (var) == INDIRECT_REF)
5345 /* If the base is a dereference of a pointer, first check its name memory
5346 tag. If it does not have one, use its symbol memory tag. */
5347 var = TREE_OPERAND (var, 0);
5348 if (TREE_CODE (var) != SSA_NAME)
5349 return NULL_TREE;
5351 if (SSA_NAME_PTR_INFO (var))
5353 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5354 if (tag)
5355 return tag;
5358 var = SSA_NAME_VAR (var);
5359 tag = var_ann (var)->symbol_mem_tag;
5360 gcc_assert (tag != NULL_TREE);
5361 return tag;
5363 else
5365 if (!DECL_P (var))
5366 return NULL_TREE;
5368 tag = var_ann (var)->symbol_mem_tag;
5369 if (tag)
5370 return tag;
5372 return var;
5376 /* Copies the reference information from OLD_REF to NEW_REF. */
5378 static void
5379 copy_ref_info (tree new_ref, tree old_ref)
5381 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5382 copy_mem_ref_info (new_ref, old_ref);
5383 else
5385 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5386 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5390 /* Rewrites USE (address that is an iv) using candidate CAND. */
5392 static void
5393 rewrite_use_address (struct ivopts_data *data,
5394 struct iv_use *use, struct iv_cand *cand)
5396 struct affine_tree_combination aff;
5397 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5398 tree ref;
5400 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5401 unshare_aff_combination (&aff);
5403 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5404 copy_ref_info (ref, *use->op_p);
5405 *use->op_p = ref;
5408 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5409 candidate CAND. */
5411 static void
5412 rewrite_use_compare (struct ivopts_data *data,
5413 struct iv_use *use, struct iv_cand *cand)
5415 tree comp;
5416 tree *op_p, cond, op, stmts, bound;
5417 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5418 enum tree_code compare;
5419 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5421 bound = cp->value;
5422 if (bound)
5424 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5425 tree var_type = TREE_TYPE (var);
5427 compare = iv_elimination_compare (data, use);
5428 bound = fold_convert (var_type, bound);
5429 op = force_gimple_operand (unshare_expr (bound), &stmts,
5430 true, NULL_TREE);
5432 if (stmts)
5433 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5435 *use->op_p = build2 (compare, boolean_type_node, var, op);
5436 update_stmt (use->stmt);
5437 return;
5440 /* The induction variable elimination failed; just express the original
5441 giv. */
5442 comp = get_computation (data->current_loop, use, cand);
5444 cond = *use->op_p;
5445 op_p = &TREE_OPERAND (cond, 0);
5446 if (TREE_CODE (*op_p) != SSA_NAME
5447 || zero_p (get_iv (data, *op_p)->step))
5448 op_p = &TREE_OPERAND (cond, 1);
5450 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5451 if (stmts)
5452 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5454 *op_p = op;
5457 /* Ensure that operand *OP_P may be used at the end of EXIT without
5458 violating loop closed ssa form. */
5460 static void
5461 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5463 basic_block def_bb;
5464 struct loop *def_loop;
5465 tree phi, use;
5467 use = USE_FROM_PTR (op_p);
5468 if (TREE_CODE (use) != SSA_NAME)
5469 return;
5471 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5472 if (!def_bb)
5473 return;
5475 def_loop = def_bb->loop_father;
5476 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5477 return;
5479 /* Try finding a phi node that copies the value out of the loop. */
5480 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5481 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5482 break;
5484 if (!phi)
5486 /* Create such a phi node. */
5487 tree new_name = duplicate_ssa_name (use, NULL);
5489 phi = create_phi_node (new_name, exit->dest);
5490 SSA_NAME_DEF_STMT (new_name) = phi;
5491 add_phi_arg (phi, use, exit);
5494 SET_USE (op_p, PHI_RESULT (phi));
5497 /* Ensure that operands of STMT may be used at the end of EXIT without
5498 violating loop closed ssa form. */
5500 static void
5501 protect_loop_closed_ssa_form (edge exit, tree stmt)
5503 ssa_op_iter iter;
5504 use_operand_p use_p;
5506 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5507 protect_loop_closed_ssa_form_use (exit, use_p);
5510 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5511 so that they are emitted on the correct place, and so that the loop closed
5512 ssa form is preserved. */
5514 void
5515 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5517 tree_stmt_iterator tsi;
5518 block_stmt_iterator bsi;
5519 tree phi, stmt, def, next;
5521 if (!single_pred_p (exit->dest))
5522 split_loop_exit_edge (exit);
5524 /* Ensure there is label in exit->dest, so that we can
5525 insert after it. */
5526 tree_block_label (exit->dest);
5527 bsi = bsi_after_labels (exit->dest);
5529 if (TREE_CODE (stmts) == STATEMENT_LIST)
5531 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5533 tree stmt = tsi_stmt (tsi);
5534 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5535 protect_loop_closed_ssa_form (exit, stmt);
5538 else
5540 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5541 protect_loop_closed_ssa_form (exit, stmts);
5544 if (!op)
5545 return;
5547 for (phi = phi_nodes (exit->dest); phi; phi = next)
5549 next = PHI_CHAIN (phi);
5551 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5553 def = PHI_RESULT (phi);
5554 remove_statement (phi, false);
5555 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5556 def, op);
5557 SSA_NAME_DEF_STMT (def) = stmt;
5558 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5563 /* Rewrites USE using candidate CAND. */
5565 static void
5566 rewrite_use (struct ivopts_data *data,
5567 struct iv_use *use, struct iv_cand *cand)
5569 switch (use->type)
5571 case USE_NONLINEAR_EXPR:
5572 rewrite_use_nonlinear_expr (data, use, cand);
5573 break;
5575 case USE_ADDRESS:
5576 rewrite_use_address (data, use, cand);
5577 break;
5579 case USE_COMPARE:
5580 rewrite_use_compare (data, use, cand);
5581 break;
5583 default:
5584 gcc_unreachable ();
5586 mark_new_vars_to_rename (use->stmt);
5589 /* Rewrite the uses using the selected induction variables. */
5591 static void
5592 rewrite_uses (struct ivopts_data *data)
5594 unsigned i;
5595 struct iv_cand *cand;
5596 struct iv_use *use;
5598 for (i = 0; i < n_iv_uses (data); i++)
5600 use = iv_use (data, i);
5601 cand = use->selected;
5602 gcc_assert (cand);
5604 rewrite_use (data, use, cand);
5608 /* Removes the ivs that are not used after rewriting. */
5610 static void
5611 remove_unused_ivs (struct ivopts_data *data)
5613 unsigned j;
5614 bitmap_iterator bi;
5616 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5618 struct version_info *info;
5620 info = ver_info (data, j);
5621 if (info->iv
5622 && !zero_p (info->iv->step)
5623 && !info->inv_id
5624 && !info->iv->have_use_for
5625 && !info->preserve_biv)
5626 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5630 /* Frees data allocated by the optimization of a single loop. */
5632 static void
5633 free_loop_data (struct ivopts_data *data)
5635 unsigned i, j;
5636 bitmap_iterator bi;
5637 tree obj;
5639 htab_empty (data->niters);
5641 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5643 struct version_info *info;
5645 info = ver_info (data, i);
5646 if (info->iv)
5647 free (info->iv);
5648 info->iv = NULL;
5649 info->has_nonlin_use = false;
5650 info->preserve_biv = false;
5651 info->inv_id = 0;
5653 bitmap_clear (data->relevant);
5654 bitmap_clear (data->important_candidates);
5656 for (i = 0; i < n_iv_uses (data); i++)
5658 struct iv_use *use = iv_use (data, i);
5660 free (use->iv);
5661 BITMAP_FREE (use->related_cands);
5662 for (j = 0; j < use->n_map_members; j++)
5663 if (use->cost_map[j].depends_on)
5664 BITMAP_FREE (use->cost_map[j].depends_on);
5665 free (use->cost_map);
5666 free (use);
5668 VEC_truncate (iv_use_p, data->iv_uses, 0);
5670 for (i = 0; i < n_iv_cands (data); i++)
5672 struct iv_cand *cand = iv_cand (data, i);
5674 if (cand->iv)
5675 free (cand->iv);
5676 if (cand->depends_on)
5677 BITMAP_FREE (cand->depends_on);
5678 free (cand);
5680 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5682 if (data->version_info_size < num_ssa_names)
5684 data->version_info_size = 2 * num_ssa_names;
5685 free (data->version_info);
5686 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5689 data->max_inv_id = 0;
5691 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5692 SET_DECL_RTL (obj, NULL_RTX);
5694 VEC_truncate (tree, decl_rtl_to_reset, 0);
5697 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5698 loop tree. */
5700 static void
5701 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5703 free_loop_data (data);
5704 free (data->version_info);
5705 BITMAP_FREE (data->relevant);
5706 BITMAP_FREE (data->important_candidates);
5707 htab_delete (data->niters);
5709 VEC_free (tree, heap, decl_rtl_to_reset);
5710 VEC_free (iv_use_p, heap, data->iv_uses);
5711 VEC_free (iv_cand_p, heap, data->iv_candidates);
5714 /* Optimizes the LOOP. Returns true if anything changed. */
5716 static bool
5717 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5719 bool changed = false;
5720 struct iv_ca *iv_ca;
5721 edge exit;
5723 data->current_loop = loop;
5725 if (dump_file && (dump_flags & TDF_DETAILS))
5727 fprintf (dump_file, "Processing loop %d\n", loop->num);
5729 exit = single_dom_exit (loop);
5730 if (exit)
5732 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5733 exit->src->index, exit->dest->index);
5734 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5735 fprintf (dump_file, "\n");
5738 fprintf (dump_file, "\n");
5741 /* For each ssa name determines whether it behaves as an induction variable
5742 in some loop. */
5743 if (!find_induction_variables (data))
5744 goto finish;
5746 /* Finds interesting uses (item 1). */
5747 find_interesting_uses (data);
5748 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5749 goto finish;
5751 /* Finds candidates for the induction variables (item 2). */
5752 find_iv_candidates (data);
5754 /* Calculates the costs (item 3, part 1). */
5755 determine_use_iv_costs (data);
5756 determine_iv_costs (data);
5757 determine_set_costs (data);
5759 /* Find the optimal set of induction variables (item 3, part 2). */
5760 iv_ca = find_optimal_iv_set (data);
5761 if (!iv_ca)
5762 goto finish;
5763 changed = true;
5765 /* Create the new induction variables (item 4, part 1). */
5766 create_new_ivs (data, iv_ca);
5767 iv_ca_free (&iv_ca);
5769 /* Rewrite the uses (item 4, part 2). */
5770 rewrite_uses (data);
5772 /* Remove the ivs that are unused after rewriting. */
5773 remove_unused_ivs (data);
5775 /* We have changed the structure of induction variables; it might happen
5776 that definitions in the scev database refer to some of them that were
5777 eliminated. */
5778 scev_reset ();
5780 finish:
5781 free_loop_data (data);
5783 return changed;
5786 /* Main entry point. Optimizes induction variables in LOOPS. */
5788 void
5789 tree_ssa_iv_optimize (struct loops *loops)
5791 struct loop *loop;
5792 struct ivopts_data data;
5794 tree_ssa_iv_optimize_init (&data);
5796 /* Optimize the loops starting with the innermost ones. */
5797 loop = loops->tree_root;
5798 while (loop->inner)
5799 loop = loop->inner;
5801 /* Scan the loops, inner ones first. */
5802 while (loop != loops->tree_root)
5804 if (dump_file && (dump_flags & TDF_DETAILS))
5805 flow_loop_dump (loop, dump_file, NULL, 1);
5807 tree_ssa_iv_optimize_loop (&data, loop);
5809 if (loop->next)
5811 loop = loop->next;
5812 while (loop->inner)
5813 loop = loop->inner;
5815 else
5816 loop = loop->outer;
5819 tree_ssa_iv_optimize_finalize (&data);