* testsuite/libgomp.fortran/vla7.f90: Add -w to options.
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob2d05e5f7098542a5aa92ec85cc0fd425cb17d2b5
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
125 struct loop_data
127 unsigned regs_used; /* Number of registers used. */
130 /* Types of uses. */
131 enum use_type
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_ADDRESS, /* Use in an address. */
135 USE_COMPARE /* Use is a compare. */
138 /* The candidate - cost pair. */
139 struct cost_pair
141 struct iv_cand *cand; /* The candidate. */
142 unsigned cost; /* The cost. */
143 bitmap depends_on; /* The list of invariants that have to be
144 preserved. */
145 tree value; /* For final value elimination, the expression for
146 the final value of the iv. For iv elimination,
147 the new bound to compare with. */
150 /* Use. */
151 struct iv_use
153 unsigned id; /* The id of the use. */
154 enum use_type type; /* Type of the use. */
155 struct iv *iv; /* The induction variable it is based on. */
156 tree stmt; /* Statement in that it occurs. */
157 tree *op_p; /* The place where it occurs. */
158 bitmap related_cands; /* The set of "related" iv candidates, plus the common
159 important ones. */
161 unsigned n_map_members; /* Number of candidates in the cost_map list. */
162 struct cost_pair *cost_map;
163 /* The costs wrto the iv candidates. */
165 struct iv_cand *selected;
166 /* The selected candidate. */
169 /* The position where the iv is computed. */
170 enum iv_position
172 IP_NORMAL, /* At the end, just before the exit condition. */
173 IP_END, /* At the end of the latch block. */
174 IP_ORIGINAL /* The original biv. */
177 /* The induction variable candidate. */
178 struct iv_cand
180 unsigned id; /* The number of the candidate. */
181 bool important; /* Whether this is an "important" candidate, i.e. such
182 that it should be considered by all uses. */
183 enum iv_position pos; /* Where it is computed. */
184 tree incremented_at; /* For original biv, the statement where it is
185 incremented. */
186 tree var_before; /* The variable used for it before increment. */
187 tree var_after; /* The variable used for it after increment. */
188 struct iv *iv; /* The value of the candidate. NULL for
189 "pseudocandidate" used to indicate the possibility
190 to replace the final value of an iv by direct
191 computation of the value. */
192 unsigned cost; /* Cost of the candidate. */
193 bitmap depends_on; /* The list of invariants that are used in step of the
194 biv. */
197 /* The data used by the induction variable optimizations. */
199 typedef struct iv_use *iv_use_p;
200 DEF_VEC_P(iv_use_p);
201 DEF_VEC_ALLOC_P(iv_use_p,heap);
203 typedef struct iv_cand *iv_cand_p;
204 DEF_VEC_P(iv_cand_p);
205 DEF_VEC_ALLOC_P(iv_cand_p,heap);
207 struct ivopts_data
209 /* The currently optimized loop. */
210 struct loop *current_loop;
212 /* Numbers of iterations for all exits of the current loop. */
213 htab_t niters;
215 /* The size of version_info array allocated. */
216 unsigned version_info_size;
218 /* The array of information for the ssa names. */
219 struct version_info *version_info;
221 /* The bitmap of indices in version_info whose value was changed. */
222 bitmap relevant;
224 /* The maximum invariant id. */
225 unsigned max_inv_id;
227 /* The uses of induction variables. */
228 VEC(iv_use_p,heap) *iv_uses;
230 /* The candidates. */
231 VEC(iv_cand_p,heap) *iv_candidates;
233 /* A bitmap of important candidates. */
234 bitmap important_candidates;
236 /* Whether to consider just related and important candidates when replacing a
237 use. */
238 bool consider_all_candidates;
241 /* An assignment of iv candidates to uses. */
243 struct iv_ca
245 /* The number of uses covered by the assignment. */
246 unsigned upto;
248 /* Number of uses that cannot be expressed by the candidates in the set. */
249 unsigned bad_uses;
251 /* Candidate assigned to a use, together with the related costs. */
252 struct cost_pair **cand_for_use;
254 /* Number of times each candidate is used. */
255 unsigned *n_cand_uses;
257 /* The candidates used. */
258 bitmap cands;
260 /* The number of candidates in the set. */
261 unsigned n_cands;
263 /* Total number of registers needed. */
264 unsigned n_regs;
266 /* Total cost of expressing uses. */
267 unsigned cand_use_cost;
269 /* Total cost of candidates. */
270 unsigned cand_cost;
272 /* Number of times each invariant is used. */
273 unsigned *n_invariant_uses;
275 /* Total cost of the assignment. */
276 unsigned cost;
279 /* Difference of two iv candidate assignments. */
281 struct iv_ca_delta
283 /* Changed use. */
284 struct iv_use *use;
286 /* An old assignment (for rollback purposes). */
287 struct cost_pair *old_cp;
289 /* A new assignment. */
290 struct cost_pair *new_cp;
292 /* Next change in the list. */
293 struct iv_ca_delta *next_change;
296 /* Bound on number of candidates below that all candidates are considered. */
298 #define CONSIDER_ALL_CANDIDATES_BOUND \
299 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
301 /* If there are more iv occurrences, we just give up (it is quite unlikely that
302 optimizing such a loop would help, and it would take ages). */
304 #define MAX_CONSIDERED_USES \
305 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
307 /* If there are at most this number of ivs in the set, try removing unnecessary
308 ivs from the set always. */
310 #define ALWAYS_PRUNE_CAND_SET_BOUND \
311 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
313 /* The list of trees for that the decl_rtl field must be reset is stored
314 here. */
316 static VEC(tree,heap) *decl_rtl_to_reset;
318 /* Number of uses recorded in DATA. */
320 static inline unsigned
321 n_iv_uses (struct ivopts_data *data)
323 return VEC_length (iv_use_p, data->iv_uses);
326 /* Ith use recorded in DATA. */
328 static inline struct iv_use *
329 iv_use (struct ivopts_data *data, unsigned i)
331 return VEC_index (iv_use_p, data->iv_uses, i);
334 /* Number of candidates recorded in DATA. */
336 static inline unsigned
337 n_iv_cands (struct ivopts_data *data)
339 return VEC_length (iv_cand_p, data->iv_candidates);
342 /* Ith candidate recorded in DATA. */
344 static inline struct iv_cand *
345 iv_cand (struct ivopts_data *data, unsigned i)
347 return VEC_index (iv_cand_p, data->iv_candidates, i);
350 /* The data for LOOP. */
352 static inline struct loop_data *
353 loop_data (struct loop *loop)
355 return loop->aux;
358 /* The single loop exit if it dominates the latch, NULL otherwise. */
360 edge
361 single_dom_exit (struct loop *loop)
363 edge exit = loop->single_exit;
365 if (!exit)
366 return NULL;
368 if (!just_once_each_iteration_p (loop, exit->src))
369 return NULL;
371 return exit;
374 /* Dumps information about the induction variable IV to FILE. */
376 extern void dump_iv (FILE *, struct iv *);
377 void
378 dump_iv (FILE *file, struct iv *iv)
380 if (iv->ssa_name)
382 fprintf (file, "ssa name ");
383 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
384 fprintf (file, "\n");
387 fprintf (file, " type ");
388 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
389 fprintf (file, "\n");
391 if (iv->step)
393 fprintf (file, " base ");
394 print_generic_expr (file, iv->base, TDF_SLIM);
395 fprintf (file, "\n");
397 fprintf (file, " step ");
398 print_generic_expr (file, iv->step, TDF_SLIM);
399 fprintf (file, "\n");
401 else
403 fprintf (file, " invariant ");
404 print_generic_expr (file, iv->base, TDF_SLIM);
405 fprintf (file, "\n");
408 if (iv->base_object)
410 fprintf (file, " base object ");
411 print_generic_expr (file, iv->base_object, TDF_SLIM);
412 fprintf (file, "\n");
415 if (iv->biv_p)
416 fprintf (file, " is a biv\n");
419 /* Dumps information about the USE to FILE. */
421 extern void dump_use (FILE *, struct iv_use *);
422 void
423 dump_use (FILE *file, struct iv_use *use)
425 fprintf (file, "use %d\n", use->id);
427 switch (use->type)
429 case USE_NONLINEAR_EXPR:
430 fprintf (file, " generic\n");
431 break;
433 case USE_ADDRESS:
434 fprintf (file, " address\n");
435 break;
437 case USE_COMPARE:
438 fprintf (file, " compare\n");
439 break;
441 default:
442 gcc_unreachable ();
445 fprintf (file, " in statement ");
446 print_generic_expr (file, use->stmt, TDF_SLIM);
447 fprintf (file, "\n");
449 fprintf (file, " at position ");
450 if (use->op_p)
451 print_generic_expr (file, *use->op_p, TDF_SLIM);
452 fprintf (file, "\n");
454 dump_iv (file, use->iv);
456 if (use->related_cands)
458 fprintf (file, " related candidates ");
459 dump_bitmap (file, use->related_cands);
463 /* Dumps information about the uses to FILE. */
465 extern void dump_uses (FILE *, struct ivopts_data *);
466 void
467 dump_uses (FILE *file, struct ivopts_data *data)
469 unsigned i;
470 struct iv_use *use;
472 for (i = 0; i < n_iv_uses (data); i++)
474 use = iv_use (data, i);
476 dump_use (file, use);
477 fprintf (file, "\n");
481 /* Dumps information about induction variable candidate CAND to FILE. */
483 extern void dump_cand (FILE *, struct iv_cand *);
484 void
485 dump_cand (FILE *file, struct iv_cand *cand)
487 struct iv *iv = cand->iv;
489 fprintf (file, "candidate %d%s\n",
490 cand->id, cand->important ? " (important)" : "");
492 if (cand->depends_on)
494 fprintf (file, " depends on ");
495 dump_bitmap (file, cand->depends_on);
498 if (!iv)
500 fprintf (file, " final value replacement\n");
501 return;
504 switch (cand->pos)
506 case IP_NORMAL:
507 fprintf (file, " incremented before exit test\n");
508 break;
510 case IP_END:
511 fprintf (file, " incremented at end\n");
512 break;
514 case IP_ORIGINAL:
515 fprintf (file, " original biv\n");
516 break;
519 dump_iv (file, iv);
522 /* Returns the info for ssa version VER. */
524 static inline struct version_info *
525 ver_info (struct ivopts_data *data, unsigned ver)
527 return data->version_info + ver;
530 /* Returns the info for ssa name NAME. */
532 static inline struct version_info *
533 name_info (struct ivopts_data *data, tree name)
535 return ver_info (data, SSA_NAME_VERSION (name));
538 /* Checks whether there exists number X such that X * B = A, counting modulo
539 2^BITS. */
541 static bool
542 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
543 HOST_WIDE_INT *x)
545 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
546 unsigned HOST_WIDE_INT inv, ex, val;
547 unsigned i;
549 a &= mask;
550 b &= mask;
552 /* First divide the whole equation by 2 as long as possible. */
553 while (!(a & 1) && !(b & 1))
555 a >>= 1;
556 b >>= 1;
557 bits--;
558 mask >>= 1;
561 if (!(b & 1))
563 /* If b is still even, a is odd and there is no such x. */
564 return false;
567 /* Find the inverse of b. We compute it as
568 b^(2^(bits - 1) - 1) (mod 2^bits). */
569 inv = 1;
570 ex = b;
571 for (i = 0; i < bits - 1; i++)
573 inv = (inv * ex) & mask;
574 ex = (ex * ex) & mask;
577 val = (a * inv) & mask;
579 gcc_assert (((val * b) & mask) == a);
581 if ((val >> (bits - 1)) & 1)
582 val |= ~mask;
584 *x = val;
586 return true;
589 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
590 emitted in LOOP. */
592 static bool
593 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
595 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
597 gcc_assert (bb);
599 if (sbb == loop->latch)
600 return true;
602 if (sbb != bb)
603 return false;
605 return stmt == last_stmt (bb);
608 /* Returns true if STMT if after the place where the original induction
609 variable CAND is incremented. */
611 static bool
612 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
614 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
615 basic_block stmt_bb = bb_for_stmt (stmt);
616 block_stmt_iterator bsi;
618 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
619 return false;
621 if (stmt_bb != cand_bb)
622 return true;
624 /* Scan the block from the end, since the original ivs are usually
625 incremented at the end of the loop body. */
626 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
628 if (bsi_stmt (bsi) == cand->incremented_at)
629 return false;
630 if (bsi_stmt (bsi) == stmt)
631 return true;
635 /* Returns true if STMT if after the place where the induction variable
636 CAND is incremented in LOOP. */
638 static bool
639 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
641 switch (cand->pos)
643 case IP_END:
644 return false;
646 case IP_NORMAL:
647 return stmt_after_ip_normal_pos (loop, stmt);
649 case IP_ORIGINAL:
650 return stmt_after_ip_original_pos (cand, stmt);
652 default:
653 gcc_unreachable ();
657 /* Element of the table in that we cache the numbers of iterations obtained
658 from exits of the loop. */
660 struct nfe_cache_elt
662 /* The edge for that the number of iterations is cached. */
663 edge exit;
665 /* True if the # of iterations was successfully determined. */
666 bool valid_p;
668 /* Description of # of iterations. */
669 struct tree_niter_desc niter;
672 /* Hash function for nfe_cache_elt E. */
674 static hashval_t
675 nfe_hash (const void *e)
677 const struct nfe_cache_elt *elt = e;
679 return htab_hash_pointer (elt->exit);
682 /* Equality function for nfe_cache_elt E1 and edge E2. */
684 static int
685 nfe_eq (const void *e1, const void *e2)
687 const struct nfe_cache_elt *elt1 = e1;
689 return elt1->exit == e2;
692 /* Returns structure describing number of iterations determined from
693 EXIT of DATA->current_loop, or NULL if something goes wrong. */
695 static struct tree_niter_desc *
696 niter_for_exit (struct ivopts_data *data, edge exit)
698 struct nfe_cache_elt *nfe_desc;
699 PTR *slot;
701 slot = htab_find_slot_with_hash (data->niters, exit,
702 htab_hash_pointer (exit),
703 INSERT);
705 if (!*slot)
707 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
708 nfe_desc->exit = exit;
709 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
710 exit, &nfe_desc->niter,
711 true);
712 *slot = nfe_desc;
714 else
715 nfe_desc = *slot;
717 if (!nfe_desc->valid_p)
718 return NULL;
720 return &nfe_desc->niter;
723 /* Returns structure describing number of iterations determined from
724 single dominating exit of DATA->current_loop, or NULL if something
725 goes wrong. */
727 static struct tree_niter_desc *
728 niter_for_single_dom_exit (struct ivopts_data *data)
730 edge exit = single_dom_exit (data->current_loop);
732 if (!exit)
733 return NULL;
735 return niter_for_exit (data, exit);
738 /* Initializes data structures used by the iv optimization pass, stored
739 in DATA. LOOPS is the loop tree. */
741 static void
742 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
744 unsigned i;
746 data->version_info_size = 2 * num_ssa_names;
747 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
748 data->relevant = BITMAP_ALLOC (NULL);
749 data->important_candidates = BITMAP_ALLOC (NULL);
750 data->max_inv_id = 0;
751 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
753 for (i = 1; i < loops->num; i++)
754 if (loops->parray[i])
755 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
757 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
758 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
759 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
762 /* Returns a memory object to that EXPR points. In case we are able to
763 determine that it does not point to any such object, NULL is returned. */
765 static tree
766 determine_base_object (tree expr)
768 enum tree_code code = TREE_CODE (expr);
769 tree base, obj, op0, op1;
771 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
772 return NULL_TREE;
774 switch (code)
776 case INTEGER_CST:
777 return NULL_TREE;
779 case ADDR_EXPR:
780 obj = TREE_OPERAND (expr, 0);
781 base = get_base_address (obj);
783 if (!base)
784 return expr;
786 if (TREE_CODE (base) == INDIRECT_REF)
787 return determine_base_object (TREE_OPERAND (base, 0));
789 return fold_convert (ptr_type_node,
790 build_fold_addr_expr (base));
792 case PLUS_EXPR:
793 case MINUS_EXPR:
794 op0 = determine_base_object (TREE_OPERAND (expr, 0));
795 op1 = determine_base_object (TREE_OPERAND (expr, 1));
797 if (!op1)
798 return op0;
800 if (!op0)
801 return (code == PLUS_EXPR
802 ? op1
803 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
805 return fold_build2 (code, ptr_type_node, op0, op1);
807 case NOP_EXPR:
808 case CONVERT_EXPR:
809 return determine_base_object (TREE_OPERAND (expr, 0));
811 default:
812 return fold_convert (ptr_type_node, expr);
816 /* Allocates an induction variable with given initial value BASE and step STEP
817 for loop LOOP. */
819 static struct iv *
820 alloc_iv (tree base, tree step)
822 struct iv *iv = XCNEW (struct iv);
824 if (step && integer_zerop (step))
825 step = NULL_TREE;
827 iv->base = base;
828 iv->base_object = determine_base_object (base);
829 iv->step = step;
830 iv->biv_p = false;
831 iv->have_use_for = false;
832 iv->use_id = 0;
833 iv->ssa_name = NULL_TREE;
835 return iv;
838 /* Sets STEP and BASE for induction variable IV. */
840 static void
841 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
843 struct version_info *info = name_info (data, iv);
845 gcc_assert (!info->iv);
847 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
848 info->iv = alloc_iv (base, step);
849 info->iv->ssa_name = iv;
852 /* Finds induction variable declaration for VAR. */
854 static struct iv *
855 get_iv (struct ivopts_data *data, tree var)
857 basic_block bb;
859 if (!name_info (data, var)->iv)
861 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
863 if (!bb
864 || !flow_bb_inside_loop_p (data->current_loop, bb))
865 set_iv (data, var, var, NULL_TREE);
868 return name_info (data, var)->iv;
871 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
872 not define a simple affine biv with nonzero step. */
874 static tree
875 determine_biv_step (tree phi)
877 struct loop *loop = bb_for_stmt (phi)->loop_father;
878 tree name = PHI_RESULT (phi);
879 affine_iv iv;
881 if (!is_gimple_reg (name))
882 return NULL_TREE;
884 if (!simple_iv (loop, phi, name, &iv, true))
885 return NULL_TREE;
887 return (zero_p (iv.step) ? NULL_TREE : iv.step);
890 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
892 static bool
893 abnormal_ssa_name_p (tree exp)
895 if (!exp)
896 return false;
898 if (TREE_CODE (exp) != SSA_NAME)
899 return false;
901 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
904 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
905 abnormal phi node. Callback for for_each_index. */
907 static bool
908 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
909 void *data ATTRIBUTE_UNUSED)
911 if (TREE_CODE (base) == ARRAY_REF)
913 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
914 return false;
915 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
916 return false;
919 return !abnormal_ssa_name_p (*index);
922 /* Returns true if EXPR contains a ssa name that occurs in an
923 abnormal phi node. */
925 static bool
926 contains_abnormal_ssa_name_p (tree expr)
928 enum tree_code code;
929 enum tree_code_class class;
931 if (!expr)
932 return false;
934 code = TREE_CODE (expr);
935 class = TREE_CODE_CLASS (code);
937 if (code == SSA_NAME)
938 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
940 if (code == INTEGER_CST
941 || is_gimple_min_invariant (expr))
942 return false;
944 if (code == ADDR_EXPR)
945 return !for_each_index (&TREE_OPERAND (expr, 0),
946 idx_contains_abnormal_ssa_name_p,
947 NULL);
949 switch (class)
951 case tcc_binary:
952 case tcc_comparison:
953 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
954 return true;
956 /* Fallthru. */
957 case tcc_unary:
958 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
959 return true;
961 break;
963 default:
964 gcc_unreachable ();
967 return false;
970 /* Finds basic ivs. */
972 static bool
973 find_bivs (struct ivopts_data *data)
975 tree phi, step, type, base;
976 bool found = false;
977 struct loop *loop = data->current_loop;
979 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
981 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
982 continue;
984 step = determine_biv_step (phi);
985 if (!step)
986 continue;
988 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
989 base = expand_simple_operations (base);
990 if (contains_abnormal_ssa_name_p (base)
991 || contains_abnormal_ssa_name_p (step))
992 continue;
994 type = TREE_TYPE (PHI_RESULT (phi));
995 base = fold_convert (type, base);
996 if (step)
997 step = fold_convert (type, step);
999 set_iv (data, PHI_RESULT (phi), base, step);
1000 found = true;
1003 return found;
1006 /* Marks basic ivs. */
1008 static void
1009 mark_bivs (struct ivopts_data *data)
1011 tree phi, var;
1012 struct iv *iv, *incr_iv;
1013 struct loop *loop = data->current_loop;
1014 basic_block incr_bb;
1016 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1018 iv = get_iv (data, PHI_RESULT (phi));
1019 if (!iv)
1020 continue;
1022 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1023 incr_iv = get_iv (data, var);
1024 if (!incr_iv)
1025 continue;
1027 /* If the increment is in the subloop, ignore it. */
1028 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1029 if (incr_bb->loop_father != data->current_loop
1030 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1031 continue;
1033 iv->biv_p = true;
1034 incr_iv->biv_p = true;
1038 /* Checks whether STMT defines a linear induction variable and stores its
1039 parameters to IV. */
1041 static bool
1042 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1044 tree lhs;
1045 struct loop *loop = data->current_loop;
1047 iv->base = NULL_TREE;
1048 iv->step = NULL_TREE;
1050 if (TREE_CODE (stmt) != MODIFY_EXPR)
1051 return false;
1053 lhs = TREE_OPERAND (stmt, 0);
1054 if (TREE_CODE (lhs) != SSA_NAME)
1055 return false;
1057 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1058 return false;
1059 iv->base = expand_simple_operations (iv->base);
1061 if (contains_abnormal_ssa_name_p (iv->base)
1062 || contains_abnormal_ssa_name_p (iv->step))
1063 return false;
1065 return true;
1068 /* Finds general ivs in statement STMT. */
1070 static void
1071 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1073 affine_iv iv;
1075 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1076 return;
1078 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1081 /* Finds general ivs in basic block BB. */
1083 static void
1084 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1086 block_stmt_iterator bsi;
1088 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1089 find_givs_in_stmt (data, bsi_stmt (bsi));
1092 /* Finds general ivs. */
1094 static void
1095 find_givs (struct ivopts_data *data)
1097 struct loop *loop = data->current_loop;
1098 basic_block *body = get_loop_body_in_dom_order (loop);
1099 unsigned i;
1101 for (i = 0; i < loop->num_nodes; i++)
1102 find_givs_in_bb (data, body[i]);
1103 free (body);
1106 /* For each ssa name defined in LOOP determines whether it is an induction
1107 variable and if so, its initial value and step. */
1109 static bool
1110 find_induction_variables (struct ivopts_data *data)
1112 unsigned i;
1113 bitmap_iterator bi;
1115 if (!find_bivs (data))
1116 return false;
1118 find_givs (data);
1119 mark_bivs (data);
1121 if (dump_file && (dump_flags & TDF_DETAILS))
1123 struct tree_niter_desc *niter;
1125 niter = niter_for_single_dom_exit (data);
1127 if (niter)
1129 fprintf (dump_file, " number of iterations ");
1130 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1131 fprintf (dump_file, "\n");
1133 fprintf (dump_file, " may be zero if ");
1134 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1135 fprintf (dump_file, "\n");
1136 fprintf (dump_file, "\n");
1139 fprintf (dump_file, "Induction variables:\n\n");
1141 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1143 if (ver_info (data, i)->iv)
1144 dump_iv (dump_file, ver_info (data, i)->iv);
1148 return true;
1151 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1153 static struct iv_use *
1154 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1155 tree stmt, enum use_type use_type)
1157 struct iv_use *use = XCNEW (struct iv_use);
1159 use->id = n_iv_uses (data);
1160 use->type = use_type;
1161 use->iv = iv;
1162 use->stmt = stmt;
1163 use->op_p = use_p;
1164 use->related_cands = BITMAP_ALLOC (NULL);
1166 /* To avoid showing ssa name in the dumps, if it was not reset by the
1167 caller. */
1168 iv->ssa_name = NULL_TREE;
1170 if (dump_file && (dump_flags & TDF_DETAILS))
1171 dump_use (dump_file, use);
1173 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1175 return use;
1178 /* Checks whether OP is a loop-level invariant and if so, records it.
1179 NONLINEAR_USE is true if the invariant is used in a way we do not
1180 handle specially. */
1182 static void
1183 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1185 basic_block bb;
1186 struct version_info *info;
1188 if (TREE_CODE (op) != SSA_NAME
1189 || !is_gimple_reg (op))
1190 return;
1192 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1193 if (bb
1194 && flow_bb_inside_loop_p (data->current_loop, bb))
1195 return;
1197 info = name_info (data, op);
1198 info->name = op;
1199 info->has_nonlin_use |= nonlinear_use;
1200 if (!info->inv_id)
1201 info->inv_id = ++data->max_inv_id;
1202 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1205 /* Checks whether the use OP is interesting and if so, records it. */
1207 static struct iv_use *
1208 find_interesting_uses_op (struct ivopts_data *data, tree op)
1210 struct iv *iv;
1211 struct iv *civ;
1212 tree stmt;
1213 struct iv_use *use;
1215 if (TREE_CODE (op) != SSA_NAME)
1216 return NULL;
1218 iv = get_iv (data, op);
1219 if (!iv)
1220 return NULL;
1222 if (iv->have_use_for)
1224 use = iv_use (data, iv->use_id);
1226 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1227 return use;
1230 if (zero_p (iv->step))
1232 record_invariant (data, op, true);
1233 return NULL;
1235 iv->have_use_for = true;
1237 civ = XNEW (struct iv);
1238 *civ = *iv;
1240 stmt = SSA_NAME_DEF_STMT (op);
1241 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1242 || TREE_CODE (stmt) == MODIFY_EXPR);
1244 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1245 iv->use_id = use->id;
1247 return use;
1250 /* Checks whether the condition *COND_P in STMT is interesting
1251 and if so, records it. */
1253 static void
1254 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1256 tree *op0_p;
1257 tree *op1_p;
1258 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1259 struct iv const_iv;
1260 tree zero = integer_zero_node;
1262 const_iv.step = NULL_TREE;
1264 if (TREE_CODE (*cond_p) != SSA_NAME
1265 && !COMPARISON_CLASS_P (*cond_p))
1266 return;
1268 if (TREE_CODE (*cond_p) == SSA_NAME)
1270 op0_p = cond_p;
1271 op1_p = &zero;
1273 else
1275 op0_p = &TREE_OPERAND (*cond_p, 0);
1276 op1_p = &TREE_OPERAND (*cond_p, 1);
1279 if (TREE_CODE (*op0_p) == SSA_NAME)
1280 iv0 = get_iv (data, *op0_p);
1281 else
1282 iv0 = &const_iv;
1284 if (TREE_CODE (*op1_p) == SSA_NAME)
1285 iv1 = get_iv (data, *op1_p);
1286 else
1287 iv1 = &const_iv;
1289 if (/* When comparing with non-invariant value, we may not do any senseful
1290 induction variable elimination. */
1291 (!iv0 || !iv1)
1292 /* Eliminating condition based on two ivs would be nontrivial.
1293 ??? TODO -- it is not really important to handle this case. */
1294 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1296 find_interesting_uses_op (data, *op0_p);
1297 find_interesting_uses_op (data, *op1_p);
1298 return;
1301 if (zero_p (iv0->step) && zero_p (iv1->step))
1303 /* If both are invariants, this is a work for unswitching. */
1304 return;
1307 civ = XNEW (struct iv);
1308 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1309 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1312 /* Returns true if expression EXPR is obviously invariant in LOOP,
1313 i.e. if all its operands are defined outside of the LOOP. */
1315 bool
1316 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1318 basic_block def_bb;
1319 unsigned i, len;
1321 if (is_gimple_min_invariant (expr))
1322 return true;
1324 if (TREE_CODE (expr) == SSA_NAME)
1326 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1327 if (def_bb
1328 && flow_bb_inside_loop_p (loop, def_bb))
1329 return false;
1331 return true;
1334 if (!EXPR_P (expr))
1335 return false;
1337 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1338 for (i = 0; i < len; i++)
1339 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1340 return false;
1342 return true;
1345 /* Cumulates the steps of indices into DATA and replaces their values with the
1346 initial ones. Returns false when the value of the index cannot be determined.
1347 Callback for for_each_index. */
1349 struct ifs_ivopts_data
1351 struct ivopts_data *ivopts_data;
1352 tree stmt;
1353 tree *step_p;
1356 static bool
1357 idx_find_step (tree base, tree *idx, void *data)
1359 struct ifs_ivopts_data *dta = data;
1360 struct iv *iv;
1361 tree step, iv_step, lbound, off;
1362 struct loop *loop = dta->ivopts_data->current_loop;
1364 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1365 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1366 return false;
1368 /* If base is a component ref, require that the offset of the reference
1369 be invariant. */
1370 if (TREE_CODE (base) == COMPONENT_REF)
1372 off = component_ref_field_offset (base);
1373 return expr_invariant_in_loop_p (loop, off);
1376 /* If base is array, first check whether we will be able to move the
1377 reference out of the loop (in order to take its address in strength
1378 reduction). In order for this to work we need both lower bound
1379 and step to be loop invariants. */
1380 if (TREE_CODE (base) == ARRAY_REF)
1382 step = array_ref_element_size (base);
1383 lbound = array_ref_low_bound (base);
1385 if (!expr_invariant_in_loop_p (loop, step)
1386 || !expr_invariant_in_loop_p (loop, lbound))
1387 return false;
1390 if (TREE_CODE (*idx) != SSA_NAME)
1391 return true;
1393 iv = get_iv (dta->ivopts_data, *idx);
1394 if (!iv)
1395 return false;
1397 *idx = iv->base;
1399 if (!iv->step)
1400 return true;
1402 if (TREE_CODE (base) == ARRAY_REF)
1404 step = array_ref_element_size (base);
1406 /* We only handle addresses whose step is an integer constant. */
1407 if (TREE_CODE (step) != INTEGER_CST)
1408 return false;
1410 else
1411 /* The step for pointer arithmetics already is 1 byte. */
1412 step = build_int_cst (sizetype, 1);
1414 /* FIXME: convert_step should not be used outside chrec_convert: fix
1415 this by calling chrec_convert. */
1416 iv_step = convert_step (dta->ivopts_data->current_loop,
1417 sizetype, iv->base, iv->step, dta->stmt);
1419 if (!iv_step)
1421 /* The index might wrap. */
1422 return false;
1425 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1427 if (!*dta->step_p)
1428 *dta->step_p = step;
1429 else
1430 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1432 return true;
1435 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1436 object is passed to it in DATA. */
1438 static bool
1439 idx_record_use (tree base, tree *idx,
1440 void *data)
1442 find_interesting_uses_op (data, *idx);
1443 if (TREE_CODE (base) == ARRAY_REF)
1445 find_interesting_uses_op (data, array_ref_element_size (base));
1446 find_interesting_uses_op (data, array_ref_low_bound (base));
1448 return true;
1451 /* Returns true if memory reference REF may be unaligned. */
1453 static bool
1454 may_be_unaligned_p (tree ref)
1456 tree base;
1457 tree base_type;
1458 HOST_WIDE_INT bitsize;
1459 HOST_WIDE_INT bitpos;
1460 tree toffset;
1461 enum machine_mode mode;
1462 int unsignedp, volatilep;
1463 unsigned base_align;
1465 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1466 thus they are not misaligned. */
1467 if (TREE_CODE (ref) == TARGET_MEM_REF)
1468 return false;
1470 /* The test below is basically copy of what expr.c:normal_inner_ref
1471 does to check whether the object must be loaded by parts when
1472 STRICT_ALIGNMENT is true. */
1473 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1474 &unsignedp, &volatilep, true);
1475 base_type = TREE_TYPE (base);
1476 base_align = TYPE_ALIGN (base_type);
1478 if (mode != BLKmode
1479 && (base_align < GET_MODE_ALIGNMENT (mode)
1480 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1481 || bitpos % BITS_PER_UNIT != 0))
1482 return true;
1484 return false;
1487 /* Finds addresses in *OP_P inside STMT. */
1489 static void
1490 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1492 tree base = *op_p, step = NULL;
1493 struct iv *civ;
1494 struct ifs_ivopts_data ifs_ivopts_data;
1496 /* Do not play with volatile memory references. A bit too conservative,
1497 perhaps, but safe. */
1498 if (stmt_ann (stmt)->has_volatile_ops)
1499 goto fail;
1501 /* Ignore bitfields for now. Not really something terribly complicated
1502 to handle. TODO. */
1503 if (TREE_CODE (base) == COMPONENT_REF
1504 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1505 goto fail;
1507 if (STRICT_ALIGNMENT
1508 && may_be_unaligned_p (base))
1509 goto fail;
1511 base = unshare_expr (base);
1513 if (TREE_CODE (base) == TARGET_MEM_REF)
1515 tree type = build_pointer_type (TREE_TYPE (base));
1516 tree astep;
1518 if (TMR_BASE (base)
1519 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1521 civ = get_iv (data, TMR_BASE (base));
1522 if (!civ)
1523 goto fail;
1525 TMR_BASE (base) = civ->base;
1526 step = civ->step;
1528 if (TMR_INDEX (base)
1529 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1531 civ = get_iv (data, TMR_INDEX (base));
1532 if (!civ)
1533 goto fail;
1535 TMR_INDEX (base) = civ->base;
1536 astep = civ->step;
1538 if (astep)
1540 if (TMR_STEP (base))
1541 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1543 if (step)
1544 step = fold_build2 (PLUS_EXPR, type, step, astep);
1545 else
1546 step = astep;
1550 if (zero_p (step))
1551 goto fail;
1552 base = tree_mem_ref_addr (type, base);
1554 else
1556 ifs_ivopts_data.ivopts_data = data;
1557 ifs_ivopts_data.stmt = stmt;
1558 ifs_ivopts_data.step_p = &step;
1559 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1560 || zero_p (step))
1561 goto fail;
1563 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1564 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1566 base = build_fold_addr_expr (base);
1569 civ = alloc_iv (base, step);
1570 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1571 return;
1573 fail:
1574 for_each_index (op_p, idx_record_use, data);
1577 /* Finds and records invariants used in STMT. */
1579 static void
1580 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1582 ssa_op_iter iter;
1583 use_operand_p use_p;
1584 tree op;
1586 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1588 op = USE_FROM_PTR (use_p);
1589 record_invariant (data, op, false);
1593 /* Finds interesting uses of induction variables in the statement STMT. */
1595 static void
1596 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1598 struct iv *iv;
1599 tree op, lhs, rhs;
1600 ssa_op_iter iter;
1601 use_operand_p use_p;
1603 find_invariants_stmt (data, stmt);
1605 if (TREE_CODE (stmt) == COND_EXPR)
1607 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1608 return;
1611 if (TREE_CODE (stmt) == MODIFY_EXPR)
1613 lhs = TREE_OPERAND (stmt, 0);
1614 rhs = TREE_OPERAND (stmt, 1);
1616 if (TREE_CODE (lhs) == SSA_NAME)
1618 /* If the statement defines an induction variable, the uses are not
1619 interesting by themselves. */
1621 iv = get_iv (data, lhs);
1623 if (iv && !zero_p (iv->step))
1624 return;
1627 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1629 case tcc_comparison:
1630 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1631 return;
1633 case tcc_reference:
1634 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1635 if (REFERENCE_CLASS_P (lhs))
1636 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1637 return;
1639 default: ;
1642 if (REFERENCE_CLASS_P (lhs)
1643 && is_gimple_val (rhs))
1645 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1646 find_interesting_uses_op (data, rhs);
1647 return;
1650 /* TODO -- we should also handle address uses of type
1652 memory = call (whatever);
1656 call (memory). */
1659 if (TREE_CODE (stmt) == PHI_NODE
1660 && bb_for_stmt (stmt) == data->current_loop->header)
1662 lhs = PHI_RESULT (stmt);
1663 iv = get_iv (data, lhs);
1665 if (iv && !zero_p (iv->step))
1666 return;
1669 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1671 op = USE_FROM_PTR (use_p);
1673 if (TREE_CODE (op) != SSA_NAME)
1674 continue;
1676 iv = get_iv (data, op);
1677 if (!iv)
1678 continue;
1680 find_interesting_uses_op (data, op);
1684 /* Finds interesting uses of induction variables outside of loops
1685 on loop exit edge EXIT. */
1687 static void
1688 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1690 tree phi, def;
1692 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1694 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1695 find_interesting_uses_op (data, def);
1699 /* Finds uses of the induction variables that are interesting. */
1701 static void
1702 find_interesting_uses (struct ivopts_data *data)
1704 basic_block bb;
1705 block_stmt_iterator bsi;
1706 tree phi;
1707 basic_block *body = get_loop_body (data->current_loop);
1708 unsigned i;
1709 struct version_info *info;
1710 edge e;
1712 if (dump_file && (dump_flags & TDF_DETAILS))
1713 fprintf (dump_file, "Uses:\n\n");
1715 for (i = 0; i < data->current_loop->num_nodes; i++)
1717 edge_iterator ei;
1718 bb = body[i];
1720 FOR_EACH_EDGE (e, ei, bb->succs)
1721 if (e->dest != EXIT_BLOCK_PTR
1722 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1723 find_interesting_uses_outside (data, e);
1725 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1726 find_interesting_uses_stmt (data, phi);
1727 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1728 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1731 if (dump_file && (dump_flags & TDF_DETAILS))
1733 bitmap_iterator bi;
1735 fprintf (dump_file, "\n");
1737 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1739 info = ver_info (data, i);
1740 if (info->inv_id)
1742 fprintf (dump_file, " ");
1743 print_generic_expr (dump_file, info->name, TDF_SLIM);
1744 fprintf (dump_file, " is invariant (%d)%s\n",
1745 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1749 fprintf (dump_file, "\n");
1752 free (body);
1755 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1756 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1757 we are at the top-level of the processed address. */
1759 static tree
1760 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1761 unsigned HOST_WIDE_INT *offset)
1763 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1764 enum tree_code code;
1765 tree type, orig_type = TREE_TYPE (expr);
1766 unsigned HOST_WIDE_INT off0, off1, st;
1767 tree orig_expr = expr;
1769 STRIP_NOPS (expr);
1771 type = TREE_TYPE (expr);
1772 code = TREE_CODE (expr);
1773 *offset = 0;
1775 switch (code)
1777 case INTEGER_CST:
1778 if (!cst_and_fits_in_hwi (expr)
1779 || zero_p (expr))
1780 return orig_expr;
1782 *offset = int_cst_value (expr);
1783 return build_int_cst_type (orig_type, 0);
1785 case PLUS_EXPR:
1786 case MINUS_EXPR:
1787 op0 = TREE_OPERAND (expr, 0);
1788 op1 = TREE_OPERAND (expr, 1);
1790 op0 = strip_offset_1 (op0, false, false, &off0);
1791 op1 = strip_offset_1 (op1, false, false, &off1);
1793 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1794 if (op0 == TREE_OPERAND (expr, 0)
1795 && op1 == TREE_OPERAND (expr, 1))
1796 return orig_expr;
1798 if (zero_p (op1))
1799 expr = op0;
1800 else if (zero_p (op0))
1802 if (code == PLUS_EXPR)
1803 expr = op1;
1804 else
1805 expr = fold_build1 (NEGATE_EXPR, type, op1);
1807 else
1808 expr = fold_build2 (code, type, op0, op1);
1810 return fold_convert (orig_type, expr);
1812 case ARRAY_REF:
1813 if (!inside_addr)
1814 return orig_expr;
1816 step = array_ref_element_size (expr);
1817 if (!cst_and_fits_in_hwi (step))
1818 break;
1820 st = int_cst_value (step);
1821 op1 = TREE_OPERAND (expr, 1);
1822 op1 = strip_offset_1 (op1, false, false, &off1);
1823 *offset = off1 * st;
1825 if (top_compref
1826 && zero_p (op1))
1828 /* Strip the component reference completely. */
1829 op0 = TREE_OPERAND (expr, 0);
1830 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1831 *offset += off0;
1832 return op0;
1834 break;
1836 case COMPONENT_REF:
1837 if (!inside_addr)
1838 return orig_expr;
1840 tmp = component_ref_field_offset (expr);
1841 if (top_compref
1842 && cst_and_fits_in_hwi (tmp))
1844 /* Strip the component reference completely. */
1845 op0 = TREE_OPERAND (expr, 0);
1846 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1847 *offset = off0 + int_cst_value (tmp);
1848 return op0;
1850 break;
1852 case ADDR_EXPR:
1853 op0 = TREE_OPERAND (expr, 0);
1854 op0 = strip_offset_1 (op0, true, true, &off0);
1855 *offset += off0;
1857 if (op0 == TREE_OPERAND (expr, 0))
1858 return orig_expr;
1860 expr = build_fold_addr_expr (op0);
1861 return fold_convert (orig_type, expr);
1863 case INDIRECT_REF:
1864 inside_addr = false;
1865 break;
1867 default:
1868 return orig_expr;
1871 /* Default handling of expressions for that we want to recurse into
1872 the first operand. */
1873 op0 = TREE_OPERAND (expr, 0);
1874 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1875 *offset += off0;
1877 if (op0 == TREE_OPERAND (expr, 0)
1878 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1879 return orig_expr;
1881 expr = copy_node (expr);
1882 TREE_OPERAND (expr, 0) = op0;
1883 if (op1)
1884 TREE_OPERAND (expr, 1) = op1;
1886 /* Inside address, we might strip the top level component references,
1887 thus changing type of the expression. Handling of ADDR_EXPR
1888 will fix that. */
1889 expr = fold_convert (orig_type, expr);
1891 return expr;
1894 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1896 static tree
1897 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1899 return strip_offset_1 (expr, false, false, offset);
1902 /* Returns variant of TYPE that can be used as base for different uses.
1903 For integer types, we return unsigned variant of the type, which
1904 avoids problems with overflows. For pointer types, we return void *. */
1906 static tree
1907 generic_type_for (tree type)
1909 if (POINTER_TYPE_P (type))
1910 return ptr_type_node;
1912 if (TYPE_UNSIGNED (type))
1913 return type;
1915 return unsigned_type_for (type);
1918 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1919 the bitmap to that we should store it. */
1921 static struct ivopts_data *fd_ivopts_data;
1922 static tree
1923 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1925 bitmap *depends_on = data;
1926 struct version_info *info;
1928 if (TREE_CODE (*expr_p) != SSA_NAME)
1929 return NULL_TREE;
1930 info = name_info (fd_ivopts_data, *expr_p);
1932 if (!info->inv_id || info->has_nonlin_use)
1933 return NULL_TREE;
1935 if (!*depends_on)
1936 *depends_on = BITMAP_ALLOC (NULL);
1937 bitmap_set_bit (*depends_on, info->inv_id);
1939 return NULL_TREE;
1942 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1943 position to POS. If USE is not NULL, the candidate is set as related to
1944 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1945 replacement of the final value of the iv by a direct computation. */
1947 static struct iv_cand *
1948 add_candidate_1 (struct ivopts_data *data,
1949 tree base, tree step, bool important, enum iv_position pos,
1950 struct iv_use *use, tree incremented_at)
1952 unsigned i;
1953 struct iv_cand *cand = NULL;
1954 tree type, orig_type;
1956 if (base)
1958 orig_type = TREE_TYPE (base);
1959 type = generic_type_for (orig_type);
1960 if (type != orig_type)
1962 base = fold_convert (type, base);
1963 if (step)
1964 step = fold_convert (type, step);
1968 for (i = 0; i < n_iv_cands (data); i++)
1970 cand = iv_cand (data, i);
1972 if (cand->pos != pos)
1973 continue;
1975 if (cand->incremented_at != incremented_at)
1976 continue;
1978 if (!cand->iv)
1980 if (!base && !step)
1981 break;
1983 continue;
1986 if (!base && !step)
1987 continue;
1989 if (!operand_equal_p (base, cand->iv->base, 0))
1990 continue;
1992 if (zero_p (cand->iv->step))
1994 if (zero_p (step))
1995 break;
1997 else
1999 if (step && operand_equal_p (step, cand->iv->step, 0))
2000 break;
2004 if (i == n_iv_cands (data))
2006 cand = XCNEW (struct iv_cand);
2007 cand->id = i;
2009 if (!base && !step)
2010 cand->iv = NULL;
2011 else
2012 cand->iv = alloc_iv (base, step);
2014 cand->pos = pos;
2015 if (pos != IP_ORIGINAL && cand->iv)
2017 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2018 cand->var_after = cand->var_before;
2020 cand->important = important;
2021 cand->incremented_at = incremented_at;
2022 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2024 if (step
2025 && TREE_CODE (step) != INTEGER_CST)
2027 fd_ivopts_data = data;
2028 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2031 if (dump_file && (dump_flags & TDF_DETAILS))
2032 dump_cand (dump_file, cand);
2035 if (important && !cand->important)
2037 cand->important = true;
2038 if (dump_file && (dump_flags & TDF_DETAILS))
2039 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2042 if (use)
2044 bitmap_set_bit (use->related_cands, i);
2045 if (dump_file && (dump_flags & TDF_DETAILS))
2046 fprintf (dump_file, "Candidate %d is related to use %d\n",
2047 cand->id, use->id);
2050 return cand;
2053 /* Returns true if incrementing the induction variable at the end of the LOOP
2054 is allowed.
2056 The purpose is to avoid splitting latch edge with a biv increment, thus
2057 creating a jump, possibly confusing other optimization passes and leaving
2058 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2059 is not available (so we do not have a better alternative), or if the latch
2060 edge is already nonempty. */
2062 static bool
2063 allow_ip_end_pos_p (struct loop *loop)
2065 if (!ip_normal_pos (loop))
2066 return true;
2068 if (!empty_block_p (ip_end_pos (loop)))
2069 return true;
2071 return false;
2074 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2075 position to POS. If USE is not NULL, the candidate is set as related to
2076 it. The candidate computation is scheduled on all available positions. */
2078 static void
2079 add_candidate (struct ivopts_data *data,
2080 tree base, tree step, bool important, struct iv_use *use)
2082 if (ip_normal_pos (data->current_loop))
2083 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2084 if (ip_end_pos (data->current_loop)
2085 && allow_ip_end_pos_p (data->current_loop))
2086 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2089 /* Add a standard "0 + 1 * iteration" iv candidate for a
2090 type with SIZE bits. */
2092 static void
2093 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2094 unsigned int size)
2096 tree type = lang_hooks.types.type_for_size (size, true);
2097 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2098 true, NULL);
2101 /* Adds standard iv candidates. */
2103 static void
2104 add_standard_iv_candidates (struct ivopts_data *data)
2106 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2108 /* The same for a double-integer type if it is still fast enough. */
2109 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2110 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2114 /* Adds candidates bases on the old induction variable IV. */
2116 static void
2117 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2119 tree phi, def;
2120 struct iv_cand *cand;
2122 add_candidate (data, iv->base, iv->step, true, NULL);
2124 /* The same, but with initial value zero. */
2125 add_candidate (data,
2126 build_int_cst (TREE_TYPE (iv->base), 0),
2127 iv->step, true, NULL);
2129 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2130 if (TREE_CODE (phi) == PHI_NODE)
2132 /* Additionally record the possibility of leaving the original iv
2133 untouched. */
2134 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2135 cand = add_candidate_1 (data,
2136 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2137 SSA_NAME_DEF_STMT (def));
2138 cand->var_before = iv->ssa_name;
2139 cand->var_after = def;
2143 /* Adds candidates based on the old induction variables. */
2145 static void
2146 add_old_ivs_candidates (struct ivopts_data *data)
2148 unsigned i;
2149 struct iv *iv;
2150 bitmap_iterator bi;
2152 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2154 iv = ver_info (data, i)->iv;
2155 if (iv && iv->biv_p && !zero_p (iv->step))
2156 add_old_iv_candidates (data, iv);
2160 /* Adds candidates based on the value of the induction variable IV and USE. */
2162 static void
2163 add_iv_value_candidates (struct ivopts_data *data,
2164 struct iv *iv, struct iv_use *use)
2166 unsigned HOST_WIDE_INT offset;
2167 tree base;
2169 add_candidate (data, iv->base, iv->step, false, use);
2171 /* The same, but with initial value zero. Make such variable important,
2172 since it is generic enough so that possibly many uses may be based
2173 on it. */
2174 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2175 iv->step, true, use);
2177 /* Third, try removing the constant offset. */
2178 base = strip_offset (iv->base, &offset);
2179 if (offset)
2180 add_candidate (data, base, iv->step, false, use);
2183 /* Adds candidates based on the uses. */
2185 static void
2186 add_derived_ivs_candidates (struct ivopts_data *data)
2188 unsigned i;
2190 for (i = 0; i < n_iv_uses (data); i++)
2192 struct iv_use *use = iv_use (data, i);
2194 if (!use)
2195 continue;
2197 switch (use->type)
2199 case USE_NONLINEAR_EXPR:
2200 case USE_COMPARE:
2201 case USE_ADDRESS:
2202 /* Just add the ivs based on the value of the iv used here. */
2203 add_iv_value_candidates (data, use->iv, use);
2204 break;
2206 default:
2207 gcc_unreachable ();
2212 /* Record important candidates and add them to related_cands bitmaps
2213 if needed. */
2215 static void
2216 record_important_candidates (struct ivopts_data *data)
2218 unsigned i;
2219 struct iv_use *use;
2221 for (i = 0; i < n_iv_cands (data); i++)
2223 struct iv_cand *cand = iv_cand (data, i);
2225 if (cand->important)
2226 bitmap_set_bit (data->important_candidates, i);
2229 data->consider_all_candidates = (n_iv_cands (data)
2230 <= CONSIDER_ALL_CANDIDATES_BOUND);
2232 if (data->consider_all_candidates)
2234 /* We will not need "related_cands" bitmaps in this case,
2235 so release them to decrease peak memory consumption. */
2236 for (i = 0; i < n_iv_uses (data); i++)
2238 use = iv_use (data, i);
2239 BITMAP_FREE (use->related_cands);
2242 else
2244 /* Add important candidates to the related_cands bitmaps. */
2245 for (i = 0; i < n_iv_uses (data); i++)
2246 bitmap_ior_into (iv_use (data, i)->related_cands,
2247 data->important_candidates);
2251 /* Finds the candidates for the induction variables. */
2253 static void
2254 find_iv_candidates (struct ivopts_data *data)
2256 /* Add commonly used ivs. */
2257 add_standard_iv_candidates (data);
2259 /* Add old induction variables. */
2260 add_old_ivs_candidates (data);
2262 /* Add induction variables derived from uses. */
2263 add_derived_ivs_candidates (data);
2265 /* Record the important candidates. */
2266 record_important_candidates (data);
2269 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2270 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2271 we allocate a simple list to every use. */
2273 static void
2274 alloc_use_cost_map (struct ivopts_data *data)
2276 unsigned i, size, s, j;
2278 for (i = 0; i < n_iv_uses (data); i++)
2280 struct iv_use *use = iv_use (data, i);
2281 bitmap_iterator bi;
2283 if (data->consider_all_candidates)
2284 size = n_iv_cands (data);
2285 else
2287 s = 0;
2288 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2290 s++;
2293 /* Round up to the power of two, so that moduling by it is fast. */
2294 for (size = 1; size < s; size <<= 1)
2295 continue;
2298 use->n_map_members = size;
2299 use->cost_map = XCNEWVEC (struct cost_pair, size);
2303 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2304 on invariants DEPENDS_ON and that the value used in expressing it
2305 is VALUE.*/
2307 static void
2308 set_use_iv_cost (struct ivopts_data *data,
2309 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2310 bitmap depends_on, tree value)
2312 unsigned i, s;
2314 if (cost == INFTY)
2316 BITMAP_FREE (depends_on);
2317 return;
2320 if (data->consider_all_candidates)
2322 use->cost_map[cand->id].cand = cand;
2323 use->cost_map[cand->id].cost = cost;
2324 use->cost_map[cand->id].depends_on = depends_on;
2325 use->cost_map[cand->id].value = value;
2326 return;
2329 /* n_map_members is a power of two, so this computes modulo. */
2330 s = cand->id & (use->n_map_members - 1);
2331 for (i = s; i < use->n_map_members; i++)
2332 if (!use->cost_map[i].cand)
2333 goto found;
2334 for (i = 0; i < s; i++)
2335 if (!use->cost_map[i].cand)
2336 goto found;
2338 gcc_unreachable ();
2340 found:
2341 use->cost_map[i].cand = cand;
2342 use->cost_map[i].cost = cost;
2343 use->cost_map[i].depends_on = depends_on;
2344 use->cost_map[i].value = value;
2347 /* Gets cost of (USE, CANDIDATE) pair. */
2349 static struct cost_pair *
2350 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2351 struct iv_cand *cand)
2353 unsigned i, s;
2354 struct cost_pair *ret;
2356 if (!cand)
2357 return NULL;
2359 if (data->consider_all_candidates)
2361 ret = use->cost_map + cand->id;
2362 if (!ret->cand)
2363 return NULL;
2365 return ret;
2368 /* n_map_members is a power of two, so this computes modulo. */
2369 s = cand->id & (use->n_map_members - 1);
2370 for (i = s; i < use->n_map_members; i++)
2371 if (use->cost_map[i].cand == cand)
2372 return use->cost_map + i;
2374 for (i = 0; i < s; i++)
2375 if (use->cost_map[i].cand == cand)
2376 return use->cost_map + i;
2378 return NULL;
2381 /* Returns estimate on cost of computing SEQ. */
2383 static unsigned
2384 seq_cost (rtx seq)
2386 unsigned cost = 0;
2387 rtx set;
2389 for (; seq; seq = NEXT_INSN (seq))
2391 set = single_set (seq);
2392 if (set)
2393 cost += rtx_cost (set, SET);
2394 else
2395 cost++;
2398 return cost;
2401 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2402 static rtx
2403 produce_memory_decl_rtl (tree obj, int *regno)
2405 rtx x;
2407 gcc_assert (obj);
2408 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2410 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2411 x = gen_rtx_SYMBOL_REF (Pmode, name);
2413 else
2414 x = gen_raw_REG (Pmode, (*regno)++);
2416 return gen_rtx_MEM (DECL_MODE (obj), x);
2419 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2420 walk_tree. DATA contains the actual fake register number. */
2422 static tree
2423 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2425 tree obj = NULL_TREE;
2426 rtx x = NULL_RTX;
2427 int *regno = data;
2429 switch (TREE_CODE (*expr_p))
2431 case ADDR_EXPR:
2432 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2433 handled_component_p (*expr_p);
2434 expr_p = &TREE_OPERAND (*expr_p, 0))
2435 continue;
2436 obj = *expr_p;
2437 if (DECL_P (obj))
2438 x = produce_memory_decl_rtl (obj, regno);
2439 break;
2441 case SSA_NAME:
2442 *ws = 0;
2443 obj = SSA_NAME_VAR (*expr_p);
2444 if (!DECL_RTL_SET_P (obj))
2445 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2446 break;
2448 case VAR_DECL:
2449 case PARM_DECL:
2450 case RESULT_DECL:
2451 *ws = 0;
2452 obj = *expr_p;
2454 if (DECL_RTL_SET_P (obj))
2455 break;
2457 if (DECL_MODE (obj) == BLKmode)
2458 x = produce_memory_decl_rtl (obj, regno);
2459 else
2460 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2462 break;
2464 default:
2465 break;
2468 if (x)
2470 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2471 SET_DECL_RTL (obj, x);
2474 return NULL_TREE;
2477 /* Determines cost of the computation of EXPR. */
2479 static unsigned
2480 computation_cost (tree expr)
2482 rtx seq, rslt;
2483 tree type = TREE_TYPE (expr);
2484 unsigned cost;
2485 /* Avoid using hard regs in ways which may be unsupported. */
2486 int regno = LAST_VIRTUAL_REGISTER + 1;
2488 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2489 start_sequence ();
2490 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2491 seq = get_insns ();
2492 end_sequence ();
2494 cost = seq_cost (seq);
2495 if (MEM_P (rslt))
2496 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2498 return cost;
2501 /* Returns variable containing the value of candidate CAND at statement AT. */
2503 static tree
2504 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2506 if (stmt_after_increment (loop, cand, stmt))
2507 return cand->var_after;
2508 else
2509 return cand->var_before;
2512 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2513 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2516 tree_int_cst_sign_bit (tree t)
2518 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2519 unsigned HOST_WIDE_INT w;
2521 if (bitno < HOST_BITS_PER_WIDE_INT)
2522 w = TREE_INT_CST_LOW (t);
2523 else
2525 w = TREE_INT_CST_HIGH (t);
2526 bitno -= HOST_BITS_PER_WIDE_INT;
2529 return (w >> bitno) & 1;
2532 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2533 return cst. Otherwise return NULL_TREE. */
2535 static tree
2536 constant_multiple_of (tree type, tree top, tree bot)
2538 tree res, mby, p0, p1;
2539 enum tree_code code;
2540 bool negate;
2542 STRIP_NOPS (top);
2543 STRIP_NOPS (bot);
2545 if (operand_equal_p (top, bot, 0))
2546 return build_int_cst (type, 1);
2548 code = TREE_CODE (top);
2549 switch (code)
2551 case MULT_EXPR:
2552 mby = TREE_OPERAND (top, 1);
2553 if (TREE_CODE (mby) != INTEGER_CST)
2554 return NULL_TREE;
2556 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2557 if (!res)
2558 return NULL_TREE;
2560 return fold_binary_to_constant (MULT_EXPR, type, res,
2561 fold_convert (type, mby));
2563 case PLUS_EXPR:
2564 case MINUS_EXPR:
2565 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2566 if (!p0)
2567 return NULL_TREE;
2568 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2569 if (!p1)
2570 return NULL_TREE;
2572 return fold_binary_to_constant (code, type, p0, p1);
2574 case INTEGER_CST:
2575 if (TREE_CODE (bot) != INTEGER_CST)
2576 return NULL_TREE;
2578 bot = fold_convert (type, bot);
2579 top = fold_convert (type, top);
2581 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2582 the result afterwards. */
2583 if (tree_int_cst_sign_bit (bot))
2585 negate = true;
2586 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2588 else
2589 negate = false;
2591 /* Ditto for TOP. */
2592 if (tree_int_cst_sign_bit (top))
2594 negate = !negate;
2595 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2598 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2599 return NULL_TREE;
2601 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2602 if (negate)
2603 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2604 return res;
2606 default:
2607 return NULL_TREE;
2611 /* Sets COMB to CST. */
2613 static void
2614 aff_combination_const (struct affine_tree_combination *comb, tree type,
2615 unsigned HOST_WIDE_INT cst)
2617 unsigned prec = TYPE_PRECISION (type);
2619 comb->type = type;
2620 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2622 comb->n = 0;
2623 comb->rest = NULL_TREE;
2624 comb->offset = cst & comb->mask;
2627 /* Sets COMB to single element ELT. */
2629 static void
2630 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2632 unsigned prec = TYPE_PRECISION (type);
2634 comb->type = type;
2635 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2637 comb->n = 1;
2638 comb->elts[0] = elt;
2639 comb->coefs[0] = 1;
2640 comb->rest = NULL_TREE;
2641 comb->offset = 0;
2644 /* Scales COMB by SCALE. */
2646 static void
2647 aff_combination_scale (struct affine_tree_combination *comb,
2648 unsigned HOST_WIDE_INT scale)
2650 unsigned i, j;
2652 if (scale == 1)
2653 return;
2655 if (scale == 0)
2657 aff_combination_const (comb, comb->type, 0);
2658 return;
2661 comb->offset = (scale * comb->offset) & comb->mask;
2662 for (i = 0, j = 0; i < comb->n; i++)
2664 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2665 comb->elts[j] = comb->elts[i];
2666 if (comb->coefs[j] != 0)
2667 j++;
2669 comb->n = j;
2671 if (comb->rest)
2673 if (comb->n < MAX_AFF_ELTS)
2675 comb->coefs[comb->n] = scale;
2676 comb->elts[comb->n] = comb->rest;
2677 comb->rest = NULL_TREE;
2678 comb->n++;
2680 else
2681 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2682 build_int_cst_type (comb->type, scale));
2686 /* Adds ELT * SCALE to COMB. */
2688 static void
2689 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2690 unsigned HOST_WIDE_INT scale)
2692 unsigned i;
2694 if (scale == 0)
2695 return;
2697 for (i = 0; i < comb->n; i++)
2698 if (operand_equal_p (comb->elts[i], elt, 0))
2700 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2701 if (comb->coefs[i])
2702 return;
2704 comb->n--;
2705 comb->coefs[i] = comb->coefs[comb->n];
2706 comb->elts[i] = comb->elts[comb->n];
2708 if (comb->rest)
2710 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2711 comb->coefs[comb->n] = 1;
2712 comb->elts[comb->n] = comb->rest;
2713 comb->rest = NULL_TREE;
2714 comb->n++;
2716 return;
2718 if (comb->n < MAX_AFF_ELTS)
2720 comb->coefs[comb->n] = scale;
2721 comb->elts[comb->n] = elt;
2722 comb->n++;
2723 return;
2726 if (scale == 1)
2727 elt = fold_convert (comb->type, elt);
2728 else
2729 elt = fold_build2 (MULT_EXPR, comb->type,
2730 fold_convert (comb->type, elt),
2731 build_int_cst_type (comb->type, scale));
2733 if (comb->rest)
2734 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2735 else
2736 comb->rest = elt;
2739 /* Adds COMB2 to COMB1. */
2741 static void
2742 aff_combination_add (struct affine_tree_combination *comb1,
2743 struct affine_tree_combination *comb2)
2745 unsigned i;
2747 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2748 for (i = 0; i < comb2->n; i++)
2749 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2750 if (comb2->rest)
2751 aff_combination_add_elt (comb1, comb2->rest, 1);
2754 /* Splits EXPR into an affine combination of parts. */
2756 static void
2757 tree_to_aff_combination (tree expr, tree type,
2758 struct affine_tree_combination *comb)
2760 struct affine_tree_combination tmp;
2761 enum tree_code code;
2762 tree cst, core, toffset;
2763 HOST_WIDE_INT bitpos, bitsize;
2764 enum machine_mode mode;
2765 int unsignedp, volatilep;
2767 STRIP_NOPS (expr);
2769 code = TREE_CODE (expr);
2770 switch (code)
2772 case INTEGER_CST:
2773 aff_combination_const (comb, type, int_cst_value (expr));
2774 return;
2776 case PLUS_EXPR:
2777 case MINUS_EXPR:
2778 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2779 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2780 if (code == MINUS_EXPR)
2781 aff_combination_scale (&tmp, -1);
2782 aff_combination_add (comb, &tmp);
2783 return;
2785 case MULT_EXPR:
2786 cst = TREE_OPERAND (expr, 1);
2787 if (TREE_CODE (cst) != INTEGER_CST)
2788 break;
2789 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2790 aff_combination_scale (comb, int_cst_value (cst));
2791 return;
2793 case NEGATE_EXPR:
2794 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2795 aff_combination_scale (comb, -1);
2796 return;
2798 case ADDR_EXPR:
2799 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2800 &toffset, &mode, &unsignedp, &volatilep,
2801 false);
2802 if (bitpos % BITS_PER_UNIT != 0)
2803 break;
2804 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2805 core = build_fold_addr_expr (core);
2806 if (TREE_CODE (core) == ADDR_EXPR)
2807 aff_combination_add_elt (comb, core, 1);
2808 else
2810 tree_to_aff_combination (core, type, &tmp);
2811 aff_combination_add (comb, &tmp);
2813 if (toffset)
2815 tree_to_aff_combination (toffset, type, &tmp);
2816 aff_combination_add (comb, &tmp);
2818 return;
2820 default:
2821 break;
2824 aff_combination_elt (comb, type, expr);
2827 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2829 static tree
2830 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2831 unsigned HOST_WIDE_INT mask)
2833 enum tree_code code;
2835 scale &= mask;
2836 elt = fold_convert (type, elt);
2838 if (scale == 1)
2840 if (!expr)
2841 return elt;
2843 return fold_build2 (PLUS_EXPR, type, expr, elt);
2846 if (scale == mask)
2848 if (!expr)
2849 return fold_build1 (NEGATE_EXPR, type, elt);
2851 return fold_build2 (MINUS_EXPR, type, expr, elt);
2854 if (!expr)
2855 return fold_build2 (MULT_EXPR, type, elt,
2856 build_int_cst_type (type, scale));
2858 if ((scale | (mask >> 1)) == mask)
2860 /* Scale is negative. */
2861 code = MINUS_EXPR;
2862 scale = (-scale) & mask;
2864 else
2865 code = PLUS_EXPR;
2867 elt = fold_build2 (MULT_EXPR, type, elt,
2868 build_int_cst_type (type, scale));
2869 return fold_build2 (code, type, expr, elt);
2872 /* Copies the tree elements of COMB to ensure that they are not shared. */
2874 static void
2875 unshare_aff_combination (struct affine_tree_combination *comb)
2877 unsigned i;
2879 for (i = 0; i < comb->n; i++)
2880 comb->elts[i] = unshare_expr (comb->elts[i]);
2881 if (comb->rest)
2882 comb->rest = unshare_expr (comb->rest);
2885 /* Makes tree from the affine combination COMB. */
2887 static tree
2888 aff_combination_to_tree (struct affine_tree_combination *comb)
2890 tree type = comb->type;
2891 tree expr = comb->rest;
2892 unsigned i;
2893 unsigned HOST_WIDE_INT off, sgn;
2895 /* Handle the special case produced by get_computation_aff when
2896 the type does not fit in HOST_WIDE_INT. */
2897 if (comb->n == 0 && comb->offset == 0)
2898 return fold_convert (type, expr);
2900 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2902 for (i = 0; i < comb->n; i++)
2903 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2904 comb->mask);
2906 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2908 /* Offset is negative. */
2909 off = (-comb->offset) & comb->mask;
2910 sgn = comb->mask;
2912 else
2914 off = comb->offset;
2915 sgn = 1;
2917 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2918 comb->mask);
2921 /* Determines the expression by that USE is expressed from induction variable
2922 CAND at statement AT in LOOP. The expression is stored in a decomposed
2923 form into AFF. Returns false if USE cannot be expressed using CAND. */
2925 static bool
2926 get_computation_aff (struct loop *loop,
2927 struct iv_use *use, struct iv_cand *cand, tree at,
2928 struct affine_tree_combination *aff)
2930 tree ubase = use->iv->base;
2931 tree ustep = use->iv->step;
2932 tree cbase = cand->iv->base;
2933 tree cstep = cand->iv->step;
2934 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2935 tree uutype;
2936 tree expr, delta;
2937 tree ratio;
2938 unsigned HOST_WIDE_INT ustepi, cstepi;
2939 HOST_WIDE_INT ratioi;
2940 struct affine_tree_combination cbase_aff, expr_aff;
2941 tree cstep_orig = cstep, ustep_orig = ustep;
2943 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2945 /* We do not have a precision to express the values of use. */
2946 return false;
2949 expr = var_at_stmt (loop, cand, at);
2951 if (TREE_TYPE (expr) != ctype)
2953 /* This may happen with the original ivs. */
2954 expr = fold_convert (ctype, expr);
2957 if (TYPE_UNSIGNED (utype))
2958 uutype = utype;
2959 else
2961 uutype = unsigned_type_for (utype);
2962 ubase = fold_convert (uutype, ubase);
2963 ustep = fold_convert (uutype, ustep);
2966 if (uutype != ctype)
2968 expr = fold_convert (uutype, expr);
2969 cbase = fold_convert (uutype, cbase);
2970 cstep = fold_convert (uutype, cstep);
2972 /* If the conversion is not noop, we must take it into account when
2973 considering the value of the step. */
2974 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2975 cstep_orig = cstep;
2978 if (cst_and_fits_in_hwi (cstep_orig)
2979 && cst_and_fits_in_hwi (ustep_orig))
2981 ustepi = int_cst_value (ustep_orig);
2982 cstepi = int_cst_value (cstep_orig);
2984 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2986 /* TODO maybe consider case when ustep divides cstep and the ratio is
2987 a power of 2 (so that the division is fast to execute)? We would
2988 need to be much more careful with overflows etc. then. */
2989 return false;
2992 ratio = build_int_cst_type (uutype, ratioi);
2994 else
2996 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
2997 if (!ratio)
2998 return false;
3000 /* Ratioi is only used to detect special cases when the multiplicative
3001 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3002 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3003 to integer_onep/integer_all_onesp, since the former ignores
3004 TREE_OVERFLOW. */
3005 if (cst_and_fits_in_hwi (ratio))
3006 ratioi = int_cst_value (ratio);
3007 else if (integer_onep (ratio))
3008 ratioi = 1;
3009 else if (integer_all_onesp (ratio))
3010 ratioi = -1;
3011 else
3012 ratioi = 0;
3015 /* We may need to shift the value if we are after the increment. */
3016 if (stmt_after_increment (loop, cand, at))
3017 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3019 /* use = ubase - ratio * cbase + ratio * var.
3021 In general case ubase + ratio * (var - cbase) could be better (one less
3022 multiplication), but often it is possible to eliminate redundant parts
3023 of computations from (ubase - ratio * cbase) term, and if it does not
3024 happen, fold is able to apply the distributive law to obtain this form
3025 anyway. */
3027 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3029 /* Let's compute in trees and just return the result in AFF. This case
3030 should not be very common, and fold itself is not that bad either,
3031 so making the aff. functions more complicated to handle this case
3032 is not that urgent. */
3033 if (ratioi == 1)
3035 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3036 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3038 else if (ratioi == -1)
3040 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3041 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3043 else
3045 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3046 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3047 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3048 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3051 aff->type = uutype;
3052 aff->n = 0;
3053 aff->offset = 0;
3054 aff->mask = 0;
3055 aff->rest = expr;
3056 return true;
3059 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3060 possible to compute ratioi. */
3061 gcc_assert (ratioi);
3063 tree_to_aff_combination (ubase, uutype, aff);
3064 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3065 tree_to_aff_combination (expr, uutype, &expr_aff);
3066 aff_combination_scale (&cbase_aff, -ratioi);
3067 aff_combination_scale (&expr_aff, ratioi);
3068 aff_combination_add (aff, &cbase_aff);
3069 aff_combination_add (aff, &expr_aff);
3071 return true;
3074 /* Determines the expression by that USE is expressed from induction variable
3075 CAND at statement AT in LOOP. The computation is unshared. */
3077 static tree
3078 get_computation_at (struct loop *loop,
3079 struct iv_use *use, struct iv_cand *cand, tree at)
3081 struct affine_tree_combination aff;
3082 tree type = TREE_TYPE (use->iv->base);
3084 if (!get_computation_aff (loop, use, cand, at, &aff))
3085 return NULL_TREE;
3086 unshare_aff_combination (&aff);
3087 return fold_convert (type, aff_combination_to_tree (&aff));
3090 /* Determines the expression by that USE is expressed from induction variable
3091 CAND in LOOP. The computation is unshared. */
3093 static tree
3094 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3096 return get_computation_at (loop, use, cand, use->stmt);
3099 /* Returns cost of addition in MODE. */
3101 static unsigned
3102 add_cost (enum machine_mode mode)
3104 static unsigned costs[NUM_MACHINE_MODES];
3105 rtx seq;
3106 unsigned cost;
3108 if (costs[mode])
3109 return costs[mode];
3111 start_sequence ();
3112 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3113 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3114 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3115 NULL_RTX);
3116 seq = get_insns ();
3117 end_sequence ();
3119 cost = seq_cost (seq);
3120 if (!cost)
3121 cost = 1;
3123 costs[mode] = cost;
3125 if (dump_file && (dump_flags & TDF_DETAILS))
3126 fprintf (dump_file, "Addition in %s costs %d\n",
3127 GET_MODE_NAME (mode), cost);
3128 return cost;
3131 /* Entry in a hashtable of already known costs for multiplication. */
3132 struct mbc_entry
3134 HOST_WIDE_INT cst; /* The constant to multiply by. */
3135 enum machine_mode mode; /* In mode. */
3136 unsigned cost; /* The cost. */
3139 /* Counts hash value for the ENTRY. */
3141 static hashval_t
3142 mbc_entry_hash (const void *entry)
3144 const struct mbc_entry *e = entry;
3146 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3149 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3151 static int
3152 mbc_entry_eq (const void *entry1, const void *entry2)
3154 const struct mbc_entry *e1 = entry1;
3155 const struct mbc_entry *e2 = entry2;
3157 return (e1->mode == e2->mode
3158 && e1->cst == e2->cst);
3161 /* Returns cost of multiplication by constant CST in MODE. */
3163 unsigned
3164 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3166 static htab_t costs;
3167 struct mbc_entry **cached, act;
3168 rtx seq;
3169 unsigned cost;
3171 if (!costs)
3172 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3174 act.mode = mode;
3175 act.cst = cst;
3176 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3177 if (*cached)
3178 return (*cached)->cost;
3180 *cached = XNEW (struct mbc_entry);
3181 (*cached)->mode = mode;
3182 (*cached)->cst = cst;
3184 start_sequence ();
3185 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3186 gen_int_mode (cst, mode), NULL_RTX, 0);
3187 seq = get_insns ();
3188 end_sequence ();
3190 cost = seq_cost (seq);
3192 if (dump_file && (dump_flags & TDF_DETAILS))
3193 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3194 (int) cst, GET_MODE_NAME (mode), cost);
3196 (*cached)->cost = cost;
3198 return cost;
3201 /* Returns true if multiplying by RATIO is allowed in address. */
3203 bool
3204 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3206 #define MAX_RATIO 128
3207 static sbitmap valid_mult;
3209 if (!valid_mult)
3211 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3212 rtx addr;
3213 HOST_WIDE_INT i;
3215 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3216 sbitmap_zero (valid_mult);
3217 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3218 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3220 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3221 if (memory_address_p (Pmode, addr))
3222 SET_BIT (valid_mult, i + MAX_RATIO);
3225 if (dump_file && (dump_flags & TDF_DETAILS))
3227 fprintf (dump_file, " allowed multipliers:");
3228 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3229 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3230 fprintf (dump_file, " %d", (int) i);
3231 fprintf (dump_file, "\n");
3232 fprintf (dump_file, "\n");
3236 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3237 return false;
3239 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3242 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3243 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3244 variable is omitted. The created memory accesses MODE.
3246 TODO -- there must be some better way. This all is quite crude. */
3248 static unsigned
3249 get_address_cost (bool symbol_present, bool var_present,
3250 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3252 static bool initialized = false;
3253 static HOST_WIDE_INT rat, off;
3254 static HOST_WIDE_INT min_offset, max_offset;
3255 static unsigned costs[2][2][2][2];
3256 unsigned cost, acost;
3257 rtx seq, addr, base;
3258 bool offset_p, ratio_p;
3259 rtx reg1;
3260 HOST_WIDE_INT s_offset;
3261 unsigned HOST_WIDE_INT mask;
3262 unsigned bits;
3264 if (!initialized)
3266 HOST_WIDE_INT i;
3267 initialized = true;
3269 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3271 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3272 for (i = 1; i <= 1 << 20; i <<= 1)
3274 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3275 if (!memory_address_p (Pmode, addr))
3276 break;
3278 max_offset = i >> 1;
3279 off = max_offset;
3281 for (i = 1; i <= 1 << 20; i <<= 1)
3283 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3284 if (!memory_address_p (Pmode, addr))
3285 break;
3287 min_offset = -(i >> 1);
3289 if (dump_file && (dump_flags & TDF_DETAILS))
3291 fprintf (dump_file, "get_address_cost:\n");
3292 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3293 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3296 rat = 1;
3297 for (i = 2; i <= MAX_RATIO; i++)
3298 if (multiplier_allowed_in_address_p (i))
3300 rat = i;
3301 break;
3305 bits = GET_MODE_BITSIZE (Pmode);
3306 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3307 offset &= mask;
3308 if ((offset >> (bits - 1) & 1))
3309 offset |= ~mask;
3310 s_offset = offset;
3312 cost = 0;
3313 offset_p = (s_offset != 0
3314 && min_offset <= s_offset && s_offset <= max_offset);
3315 ratio_p = (ratio != 1
3316 && multiplier_allowed_in_address_p (ratio));
3318 if (ratio != 1 && !ratio_p)
3319 cost += multiply_by_cost (ratio, Pmode);
3321 if (s_offset && !offset_p && !symbol_present)
3323 cost += add_cost (Pmode);
3324 var_present = true;
3327 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3328 if (!acost)
3330 int old_cse_not_expected;
3331 acost = 0;
3333 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3334 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3335 if (ratio_p)
3336 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3338 if (var_present)
3339 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3341 if (symbol_present)
3343 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3344 if (offset_p)
3345 base = gen_rtx_fmt_e (CONST, Pmode,
3346 gen_rtx_fmt_ee (PLUS, Pmode,
3347 base,
3348 gen_int_mode (off, Pmode)));
3350 else if (offset_p)
3351 base = gen_int_mode (off, Pmode);
3352 else
3353 base = NULL_RTX;
3355 if (base)
3356 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3358 start_sequence ();
3359 /* To avoid splitting addressing modes, pretend that no cse will
3360 follow. */
3361 old_cse_not_expected = cse_not_expected;
3362 cse_not_expected = true;
3363 addr = memory_address (Pmode, addr);
3364 cse_not_expected = old_cse_not_expected;
3365 seq = get_insns ();
3366 end_sequence ();
3368 acost = seq_cost (seq);
3369 acost += address_cost (addr, Pmode);
3371 if (!acost)
3372 acost = 1;
3373 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3376 return cost + acost;
3379 /* Estimates cost of forcing expression EXPR into a variable. */
3381 unsigned
3382 force_expr_to_var_cost (tree expr)
3384 static bool costs_initialized = false;
3385 static unsigned integer_cost;
3386 static unsigned symbol_cost;
3387 static unsigned address_cost;
3388 tree op0, op1;
3389 unsigned cost0, cost1, cost;
3390 enum machine_mode mode;
3392 if (!costs_initialized)
3394 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3395 rtx x = gen_rtx_MEM (DECL_MODE (var),
3396 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3397 tree addr;
3398 tree type = build_pointer_type (integer_type_node);
3400 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3401 2000));
3403 SET_DECL_RTL (var, x);
3404 TREE_STATIC (var) = 1;
3405 addr = build1 (ADDR_EXPR, type, var);
3406 symbol_cost = computation_cost (addr) + 1;
3408 address_cost
3409 = computation_cost (build2 (PLUS_EXPR, type,
3410 addr,
3411 build_int_cst_type (type, 2000))) + 1;
3412 if (dump_file && (dump_flags & TDF_DETAILS))
3414 fprintf (dump_file, "force_expr_to_var_cost:\n");
3415 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3416 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3417 fprintf (dump_file, " address %d\n", (int) address_cost);
3418 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3419 fprintf (dump_file, "\n");
3422 costs_initialized = true;
3425 STRIP_NOPS (expr);
3427 if (SSA_VAR_P (expr))
3428 return 0;
3430 if (TREE_INVARIANT (expr))
3432 if (TREE_CODE (expr) == INTEGER_CST)
3433 return integer_cost;
3435 if (TREE_CODE (expr) == ADDR_EXPR)
3437 tree obj = TREE_OPERAND (expr, 0);
3439 if (TREE_CODE (obj) == VAR_DECL
3440 || TREE_CODE (obj) == PARM_DECL
3441 || TREE_CODE (obj) == RESULT_DECL)
3442 return symbol_cost;
3445 return address_cost;
3448 switch (TREE_CODE (expr))
3450 case PLUS_EXPR:
3451 case MINUS_EXPR:
3452 case MULT_EXPR:
3453 op0 = TREE_OPERAND (expr, 0);
3454 op1 = TREE_OPERAND (expr, 1);
3455 STRIP_NOPS (op0);
3456 STRIP_NOPS (op1);
3458 if (is_gimple_val (op0))
3459 cost0 = 0;
3460 else
3461 cost0 = force_expr_to_var_cost (op0);
3463 if (is_gimple_val (op1))
3464 cost1 = 0;
3465 else
3466 cost1 = force_expr_to_var_cost (op1);
3468 break;
3470 default:
3471 /* Just an arbitrary value, FIXME. */
3472 return target_spill_cost;
3475 mode = TYPE_MODE (TREE_TYPE (expr));
3476 switch (TREE_CODE (expr))
3478 case PLUS_EXPR:
3479 case MINUS_EXPR:
3480 cost = add_cost (mode);
3481 break;
3483 case MULT_EXPR:
3484 if (cst_and_fits_in_hwi (op0))
3485 cost = multiply_by_cost (int_cst_value (op0), mode);
3486 else if (cst_and_fits_in_hwi (op1))
3487 cost = multiply_by_cost (int_cst_value (op1), mode);
3488 else
3489 return target_spill_cost;
3490 break;
3492 default:
3493 gcc_unreachable ();
3496 cost += cost0;
3497 cost += cost1;
3499 /* Bound the cost by target_spill_cost. The parts of complicated
3500 computations often are either loop invariant or at least can
3501 be shared between several iv uses, so letting this grow without
3502 limits would not give reasonable results. */
3503 return cost < target_spill_cost ? cost : target_spill_cost;
3506 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3507 invariants the computation depends on. */
3509 static unsigned
3510 force_var_cost (struct ivopts_data *data,
3511 tree expr, bitmap *depends_on)
3513 if (depends_on)
3515 fd_ivopts_data = data;
3516 walk_tree (&expr, find_depends, depends_on, NULL);
3519 return force_expr_to_var_cost (expr);
3522 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3523 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3524 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3525 invariants the computation depends on. */
3527 static unsigned
3528 split_address_cost (struct ivopts_data *data,
3529 tree addr, bool *symbol_present, bool *var_present,
3530 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3532 tree core;
3533 HOST_WIDE_INT bitsize;
3534 HOST_WIDE_INT bitpos;
3535 tree toffset;
3536 enum machine_mode mode;
3537 int unsignedp, volatilep;
3539 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3540 &unsignedp, &volatilep, false);
3542 if (toffset != 0
3543 || bitpos % BITS_PER_UNIT != 0
3544 || TREE_CODE (core) != VAR_DECL)
3546 *symbol_present = false;
3547 *var_present = true;
3548 fd_ivopts_data = data;
3549 walk_tree (&addr, find_depends, depends_on, NULL);
3550 return target_spill_cost;
3553 *offset += bitpos / BITS_PER_UNIT;
3554 if (TREE_STATIC (core)
3555 || DECL_EXTERNAL (core))
3557 *symbol_present = true;
3558 *var_present = false;
3559 return 0;
3562 *symbol_present = false;
3563 *var_present = true;
3564 return 0;
3567 /* Estimates cost of expressing difference of addresses E1 - E2 as
3568 var + symbol + offset. The value of offset is added to OFFSET,
3569 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3570 part is missing. DEPENDS_ON is a set of the invariants the computation
3571 depends on. */
3573 static unsigned
3574 ptr_difference_cost (struct ivopts_data *data,
3575 tree e1, tree e2, bool *symbol_present, bool *var_present,
3576 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3578 HOST_WIDE_INT diff = 0;
3579 unsigned cost;
3581 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3583 if (ptr_difference_const (e1, e2, &diff))
3585 *offset += diff;
3586 *symbol_present = false;
3587 *var_present = false;
3588 return 0;
3591 if (e2 == integer_zero_node)
3592 return split_address_cost (data, TREE_OPERAND (e1, 0),
3593 symbol_present, var_present, offset, depends_on);
3595 *symbol_present = false;
3596 *var_present = true;
3598 cost = force_var_cost (data, e1, depends_on);
3599 cost += force_var_cost (data, e2, depends_on);
3600 cost += add_cost (Pmode);
3602 return cost;
3605 /* Estimates cost of expressing difference E1 - E2 as
3606 var + symbol + offset. The value of offset is added to OFFSET,
3607 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3608 part is missing. DEPENDS_ON is a set of the invariants the computation
3609 depends on. */
3611 static unsigned
3612 difference_cost (struct ivopts_data *data,
3613 tree e1, tree e2, bool *symbol_present, bool *var_present,
3614 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3616 unsigned cost;
3617 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3618 unsigned HOST_WIDE_INT off1, off2;
3620 e1 = strip_offset (e1, &off1);
3621 e2 = strip_offset (e2, &off2);
3622 *offset += off1 - off2;
3624 STRIP_NOPS (e1);
3625 STRIP_NOPS (e2);
3627 if (TREE_CODE (e1) == ADDR_EXPR)
3628 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3629 depends_on);
3630 *symbol_present = false;
3632 if (operand_equal_p (e1, e2, 0))
3634 *var_present = false;
3635 return 0;
3637 *var_present = true;
3638 if (zero_p (e2))
3639 return force_var_cost (data, e1, depends_on);
3641 if (zero_p (e1))
3643 cost = force_var_cost (data, e2, depends_on);
3644 cost += multiply_by_cost (-1, mode);
3646 return cost;
3649 cost = force_var_cost (data, e1, depends_on);
3650 cost += force_var_cost (data, e2, depends_on);
3651 cost += add_cost (mode);
3653 return cost;
3656 /* Determines the cost of the computation by that USE is expressed
3657 from induction variable CAND. If ADDRESS_P is true, we just need
3658 to create an address from it, otherwise we want to get it into
3659 register. A set of invariants we depend on is stored in
3660 DEPENDS_ON. AT is the statement at that the value is computed. */
3662 static unsigned
3663 get_computation_cost_at (struct ivopts_data *data,
3664 struct iv_use *use, struct iv_cand *cand,
3665 bool address_p, bitmap *depends_on, tree at)
3667 tree ubase = use->iv->base, ustep = use->iv->step;
3668 tree cbase, cstep;
3669 tree utype = TREE_TYPE (ubase), ctype;
3670 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3671 HOST_WIDE_INT ratio, aratio;
3672 bool var_present, symbol_present;
3673 unsigned cost = 0, n_sums;
3675 *depends_on = NULL;
3677 /* Only consider real candidates. */
3678 if (!cand->iv)
3679 return INFTY;
3681 cbase = cand->iv->base;
3682 cstep = cand->iv->step;
3683 ctype = TREE_TYPE (cbase);
3685 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3687 /* We do not have a precision to express the values of use. */
3688 return INFTY;
3691 if (address_p)
3693 /* Do not try to express address of an object with computation based
3694 on address of a different object. This may cause problems in rtl
3695 level alias analysis (that does not expect this to be happening,
3696 as this is illegal in C), and would be unlikely to be useful
3697 anyway. */
3698 if (use->iv->base_object
3699 && cand->iv->base_object
3700 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3701 return INFTY;
3704 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3706 /* TODO -- add direct handling of this case. */
3707 goto fallback;
3710 /* CSTEPI is removed from the offset in case statement is after the
3711 increment. If the step is not constant, we use zero instead.
3712 This is a bit imprecise (there is the extra addition), but
3713 redundancy elimination is likely to transform the code so that
3714 it uses value of the variable before increment anyway,
3715 so it is not that much unrealistic. */
3716 if (cst_and_fits_in_hwi (cstep))
3717 cstepi = int_cst_value (cstep);
3718 else
3719 cstepi = 0;
3721 if (cst_and_fits_in_hwi (ustep)
3722 && cst_and_fits_in_hwi (cstep))
3724 ustepi = int_cst_value (ustep);
3726 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3727 return INFTY;
3729 else
3731 tree rat;
3733 rat = constant_multiple_of (utype, ustep, cstep);
3735 if (!rat)
3736 return INFTY;
3738 if (cst_and_fits_in_hwi (rat))
3739 ratio = int_cst_value (rat);
3740 else if (integer_onep (rat))
3741 ratio = 1;
3742 else if (integer_all_onesp (rat))
3743 ratio = -1;
3744 else
3745 return INFTY;
3748 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3749 or ratio == 1, it is better to handle this like
3751 ubase - ratio * cbase + ratio * var
3753 (also holds in the case ratio == -1, TODO. */
3755 if (cst_and_fits_in_hwi (cbase))
3757 offset = - ratio * int_cst_value (cbase);
3758 cost += difference_cost (data,
3759 ubase, integer_zero_node,
3760 &symbol_present, &var_present, &offset,
3761 depends_on);
3763 else if (ratio == 1)
3765 cost += difference_cost (data,
3766 ubase, cbase,
3767 &symbol_present, &var_present, &offset,
3768 depends_on);
3770 else
3772 cost += force_var_cost (data, cbase, depends_on);
3773 cost += add_cost (TYPE_MODE (ctype));
3774 cost += difference_cost (data,
3775 ubase, integer_zero_node,
3776 &symbol_present, &var_present, &offset,
3777 depends_on);
3780 /* If we are after the increment, the value of the candidate is higher by
3781 one iteration. */
3782 if (stmt_after_increment (data->current_loop, cand, at))
3783 offset -= ratio * cstepi;
3785 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3786 (symbol/var/const parts may be omitted). If we are looking for an address,
3787 find the cost of addressing this. */
3788 if (address_p)
3789 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3791 /* Otherwise estimate the costs for computing the expression. */
3792 aratio = ratio > 0 ? ratio : -ratio;
3793 if (!symbol_present && !var_present && !offset)
3795 if (ratio != 1)
3796 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3798 return cost;
3801 if (aratio != 1)
3802 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3804 n_sums = 1;
3805 if (var_present
3806 /* Symbol + offset should be compile-time computable. */
3807 && (symbol_present || offset))
3808 n_sums++;
3810 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3812 fallback:
3814 /* Just get the expression, expand it and measure the cost. */
3815 tree comp = get_computation_at (data->current_loop, use, cand, at);
3817 if (!comp)
3818 return INFTY;
3820 if (address_p)
3821 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3823 return computation_cost (comp);
3827 /* Determines the cost of the computation by that USE is expressed
3828 from induction variable CAND. If ADDRESS_P is true, we just need
3829 to create an address from it, otherwise we want to get it into
3830 register. A set of invariants we depend on is stored in
3831 DEPENDS_ON. */
3833 static unsigned
3834 get_computation_cost (struct ivopts_data *data,
3835 struct iv_use *use, struct iv_cand *cand,
3836 bool address_p, bitmap *depends_on)
3838 return get_computation_cost_at (data,
3839 use, cand, address_p, depends_on, use->stmt);
3842 /* Determines cost of basing replacement of USE on CAND in a generic
3843 expression. */
3845 static bool
3846 determine_use_iv_cost_generic (struct ivopts_data *data,
3847 struct iv_use *use, struct iv_cand *cand)
3849 bitmap depends_on;
3850 unsigned cost;
3852 /* The simple case first -- if we need to express value of the preserved
3853 original biv, the cost is 0. This also prevents us from counting the
3854 cost of increment twice -- once at this use and once in the cost of
3855 the candidate. */
3856 if (cand->pos == IP_ORIGINAL
3857 && cand->incremented_at == use->stmt)
3859 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3860 return true;
3863 cost = get_computation_cost (data, use, cand, false, &depends_on);
3864 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3866 return cost != INFTY;
3869 /* Determines cost of basing replacement of USE on CAND in an address. */
3871 static bool
3872 determine_use_iv_cost_address (struct ivopts_data *data,
3873 struct iv_use *use, struct iv_cand *cand)
3875 bitmap depends_on;
3876 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3878 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3880 return cost != INFTY;
3883 /* Computes value of induction variable IV in iteration NITER. */
3885 static tree
3886 iv_value (struct iv *iv, tree niter)
3888 tree val;
3889 tree type = TREE_TYPE (iv->base);
3891 niter = fold_convert (type, niter);
3892 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3894 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3897 /* Computes value of candidate CAND at position AT in iteration NITER. */
3899 static tree
3900 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3902 tree val = iv_value (cand->iv, niter);
3903 tree type = TREE_TYPE (cand->iv->base);
3905 if (stmt_after_increment (loop, cand, at))
3906 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3908 return val;
3911 /* Returns period of induction variable iv. */
3913 static tree
3914 iv_period (struct iv *iv)
3916 tree step = iv->step, period, type;
3917 tree pow2div;
3919 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3921 /* Period of the iv is gcd (step, type range). Since type range is power
3922 of two, it suffices to determine the maximum power of two that divides
3923 step. */
3924 pow2div = num_ending_zeros (step);
3925 type = unsigned_type_for (TREE_TYPE (step));
3927 period = build_low_bits_mask (type,
3928 (TYPE_PRECISION (type)
3929 - tree_low_cst (pow2div, 1)));
3931 return period;
3934 /* Returns the comparison operator used when eliminating the iv USE. */
3936 static enum tree_code
3937 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3939 struct loop *loop = data->current_loop;
3940 basic_block ex_bb;
3941 edge exit;
3943 ex_bb = bb_for_stmt (use->stmt);
3944 exit = EDGE_SUCC (ex_bb, 0);
3945 if (flow_bb_inside_loop_p (loop, exit->dest))
3946 exit = EDGE_SUCC (ex_bb, 1);
3948 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3951 /* Check whether it is possible to express the condition in USE by comparison
3952 of candidate CAND. If so, store the value compared with to BOUND. */
3954 static bool
3955 may_eliminate_iv (struct ivopts_data *data,
3956 struct iv_use *use, struct iv_cand *cand, tree *bound)
3958 basic_block ex_bb;
3959 edge exit;
3960 struct tree_niter_desc *niter;
3961 tree nit, nit_type;
3962 tree wider_type, period, per_type;
3963 struct loop *loop = data->current_loop;
3965 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3966 return false;
3968 /* For now works only for exits that dominate the loop latch. TODO -- extend
3969 for other conditions inside loop body. */
3970 ex_bb = bb_for_stmt (use->stmt);
3971 if (use->stmt != last_stmt (ex_bb)
3972 || TREE_CODE (use->stmt) != COND_EXPR)
3973 return false;
3974 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3975 return false;
3977 exit = EDGE_SUCC (ex_bb, 0);
3978 if (flow_bb_inside_loop_p (loop, exit->dest))
3979 exit = EDGE_SUCC (ex_bb, 1);
3980 if (flow_bb_inside_loop_p (loop, exit->dest))
3981 return false;
3983 niter = niter_for_exit (data, exit);
3984 if (!niter
3985 || !zero_p (niter->may_be_zero))
3986 return false;
3988 nit = niter->niter;
3989 nit_type = TREE_TYPE (nit);
3991 /* Determine whether we may use the variable to test whether niter iterations
3992 elapsed. This is the case iff the period of the induction variable is
3993 greater than the number of iterations. */
3994 period = iv_period (cand->iv);
3995 if (!period)
3996 return false;
3997 per_type = TREE_TYPE (period);
3999 wider_type = TREE_TYPE (period);
4000 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4001 wider_type = per_type;
4002 else
4003 wider_type = nit_type;
4005 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4006 fold_convert (wider_type, period),
4007 fold_convert (wider_type, nit))))
4008 return false;
4010 *bound = cand_value_at (loop, cand, use->stmt, nit);
4011 return true;
4014 /* Determines cost of basing replacement of USE on CAND in a condition. */
4016 static bool
4017 determine_use_iv_cost_condition (struct ivopts_data *data,
4018 struct iv_use *use, struct iv_cand *cand)
4020 tree bound = NULL_TREE, op, cond;
4021 bitmap depends_on = NULL;
4022 unsigned cost;
4024 /* Only consider real candidates. */
4025 if (!cand->iv)
4027 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4028 return false;
4031 if (may_eliminate_iv (data, use, cand, &bound))
4033 cost = force_var_cost (data, bound, &depends_on);
4035 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4036 return cost != INFTY;
4039 /* The induction variable elimination failed; just express the original
4040 giv. If it is compared with an invariant, note that we cannot get
4041 rid of it. */
4042 cost = get_computation_cost (data, use, cand, false, &depends_on);
4044 cond = *use->op_p;
4045 if (TREE_CODE (cond) != SSA_NAME)
4047 op = TREE_OPERAND (cond, 0);
4048 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4049 op = TREE_OPERAND (cond, 1);
4050 if (TREE_CODE (op) == SSA_NAME)
4052 op = get_iv (data, op)->base;
4053 fd_ivopts_data = data;
4054 walk_tree (&op, find_depends, &depends_on, NULL);
4058 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4059 return cost != INFTY;
4062 /* Determines cost of basing replacement of USE on CAND. Returns false
4063 if USE cannot be based on CAND. */
4065 static bool
4066 determine_use_iv_cost (struct ivopts_data *data,
4067 struct iv_use *use, struct iv_cand *cand)
4069 switch (use->type)
4071 case USE_NONLINEAR_EXPR:
4072 return determine_use_iv_cost_generic (data, use, cand);
4074 case USE_ADDRESS:
4075 return determine_use_iv_cost_address (data, use, cand);
4077 case USE_COMPARE:
4078 return determine_use_iv_cost_condition (data, use, cand);
4080 default:
4081 gcc_unreachable ();
4085 /* Determines costs of basing the use of the iv on an iv candidate. */
4087 static void
4088 determine_use_iv_costs (struct ivopts_data *data)
4090 unsigned i, j;
4091 struct iv_use *use;
4092 struct iv_cand *cand;
4093 bitmap to_clear = BITMAP_ALLOC (NULL);
4095 alloc_use_cost_map (data);
4097 for (i = 0; i < n_iv_uses (data); i++)
4099 use = iv_use (data, i);
4101 if (data->consider_all_candidates)
4103 for (j = 0; j < n_iv_cands (data); j++)
4105 cand = iv_cand (data, j);
4106 determine_use_iv_cost (data, use, cand);
4109 else
4111 bitmap_iterator bi;
4113 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4115 cand = iv_cand (data, j);
4116 if (!determine_use_iv_cost (data, use, cand))
4117 bitmap_set_bit (to_clear, j);
4120 /* Remove the candidates for that the cost is infinite from
4121 the list of related candidates. */
4122 bitmap_and_compl_into (use->related_cands, to_clear);
4123 bitmap_clear (to_clear);
4127 BITMAP_FREE (to_clear);
4129 if (dump_file && (dump_flags & TDF_DETAILS))
4131 fprintf (dump_file, "Use-candidate costs:\n");
4133 for (i = 0; i < n_iv_uses (data); i++)
4135 use = iv_use (data, i);
4137 fprintf (dump_file, "Use %d:\n", i);
4138 fprintf (dump_file, " cand\tcost\tdepends on\n");
4139 for (j = 0; j < use->n_map_members; j++)
4141 if (!use->cost_map[j].cand
4142 || use->cost_map[j].cost == INFTY)
4143 continue;
4145 fprintf (dump_file, " %d\t%d\t",
4146 use->cost_map[j].cand->id,
4147 use->cost_map[j].cost);
4148 if (use->cost_map[j].depends_on)
4149 bitmap_print (dump_file,
4150 use->cost_map[j].depends_on, "","");
4151 fprintf (dump_file, "\n");
4154 fprintf (dump_file, "\n");
4156 fprintf (dump_file, "\n");
4160 /* Determines cost of the candidate CAND. */
4162 static void
4163 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4165 unsigned cost_base, cost_step;
4166 tree base;
4168 if (!cand->iv)
4170 cand->cost = 0;
4171 return;
4174 /* There are two costs associated with the candidate -- its increment
4175 and its initialization. The second is almost negligible for any loop
4176 that rolls enough, so we take it just very little into account. */
4178 base = cand->iv->base;
4179 cost_base = force_var_cost (data, base, NULL);
4180 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4182 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4184 /* Prefer the original iv unless we may gain something by replacing it;
4185 this is not really relevant for artificial ivs created by other
4186 passes. */
4187 if (cand->pos == IP_ORIGINAL
4188 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4189 cand->cost--;
4191 /* Prefer not to insert statements into latch unless there are some
4192 already (so that we do not create unnecessary jumps). */
4193 if (cand->pos == IP_END
4194 && empty_block_p (ip_end_pos (data->current_loop)))
4195 cand->cost++;
4198 /* Determines costs of computation of the candidates. */
4200 static void
4201 determine_iv_costs (struct ivopts_data *data)
4203 unsigned i;
4205 if (dump_file && (dump_flags & TDF_DETAILS))
4207 fprintf (dump_file, "Candidate costs:\n");
4208 fprintf (dump_file, " cand\tcost\n");
4211 for (i = 0; i < n_iv_cands (data); i++)
4213 struct iv_cand *cand = iv_cand (data, i);
4215 determine_iv_cost (data, cand);
4217 if (dump_file && (dump_flags & TDF_DETAILS))
4218 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4221 if (dump_file && (dump_flags & TDF_DETAILS))
4222 fprintf (dump_file, "\n");
4225 /* Calculates cost for having SIZE induction variables. */
4227 static unsigned
4228 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4230 return global_cost_for_size (size,
4231 loop_data (data->current_loop)->regs_used,
4232 n_iv_uses (data));
4235 /* For each size of the induction variable set determine the penalty. */
4237 static void
4238 determine_set_costs (struct ivopts_data *data)
4240 unsigned j, n;
4241 tree phi, op;
4242 struct loop *loop = data->current_loop;
4243 bitmap_iterator bi;
4245 /* We use the following model (definitely improvable, especially the
4246 cost function -- TODO):
4248 We estimate the number of registers available (using MD data), name it A.
4250 We estimate the number of registers used by the loop, name it U. This
4251 number is obtained as the number of loop phi nodes (not counting virtual
4252 registers and bivs) + the number of variables from outside of the loop.
4254 We set a reserve R (free regs that are used for temporary computations,
4255 etc.). For now the reserve is a constant 3.
4257 Let I be the number of induction variables.
4259 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4260 make a lot of ivs without a reason).
4261 -- if A - R < U + I <= A, the cost is I * PRES_COST
4262 -- if U + I > A, the cost is I * PRES_COST and
4263 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4265 if (dump_file && (dump_flags & TDF_DETAILS))
4267 fprintf (dump_file, "Global costs:\n");
4268 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4269 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4270 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4271 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4274 n = 0;
4275 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4277 op = PHI_RESULT (phi);
4279 if (!is_gimple_reg (op))
4280 continue;
4282 if (get_iv (data, op))
4283 continue;
4285 n++;
4288 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4290 struct version_info *info = ver_info (data, j);
4292 if (info->inv_id && info->has_nonlin_use)
4293 n++;
4296 loop_data (loop)->regs_used = n;
4297 if (dump_file && (dump_flags & TDF_DETAILS))
4298 fprintf (dump_file, " regs_used %d\n", n);
4300 if (dump_file && (dump_flags & TDF_DETAILS))
4302 fprintf (dump_file, " cost for size:\n");
4303 fprintf (dump_file, " ivs\tcost\n");
4304 for (j = 0; j <= 2 * target_avail_regs; j++)
4305 fprintf (dump_file, " %d\t%d\n", j,
4306 ivopts_global_cost_for_size (data, j));
4307 fprintf (dump_file, "\n");
4311 /* Returns true if A is a cheaper cost pair than B. */
4313 static bool
4314 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4316 if (!a)
4317 return false;
4319 if (!b)
4320 return true;
4322 if (a->cost < b->cost)
4323 return true;
4325 if (a->cost > b->cost)
4326 return false;
4328 /* In case the costs are the same, prefer the cheaper candidate. */
4329 if (a->cand->cost < b->cand->cost)
4330 return true;
4332 return false;
4335 /* Computes the cost field of IVS structure. */
4337 static void
4338 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4340 unsigned cost = 0;
4342 cost += ivs->cand_use_cost;
4343 cost += ivs->cand_cost;
4344 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4346 ivs->cost = cost;
4349 /* Remove invariants in set INVS to set IVS. */
4351 static void
4352 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4354 bitmap_iterator bi;
4355 unsigned iid;
4357 if (!invs)
4358 return;
4360 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4362 ivs->n_invariant_uses[iid]--;
4363 if (ivs->n_invariant_uses[iid] == 0)
4364 ivs->n_regs--;
4368 /* Set USE not to be expressed by any candidate in IVS. */
4370 static void
4371 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4372 struct iv_use *use)
4374 unsigned uid = use->id, cid;
4375 struct cost_pair *cp;
4377 cp = ivs->cand_for_use[uid];
4378 if (!cp)
4379 return;
4380 cid = cp->cand->id;
4382 ivs->bad_uses++;
4383 ivs->cand_for_use[uid] = NULL;
4384 ivs->n_cand_uses[cid]--;
4386 if (ivs->n_cand_uses[cid] == 0)
4388 bitmap_clear_bit (ivs->cands, cid);
4389 /* Do not count the pseudocandidates. */
4390 if (cp->cand->iv)
4391 ivs->n_regs--;
4392 ivs->n_cands--;
4393 ivs->cand_cost -= cp->cand->cost;
4395 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4398 ivs->cand_use_cost -= cp->cost;
4400 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4401 iv_ca_recount_cost (data, ivs);
4404 /* Add invariants in set INVS to set IVS. */
4406 static void
4407 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4409 bitmap_iterator bi;
4410 unsigned iid;
4412 if (!invs)
4413 return;
4415 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4417 ivs->n_invariant_uses[iid]++;
4418 if (ivs->n_invariant_uses[iid] == 1)
4419 ivs->n_regs++;
4423 /* Set cost pair for USE in set IVS to CP. */
4425 static void
4426 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4427 struct iv_use *use, struct cost_pair *cp)
4429 unsigned uid = use->id, cid;
4431 if (ivs->cand_for_use[uid] == cp)
4432 return;
4434 if (ivs->cand_for_use[uid])
4435 iv_ca_set_no_cp (data, ivs, use);
4437 if (cp)
4439 cid = cp->cand->id;
4441 ivs->bad_uses--;
4442 ivs->cand_for_use[uid] = cp;
4443 ivs->n_cand_uses[cid]++;
4444 if (ivs->n_cand_uses[cid] == 1)
4446 bitmap_set_bit (ivs->cands, cid);
4447 /* Do not count the pseudocandidates. */
4448 if (cp->cand->iv)
4449 ivs->n_regs++;
4450 ivs->n_cands++;
4451 ivs->cand_cost += cp->cand->cost;
4453 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4456 ivs->cand_use_cost += cp->cost;
4457 iv_ca_set_add_invariants (ivs, cp->depends_on);
4458 iv_ca_recount_cost (data, ivs);
4462 /* Extend set IVS by expressing USE by some of the candidates in it
4463 if possible. */
4465 static void
4466 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4467 struct iv_use *use)
4469 struct cost_pair *best_cp = NULL, *cp;
4470 bitmap_iterator bi;
4471 unsigned i;
4473 gcc_assert (ivs->upto >= use->id);
4475 if (ivs->upto == use->id)
4477 ivs->upto++;
4478 ivs->bad_uses++;
4481 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4483 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4485 if (cheaper_cost_pair (cp, best_cp))
4486 best_cp = cp;
4489 iv_ca_set_cp (data, ivs, use, best_cp);
4492 /* Get cost for assignment IVS. */
4494 static unsigned
4495 iv_ca_cost (struct iv_ca *ivs)
4497 return (ivs->bad_uses ? INFTY : ivs->cost);
4500 /* Returns true if all dependences of CP are among invariants in IVS. */
4502 static bool
4503 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4505 unsigned i;
4506 bitmap_iterator bi;
4508 if (!cp->depends_on)
4509 return true;
4511 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4513 if (ivs->n_invariant_uses[i] == 0)
4514 return false;
4517 return true;
4520 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4521 it before NEXT_CHANGE. */
4523 static struct iv_ca_delta *
4524 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4525 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4527 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4529 change->use = use;
4530 change->old_cp = old_cp;
4531 change->new_cp = new_cp;
4532 change->next_change = next_change;
4534 return change;
4537 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4538 are rewritten. */
4540 static struct iv_ca_delta *
4541 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4543 struct iv_ca_delta *last;
4545 if (!l2)
4546 return l1;
4548 if (!l1)
4549 return l2;
4551 for (last = l1; last->next_change; last = last->next_change)
4552 continue;
4553 last->next_change = l2;
4555 return l1;
4558 /* Returns candidate by that USE is expressed in IVS. */
4560 static struct cost_pair *
4561 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4563 return ivs->cand_for_use[use->id];
4566 /* Reverse the list of changes DELTA, forming the inverse to it. */
4568 static struct iv_ca_delta *
4569 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4571 struct iv_ca_delta *act, *next, *prev = NULL;
4572 struct cost_pair *tmp;
4574 for (act = delta; act; act = next)
4576 next = act->next_change;
4577 act->next_change = prev;
4578 prev = act;
4580 tmp = act->old_cp;
4581 act->old_cp = act->new_cp;
4582 act->new_cp = tmp;
4585 return prev;
4588 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4589 reverted instead. */
4591 static void
4592 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4593 struct iv_ca_delta *delta, bool forward)
4595 struct cost_pair *from, *to;
4596 struct iv_ca_delta *act;
4598 if (!forward)
4599 delta = iv_ca_delta_reverse (delta);
4601 for (act = delta; act; act = act->next_change)
4603 from = act->old_cp;
4604 to = act->new_cp;
4605 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4606 iv_ca_set_cp (data, ivs, act->use, to);
4609 if (!forward)
4610 iv_ca_delta_reverse (delta);
4613 /* Returns true if CAND is used in IVS. */
4615 static bool
4616 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4618 return ivs->n_cand_uses[cand->id] > 0;
4621 /* Returns number of induction variable candidates in the set IVS. */
4623 static unsigned
4624 iv_ca_n_cands (struct iv_ca *ivs)
4626 return ivs->n_cands;
4629 /* Free the list of changes DELTA. */
4631 static void
4632 iv_ca_delta_free (struct iv_ca_delta **delta)
4634 struct iv_ca_delta *act, *next;
4636 for (act = *delta; act; act = next)
4638 next = act->next_change;
4639 free (act);
4642 *delta = NULL;
4645 /* Allocates new iv candidates assignment. */
4647 static struct iv_ca *
4648 iv_ca_new (struct ivopts_data *data)
4650 struct iv_ca *nw = XNEW (struct iv_ca);
4652 nw->upto = 0;
4653 nw->bad_uses = 0;
4654 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4655 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4656 nw->cands = BITMAP_ALLOC (NULL);
4657 nw->n_cands = 0;
4658 nw->n_regs = 0;
4659 nw->cand_use_cost = 0;
4660 nw->cand_cost = 0;
4661 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4662 nw->cost = 0;
4664 return nw;
4667 /* Free memory occupied by the set IVS. */
4669 static void
4670 iv_ca_free (struct iv_ca **ivs)
4672 free ((*ivs)->cand_for_use);
4673 free ((*ivs)->n_cand_uses);
4674 BITMAP_FREE ((*ivs)->cands);
4675 free ((*ivs)->n_invariant_uses);
4676 free (*ivs);
4677 *ivs = NULL;
4680 /* Dumps IVS to FILE. */
4682 static void
4683 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4685 const char *pref = " invariants ";
4686 unsigned i;
4688 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4689 bitmap_print (file, ivs->cands, " candidates ","\n");
4691 for (i = 1; i <= data->max_inv_id; i++)
4692 if (ivs->n_invariant_uses[i])
4694 fprintf (file, "%s%d", pref, i);
4695 pref = ", ";
4697 fprintf (file, "\n");
4700 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4701 new set, and store differences in DELTA. Number of induction variables
4702 in the new set is stored to N_IVS. */
4704 static unsigned
4705 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4706 struct iv_cand *cand, struct iv_ca_delta **delta,
4707 unsigned *n_ivs)
4709 unsigned i, cost;
4710 struct iv_use *use;
4711 struct cost_pair *old_cp, *new_cp;
4713 *delta = NULL;
4714 for (i = 0; i < ivs->upto; i++)
4716 use = iv_use (data, i);
4717 old_cp = iv_ca_cand_for_use (ivs, use);
4719 if (old_cp
4720 && old_cp->cand == cand)
4721 continue;
4723 new_cp = get_use_iv_cost (data, use, cand);
4724 if (!new_cp)
4725 continue;
4727 if (!iv_ca_has_deps (ivs, new_cp))
4728 continue;
4730 if (!cheaper_cost_pair (new_cp, old_cp))
4731 continue;
4733 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4736 iv_ca_delta_commit (data, ivs, *delta, true);
4737 cost = iv_ca_cost (ivs);
4738 if (n_ivs)
4739 *n_ivs = iv_ca_n_cands (ivs);
4740 iv_ca_delta_commit (data, ivs, *delta, false);
4742 return cost;
4745 /* Try narrowing set IVS by removing CAND. Return the cost of
4746 the new set and store the differences in DELTA. */
4748 static unsigned
4749 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4750 struct iv_cand *cand, struct iv_ca_delta **delta)
4752 unsigned i, ci;
4753 struct iv_use *use;
4754 struct cost_pair *old_cp, *new_cp, *cp;
4755 bitmap_iterator bi;
4756 struct iv_cand *cnd;
4757 unsigned cost;
4759 *delta = NULL;
4760 for (i = 0; i < n_iv_uses (data); i++)
4762 use = iv_use (data, i);
4764 old_cp = iv_ca_cand_for_use (ivs, use);
4765 if (old_cp->cand != cand)
4766 continue;
4768 new_cp = NULL;
4770 if (data->consider_all_candidates)
4772 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4774 if (ci == cand->id)
4775 continue;
4777 cnd = iv_cand (data, ci);
4779 cp = get_use_iv_cost (data, use, cnd);
4780 if (!cp)
4781 continue;
4782 if (!iv_ca_has_deps (ivs, cp))
4783 continue;
4785 if (!cheaper_cost_pair (cp, new_cp))
4786 continue;
4788 new_cp = cp;
4791 else
4793 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4795 if (ci == cand->id)
4796 continue;
4798 cnd = iv_cand (data, ci);
4800 cp = get_use_iv_cost (data, use, cnd);
4801 if (!cp)
4802 continue;
4803 if (!iv_ca_has_deps (ivs, cp))
4804 continue;
4806 if (!cheaper_cost_pair (cp, new_cp))
4807 continue;
4809 new_cp = cp;
4813 if (!new_cp)
4815 iv_ca_delta_free (delta);
4816 return INFTY;
4819 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4822 iv_ca_delta_commit (data, ivs, *delta, true);
4823 cost = iv_ca_cost (ivs);
4824 iv_ca_delta_commit (data, ivs, *delta, false);
4826 return cost;
4829 /* Try optimizing the set of candidates IVS by removing candidates different
4830 from to EXCEPT_CAND from it. Return cost of the new set, and store
4831 differences in DELTA. */
4833 static unsigned
4834 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4835 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4837 bitmap_iterator bi;
4838 struct iv_ca_delta *act_delta, *best_delta;
4839 unsigned i, best_cost, acost;
4840 struct iv_cand *cand;
4842 best_delta = NULL;
4843 best_cost = iv_ca_cost (ivs);
4845 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4847 cand = iv_cand (data, i);
4849 if (cand == except_cand)
4850 continue;
4852 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4854 if (acost < best_cost)
4856 best_cost = acost;
4857 iv_ca_delta_free (&best_delta);
4858 best_delta = act_delta;
4860 else
4861 iv_ca_delta_free (&act_delta);
4864 if (!best_delta)
4866 *delta = NULL;
4867 return best_cost;
4870 /* Recurse to possibly remove other unnecessary ivs. */
4871 iv_ca_delta_commit (data, ivs, best_delta, true);
4872 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4873 iv_ca_delta_commit (data, ivs, best_delta, false);
4874 *delta = iv_ca_delta_join (best_delta, *delta);
4875 return best_cost;
4878 /* Tries to extend the sets IVS in the best possible way in order
4879 to express the USE. */
4881 static bool
4882 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4883 struct iv_use *use)
4885 unsigned best_cost, act_cost;
4886 unsigned i;
4887 bitmap_iterator bi;
4888 struct iv_cand *cand;
4889 struct iv_ca_delta *best_delta = NULL, *act_delta;
4890 struct cost_pair *cp;
4892 iv_ca_add_use (data, ivs, use);
4893 best_cost = iv_ca_cost (ivs);
4895 cp = iv_ca_cand_for_use (ivs, use);
4896 if (cp)
4898 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4899 iv_ca_set_no_cp (data, ivs, use);
4902 /* First try important candidates. Only if it fails, try the specific ones.
4903 Rationale -- in loops with many variables the best choice often is to use
4904 just one generic biv. If we added here many ivs specific to the uses,
4905 the optimization algorithm later would be likely to get stuck in a local
4906 minimum, thus causing us to create too many ivs. The approach from
4907 few ivs to more seems more likely to be successful -- starting from few
4908 ivs, replacing an expensive use by a specific iv should always be a
4909 win. */
4910 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4912 cand = iv_cand (data, i);
4914 if (iv_ca_cand_used_p (ivs, cand))
4915 continue;
4917 cp = get_use_iv_cost (data, use, cand);
4918 if (!cp)
4919 continue;
4921 iv_ca_set_cp (data, ivs, use, cp);
4922 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4923 iv_ca_set_no_cp (data, ivs, use);
4924 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4926 if (act_cost < best_cost)
4928 best_cost = act_cost;
4930 iv_ca_delta_free (&best_delta);
4931 best_delta = act_delta;
4933 else
4934 iv_ca_delta_free (&act_delta);
4937 if (best_cost == INFTY)
4939 for (i = 0; i < use->n_map_members; i++)
4941 cp = use->cost_map + i;
4942 cand = cp->cand;
4943 if (!cand)
4944 continue;
4946 /* Already tried this. */
4947 if (cand->important)
4948 continue;
4950 if (iv_ca_cand_used_p (ivs, cand))
4951 continue;
4953 act_delta = NULL;
4954 iv_ca_set_cp (data, ivs, use, cp);
4955 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4956 iv_ca_set_no_cp (data, ivs, use);
4957 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4958 cp, act_delta);
4960 if (act_cost < best_cost)
4962 best_cost = act_cost;
4964 if (best_delta)
4965 iv_ca_delta_free (&best_delta);
4966 best_delta = act_delta;
4968 else
4969 iv_ca_delta_free (&act_delta);
4973 iv_ca_delta_commit (data, ivs, best_delta, true);
4974 iv_ca_delta_free (&best_delta);
4976 return (best_cost != INFTY);
4979 /* Finds an initial assignment of candidates to uses. */
4981 static struct iv_ca *
4982 get_initial_solution (struct ivopts_data *data)
4984 struct iv_ca *ivs = iv_ca_new (data);
4985 unsigned i;
4987 for (i = 0; i < n_iv_uses (data); i++)
4988 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4990 iv_ca_free (&ivs);
4991 return NULL;
4994 return ivs;
4997 /* Tries to improve set of induction variables IVS. */
4999 static bool
5000 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5002 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5003 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5004 struct iv_cand *cand;
5006 /* Try extending the set of induction variables by one. */
5007 for (i = 0; i < n_iv_cands (data); i++)
5009 cand = iv_cand (data, i);
5011 if (iv_ca_cand_used_p (ivs, cand))
5012 continue;
5014 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5015 if (!act_delta)
5016 continue;
5018 /* If we successfully added the candidate and the set is small enough,
5019 try optimizing it by removing other candidates. */
5020 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5022 iv_ca_delta_commit (data, ivs, act_delta, true);
5023 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5024 iv_ca_delta_commit (data, ivs, act_delta, false);
5025 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5028 if (acost < best_cost)
5030 best_cost = acost;
5031 iv_ca_delta_free (&best_delta);
5032 best_delta = act_delta;
5034 else
5035 iv_ca_delta_free (&act_delta);
5038 if (!best_delta)
5040 /* Try removing the candidates from the set instead. */
5041 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5043 /* Nothing more we can do. */
5044 if (!best_delta)
5045 return false;
5048 iv_ca_delta_commit (data, ivs, best_delta, true);
5049 gcc_assert (best_cost == iv_ca_cost (ivs));
5050 iv_ca_delta_free (&best_delta);
5051 return true;
5054 /* Attempts to find the optimal set of induction variables. We do simple
5055 greedy heuristic -- we try to replace at most one candidate in the selected
5056 solution and remove the unused ivs while this improves the cost. */
5058 static struct iv_ca *
5059 find_optimal_iv_set (struct ivopts_data *data)
5061 unsigned i;
5062 struct iv_ca *set;
5063 struct iv_use *use;
5065 /* Get the initial solution. */
5066 set = get_initial_solution (data);
5067 if (!set)
5069 if (dump_file && (dump_flags & TDF_DETAILS))
5070 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5071 return NULL;
5074 if (dump_file && (dump_flags & TDF_DETAILS))
5076 fprintf (dump_file, "Initial set of candidates:\n");
5077 iv_ca_dump (data, dump_file, set);
5080 while (try_improve_iv_set (data, set))
5082 if (dump_file && (dump_flags & TDF_DETAILS))
5084 fprintf (dump_file, "Improved to:\n");
5085 iv_ca_dump (data, dump_file, set);
5089 if (dump_file && (dump_flags & TDF_DETAILS))
5090 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5092 for (i = 0; i < n_iv_uses (data); i++)
5094 use = iv_use (data, i);
5095 use->selected = iv_ca_cand_for_use (set, use)->cand;
5098 return set;
5101 /* Creates a new induction variable corresponding to CAND. */
5103 static void
5104 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5106 block_stmt_iterator incr_pos;
5107 tree base;
5108 bool after = false;
5110 if (!cand->iv)
5111 return;
5113 switch (cand->pos)
5115 case IP_NORMAL:
5116 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5117 break;
5119 case IP_END:
5120 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5121 after = true;
5122 break;
5124 case IP_ORIGINAL:
5125 /* Mark that the iv is preserved. */
5126 name_info (data, cand->var_before)->preserve_biv = true;
5127 name_info (data, cand->var_after)->preserve_biv = true;
5129 /* Rewrite the increment so that it uses var_before directly. */
5130 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5132 return;
5135 gimple_add_tmp_var (cand->var_before);
5136 add_referenced_tmp_var (cand->var_before);
5138 base = unshare_expr (cand->iv->base);
5140 create_iv (base, unshare_expr (cand->iv->step),
5141 cand->var_before, data->current_loop,
5142 &incr_pos, after, &cand->var_before, &cand->var_after);
5145 /* Creates new induction variables described in SET. */
5147 static void
5148 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5150 unsigned i;
5151 struct iv_cand *cand;
5152 bitmap_iterator bi;
5154 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5156 cand = iv_cand (data, i);
5157 create_new_iv (data, cand);
5161 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5162 is true, remove also the ssa name defined by the statement. */
5164 static void
5165 remove_statement (tree stmt, bool including_defined_name)
5167 if (TREE_CODE (stmt) == PHI_NODE)
5169 if (!including_defined_name)
5171 /* Prevent the ssa name defined by the statement from being removed. */
5172 SET_PHI_RESULT (stmt, NULL);
5174 remove_phi_node (stmt, NULL_TREE);
5176 else
5178 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5180 bsi_remove (&bsi, true);
5184 /* Rewrites USE (definition of iv used in a nonlinear expression)
5185 using candidate CAND. */
5187 static void
5188 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5189 struct iv_use *use, struct iv_cand *cand)
5191 tree comp;
5192 tree op, stmts, tgt, ass;
5193 block_stmt_iterator bsi, pbsi;
5195 /* An important special case -- if we are asked to express value of
5196 the original iv by itself, just exit; there is no need to
5197 introduce a new computation (that might also need casting the
5198 variable to unsigned and back). */
5199 if (cand->pos == IP_ORIGINAL
5200 && cand->incremented_at == use->stmt)
5202 tree step, ctype, utype;
5203 enum tree_code incr_code = PLUS_EXPR;
5205 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5206 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5208 step = cand->iv->step;
5209 ctype = TREE_TYPE (step);
5210 utype = TREE_TYPE (cand->var_after);
5211 if (TREE_CODE (step) == NEGATE_EXPR)
5213 incr_code = MINUS_EXPR;
5214 step = TREE_OPERAND (step, 0);
5217 /* Check whether we may leave the computation unchanged.
5218 This is the case only if it does not rely on other
5219 computations in the loop -- otherwise, the computation
5220 we rely upon may be removed in remove_unused_ivs,
5221 thus leading to ICE. */
5222 op = TREE_OPERAND (use->stmt, 1);
5223 if (TREE_CODE (op) == PLUS_EXPR
5224 || TREE_CODE (op) == MINUS_EXPR)
5226 if (TREE_OPERAND (op, 0) == cand->var_before)
5227 op = TREE_OPERAND (op, 1);
5228 else if (TREE_CODE (op) == PLUS_EXPR
5229 && TREE_OPERAND (op, 1) == cand->var_before)
5230 op = TREE_OPERAND (op, 0);
5231 else
5232 op = NULL_TREE;
5234 else
5235 op = NULL_TREE;
5237 if (op
5238 && (TREE_CODE (op) == INTEGER_CST
5239 || operand_equal_p (op, step, 0)))
5240 return;
5242 /* Otherwise, add the necessary computations to express
5243 the iv. */
5244 op = fold_convert (ctype, cand->var_before);
5245 comp = fold_convert (utype,
5246 build2 (incr_code, ctype, op,
5247 unshare_expr (step)));
5249 else
5250 comp = get_computation (data->current_loop, use, cand);
5252 switch (TREE_CODE (use->stmt))
5254 case PHI_NODE:
5255 tgt = PHI_RESULT (use->stmt);
5257 /* If we should keep the biv, do not replace it. */
5258 if (name_info (data, tgt)->preserve_biv)
5259 return;
5261 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5262 while (!bsi_end_p (pbsi)
5263 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5265 bsi = pbsi;
5266 bsi_next (&pbsi);
5268 break;
5270 case MODIFY_EXPR:
5271 tgt = TREE_OPERAND (use->stmt, 0);
5272 bsi = bsi_for_stmt (use->stmt);
5273 break;
5275 default:
5276 gcc_unreachable ();
5279 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5281 if (TREE_CODE (use->stmt) == PHI_NODE)
5283 if (stmts)
5284 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5285 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5286 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5287 remove_statement (use->stmt, false);
5288 SSA_NAME_DEF_STMT (tgt) = ass;
5290 else
5292 if (stmts)
5293 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5294 TREE_OPERAND (use->stmt, 1) = op;
5298 /* Replaces ssa name in index IDX by its basic variable. Callback for
5299 for_each_index. */
5301 static bool
5302 idx_remove_ssa_names (tree base, tree *idx,
5303 void *data ATTRIBUTE_UNUSED)
5305 tree *op;
5307 if (TREE_CODE (*idx) == SSA_NAME)
5308 *idx = SSA_NAME_VAR (*idx);
5310 if (TREE_CODE (base) == ARRAY_REF)
5312 op = &TREE_OPERAND (base, 2);
5313 if (*op
5314 && TREE_CODE (*op) == SSA_NAME)
5315 *op = SSA_NAME_VAR (*op);
5316 op = &TREE_OPERAND (base, 3);
5317 if (*op
5318 && TREE_CODE (*op) == SSA_NAME)
5319 *op = SSA_NAME_VAR (*op);
5322 return true;
5325 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5327 static tree
5328 unshare_and_remove_ssa_names (tree ref)
5330 ref = unshare_expr (ref);
5331 for_each_index (&ref, idx_remove_ssa_names, NULL);
5333 return ref;
5336 /* Extract the alias analysis info for the memory reference REF. There are
5337 several ways how this information may be stored and what precisely is
5338 its semantics depending on the type of the reference, but there always is
5339 somewhere hidden one _DECL node that is used to determine the set of
5340 virtual operands for the reference. The code below deciphers this jungle
5341 and extracts this single useful piece of information. */
5343 static tree
5344 get_ref_tag (tree ref, tree orig)
5346 tree var = get_base_address (ref);
5347 tree aref = NULL_TREE, tag, sv;
5348 HOST_WIDE_INT offset, size, maxsize;
5350 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5352 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5353 if (ref)
5354 break;
5357 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5358 return unshare_expr (sv);
5360 if (!var)
5361 return NULL_TREE;
5363 if (TREE_CODE (var) == INDIRECT_REF)
5365 /* In case the base is a dereference of a pointer, first check its name
5366 mem tag, and if it does not have one, use type mem tag. */
5367 var = TREE_OPERAND (var, 0);
5368 if (TREE_CODE (var) != SSA_NAME)
5369 return NULL_TREE;
5371 if (SSA_NAME_PTR_INFO (var))
5373 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5374 if (tag)
5375 return tag;
5378 var = SSA_NAME_VAR (var);
5379 tag = var_ann (var)->type_mem_tag;
5380 gcc_assert (tag != NULL_TREE);
5381 return tag;
5383 else
5385 if (!DECL_P (var))
5386 return NULL_TREE;
5388 tag = var_ann (var)->type_mem_tag;
5389 if (tag)
5390 return tag;
5392 return var;
5396 /* Copies the reference information from OLD_REF to NEW_REF. */
5398 static void
5399 copy_ref_info (tree new_ref, tree old_ref)
5401 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5402 copy_mem_ref_info (new_ref, old_ref);
5403 else
5405 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5406 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5410 /* Rewrites USE (address that is an iv) using candidate CAND. */
5412 static void
5413 rewrite_use_address (struct ivopts_data *data,
5414 struct iv_use *use, struct iv_cand *cand)
5416 struct affine_tree_combination aff;
5417 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5418 tree ref;
5420 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5421 unshare_aff_combination (&aff);
5423 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5424 copy_ref_info (ref, *use->op_p);
5425 *use->op_p = ref;
5428 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5429 candidate CAND. */
5431 static void
5432 rewrite_use_compare (struct ivopts_data *data,
5433 struct iv_use *use, struct iv_cand *cand)
5435 tree comp;
5436 tree *op_p, cond, op, stmts, bound;
5437 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5438 enum tree_code compare;
5439 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5441 bound = cp->value;
5442 if (bound)
5444 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5445 tree var_type = TREE_TYPE (var);
5447 compare = iv_elimination_compare (data, use);
5448 bound = fold_convert (var_type, bound);
5449 op = force_gimple_operand (unshare_expr (bound), &stmts,
5450 true, NULL_TREE);
5452 if (stmts)
5453 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5455 *use->op_p = build2 (compare, boolean_type_node, var, op);
5456 update_stmt (use->stmt);
5457 return;
5460 /* The induction variable elimination failed; just express the original
5461 giv. */
5462 comp = get_computation (data->current_loop, use, cand);
5464 cond = *use->op_p;
5465 op_p = &TREE_OPERAND (cond, 0);
5466 if (TREE_CODE (*op_p) != SSA_NAME
5467 || zero_p (get_iv (data, *op_p)->step))
5468 op_p = &TREE_OPERAND (cond, 1);
5470 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5471 if (stmts)
5472 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5474 *op_p = op;
5477 /* Ensure that operand *OP_P may be used at the end of EXIT without
5478 violating loop closed ssa form. */
5480 static void
5481 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5483 basic_block def_bb;
5484 struct loop *def_loop;
5485 tree phi, use;
5487 use = USE_FROM_PTR (op_p);
5488 if (TREE_CODE (use) != SSA_NAME)
5489 return;
5491 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5492 if (!def_bb)
5493 return;
5495 def_loop = def_bb->loop_father;
5496 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5497 return;
5499 /* Try finding a phi node that copies the value out of the loop. */
5500 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5501 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5502 break;
5504 if (!phi)
5506 /* Create such a phi node. */
5507 tree new_name = duplicate_ssa_name (use, NULL);
5509 phi = create_phi_node (new_name, exit->dest);
5510 SSA_NAME_DEF_STMT (new_name) = phi;
5511 add_phi_arg (phi, use, exit);
5514 SET_USE (op_p, PHI_RESULT (phi));
5517 /* Ensure that operands of STMT may be used at the end of EXIT without
5518 violating loop closed ssa form. */
5520 static void
5521 protect_loop_closed_ssa_form (edge exit, tree stmt)
5523 ssa_op_iter iter;
5524 use_operand_p use_p;
5526 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5527 protect_loop_closed_ssa_form_use (exit, use_p);
5530 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5531 so that they are emitted on the correct place, and so that the loop closed
5532 ssa form is preserved. */
5534 void
5535 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5537 tree_stmt_iterator tsi;
5538 block_stmt_iterator bsi;
5539 tree phi, stmt, def, next;
5541 if (!single_pred_p (exit->dest))
5542 split_loop_exit_edge (exit);
5544 /* Ensure there is label in exit->dest, so that we can
5545 insert after it. */
5546 tree_block_label (exit->dest);
5547 bsi = bsi_after_labels (exit->dest);
5549 if (TREE_CODE (stmts) == STATEMENT_LIST)
5551 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5553 tree stmt = tsi_stmt (tsi);
5554 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5555 protect_loop_closed_ssa_form (exit, stmt);
5558 else
5560 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5561 protect_loop_closed_ssa_form (exit, stmts);
5564 if (!op)
5565 return;
5567 for (phi = phi_nodes (exit->dest); phi; phi = next)
5569 next = PHI_CHAIN (phi);
5571 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5573 def = PHI_RESULT (phi);
5574 remove_statement (phi, false);
5575 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5576 def, op);
5577 SSA_NAME_DEF_STMT (def) = stmt;
5578 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5583 /* Rewrites USE using candidate CAND. */
5585 static void
5586 rewrite_use (struct ivopts_data *data,
5587 struct iv_use *use, struct iv_cand *cand)
5589 switch (use->type)
5591 case USE_NONLINEAR_EXPR:
5592 rewrite_use_nonlinear_expr (data, use, cand);
5593 break;
5595 case USE_ADDRESS:
5596 rewrite_use_address (data, use, cand);
5597 break;
5599 case USE_COMPARE:
5600 rewrite_use_compare (data, use, cand);
5601 break;
5603 default:
5604 gcc_unreachable ();
5606 mark_new_vars_to_rename (use->stmt);
5609 /* Rewrite the uses using the selected induction variables. */
5611 static void
5612 rewrite_uses (struct ivopts_data *data)
5614 unsigned i;
5615 struct iv_cand *cand;
5616 struct iv_use *use;
5618 for (i = 0; i < n_iv_uses (data); i++)
5620 use = iv_use (data, i);
5621 cand = use->selected;
5622 gcc_assert (cand);
5624 rewrite_use (data, use, cand);
5628 /* Removes the ivs that are not used after rewriting. */
5630 static void
5631 remove_unused_ivs (struct ivopts_data *data)
5633 unsigned j;
5634 bitmap_iterator bi;
5636 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5638 struct version_info *info;
5640 info = ver_info (data, j);
5641 if (info->iv
5642 && !zero_p (info->iv->step)
5643 && !info->inv_id
5644 && !info->iv->have_use_for
5645 && !info->preserve_biv)
5646 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5650 /* Frees data allocated by the optimization of a single loop. */
5652 static void
5653 free_loop_data (struct ivopts_data *data)
5655 unsigned i, j;
5656 bitmap_iterator bi;
5657 tree obj;
5659 htab_empty (data->niters);
5661 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5663 struct version_info *info;
5665 info = ver_info (data, i);
5666 if (info->iv)
5667 free (info->iv);
5668 info->iv = NULL;
5669 info->has_nonlin_use = false;
5670 info->preserve_biv = false;
5671 info->inv_id = 0;
5673 bitmap_clear (data->relevant);
5674 bitmap_clear (data->important_candidates);
5676 for (i = 0; i < n_iv_uses (data); i++)
5678 struct iv_use *use = iv_use (data, i);
5680 free (use->iv);
5681 BITMAP_FREE (use->related_cands);
5682 for (j = 0; j < use->n_map_members; j++)
5683 if (use->cost_map[j].depends_on)
5684 BITMAP_FREE (use->cost_map[j].depends_on);
5685 free (use->cost_map);
5686 free (use);
5688 VEC_truncate (iv_use_p, data->iv_uses, 0);
5690 for (i = 0; i < n_iv_cands (data); i++)
5692 struct iv_cand *cand = iv_cand (data, i);
5694 if (cand->iv)
5695 free (cand->iv);
5696 if (cand->depends_on)
5697 BITMAP_FREE (cand->depends_on);
5698 free (cand);
5700 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5702 if (data->version_info_size < num_ssa_names)
5704 data->version_info_size = 2 * num_ssa_names;
5705 free (data->version_info);
5706 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5709 data->max_inv_id = 0;
5711 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5712 SET_DECL_RTL (obj, NULL_RTX);
5714 VEC_truncate (tree, decl_rtl_to_reset, 0);
5717 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5718 loop tree. */
5720 static void
5721 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5723 unsigned i;
5725 for (i = 1; i < loops->num; i++)
5726 if (loops->parray[i])
5728 free (loops->parray[i]->aux);
5729 loops->parray[i]->aux = NULL;
5732 free_loop_data (data);
5733 free (data->version_info);
5734 BITMAP_FREE (data->relevant);
5735 BITMAP_FREE (data->important_candidates);
5736 htab_delete (data->niters);
5738 VEC_free (tree, heap, decl_rtl_to_reset);
5739 VEC_free (iv_use_p, heap, data->iv_uses);
5740 VEC_free (iv_cand_p, heap, data->iv_candidates);
5743 /* Optimizes the LOOP. Returns true if anything changed. */
5745 static bool
5746 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5748 bool changed = false;
5749 struct iv_ca *iv_ca;
5750 edge exit;
5752 data->current_loop = loop;
5754 if (dump_file && (dump_flags & TDF_DETAILS))
5756 fprintf (dump_file, "Processing loop %d\n", loop->num);
5758 exit = single_dom_exit (loop);
5759 if (exit)
5761 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5762 exit->src->index, exit->dest->index);
5763 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5764 fprintf (dump_file, "\n");
5767 fprintf (dump_file, "\n");
5770 /* For each ssa name determines whether it behaves as an induction variable
5771 in some loop. */
5772 if (!find_induction_variables (data))
5773 goto finish;
5775 /* Finds interesting uses (item 1). */
5776 find_interesting_uses (data);
5777 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5778 goto finish;
5780 /* Finds candidates for the induction variables (item 2). */
5781 find_iv_candidates (data);
5783 /* Calculates the costs (item 3, part 1). */
5784 determine_use_iv_costs (data);
5785 determine_iv_costs (data);
5786 determine_set_costs (data);
5788 /* Find the optimal set of induction variables (item 3, part 2). */
5789 iv_ca = find_optimal_iv_set (data);
5790 if (!iv_ca)
5791 goto finish;
5792 changed = true;
5794 /* Create the new induction variables (item 4, part 1). */
5795 create_new_ivs (data, iv_ca);
5796 iv_ca_free (&iv_ca);
5798 /* Rewrite the uses (item 4, part 2). */
5799 rewrite_uses (data);
5801 /* Remove the ivs that are unused after rewriting. */
5802 remove_unused_ivs (data);
5804 /* We have changed the structure of induction variables; it might happen
5805 that definitions in the scev database refer to some of them that were
5806 eliminated. */
5807 scev_reset ();
5809 finish:
5810 free_loop_data (data);
5812 return changed;
5815 /* Main entry point. Optimizes induction variables in LOOPS. */
5817 void
5818 tree_ssa_iv_optimize (struct loops *loops)
5820 struct loop *loop;
5821 struct ivopts_data data;
5823 tree_ssa_iv_optimize_init (loops, &data);
5825 /* Optimize the loops starting with the innermost ones. */
5826 loop = loops->tree_root;
5827 while (loop->inner)
5828 loop = loop->inner;
5830 /* Scan the loops, inner ones first. */
5831 while (loop != loops->tree_root)
5833 if (dump_file && (dump_flags & TDF_DETAILS))
5834 flow_loop_dump (loop, dump_file, NULL, 1);
5836 tree_ssa_iv_optimize_loop (&data, loop);
5838 if (loop->next)
5840 loop = loop->next;
5841 while (loop->inner)
5842 loop = loop->inner;
5844 else
5845 loop = loop->outer;
5848 tree_ssa_iv_optimize_finalize (loops, &data);