fix entries ordering
[official-gcc.git] / gcc / tree-ssa-loop-ivopts.c
blob8e1c53ad174a2f16a2aaa2fd534aab352d7b7b50
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
28 1) The interesting uses of induction variables are found. This includes
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
34 2) Candidates for the induction variables are found. This includes
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "hashtab.h"
87 #include "tree-chrec.h"
88 #include "tree-scalar-evolution.h"
89 #include "cfgloop.h"
90 #include "params.h"
91 #include "langhooks.h"
93 /* The infinite cost. */
94 #define INFTY 10000000
96 /* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98 #define AVG_LOOP_NITER(LOOP) 5
101 /* Representation of the induction variable. */
102 struct iv
104 tree base; /* Initial value of the iv. */
105 tree base_object; /* A memory object to that the induction variable points. */
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
113 /* Per-ssa version information (induction variable descriptions, etc.). */
114 struct version_info
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
124 /* Information attached to loop. */
125 struct loop_data
127 unsigned regs_used; /* Number of registers used. */
130 /* Types of uses. */
131 enum use_type
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
139 /* The candidate - cost pair. */
140 struct cost_pair
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146 tree value; /* For final value elimination, the expression for
147 the final value of the iv. For iv elimination,
148 the new bound to compare with. */
151 /* Use. */
152 struct iv_use
154 unsigned id; /* The id of the use. */
155 enum use_type type; /* Type of the use. */
156 struct iv *iv; /* The induction variable it is based on. */
157 tree stmt; /* Statement in that it occurs. */
158 tree *op_p; /* The place where it occurs. */
159 bitmap related_cands; /* The set of "related" iv candidates, plus the common
160 important ones. */
162 unsigned n_map_members; /* Number of candidates in the cost_map list. */
163 struct cost_pair *cost_map;
164 /* The costs wrto the iv candidates. */
166 struct iv_cand *selected;
167 /* The selected candidate. */
170 /* The position where the iv is computed. */
171 enum iv_position
173 IP_NORMAL, /* At the end, just before the exit condition. */
174 IP_END, /* At the end of the latch block. */
175 IP_ORIGINAL /* The original biv. */
178 /* The induction variable candidate. */
179 struct iv_cand
181 unsigned id; /* The number of the candidate. */
182 bool important; /* Whether this is an "important" candidate, i.e. such
183 that it should be considered by all uses. */
184 enum iv_position pos; /* Where it is computed. */
185 tree incremented_at; /* For original biv, the statement where it is
186 incremented. */
187 tree var_before; /* The variable used for it before increment. */
188 tree var_after; /* The variable used for it after increment. */
189 struct iv *iv; /* The value of the candidate. NULL for
190 "pseudocandidate" used to indicate the possibility
191 to replace the final value of an iv by direct
192 computation of the value. */
193 unsigned cost; /* Cost of the candidate. */
194 bitmap depends_on; /* The list of invariants that are used in step of the
195 biv. */
198 /* The data used by the induction variable optimizations. */
200 typedef struct iv_use *iv_use_p;
201 DEF_VEC_P(iv_use_p);
202 DEF_VEC_ALLOC_P(iv_use_p,heap);
204 typedef struct iv_cand *iv_cand_p;
205 DEF_VEC_P(iv_cand_p);
206 DEF_VEC_ALLOC_P(iv_cand_p,heap);
208 struct ivopts_data
210 /* The currently optimized loop. */
211 struct loop *current_loop;
213 /* Numbers of iterations for all exits of the current loop. */
214 htab_t niters;
216 /* The size of version_info array allocated. */
217 unsigned version_info_size;
219 /* The array of information for the ssa names. */
220 struct version_info *version_info;
222 /* The bitmap of indices in version_info whose value was changed. */
223 bitmap relevant;
225 /* The maximum invariant id. */
226 unsigned max_inv_id;
228 /* The uses of induction variables. */
229 VEC(iv_use_p,heap) *iv_uses;
231 /* The candidates. */
232 VEC(iv_cand_p,heap) *iv_candidates;
234 /* A bitmap of important candidates. */
235 bitmap important_candidates;
237 /* Whether to consider just related and important candidates when replacing a
238 use. */
239 bool consider_all_candidates;
242 /* An assignment of iv candidates to uses. */
244 struct iv_ca
246 /* The number of uses covered by the assignment. */
247 unsigned upto;
249 /* Number of uses that cannot be expressed by the candidates in the set. */
250 unsigned bad_uses;
252 /* Candidate assigned to a use, together with the related costs. */
253 struct cost_pair **cand_for_use;
255 /* Number of times each candidate is used. */
256 unsigned *n_cand_uses;
258 /* The candidates used. */
259 bitmap cands;
261 /* The number of candidates in the set. */
262 unsigned n_cands;
264 /* Total number of registers needed. */
265 unsigned n_regs;
267 /* Total cost of expressing uses. */
268 unsigned cand_use_cost;
270 /* Total cost of candidates. */
271 unsigned cand_cost;
273 /* Number of times each invariant is used. */
274 unsigned *n_invariant_uses;
276 /* Total cost of the assignment. */
277 unsigned cost;
280 /* Difference of two iv candidate assignments. */
282 struct iv_ca_delta
284 /* Changed use. */
285 struct iv_use *use;
287 /* An old assignment (for rollback purposes). */
288 struct cost_pair *old_cp;
290 /* A new assignment. */
291 struct cost_pair *new_cp;
293 /* Next change in the list. */
294 struct iv_ca_delta *next_change;
297 /* Bound on number of candidates below that all candidates are considered. */
299 #define CONSIDER_ALL_CANDIDATES_BOUND \
300 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
302 /* If there are more iv occurrences, we just give up (it is quite unlikely that
303 optimizing such a loop would help, and it would take ages). */
305 #define MAX_CONSIDERED_USES \
306 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
308 /* If there are at most this number of ivs in the set, try removing unnecessary
309 ivs from the set always. */
311 #define ALWAYS_PRUNE_CAND_SET_BOUND \
312 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
314 /* The list of trees for that the decl_rtl field must be reset is stored
315 here. */
317 static VEC(tree,heap) *decl_rtl_to_reset;
319 /* Number of uses recorded in DATA. */
321 static inline unsigned
322 n_iv_uses (struct ivopts_data *data)
324 return VEC_length (iv_use_p, data->iv_uses);
327 /* Ith use recorded in DATA. */
329 static inline struct iv_use *
330 iv_use (struct ivopts_data *data, unsigned i)
332 return VEC_index (iv_use_p, data->iv_uses, i);
335 /* Number of candidates recorded in DATA. */
337 static inline unsigned
338 n_iv_cands (struct ivopts_data *data)
340 return VEC_length (iv_cand_p, data->iv_candidates);
343 /* Ith candidate recorded in DATA. */
345 static inline struct iv_cand *
346 iv_cand (struct ivopts_data *data, unsigned i)
348 return VEC_index (iv_cand_p, data->iv_candidates, i);
351 /* The data for LOOP. */
353 static inline struct loop_data *
354 loop_data (struct loop *loop)
356 return loop->aux;
359 /* The single loop exit if it dominates the latch, NULL otherwise. */
361 edge
362 single_dom_exit (struct loop *loop)
364 edge exit = loop->single_exit;
366 if (!exit)
367 return NULL;
369 if (!just_once_each_iteration_p (loop, exit->src))
370 return NULL;
372 return exit;
375 /* Dumps information about the induction variable IV to FILE. */
377 extern void dump_iv (FILE *, struct iv *);
378 void
379 dump_iv (FILE *file, struct iv *iv)
381 if (iv->ssa_name)
383 fprintf (file, "ssa name ");
384 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
385 fprintf (file, "\n");
388 fprintf (file, " type ");
389 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
390 fprintf (file, "\n");
392 if (iv->step)
394 fprintf (file, " base ");
395 print_generic_expr (file, iv->base, TDF_SLIM);
396 fprintf (file, "\n");
398 fprintf (file, " step ");
399 print_generic_expr (file, iv->step, TDF_SLIM);
400 fprintf (file, "\n");
402 else
404 fprintf (file, " invariant ");
405 print_generic_expr (file, iv->base, TDF_SLIM);
406 fprintf (file, "\n");
409 if (iv->base_object)
411 fprintf (file, " base object ");
412 print_generic_expr (file, iv->base_object, TDF_SLIM);
413 fprintf (file, "\n");
416 if (iv->biv_p)
417 fprintf (file, " is a biv\n");
420 /* Dumps information about the USE to FILE. */
422 extern void dump_use (FILE *, struct iv_use *);
423 void
424 dump_use (FILE *file, struct iv_use *use)
426 fprintf (file, "use %d\n", use->id);
428 switch (use->type)
430 case USE_NONLINEAR_EXPR:
431 fprintf (file, " generic\n");
432 break;
434 case USE_OUTER:
435 fprintf (file, " outside\n");
436 break;
438 case USE_ADDRESS:
439 fprintf (file, " address\n");
440 break;
442 case USE_COMPARE:
443 fprintf (file, " compare\n");
444 break;
446 default:
447 gcc_unreachable ();
450 fprintf (file, " in statement ");
451 print_generic_expr (file, use->stmt, TDF_SLIM);
452 fprintf (file, "\n");
454 fprintf (file, " at position ");
455 if (use->op_p)
456 print_generic_expr (file, *use->op_p, TDF_SLIM);
457 fprintf (file, "\n");
459 dump_iv (file, use->iv);
461 if (use->related_cands)
463 fprintf (file, " related candidates ");
464 dump_bitmap (file, use->related_cands);
468 /* Dumps information about the uses to FILE. */
470 extern void dump_uses (FILE *, struct ivopts_data *);
471 void
472 dump_uses (FILE *file, struct ivopts_data *data)
474 unsigned i;
475 struct iv_use *use;
477 for (i = 0; i < n_iv_uses (data); i++)
479 use = iv_use (data, i);
481 dump_use (file, use);
482 fprintf (file, "\n");
486 /* Dumps information about induction variable candidate CAND to FILE. */
488 extern void dump_cand (FILE *, struct iv_cand *);
489 void
490 dump_cand (FILE *file, struct iv_cand *cand)
492 struct iv *iv = cand->iv;
494 fprintf (file, "candidate %d%s\n",
495 cand->id, cand->important ? " (important)" : "");
497 if (cand->depends_on)
499 fprintf (file, " depends on ");
500 dump_bitmap (file, cand->depends_on);
503 if (!iv)
505 fprintf (file, " final value replacement\n");
506 return;
509 switch (cand->pos)
511 case IP_NORMAL:
512 fprintf (file, " incremented before exit test\n");
513 break;
515 case IP_END:
516 fprintf (file, " incremented at end\n");
517 break;
519 case IP_ORIGINAL:
520 fprintf (file, " original biv\n");
521 break;
524 dump_iv (file, iv);
527 /* Returns the info for ssa version VER. */
529 static inline struct version_info *
530 ver_info (struct ivopts_data *data, unsigned ver)
532 return data->version_info + ver;
535 /* Returns the info for ssa name NAME. */
537 static inline struct version_info *
538 name_info (struct ivopts_data *data, tree name)
540 return ver_info (data, SSA_NAME_VERSION (name));
543 /* Checks whether there exists number X such that X * B = A, counting modulo
544 2^BITS. */
546 static bool
547 divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
548 HOST_WIDE_INT *x)
550 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
551 unsigned HOST_WIDE_INT inv, ex, val;
552 unsigned i;
554 a &= mask;
555 b &= mask;
557 /* First divide the whole equation by 2 as long as possible. */
558 while (!(a & 1) && !(b & 1))
560 a >>= 1;
561 b >>= 1;
562 bits--;
563 mask >>= 1;
566 if (!(b & 1))
568 /* If b is still even, a is odd and there is no such x. */
569 return false;
572 /* Find the inverse of b. We compute it as
573 b^(2^(bits - 1) - 1) (mod 2^bits). */
574 inv = 1;
575 ex = b;
576 for (i = 0; i < bits - 1; i++)
578 inv = (inv * ex) & mask;
579 ex = (ex * ex) & mask;
582 val = (a * inv) & mask;
584 gcc_assert (((val * b) & mask) == a);
586 if ((val >> (bits - 1)) & 1)
587 val |= ~mask;
589 *x = val;
591 return true;
594 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
595 emitted in LOOP. */
597 static bool
598 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
600 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
602 gcc_assert (bb);
604 if (sbb == loop->latch)
605 return true;
607 if (sbb != bb)
608 return false;
610 return stmt == last_stmt (bb);
613 /* Returns true if STMT if after the place where the original induction
614 variable CAND is incremented. */
616 static bool
617 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
619 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
620 basic_block stmt_bb = bb_for_stmt (stmt);
621 block_stmt_iterator bsi;
623 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
624 return false;
626 if (stmt_bb != cand_bb)
627 return true;
629 /* Scan the block from the end, since the original ivs are usually
630 incremented at the end of the loop body. */
631 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
633 if (bsi_stmt (bsi) == cand->incremented_at)
634 return false;
635 if (bsi_stmt (bsi) == stmt)
636 return true;
640 /* Returns true if STMT if after the place where the induction variable
641 CAND is incremented in LOOP. */
643 static bool
644 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
646 switch (cand->pos)
648 case IP_END:
649 return false;
651 case IP_NORMAL:
652 return stmt_after_ip_normal_pos (loop, stmt);
654 case IP_ORIGINAL:
655 return stmt_after_ip_original_pos (cand, stmt);
657 default:
658 gcc_unreachable ();
662 /* Element of the table in that we cache the numbers of iterations obtained
663 from exits of the loop. */
665 struct nfe_cache_elt
667 /* The edge for that the number of iterations is cached. */
668 edge exit;
670 /* True if the # of iterations was successfully determined. */
671 bool valid_p;
673 /* Description of # of iterations. */
674 struct tree_niter_desc niter;
677 /* Hash function for nfe_cache_elt E. */
679 static hashval_t
680 nfe_hash (const void *e)
682 const struct nfe_cache_elt *elt = e;
684 return htab_hash_pointer (elt->exit);
687 /* Equality function for nfe_cache_elt E1 and edge E2. */
689 static int
690 nfe_eq (const void *e1, const void *e2)
692 const struct nfe_cache_elt *elt1 = e1;
694 return elt1->exit == e2;
697 /* Returns structure describing number of iterations determined from
698 EXIT of DATA->current_loop, or NULL if something goes wrong. */
700 static struct tree_niter_desc *
701 niter_for_exit (struct ivopts_data *data, edge exit)
703 struct nfe_cache_elt *nfe_desc;
704 PTR *slot;
706 slot = htab_find_slot_with_hash (data->niters, exit,
707 htab_hash_pointer (exit),
708 INSERT);
710 if (!*slot)
712 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
713 nfe_desc->exit = exit;
714 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
715 exit, &nfe_desc->niter,
716 true);
717 *slot = nfe_desc;
719 else
720 nfe_desc = *slot;
722 if (!nfe_desc->valid_p)
723 return NULL;
725 return &nfe_desc->niter;
728 /* Returns structure describing number of iterations determined from
729 single dominating exit of DATA->current_loop, or NULL if something
730 goes wrong. */
732 static struct tree_niter_desc *
733 niter_for_single_dom_exit (struct ivopts_data *data)
735 edge exit = single_dom_exit (data->current_loop);
737 if (!exit)
738 return NULL;
740 return niter_for_exit (data, exit);
743 /* Initializes data structures used by the iv optimization pass, stored
744 in DATA. LOOPS is the loop tree. */
746 static void
747 tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
749 unsigned i;
751 data->version_info_size = 2 * num_ssa_names;
752 data->version_info = xcalloc (data->version_info_size,
753 sizeof (struct version_info));
754 data->relevant = BITMAP_ALLOC (NULL);
755 data->important_candidates = BITMAP_ALLOC (NULL);
756 data->max_inv_id = 0;
757 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
759 for (i = 1; i < loops->num; i++)
760 if (loops->parray[i])
761 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
763 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
764 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
765 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
768 /* Returns a memory object to that EXPR points. In case we are able to
769 determine that it does not point to any such object, NULL is returned. */
771 static tree
772 determine_base_object (tree expr)
774 enum tree_code code = TREE_CODE (expr);
775 tree base, obj, op0, op1;
777 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
778 return NULL_TREE;
780 switch (code)
782 case INTEGER_CST:
783 return NULL_TREE;
785 case ADDR_EXPR:
786 obj = TREE_OPERAND (expr, 0);
787 base = get_base_address (obj);
789 if (!base)
790 return expr;
792 if (TREE_CODE (base) == INDIRECT_REF)
793 return determine_base_object (TREE_OPERAND (base, 0));
795 return fold_convert (ptr_type_node,
796 build_fold_addr_expr (base));
798 case PLUS_EXPR:
799 case MINUS_EXPR:
800 op0 = determine_base_object (TREE_OPERAND (expr, 0));
801 op1 = determine_base_object (TREE_OPERAND (expr, 1));
803 if (!op1)
804 return op0;
806 if (!op0)
807 return (code == PLUS_EXPR
808 ? op1
809 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
811 return fold_build2 (code, ptr_type_node, op0, op1);
813 case NOP_EXPR:
814 case CONVERT_EXPR:
815 return determine_base_object (TREE_OPERAND (expr, 0));
817 default:
818 return fold_convert (ptr_type_node, expr);
822 /* Allocates an induction variable with given initial value BASE and step STEP
823 for loop LOOP. */
825 static struct iv *
826 alloc_iv (tree base, tree step)
828 struct iv *iv = xcalloc (1, sizeof (struct iv));
830 if (step && integer_zerop (step))
831 step = NULL_TREE;
833 iv->base = base;
834 iv->base_object = determine_base_object (base);
835 iv->step = step;
836 iv->biv_p = false;
837 iv->have_use_for = false;
838 iv->use_id = 0;
839 iv->ssa_name = NULL_TREE;
841 return iv;
844 /* Sets STEP and BASE for induction variable IV. */
846 static void
847 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
849 struct version_info *info = name_info (data, iv);
851 gcc_assert (!info->iv);
853 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
854 info->iv = alloc_iv (base, step);
855 info->iv->ssa_name = iv;
858 /* Finds induction variable declaration for VAR. */
860 static struct iv *
861 get_iv (struct ivopts_data *data, tree var)
863 basic_block bb;
865 if (!name_info (data, var)->iv)
867 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
869 if (!bb
870 || !flow_bb_inside_loop_p (data->current_loop, bb))
871 set_iv (data, var, var, NULL_TREE);
874 return name_info (data, var)->iv;
877 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
878 not define a simple affine biv with nonzero step. */
880 static tree
881 determine_biv_step (tree phi)
883 struct loop *loop = bb_for_stmt (phi)->loop_father;
884 tree name = PHI_RESULT (phi);
885 affine_iv iv;
887 if (!is_gimple_reg (name))
888 return NULL_TREE;
890 if (!simple_iv (loop, phi, name, &iv, true))
891 return NULL_TREE;
893 return (zero_p (iv.step) ? NULL_TREE : iv.step);
896 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
898 static bool
899 abnormal_ssa_name_p (tree exp)
901 if (!exp)
902 return false;
904 if (TREE_CODE (exp) != SSA_NAME)
905 return false;
907 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
910 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
911 abnormal phi node. Callback for for_each_index. */
913 static bool
914 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
915 void *data ATTRIBUTE_UNUSED)
917 if (TREE_CODE (base) == ARRAY_REF)
919 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
920 return false;
921 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
922 return false;
925 return !abnormal_ssa_name_p (*index);
928 /* Returns true if EXPR contains a ssa name that occurs in an
929 abnormal phi node. */
931 static bool
932 contains_abnormal_ssa_name_p (tree expr)
934 enum tree_code code;
935 enum tree_code_class class;
937 if (!expr)
938 return false;
940 code = TREE_CODE (expr);
941 class = TREE_CODE_CLASS (code);
943 if (code == SSA_NAME)
944 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
946 if (code == INTEGER_CST
947 || is_gimple_min_invariant (expr))
948 return false;
950 if (code == ADDR_EXPR)
951 return !for_each_index (&TREE_OPERAND (expr, 0),
952 idx_contains_abnormal_ssa_name_p,
953 NULL);
955 switch (class)
957 case tcc_binary:
958 case tcc_comparison:
959 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
960 return true;
962 /* Fallthru. */
963 case tcc_unary:
964 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
965 return true;
967 break;
969 default:
970 gcc_unreachable ();
973 return false;
976 /* Finds basic ivs. */
978 static bool
979 find_bivs (struct ivopts_data *data)
981 tree phi, step, type, base;
982 bool found = false;
983 struct loop *loop = data->current_loop;
985 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
987 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
988 continue;
990 step = determine_biv_step (phi);
991 if (!step)
992 continue;
994 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
995 base = expand_simple_operations (base);
996 if (contains_abnormal_ssa_name_p (base)
997 || contains_abnormal_ssa_name_p (step))
998 continue;
1000 type = TREE_TYPE (PHI_RESULT (phi));
1001 base = fold_convert (type, base);
1002 if (step)
1003 step = fold_convert (type, step);
1005 set_iv (data, PHI_RESULT (phi), base, step);
1006 found = true;
1009 return found;
1012 /* Marks basic ivs. */
1014 static void
1015 mark_bivs (struct ivopts_data *data)
1017 tree phi, var;
1018 struct iv *iv, *incr_iv;
1019 struct loop *loop = data->current_loop;
1020 basic_block incr_bb;
1022 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1024 iv = get_iv (data, PHI_RESULT (phi));
1025 if (!iv)
1026 continue;
1028 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1029 incr_iv = get_iv (data, var);
1030 if (!incr_iv)
1031 continue;
1033 /* If the increment is in the subloop, ignore it. */
1034 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1035 if (incr_bb->loop_father != data->current_loop
1036 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1037 continue;
1039 iv->biv_p = true;
1040 incr_iv->biv_p = true;
1044 /* Checks whether STMT defines a linear induction variable and stores its
1045 parameters to IV. */
1047 static bool
1048 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
1050 tree lhs;
1051 struct loop *loop = data->current_loop;
1053 iv->base = NULL_TREE;
1054 iv->step = NULL_TREE;
1056 if (TREE_CODE (stmt) != MODIFY_EXPR)
1057 return false;
1059 lhs = TREE_OPERAND (stmt, 0);
1060 if (TREE_CODE (lhs) != SSA_NAME)
1061 return false;
1063 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
1064 return false;
1065 iv->base = expand_simple_operations (iv->base);
1067 if (contains_abnormal_ssa_name_p (iv->base)
1068 || contains_abnormal_ssa_name_p (iv->step))
1069 return false;
1071 return true;
1074 /* Finds general ivs in statement STMT. */
1076 static void
1077 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1079 affine_iv iv;
1081 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1082 return;
1084 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
1087 /* Finds general ivs in basic block BB. */
1089 static void
1090 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1092 block_stmt_iterator bsi;
1094 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1095 find_givs_in_stmt (data, bsi_stmt (bsi));
1098 /* Finds general ivs. */
1100 static void
1101 find_givs (struct ivopts_data *data)
1103 struct loop *loop = data->current_loop;
1104 basic_block *body = get_loop_body_in_dom_order (loop);
1105 unsigned i;
1107 for (i = 0; i < loop->num_nodes; i++)
1108 find_givs_in_bb (data, body[i]);
1109 free (body);
1112 /* For each ssa name defined in LOOP determines whether it is an induction
1113 variable and if so, its initial value and step. */
1115 static bool
1116 find_induction_variables (struct ivopts_data *data)
1118 unsigned i;
1119 bitmap_iterator bi;
1121 if (!find_bivs (data))
1122 return false;
1124 find_givs (data);
1125 mark_bivs (data);
1127 if (dump_file && (dump_flags & TDF_DETAILS))
1129 struct tree_niter_desc *niter;
1131 niter = niter_for_single_dom_exit (data);
1133 if (niter)
1135 fprintf (dump_file, " number of iterations ");
1136 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1137 fprintf (dump_file, "\n");
1139 fprintf (dump_file, " may be zero if ");
1140 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1141 fprintf (dump_file, "\n");
1142 fprintf (dump_file, "\n");
1145 fprintf (dump_file, "Induction variables:\n\n");
1147 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1149 if (ver_info (data, i)->iv)
1150 dump_iv (dump_file, ver_info (data, i)->iv);
1154 return true;
1157 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1159 static struct iv_use *
1160 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1161 tree stmt, enum use_type use_type)
1163 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1165 use->id = n_iv_uses (data);
1166 use->type = use_type;
1167 use->iv = iv;
1168 use->stmt = stmt;
1169 use->op_p = use_p;
1170 use->related_cands = BITMAP_ALLOC (NULL);
1172 /* To avoid showing ssa name in the dumps, if it was not reset by the
1173 caller. */
1174 iv->ssa_name = NULL_TREE;
1176 if (dump_file && (dump_flags & TDF_DETAILS))
1177 dump_use (dump_file, use);
1179 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1181 return use;
1184 /* Checks whether OP is a loop-level invariant and if so, records it.
1185 NONLINEAR_USE is true if the invariant is used in a way we do not
1186 handle specially. */
1188 static void
1189 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1191 basic_block bb;
1192 struct version_info *info;
1194 if (TREE_CODE (op) != SSA_NAME
1195 || !is_gimple_reg (op))
1196 return;
1198 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1199 if (bb
1200 && flow_bb_inside_loop_p (data->current_loop, bb))
1201 return;
1203 info = name_info (data, op);
1204 info->name = op;
1205 info->has_nonlin_use |= nonlinear_use;
1206 if (!info->inv_id)
1207 info->inv_id = ++data->max_inv_id;
1208 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1211 /* Checks whether the use OP is interesting and if so, records it
1212 as TYPE. */
1214 static struct iv_use *
1215 find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1216 enum use_type type)
1218 struct iv *iv;
1219 struct iv *civ;
1220 tree stmt;
1221 struct iv_use *use;
1223 if (TREE_CODE (op) != SSA_NAME)
1224 return NULL;
1226 iv = get_iv (data, op);
1227 if (!iv)
1228 return NULL;
1230 if (iv->have_use_for)
1232 use = iv_use (data, iv->use_id);
1234 gcc_assert (use->type == USE_NONLINEAR_EXPR
1235 || use->type == USE_OUTER);
1237 if (type == USE_NONLINEAR_EXPR)
1238 use->type = USE_NONLINEAR_EXPR;
1239 return use;
1242 if (zero_p (iv->step))
1244 record_invariant (data, op, true);
1245 return NULL;
1247 iv->have_use_for = true;
1249 civ = xmalloc (sizeof (struct iv));
1250 *civ = *iv;
1252 stmt = SSA_NAME_DEF_STMT (op);
1253 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1254 || TREE_CODE (stmt) == MODIFY_EXPR);
1256 use = record_use (data, NULL, civ, stmt, type);
1257 iv->use_id = use->id;
1259 return use;
1262 /* Checks whether the use OP is interesting and if so, records it. */
1264 static struct iv_use *
1265 find_interesting_uses_op (struct ivopts_data *data, tree op)
1267 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1270 /* Records a definition of induction variable OP that is used outside of the
1271 loop. */
1273 static struct iv_use *
1274 find_interesting_uses_outer (struct ivopts_data *data, tree op)
1276 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1279 /* Checks whether the condition *COND_P in STMT is interesting
1280 and if so, records it. */
1282 static void
1283 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1285 tree *op0_p;
1286 tree *op1_p;
1287 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1288 struct iv const_iv;
1289 tree zero = integer_zero_node;
1291 const_iv.step = NULL_TREE;
1293 if (TREE_CODE (*cond_p) != SSA_NAME
1294 && !COMPARISON_CLASS_P (*cond_p))
1295 return;
1297 if (TREE_CODE (*cond_p) == SSA_NAME)
1299 op0_p = cond_p;
1300 op1_p = &zero;
1302 else
1304 op0_p = &TREE_OPERAND (*cond_p, 0);
1305 op1_p = &TREE_OPERAND (*cond_p, 1);
1308 if (TREE_CODE (*op0_p) == SSA_NAME)
1309 iv0 = get_iv (data, *op0_p);
1310 else
1311 iv0 = &const_iv;
1313 if (TREE_CODE (*op1_p) == SSA_NAME)
1314 iv1 = get_iv (data, *op1_p);
1315 else
1316 iv1 = &const_iv;
1318 if (/* When comparing with non-invariant value, we may not do any senseful
1319 induction variable elimination. */
1320 (!iv0 || !iv1)
1321 /* Eliminating condition based on two ivs would be nontrivial.
1322 ??? TODO -- it is not really important to handle this case. */
1323 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1325 find_interesting_uses_op (data, *op0_p);
1326 find_interesting_uses_op (data, *op1_p);
1327 return;
1330 if (zero_p (iv0->step) && zero_p (iv1->step))
1332 /* If both are invariants, this is a work for unswitching. */
1333 return;
1336 civ = xmalloc (sizeof (struct iv));
1337 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1338 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1341 /* Returns true if expression EXPR is obviously invariant in LOOP,
1342 i.e. if all its operands are defined outside of the LOOP. */
1344 bool
1345 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1347 basic_block def_bb;
1348 unsigned i, len;
1350 if (is_gimple_min_invariant (expr))
1351 return true;
1353 if (TREE_CODE (expr) == SSA_NAME)
1355 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1356 if (def_bb
1357 && flow_bb_inside_loop_p (loop, def_bb))
1358 return false;
1360 return true;
1363 if (!EXPR_P (expr))
1364 return false;
1366 len = TREE_CODE_LENGTH (TREE_CODE (expr));
1367 for (i = 0; i < len; i++)
1368 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1369 return false;
1371 return true;
1374 /* Cumulates the steps of indices into DATA and replaces their values with the
1375 initial ones. Returns false when the value of the index cannot be determined.
1376 Callback for for_each_index. */
1378 struct ifs_ivopts_data
1380 struct ivopts_data *ivopts_data;
1381 tree stmt;
1382 tree *step_p;
1385 static bool
1386 idx_find_step (tree base, tree *idx, void *data)
1388 struct ifs_ivopts_data *dta = data;
1389 struct iv *iv;
1390 tree step, iv_step, lbound, off;
1391 struct loop *loop = dta->ivopts_data->current_loop;
1393 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1394 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1395 return false;
1397 /* If base is a component ref, require that the offset of the reference
1398 be invariant. */
1399 if (TREE_CODE (base) == COMPONENT_REF)
1401 off = component_ref_field_offset (base);
1402 return expr_invariant_in_loop_p (loop, off);
1405 /* If base is array, first check whether we will be able to move the
1406 reference out of the loop (in order to take its address in strength
1407 reduction). In order for this to work we need both lower bound
1408 and step to be loop invariants. */
1409 if (TREE_CODE (base) == ARRAY_REF)
1411 step = array_ref_element_size (base);
1412 lbound = array_ref_low_bound (base);
1414 if (!expr_invariant_in_loop_p (loop, step)
1415 || !expr_invariant_in_loop_p (loop, lbound))
1416 return false;
1419 if (TREE_CODE (*idx) != SSA_NAME)
1420 return true;
1422 iv = get_iv (dta->ivopts_data, *idx);
1423 if (!iv)
1424 return false;
1426 *idx = iv->base;
1428 if (!iv->step)
1429 return true;
1431 if (TREE_CODE (base) == ARRAY_REF)
1433 step = array_ref_element_size (base);
1435 /* We only handle addresses whose step is an integer constant. */
1436 if (TREE_CODE (step) != INTEGER_CST)
1437 return false;
1439 else
1440 /* The step for pointer arithmetics already is 1 byte. */
1441 step = build_int_cst (sizetype, 1);
1443 /* FIXME: convert_step should not be used outside chrec_convert: fix
1444 this by calling chrec_convert. */
1445 iv_step = convert_step (dta->ivopts_data->current_loop,
1446 sizetype, iv->base, iv->step, dta->stmt);
1448 if (!iv_step)
1450 /* The index might wrap. */
1451 return false;
1454 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1456 if (!*dta->step_p)
1457 *dta->step_p = step;
1458 else
1459 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
1461 return true;
1464 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1465 object is passed to it in DATA. */
1467 static bool
1468 idx_record_use (tree base, tree *idx,
1469 void *data)
1471 find_interesting_uses_op (data, *idx);
1472 if (TREE_CODE (base) == ARRAY_REF)
1474 find_interesting_uses_op (data, array_ref_element_size (base));
1475 find_interesting_uses_op (data, array_ref_low_bound (base));
1477 return true;
1480 /* Returns true if memory reference REF may be unaligned. */
1482 static bool
1483 may_be_unaligned_p (tree ref)
1485 tree base;
1486 tree base_type;
1487 HOST_WIDE_INT bitsize;
1488 HOST_WIDE_INT bitpos;
1489 tree toffset;
1490 enum machine_mode mode;
1491 int unsignedp, volatilep;
1492 unsigned base_align;
1494 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1495 thus they are not misaligned. */
1496 if (TREE_CODE (ref) == TARGET_MEM_REF)
1497 return false;
1499 /* The test below is basically copy of what expr.c:normal_inner_ref
1500 does to check whether the object must be loaded by parts when
1501 STRICT_ALIGNMENT is true. */
1502 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1503 &unsignedp, &volatilep, true);
1504 base_type = TREE_TYPE (base);
1505 base_align = TYPE_ALIGN (base_type);
1507 if (mode != BLKmode
1508 && (base_align < GET_MODE_ALIGNMENT (mode)
1509 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1510 || bitpos % BITS_PER_UNIT != 0))
1511 return true;
1513 return false;
1516 /* Finds addresses in *OP_P inside STMT. */
1518 static void
1519 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1521 tree base = *op_p, step = NULL;
1522 struct iv *civ;
1523 struct ifs_ivopts_data ifs_ivopts_data;
1525 /* Do not play with volatile memory references. A bit too conservative,
1526 perhaps, but safe. */
1527 if (stmt_ann (stmt)->has_volatile_ops)
1528 goto fail;
1530 /* Ignore bitfields for now. Not really something terribly complicated
1531 to handle. TODO. */
1532 if (TREE_CODE (base) == COMPONENT_REF
1533 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1534 goto fail;
1536 if (STRICT_ALIGNMENT
1537 && may_be_unaligned_p (base))
1538 goto fail;
1540 base = unshare_expr (base);
1542 if (TREE_CODE (base) == TARGET_MEM_REF)
1544 tree type = build_pointer_type (TREE_TYPE (base));
1545 tree astep;
1547 if (TMR_BASE (base)
1548 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1550 civ = get_iv (data, TMR_BASE (base));
1551 if (!civ)
1552 goto fail;
1554 TMR_BASE (base) = civ->base;
1555 step = civ->step;
1557 if (TMR_INDEX (base)
1558 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1560 civ = get_iv (data, TMR_INDEX (base));
1561 if (!civ)
1562 goto fail;
1564 TMR_INDEX (base) = civ->base;
1565 astep = civ->step;
1567 if (astep)
1569 if (TMR_STEP (base))
1570 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1572 if (step)
1573 step = fold_build2 (PLUS_EXPR, type, step, astep);
1574 else
1575 step = astep;
1579 if (zero_p (step))
1580 goto fail;
1581 base = tree_mem_ref_addr (type, base);
1583 else
1585 ifs_ivopts_data.ivopts_data = data;
1586 ifs_ivopts_data.stmt = stmt;
1587 ifs_ivopts_data.step_p = &step;
1588 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1589 || zero_p (step))
1590 goto fail;
1592 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1593 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1595 base = build_fold_addr_expr (base);
1598 civ = alloc_iv (base, step);
1599 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1600 return;
1602 fail:
1603 for_each_index (op_p, idx_record_use, data);
1606 /* Finds and records invariants used in STMT. */
1608 static void
1609 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1611 ssa_op_iter iter;
1612 use_operand_p use_p;
1613 tree op;
1615 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1617 op = USE_FROM_PTR (use_p);
1618 record_invariant (data, op, false);
1622 /* Finds interesting uses of induction variables in the statement STMT. */
1624 static void
1625 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1627 struct iv *iv;
1628 tree op, lhs, rhs;
1629 ssa_op_iter iter;
1630 use_operand_p use_p;
1632 find_invariants_stmt (data, stmt);
1634 if (TREE_CODE (stmt) == COND_EXPR)
1636 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1637 return;
1640 if (TREE_CODE (stmt) == MODIFY_EXPR)
1642 lhs = TREE_OPERAND (stmt, 0);
1643 rhs = TREE_OPERAND (stmt, 1);
1645 if (TREE_CODE (lhs) == SSA_NAME)
1647 /* If the statement defines an induction variable, the uses are not
1648 interesting by themselves. */
1650 iv = get_iv (data, lhs);
1652 if (iv && !zero_p (iv->step))
1653 return;
1656 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1658 case tcc_comparison:
1659 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1660 return;
1662 case tcc_reference:
1663 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
1664 if (REFERENCE_CLASS_P (lhs))
1665 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1666 return;
1668 default: ;
1671 if (REFERENCE_CLASS_P (lhs)
1672 && is_gimple_val (rhs))
1674 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1675 find_interesting_uses_op (data, rhs);
1676 return;
1679 /* TODO -- we should also handle address uses of type
1681 memory = call (whatever);
1685 call (memory). */
1688 if (TREE_CODE (stmt) == PHI_NODE
1689 && bb_for_stmt (stmt) == data->current_loop->header)
1691 lhs = PHI_RESULT (stmt);
1692 iv = get_iv (data, lhs);
1694 if (iv && !zero_p (iv->step))
1695 return;
1698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1700 op = USE_FROM_PTR (use_p);
1702 if (TREE_CODE (op) != SSA_NAME)
1703 continue;
1705 iv = get_iv (data, op);
1706 if (!iv)
1707 continue;
1709 find_interesting_uses_op (data, op);
1713 /* Finds interesting uses of induction variables outside of loops
1714 on loop exit edge EXIT. */
1716 static void
1717 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1719 tree phi, def;
1721 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1723 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1724 find_interesting_uses_outer (data, def);
1728 /* Finds uses of the induction variables that are interesting. */
1730 static void
1731 find_interesting_uses (struct ivopts_data *data)
1733 basic_block bb;
1734 block_stmt_iterator bsi;
1735 tree phi;
1736 basic_block *body = get_loop_body (data->current_loop);
1737 unsigned i;
1738 struct version_info *info;
1739 edge e;
1741 if (dump_file && (dump_flags & TDF_DETAILS))
1742 fprintf (dump_file, "Uses:\n\n");
1744 for (i = 0; i < data->current_loop->num_nodes; i++)
1746 edge_iterator ei;
1747 bb = body[i];
1749 FOR_EACH_EDGE (e, ei, bb->succs)
1750 if (e->dest != EXIT_BLOCK_PTR
1751 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1752 find_interesting_uses_outside (data, e);
1754 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1755 find_interesting_uses_stmt (data, phi);
1756 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1757 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1760 if (dump_file && (dump_flags & TDF_DETAILS))
1762 bitmap_iterator bi;
1764 fprintf (dump_file, "\n");
1766 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1768 info = ver_info (data, i);
1769 if (info->inv_id)
1771 fprintf (dump_file, " ");
1772 print_generic_expr (dump_file, info->name, TDF_SLIM);
1773 fprintf (dump_file, " is invariant (%d)%s\n",
1774 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1778 fprintf (dump_file, "\n");
1781 free (body);
1784 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1785 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1786 we are at the top-level of the processed address. */
1788 static tree
1789 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1790 unsigned HOST_WIDE_INT *offset)
1792 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1793 enum tree_code code;
1794 tree type, orig_type = TREE_TYPE (expr);
1795 unsigned HOST_WIDE_INT off0, off1, st;
1796 tree orig_expr = expr;
1798 STRIP_NOPS (expr);
1800 type = TREE_TYPE (expr);
1801 code = TREE_CODE (expr);
1802 *offset = 0;
1804 switch (code)
1806 case INTEGER_CST:
1807 if (!cst_and_fits_in_hwi (expr)
1808 || zero_p (expr))
1809 return orig_expr;
1811 *offset = int_cst_value (expr);
1812 return build_int_cst_type (orig_type, 0);
1814 case PLUS_EXPR:
1815 case MINUS_EXPR:
1816 op0 = TREE_OPERAND (expr, 0);
1817 op1 = TREE_OPERAND (expr, 1);
1819 op0 = strip_offset_1 (op0, false, false, &off0);
1820 op1 = strip_offset_1 (op1, false, false, &off1);
1822 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1823 if (op0 == TREE_OPERAND (expr, 0)
1824 && op1 == TREE_OPERAND (expr, 1))
1825 return orig_expr;
1827 if (zero_p (op1))
1828 expr = op0;
1829 else if (zero_p (op0))
1831 if (code == PLUS_EXPR)
1832 expr = op1;
1833 else
1834 expr = fold_build1 (NEGATE_EXPR, type, op1);
1836 else
1837 expr = fold_build2 (code, type, op0, op1);
1839 return fold_convert (orig_type, expr);
1841 case ARRAY_REF:
1842 if (!inside_addr)
1843 return orig_expr;
1845 step = array_ref_element_size (expr);
1846 if (!cst_and_fits_in_hwi (step))
1847 break;
1849 st = int_cst_value (step);
1850 op1 = TREE_OPERAND (expr, 1);
1851 op1 = strip_offset_1 (op1, false, false, &off1);
1852 *offset = off1 * st;
1854 if (top_compref
1855 && zero_p (op1))
1857 /* Strip the component reference completely. */
1858 op0 = TREE_OPERAND (expr, 0);
1859 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1860 *offset += off0;
1861 return op0;
1863 break;
1865 case COMPONENT_REF:
1866 if (!inside_addr)
1867 return orig_expr;
1869 tmp = component_ref_field_offset (expr);
1870 if (top_compref
1871 && cst_and_fits_in_hwi (tmp))
1873 /* Strip the component reference completely. */
1874 op0 = TREE_OPERAND (expr, 0);
1875 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1876 *offset = off0 + int_cst_value (tmp);
1877 return op0;
1879 break;
1881 case ADDR_EXPR:
1882 op0 = TREE_OPERAND (expr, 0);
1883 op0 = strip_offset_1 (op0, true, true, &off0);
1884 *offset += off0;
1886 if (op0 == TREE_OPERAND (expr, 0))
1887 return orig_expr;
1889 expr = build_fold_addr_expr (op0);
1890 return fold_convert (orig_type, expr);
1892 case INDIRECT_REF:
1893 inside_addr = false;
1894 break;
1896 default:
1897 return orig_expr;
1900 /* Default handling of expressions for that we want to recurse into
1901 the first operand. */
1902 op0 = TREE_OPERAND (expr, 0);
1903 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1904 *offset += off0;
1906 if (op0 == TREE_OPERAND (expr, 0)
1907 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1908 return orig_expr;
1910 expr = copy_node (expr);
1911 TREE_OPERAND (expr, 0) = op0;
1912 if (op1)
1913 TREE_OPERAND (expr, 1) = op1;
1915 /* Inside address, we might strip the top level component references,
1916 thus changing type of the expression. Handling of ADDR_EXPR
1917 will fix that. */
1918 expr = fold_convert (orig_type, expr);
1920 return expr;
1923 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1925 static tree
1926 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1928 return strip_offset_1 (expr, false, false, offset);
1931 /* Returns variant of TYPE that can be used as base for different uses.
1932 For integer types, we return unsigned variant of the type, which
1933 avoids problems with overflows. For pointer types, we return void *. */
1935 static tree
1936 generic_type_for (tree type)
1938 if (POINTER_TYPE_P (type))
1939 return ptr_type_node;
1941 if (TYPE_UNSIGNED (type))
1942 return type;
1944 return unsigned_type_for (type);
1947 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1948 the bitmap to that we should store it. */
1950 static struct ivopts_data *fd_ivopts_data;
1951 static tree
1952 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1954 bitmap *depends_on = data;
1955 struct version_info *info;
1957 if (TREE_CODE (*expr_p) != SSA_NAME)
1958 return NULL_TREE;
1959 info = name_info (fd_ivopts_data, *expr_p);
1961 if (!info->inv_id || info->has_nonlin_use)
1962 return NULL_TREE;
1964 if (!*depends_on)
1965 *depends_on = BITMAP_ALLOC (NULL);
1966 bitmap_set_bit (*depends_on, info->inv_id);
1968 return NULL_TREE;
1971 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1972 position to POS. If USE is not NULL, the candidate is set as related to
1973 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1974 replacement of the final value of the iv by a direct computation. */
1976 static struct iv_cand *
1977 add_candidate_1 (struct ivopts_data *data,
1978 tree base, tree step, bool important, enum iv_position pos,
1979 struct iv_use *use, tree incremented_at)
1981 unsigned i;
1982 struct iv_cand *cand = NULL;
1983 tree type, orig_type;
1985 if (base)
1987 orig_type = TREE_TYPE (base);
1988 type = generic_type_for (orig_type);
1989 if (type != orig_type)
1991 base = fold_convert (type, base);
1992 if (step)
1993 step = fold_convert (type, step);
1997 for (i = 0; i < n_iv_cands (data); i++)
1999 cand = iv_cand (data, i);
2001 if (cand->pos != pos)
2002 continue;
2004 if (cand->incremented_at != incremented_at)
2005 continue;
2007 if (!cand->iv)
2009 if (!base && !step)
2010 break;
2012 continue;
2015 if (!base && !step)
2016 continue;
2018 if (!operand_equal_p (base, cand->iv->base, 0))
2019 continue;
2021 if (zero_p (cand->iv->step))
2023 if (zero_p (step))
2024 break;
2026 else
2028 if (step && operand_equal_p (step, cand->iv->step, 0))
2029 break;
2033 if (i == n_iv_cands (data))
2035 cand = xcalloc (1, sizeof (struct iv_cand));
2036 cand->id = i;
2038 if (!base && !step)
2039 cand->iv = NULL;
2040 else
2041 cand->iv = alloc_iv (base, step);
2043 cand->pos = pos;
2044 if (pos != IP_ORIGINAL && cand->iv)
2046 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2047 cand->var_after = cand->var_before;
2049 cand->important = important;
2050 cand->incremented_at = incremented_at;
2051 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2053 if (step
2054 && TREE_CODE (step) != INTEGER_CST)
2056 fd_ivopts_data = data;
2057 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2060 if (dump_file && (dump_flags & TDF_DETAILS))
2061 dump_cand (dump_file, cand);
2064 if (important && !cand->important)
2066 cand->important = true;
2067 if (dump_file && (dump_flags & TDF_DETAILS))
2068 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2071 if (use)
2073 bitmap_set_bit (use->related_cands, i);
2074 if (dump_file && (dump_flags & TDF_DETAILS))
2075 fprintf (dump_file, "Candidate %d is related to use %d\n",
2076 cand->id, use->id);
2079 return cand;
2082 /* Returns true if incrementing the induction variable at the end of the LOOP
2083 is allowed.
2085 The purpose is to avoid splitting latch edge with a biv increment, thus
2086 creating a jump, possibly confusing other optimization passes and leaving
2087 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2088 is not available (so we do not have a better alternative), or if the latch
2089 edge is already nonempty. */
2091 static bool
2092 allow_ip_end_pos_p (struct loop *loop)
2094 if (!ip_normal_pos (loop))
2095 return true;
2097 if (!empty_block_p (ip_end_pos (loop)))
2098 return true;
2100 return false;
2103 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2104 position to POS. If USE is not NULL, the candidate is set as related to
2105 it. The candidate computation is scheduled on all available positions. */
2107 static void
2108 add_candidate (struct ivopts_data *data,
2109 tree base, tree step, bool important, struct iv_use *use)
2111 if (ip_normal_pos (data->current_loop))
2112 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2113 if (ip_end_pos (data->current_loop)
2114 && allow_ip_end_pos_p (data->current_loop))
2115 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2118 /* Add a standard "0 + 1 * iteration" iv candidate for a
2119 type with SIZE bits. */
2121 static void
2122 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2123 unsigned int size)
2125 tree type = lang_hooks.types.type_for_size (size, true);
2126 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2127 true, NULL);
2130 /* Adds standard iv candidates. */
2132 static void
2133 add_standard_iv_candidates (struct ivopts_data *data)
2135 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2137 /* The same for a double-integer type if it is still fast enough. */
2138 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2139 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2143 /* Adds candidates bases on the old induction variable IV. */
2145 static void
2146 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2148 tree phi, def;
2149 struct iv_cand *cand;
2151 add_candidate (data, iv->base, iv->step, true, NULL);
2153 /* The same, but with initial value zero. */
2154 add_candidate (data,
2155 build_int_cst (TREE_TYPE (iv->base), 0),
2156 iv->step, true, NULL);
2158 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2159 if (TREE_CODE (phi) == PHI_NODE)
2161 /* Additionally record the possibility of leaving the original iv
2162 untouched. */
2163 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2164 cand = add_candidate_1 (data,
2165 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2166 SSA_NAME_DEF_STMT (def));
2167 cand->var_before = iv->ssa_name;
2168 cand->var_after = def;
2172 /* Adds candidates based on the old induction variables. */
2174 static void
2175 add_old_ivs_candidates (struct ivopts_data *data)
2177 unsigned i;
2178 struct iv *iv;
2179 bitmap_iterator bi;
2181 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2183 iv = ver_info (data, i)->iv;
2184 if (iv && iv->biv_p && !zero_p (iv->step))
2185 add_old_iv_candidates (data, iv);
2189 /* Adds candidates based on the value of the induction variable IV and USE. */
2191 static void
2192 add_iv_value_candidates (struct ivopts_data *data,
2193 struct iv *iv, struct iv_use *use)
2195 unsigned HOST_WIDE_INT offset;
2196 tree base;
2198 add_candidate (data, iv->base, iv->step, false, use);
2200 /* The same, but with initial value zero. Make such variable important,
2201 since it is generic enough so that possibly many uses may be based
2202 on it. */
2203 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2204 iv->step, true, use);
2206 /* Third, try removing the constant offset. */
2207 base = strip_offset (iv->base, &offset);
2208 if (offset)
2209 add_candidate (data, base, iv->step, false, use);
2212 /* Possibly adds pseudocandidate for replacing the final value of USE by
2213 a direct computation. */
2215 static void
2216 add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2218 struct tree_niter_desc *niter;
2220 /* We must know where we exit the loop and how many times does it roll. */
2221 niter = niter_for_single_dom_exit (data);
2222 if (!niter
2223 || !zero_p (niter->may_be_zero))
2224 return;
2226 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2229 /* Adds candidates based on the uses. */
2231 static void
2232 add_derived_ivs_candidates (struct ivopts_data *data)
2234 unsigned i;
2236 for (i = 0; i < n_iv_uses (data); i++)
2238 struct iv_use *use = iv_use (data, i);
2240 if (!use)
2241 continue;
2243 switch (use->type)
2245 case USE_NONLINEAR_EXPR:
2246 case USE_COMPARE:
2247 case USE_ADDRESS:
2248 /* Just add the ivs based on the value of the iv used here. */
2249 add_iv_value_candidates (data, use->iv, use);
2250 break;
2252 case USE_OUTER:
2253 add_iv_value_candidates (data, use->iv, use);
2255 /* Additionally, add the pseudocandidate for the possibility to
2256 replace the final value by a direct computation. */
2257 add_iv_outer_candidates (data, use);
2258 break;
2260 default:
2261 gcc_unreachable ();
2266 /* Record important candidates and add them to related_cands bitmaps
2267 if needed. */
2269 static void
2270 record_important_candidates (struct ivopts_data *data)
2272 unsigned i;
2273 struct iv_use *use;
2275 for (i = 0; i < n_iv_cands (data); i++)
2277 struct iv_cand *cand = iv_cand (data, i);
2279 if (cand->important)
2280 bitmap_set_bit (data->important_candidates, i);
2283 data->consider_all_candidates = (n_iv_cands (data)
2284 <= CONSIDER_ALL_CANDIDATES_BOUND);
2286 if (data->consider_all_candidates)
2288 /* We will not need "related_cands" bitmaps in this case,
2289 so release them to decrease peak memory consumption. */
2290 for (i = 0; i < n_iv_uses (data); i++)
2292 use = iv_use (data, i);
2293 BITMAP_FREE (use->related_cands);
2296 else
2298 /* Add important candidates to the related_cands bitmaps. */
2299 for (i = 0; i < n_iv_uses (data); i++)
2300 bitmap_ior_into (iv_use (data, i)->related_cands,
2301 data->important_candidates);
2305 /* Finds the candidates for the induction variables. */
2307 static void
2308 find_iv_candidates (struct ivopts_data *data)
2310 /* Add commonly used ivs. */
2311 add_standard_iv_candidates (data);
2313 /* Add old induction variables. */
2314 add_old_ivs_candidates (data);
2316 /* Add induction variables derived from uses. */
2317 add_derived_ivs_candidates (data);
2319 /* Record the important candidates. */
2320 record_important_candidates (data);
2323 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2324 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2325 we allocate a simple list to every use. */
2327 static void
2328 alloc_use_cost_map (struct ivopts_data *data)
2330 unsigned i, size, s, j;
2332 for (i = 0; i < n_iv_uses (data); i++)
2334 struct iv_use *use = iv_use (data, i);
2335 bitmap_iterator bi;
2337 if (data->consider_all_candidates)
2338 size = n_iv_cands (data);
2339 else
2341 s = 0;
2342 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2344 s++;
2347 /* Round up to the power of two, so that moduling by it is fast. */
2348 for (size = 1; size < s; size <<= 1)
2349 continue;
2352 use->n_map_members = size;
2353 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2357 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2358 on invariants DEPENDS_ON and that the value used in expressing it
2359 is VALUE.*/
2361 static void
2362 set_use_iv_cost (struct ivopts_data *data,
2363 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2364 bitmap depends_on, tree value)
2366 unsigned i, s;
2368 if (cost == INFTY)
2370 BITMAP_FREE (depends_on);
2371 return;
2374 if (data->consider_all_candidates)
2376 use->cost_map[cand->id].cand = cand;
2377 use->cost_map[cand->id].cost = cost;
2378 use->cost_map[cand->id].depends_on = depends_on;
2379 use->cost_map[cand->id].value = value;
2380 return;
2383 /* n_map_members is a power of two, so this computes modulo. */
2384 s = cand->id & (use->n_map_members - 1);
2385 for (i = s; i < use->n_map_members; i++)
2386 if (!use->cost_map[i].cand)
2387 goto found;
2388 for (i = 0; i < s; i++)
2389 if (!use->cost_map[i].cand)
2390 goto found;
2392 gcc_unreachable ();
2394 found:
2395 use->cost_map[i].cand = cand;
2396 use->cost_map[i].cost = cost;
2397 use->cost_map[i].depends_on = depends_on;
2398 use->cost_map[i].value = value;
2401 /* Gets cost of (USE, CANDIDATE) pair. */
2403 static struct cost_pair *
2404 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2405 struct iv_cand *cand)
2407 unsigned i, s;
2408 struct cost_pair *ret;
2410 if (!cand)
2411 return NULL;
2413 if (data->consider_all_candidates)
2415 ret = use->cost_map + cand->id;
2416 if (!ret->cand)
2417 return NULL;
2419 return ret;
2422 /* n_map_members is a power of two, so this computes modulo. */
2423 s = cand->id & (use->n_map_members - 1);
2424 for (i = s; i < use->n_map_members; i++)
2425 if (use->cost_map[i].cand == cand)
2426 return use->cost_map + i;
2428 for (i = 0; i < s; i++)
2429 if (use->cost_map[i].cand == cand)
2430 return use->cost_map + i;
2432 return NULL;
2435 /* Returns estimate on cost of computing SEQ. */
2437 static unsigned
2438 seq_cost (rtx seq)
2440 unsigned cost = 0;
2441 rtx set;
2443 for (; seq; seq = NEXT_INSN (seq))
2445 set = single_set (seq);
2446 if (set)
2447 cost += rtx_cost (set, SET);
2448 else
2449 cost++;
2452 return cost;
2455 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2456 static rtx
2457 produce_memory_decl_rtl (tree obj, int *regno)
2459 rtx x;
2461 gcc_assert (obj);
2462 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2464 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2465 x = gen_rtx_SYMBOL_REF (Pmode, name);
2467 else
2468 x = gen_raw_REG (Pmode, (*regno)++);
2470 return gen_rtx_MEM (DECL_MODE (obj), x);
2473 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2474 walk_tree. DATA contains the actual fake register number. */
2476 static tree
2477 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2479 tree obj = NULL_TREE;
2480 rtx x = NULL_RTX;
2481 int *regno = data;
2483 switch (TREE_CODE (*expr_p))
2485 case ADDR_EXPR:
2486 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2487 handled_component_p (*expr_p);
2488 expr_p = &TREE_OPERAND (*expr_p, 0))
2489 continue;
2490 obj = *expr_p;
2491 if (DECL_P (obj))
2492 x = produce_memory_decl_rtl (obj, regno);
2493 break;
2495 case SSA_NAME:
2496 *ws = 0;
2497 obj = SSA_NAME_VAR (*expr_p);
2498 if (!DECL_RTL_SET_P (obj))
2499 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2500 break;
2502 case VAR_DECL:
2503 case PARM_DECL:
2504 case RESULT_DECL:
2505 *ws = 0;
2506 obj = *expr_p;
2508 if (DECL_RTL_SET_P (obj))
2509 break;
2511 if (DECL_MODE (obj) == BLKmode)
2512 x = produce_memory_decl_rtl (obj, regno);
2513 else
2514 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2516 break;
2518 default:
2519 break;
2522 if (x)
2524 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2525 SET_DECL_RTL (obj, x);
2528 return NULL_TREE;
2531 /* Determines cost of the computation of EXPR. */
2533 static unsigned
2534 computation_cost (tree expr)
2536 rtx seq, rslt;
2537 tree type = TREE_TYPE (expr);
2538 unsigned cost;
2539 /* Avoid using hard regs in ways which may be unsupported. */
2540 int regno = LAST_VIRTUAL_REGISTER + 1;
2542 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2543 start_sequence ();
2544 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2545 seq = get_insns ();
2546 end_sequence ();
2548 cost = seq_cost (seq);
2549 if (MEM_P (rslt))
2550 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2552 return cost;
2555 /* Returns variable containing the value of candidate CAND at statement AT. */
2557 static tree
2558 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2560 if (stmt_after_increment (loop, cand, stmt))
2561 return cand->var_after;
2562 else
2563 return cand->var_before;
2566 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2567 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2570 tree_int_cst_sign_bit (tree t)
2572 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2573 unsigned HOST_WIDE_INT w;
2575 if (bitno < HOST_BITS_PER_WIDE_INT)
2576 w = TREE_INT_CST_LOW (t);
2577 else
2579 w = TREE_INT_CST_HIGH (t);
2580 bitno -= HOST_BITS_PER_WIDE_INT;
2583 return (w >> bitno) & 1;
2586 /* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2587 return cst. Otherwise return NULL_TREE. */
2589 static tree
2590 constant_multiple_of (tree type, tree top, tree bot)
2592 tree res, mby, p0, p1;
2593 enum tree_code code;
2594 bool negate;
2596 STRIP_NOPS (top);
2597 STRIP_NOPS (bot);
2599 if (operand_equal_p (top, bot, 0))
2600 return build_int_cst (type, 1);
2602 code = TREE_CODE (top);
2603 switch (code)
2605 case MULT_EXPR:
2606 mby = TREE_OPERAND (top, 1);
2607 if (TREE_CODE (mby) != INTEGER_CST)
2608 return NULL_TREE;
2610 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2611 if (!res)
2612 return NULL_TREE;
2614 return fold_binary_to_constant (MULT_EXPR, type, res,
2615 fold_convert (type, mby));
2617 case PLUS_EXPR:
2618 case MINUS_EXPR:
2619 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2620 if (!p0)
2621 return NULL_TREE;
2622 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2623 if (!p1)
2624 return NULL_TREE;
2626 return fold_binary_to_constant (code, type, p0, p1);
2628 case INTEGER_CST:
2629 if (TREE_CODE (bot) != INTEGER_CST)
2630 return NULL_TREE;
2632 bot = fold_convert (type, bot);
2633 top = fold_convert (type, top);
2635 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2636 the result afterwards. */
2637 if (tree_int_cst_sign_bit (bot))
2639 negate = true;
2640 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2642 else
2643 negate = false;
2645 /* Ditto for TOP. */
2646 if (tree_int_cst_sign_bit (top))
2648 negate = !negate;
2649 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2652 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2653 return NULL_TREE;
2655 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2656 if (negate)
2657 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2658 return res;
2660 default:
2661 return NULL_TREE;
2665 /* Sets COMB to CST. */
2667 static void
2668 aff_combination_const (struct affine_tree_combination *comb, tree type,
2669 unsigned HOST_WIDE_INT cst)
2671 unsigned prec = TYPE_PRECISION (type);
2673 comb->type = type;
2674 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2676 comb->n = 0;
2677 comb->rest = NULL_TREE;
2678 comb->offset = cst & comb->mask;
2681 /* Sets COMB to single element ELT. */
2683 static void
2684 aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2686 unsigned prec = TYPE_PRECISION (type);
2688 comb->type = type;
2689 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2691 comb->n = 1;
2692 comb->elts[0] = elt;
2693 comb->coefs[0] = 1;
2694 comb->rest = NULL_TREE;
2695 comb->offset = 0;
2698 /* Scales COMB by SCALE. */
2700 static void
2701 aff_combination_scale (struct affine_tree_combination *comb,
2702 unsigned HOST_WIDE_INT scale)
2704 unsigned i, j;
2706 if (scale == 1)
2707 return;
2709 if (scale == 0)
2711 aff_combination_const (comb, comb->type, 0);
2712 return;
2715 comb->offset = (scale * comb->offset) & comb->mask;
2716 for (i = 0, j = 0; i < comb->n; i++)
2718 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2719 comb->elts[j] = comb->elts[i];
2720 if (comb->coefs[j] != 0)
2721 j++;
2723 comb->n = j;
2725 if (comb->rest)
2727 if (comb->n < MAX_AFF_ELTS)
2729 comb->coefs[comb->n] = scale;
2730 comb->elts[comb->n] = comb->rest;
2731 comb->rest = NULL_TREE;
2732 comb->n++;
2734 else
2735 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2736 build_int_cst_type (comb->type, scale));
2740 /* Adds ELT * SCALE to COMB. */
2742 static void
2743 aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2744 unsigned HOST_WIDE_INT scale)
2746 unsigned i;
2748 if (scale == 0)
2749 return;
2751 for (i = 0; i < comb->n; i++)
2752 if (operand_equal_p (comb->elts[i], elt, 0))
2754 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2755 if (comb->coefs[i])
2756 return;
2758 comb->n--;
2759 comb->coefs[i] = comb->coefs[comb->n];
2760 comb->elts[i] = comb->elts[comb->n];
2762 if (comb->rest)
2764 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2765 comb->coefs[comb->n] = 1;
2766 comb->elts[comb->n] = comb->rest;
2767 comb->rest = NULL_TREE;
2768 comb->n++;
2770 return;
2772 if (comb->n < MAX_AFF_ELTS)
2774 comb->coefs[comb->n] = scale;
2775 comb->elts[comb->n] = elt;
2776 comb->n++;
2777 return;
2780 if (scale == 1)
2781 elt = fold_convert (comb->type, elt);
2782 else
2783 elt = fold_build2 (MULT_EXPR, comb->type,
2784 fold_convert (comb->type, elt),
2785 build_int_cst_type (comb->type, scale));
2787 if (comb->rest)
2788 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2789 else
2790 comb->rest = elt;
2793 /* Adds COMB2 to COMB1. */
2795 static void
2796 aff_combination_add (struct affine_tree_combination *comb1,
2797 struct affine_tree_combination *comb2)
2799 unsigned i;
2801 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
2802 for (i = 0; i < comb2->n; i++)
2803 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2804 if (comb2->rest)
2805 aff_combination_add_elt (comb1, comb2->rest, 1);
2808 /* Splits EXPR into an affine combination of parts. */
2810 static void
2811 tree_to_aff_combination (tree expr, tree type,
2812 struct affine_tree_combination *comb)
2814 struct affine_tree_combination tmp;
2815 enum tree_code code;
2816 tree cst, core, toffset;
2817 HOST_WIDE_INT bitpos, bitsize;
2818 enum machine_mode mode;
2819 int unsignedp, volatilep;
2821 STRIP_NOPS (expr);
2823 code = TREE_CODE (expr);
2824 switch (code)
2826 case INTEGER_CST:
2827 aff_combination_const (comb, type, int_cst_value (expr));
2828 return;
2830 case PLUS_EXPR:
2831 case MINUS_EXPR:
2832 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2833 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2834 if (code == MINUS_EXPR)
2835 aff_combination_scale (&tmp, -1);
2836 aff_combination_add (comb, &tmp);
2837 return;
2839 case MULT_EXPR:
2840 cst = TREE_OPERAND (expr, 1);
2841 if (TREE_CODE (cst) != INTEGER_CST)
2842 break;
2843 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2844 aff_combination_scale (comb, int_cst_value (cst));
2845 return;
2847 case NEGATE_EXPR:
2848 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2849 aff_combination_scale (comb, -1);
2850 return;
2852 case ADDR_EXPR:
2853 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2854 &toffset, &mode, &unsignedp, &volatilep,
2855 false);
2856 if (bitpos % BITS_PER_UNIT != 0)
2857 break;
2858 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
2859 core = build_fold_addr_expr (core);
2860 if (TREE_CODE (core) == ADDR_EXPR)
2861 aff_combination_add_elt (comb, core, 1);
2862 else
2864 tree_to_aff_combination (core, type, &tmp);
2865 aff_combination_add (comb, &tmp);
2867 if (toffset)
2869 tree_to_aff_combination (toffset, type, &tmp);
2870 aff_combination_add (comb, &tmp);
2872 return;
2874 default:
2875 break;
2878 aff_combination_elt (comb, type, expr);
2881 /* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2883 static tree
2884 add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2885 unsigned HOST_WIDE_INT mask)
2887 enum tree_code code;
2889 scale &= mask;
2890 elt = fold_convert (type, elt);
2892 if (scale == 1)
2894 if (!expr)
2895 return elt;
2897 return fold_build2 (PLUS_EXPR, type, expr, elt);
2900 if (scale == mask)
2902 if (!expr)
2903 return fold_build1 (NEGATE_EXPR, type, elt);
2905 return fold_build2 (MINUS_EXPR, type, expr, elt);
2908 if (!expr)
2909 return fold_build2 (MULT_EXPR, type, elt,
2910 build_int_cst_type (type, scale));
2912 if ((scale | (mask >> 1)) == mask)
2914 /* Scale is negative. */
2915 code = MINUS_EXPR;
2916 scale = (-scale) & mask;
2918 else
2919 code = PLUS_EXPR;
2921 elt = fold_build2 (MULT_EXPR, type, elt,
2922 build_int_cst_type (type, scale));
2923 return fold_build2 (code, type, expr, elt);
2926 /* Copies the tree elements of COMB to ensure that they are not shared. */
2928 static void
2929 unshare_aff_combination (struct affine_tree_combination *comb)
2931 unsigned i;
2933 for (i = 0; i < comb->n; i++)
2934 comb->elts[i] = unshare_expr (comb->elts[i]);
2935 if (comb->rest)
2936 comb->rest = unshare_expr (comb->rest);
2939 /* Makes tree from the affine combination COMB. */
2941 static tree
2942 aff_combination_to_tree (struct affine_tree_combination *comb)
2944 tree type = comb->type;
2945 tree expr = comb->rest;
2946 unsigned i;
2947 unsigned HOST_WIDE_INT off, sgn;
2949 /* Handle the special case produced by get_computation_aff when
2950 the type does not fit in HOST_WIDE_INT. */
2951 if (comb->n == 0 && comb->offset == 0)
2952 return fold_convert (type, expr);
2954 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2956 for (i = 0; i < comb->n; i++)
2957 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2958 comb->mask);
2960 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2962 /* Offset is negative. */
2963 off = (-comb->offset) & comb->mask;
2964 sgn = comb->mask;
2966 else
2968 off = comb->offset;
2969 sgn = 1;
2971 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2972 comb->mask);
2975 /* Determines the expression by that USE is expressed from induction variable
2976 CAND at statement AT in LOOP. The expression is stored in a decomposed
2977 form into AFF. Returns false if USE cannot be expressed using CAND. */
2979 static bool
2980 get_computation_aff (struct loop *loop,
2981 struct iv_use *use, struct iv_cand *cand, tree at,
2982 struct affine_tree_combination *aff)
2984 tree ubase = use->iv->base;
2985 tree ustep = use->iv->step;
2986 tree cbase = cand->iv->base;
2987 tree cstep = cand->iv->step;
2988 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2989 tree uutype;
2990 tree expr, delta;
2991 tree ratio;
2992 unsigned HOST_WIDE_INT ustepi, cstepi;
2993 HOST_WIDE_INT ratioi;
2994 struct affine_tree_combination cbase_aff, expr_aff;
2995 tree cstep_orig = cstep, ustep_orig = ustep;
2997 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2999 /* We do not have a precision to express the values of use. */
3000 return false;
3003 expr = var_at_stmt (loop, cand, at);
3005 if (TREE_TYPE (expr) != ctype)
3007 /* This may happen with the original ivs. */
3008 expr = fold_convert (ctype, expr);
3011 if (TYPE_UNSIGNED (utype))
3012 uutype = utype;
3013 else
3015 uutype = unsigned_type_for (utype);
3016 ubase = fold_convert (uutype, ubase);
3017 ustep = fold_convert (uutype, ustep);
3020 if (uutype != ctype)
3022 expr = fold_convert (uutype, expr);
3023 cbase = fold_convert (uutype, cbase);
3024 cstep = fold_convert (uutype, cstep);
3026 /* If the conversion is not noop, we must take it into account when
3027 considering the value of the step. */
3028 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3029 cstep_orig = cstep;
3032 if (cst_and_fits_in_hwi (cstep_orig)
3033 && cst_and_fits_in_hwi (ustep_orig))
3035 ustepi = int_cst_value (ustep_orig);
3036 cstepi = int_cst_value (cstep_orig);
3038 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
3040 /* TODO maybe consider case when ustep divides cstep and the ratio is
3041 a power of 2 (so that the division is fast to execute)? We would
3042 need to be much more careful with overflows etc. then. */
3043 return false;
3046 ratio = build_int_cst_type (uutype, ratioi);
3048 else
3050 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
3051 if (!ratio)
3052 return false;
3054 /* Ratioi is only used to detect special cases when the multiplicative
3055 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
3056 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
3057 to integer_onep/integer_all_onesp, since the former ignores
3058 TREE_OVERFLOW. */
3059 if (cst_and_fits_in_hwi (ratio))
3060 ratioi = int_cst_value (ratio);
3061 else if (integer_onep (ratio))
3062 ratioi = 1;
3063 else if (integer_all_onesp (ratio))
3064 ratioi = -1;
3065 else
3066 ratioi = 0;
3069 /* We may need to shift the value if we are after the increment. */
3070 if (stmt_after_increment (loop, cand, at))
3071 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
3073 /* use = ubase - ratio * cbase + ratio * var.
3075 In general case ubase + ratio * (var - cbase) could be better (one less
3076 multiplication), but often it is possible to eliminate redundant parts
3077 of computations from (ubase - ratio * cbase) term, and if it does not
3078 happen, fold is able to apply the distributive law to obtain this form
3079 anyway. */
3081 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
3083 /* Let's compute in trees and just return the result in AFF. This case
3084 should not be very common, and fold itself is not that bad either,
3085 so making the aff. functions more complicated to handle this case
3086 is not that urgent. */
3087 if (ratioi == 1)
3089 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3090 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3092 else if (ratioi == -1)
3094 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3095 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3097 else
3099 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3100 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3101 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3102 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
3105 aff->type = uutype;
3106 aff->n = 0;
3107 aff->offset = 0;
3108 aff->mask = 0;
3109 aff->rest = expr;
3110 return true;
3113 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3114 possible to compute ratioi. */
3115 gcc_assert (ratioi);
3117 tree_to_aff_combination (ubase, uutype, aff);
3118 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3119 tree_to_aff_combination (expr, uutype, &expr_aff);
3120 aff_combination_scale (&cbase_aff, -ratioi);
3121 aff_combination_scale (&expr_aff, ratioi);
3122 aff_combination_add (aff, &cbase_aff);
3123 aff_combination_add (aff, &expr_aff);
3125 return true;
3128 /* Determines the expression by that USE is expressed from induction variable
3129 CAND at statement AT in LOOP. The computation is unshared. */
3131 static tree
3132 get_computation_at (struct loop *loop,
3133 struct iv_use *use, struct iv_cand *cand, tree at)
3135 struct affine_tree_combination aff;
3136 tree type = TREE_TYPE (use->iv->base);
3138 if (!get_computation_aff (loop, use, cand, at, &aff))
3139 return NULL_TREE;
3140 unshare_aff_combination (&aff);
3141 return fold_convert (type, aff_combination_to_tree (&aff));
3144 /* Determines the expression by that USE is expressed from induction variable
3145 CAND in LOOP. The computation is unshared. */
3147 static tree
3148 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3150 return get_computation_at (loop, use, cand, use->stmt);
3153 /* Returns cost of addition in MODE. */
3155 static unsigned
3156 add_cost (enum machine_mode mode)
3158 static unsigned costs[NUM_MACHINE_MODES];
3159 rtx seq;
3160 unsigned cost;
3162 if (costs[mode])
3163 return costs[mode];
3165 start_sequence ();
3166 force_operand (gen_rtx_fmt_ee (PLUS, mode,
3167 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3168 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
3169 NULL_RTX);
3170 seq = get_insns ();
3171 end_sequence ();
3173 cost = seq_cost (seq);
3174 if (!cost)
3175 cost = 1;
3177 costs[mode] = cost;
3179 if (dump_file && (dump_flags & TDF_DETAILS))
3180 fprintf (dump_file, "Addition in %s costs %d\n",
3181 GET_MODE_NAME (mode), cost);
3182 return cost;
3185 /* Entry in a hashtable of already known costs for multiplication. */
3186 struct mbc_entry
3188 HOST_WIDE_INT cst; /* The constant to multiply by. */
3189 enum machine_mode mode; /* In mode. */
3190 unsigned cost; /* The cost. */
3193 /* Counts hash value for the ENTRY. */
3195 static hashval_t
3196 mbc_entry_hash (const void *entry)
3198 const struct mbc_entry *e = entry;
3200 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3203 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3205 static int
3206 mbc_entry_eq (const void *entry1, const void *entry2)
3208 const struct mbc_entry *e1 = entry1;
3209 const struct mbc_entry *e2 = entry2;
3211 return (e1->mode == e2->mode
3212 && e1->cst == e2->cst);
3215 /* Returns cost of multiplication by constant CST in MODE. */
3217 unsigned
3218 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3220 static htab_t costs;
3221 struct mbc_entry **cached, act;
3222 rtx seq;
3223 unsigned cost;
3225 if (!costs)
3226 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3228 act.mode = mode;
3229 act.cst = cst;
3230 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3231 if (*cached)
3232 return (*cached)->cost;
3234 *cached = xmalloc (sizeof (struct mbc_entry));
3235 (*cached)->mode = mode;
3236 (*cached)->cst = cst;
3238 start_sequence ();
3239 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3240 gen_int_mode (cst, mode), NULL_RTX, 0);
3241 seq = get_insns ();
3242 end_sequence ();
3244 cost = seq_cost (seq);
3246 if (dump_file && (dump_flags & TDF_DETAILS))
3247 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3248 (int) cst, GET_MODE_NAME (mode), cost);
3250 (*cached)->cost = cost;
3252 return cost;
3255 /* Returns true if multiplying by RATIO is allowed in address. */
3257 bool
3258 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3260 #define MAX_RATIO 128
3261 static sbitmap valid_mult;
3263 if (!valid_mult)
3265 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3266 rtx addr;
3267 HOST_WIDE_INT i;
3269 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3270 sbitmap_zero (valid_mult);
3271 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3272 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3274 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3275 if (memory_address_p (Pmode, addr))
3276 SET_BIT (valid_mult, i + MAX_RATIO);
3279 if (dump_file && (dump_flags & TDF_DETAILS))
3281 fprintf (dump_file, " allowed multipliers:");
3282 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3283 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3284 fprintf (dump_file, " %d", (int) i);
3285 fprintf (dump_file, "\n");
3286 fprintf (dump_file, "\n");
3290 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3291 return false;
3293 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3296 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3297 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3298 variable is omitted. The created memory accesses MODE.
3300 TODO -- there must be some better way. This all is quite crude. */
3302 static unsigned
3303 get_address_cost (bool symbol_present, bool var_present,
3304 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3306 static bool initialized = false;
3307 static HOST_WIDE_INT rat, off;
3308 static HOST_WIDE_INT min_offset, max_offset;
3309 static unsigned costs[2][2][2][2];
3310 unsigned cost, acost;
3311 rtx seq, addr, base;
3312 bool offset_p, ratio_p;
3313 rtx reg1;
3314 HOST_WIDE_INT s_offset;
3315 unsigned HOST_WIDE_INT mask;
3316 unsigned bits;
3318 if (!initialized)
3320 HOST_WIDE_INT i;
3321 initialized = true;
3323 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3325 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3326 for (i = 1; i <= 1 << 20; i <<= 1)
3328 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3329 if (!memory_address_p (Pmode, addr))
3330 break;
3332 max_offset = i >> 1;
3333 off = max_offset;
3335 for (i = 1; i <= 1 << 20; i <<= 1)
3337 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
3338 if (!memory_address_p (Pmode, addr))
3339 break;
3341 min_offset = -(i >> 1);
3343 if (dump_file && (dump_flags & TDF_DETAILS))
3345 fprintf (dump_file, "get_address_cost:\n");
3346 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3347 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3350 rat = 1;
3351 for (i = 2; i <= MAX_RATIO; i++)
3352 if (multiplier_allowed_in_address_p (i))
3354 rat = i;
3355 break;
3359 bits = GET_MODE_BITSIZE (Pmode);
3360 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3361 offset &= mask;
3362 if ((offset >> (bits - 1) & 1))
3363 offset |= ~mask;
3364 s_offset = offset;
3366 cost = 0;
3367 offset_p = (s_offset != 0
3368 && min_offset <= s_offset && s_offset <= max_offset);
3369 ratio_p = (ratio != 1
3370 && multiplier_allowed_in_address_p (ratio));
3372 if (ratio != 1 && !ratio_p)
3373 cost += multiply_by_cost (ratio, Pmode);
3375 if (s_offset && !offset_p && !symbol_present)
3377 cost += add_cost (Pmode);
3378 var_present = true;
3381 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3382 if (!acost)
3384 int old_cse_not_expected;
3385 acost = 0;
3387 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3388 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3389 if (ratio_p)
3390 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
3392 if (var_present)
3393 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3395 if (symbol_present)
3397 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3398 if (offset_p)
3399 base = gen_rtx_fmt_e (CONST, Pmode,
3400 gen_rtx_fmt_ee (PLUS, Pmode,
3401 base,
3402 gen_int_mode (off, Pmode)));
3404 else if (offset_p)
3405 base = gen_int_mode (off, Pmode);
3406 else
3407 base = NULL_RTX;
3409 if (base)
3410 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3412 start_sequence ();
3413 /* To avoid splitting addressing modes, pretend that no cse will
3414 follow. */
3415 old_cse_not_expected = cse_not_expected;
3416 cse_not_expected = true;
3417 addr = memory_address (Pmode, addr);
3418 cse_not_expected = old_cse_not_expected;
3419 seq = get_insns ();
3420 end_sequence ();
3422 acost = seq_cost (seq);
3423 acost += address_cost (addr, Pmode);
3425 if (!acost)
3426 acost = 1;
3427 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3430 return cost + acost;
3433 /* Estimates cost of forcing expression EXPR into a variable. */
3435 unsigned
3436 force_expr_to_var_cost (tree expr)
3438 static bool costs_initialized = false;
3439 static unsigned integer_cost;
3440 static unsigned symbol_cost;
3441 static unsigned address_cost;
3442 tree op0, op1;
3443 unsigned cost0, cost1, cost;
3444 enum machine_mode mode;
3446 if (!costs_initialized)
3448 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3449 rtx x = gen_rtx_MEM (DECL_MODE (var),
3450 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3451 tree addr;
3452 tree type = build_pointer_type (integer_type_node);
3454 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
3455 2000));
3457 SET_DECL_RTL (var, x);
3458 TREE_STATIC (var) = 1;
3459 addr = build1 (ADDR_EXPR, type, var);
3460 symbol_cost = computation_cost (addr) + 1;
3462 address_cost
3463 = computation_cost (build2 (PLUS_EXPR, type,
3464 addr,
3465 build_int_cst_type (type, 2000))) + 1;
3466 if (dump_file && (dump_flags & TDF_DETAILS))
3468 fprintf (dump_file, "force_expr_to_var_cost:\n");
3469 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3470 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3471 fprintf (dump_file, " address %d\n", (int) address_cost);
3472 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3473 fprintf (dump_file, "\n");
3476 costs_initialized = true;
3479 STRIP_NOPS (expr);
3481 if (SSA_VAR_P (expr))
3482 return 0;
3484 if (TREE_INVARIANT (expr))
3486 if (TREE_CODE (expr) == INTEGER_CST)
3487 return integer_cost;
3489 if (TREE_CODE (expr) == ADDR_EXPR)
3491 tree obj = TREE_OPERAND (expr, 0);
3493 if (TREE_CODE (obj) == VAR_DECL
3494 || TREE_CODE (obj) == PARM_DECL
3495 || TREE_CODE (obj) == RESULT_DECL)
3496 return symbol_cost;
3499 return address_cost;
3502 switch (TREE_CODE (expr))
3504 case PLUS_EXPR:
3505 case MINUS_EXPR:
3506 case MULT_EXPR:
3507 op0 = TREE_OPERAND (expr, 0);
3508 op1 = TREE_OPERAND (expr, 1);
3509 STRIP_NOPS (op0);
3510 STRIP_NOPS (op1);
3512 if (is_gimple_val (op0))
3513 cost0 = 0;
3514 else
3515 cost0 = force_expr_to_var_cost (op0);
3517 if (is_gimple_val (op1))
3518 cost1 = 0;
3519 else
3520 cost1 = force_expr_to_var_cost (op1);
3522 break;
3524 default:
3525 /* Just an arbitrary value, FIXME. */
3526 return target_spill_cost;
3529 mode = TYPE_MODE (TREE_TYPE (expr));
3530 switch (TREE_CODE (expr))
3532 case PLUS_EXPR:
3533 case MINUS_EXPR:
3534 cost = add_cost (mode);
3535 break;
3537 case MULT_EXPR:
3538 if (cst_and_fits_in_hwi (op0))
3539 cost = multiply_by_cost (int_cst_value (op0), mode);
3540 else if (cst_and_fits_in_hwi (op1))
3541 cost = multiply_by_cost (int_cst_value (op1), mode);
3542 else
3543 return target_spill_cost;
3544 break;
3546 default:
3547 gcc_unreachable ();
3550 cost += cost0;
3551 cost += cost1;
3553 /* Bound the cost by target_spill_cost. The parts of complicated
3554 computations often are either loop invariant or at least can
3555 be shared between several iv uses, so letting this grow without
3556 limits would not give reasonable results. */
3557 return cost < target_spill_cost ? cost : target_spill_cost;
3560 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3561 invariants the computation depends on. */
3563 static unsigned
3564 force_var_cost (struct ivopts_data *data,
3565 tree expr, bitmap *depends_on)
3567 if (depends_on)
3569 fd_ivopts_data = data;
3570 walk_tree (&expr, find_depends, depends_on, NULL);
3573 return force_expr_to_var_cost (expr);
3576 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3577 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3578 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3579 invariants the computation depends on. */
3581 static unsigned
3582 split_address_cost (struct ivopts_data *data,
3583 tree addr, bool *symbol_present, bool *var_present,
3584 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3586 tree core;
3587 HOST_WIDE_INT bitsize;
3588 HOST_WIDE_INT bitpos;
3589 tree toffset;
3590 enum machine_mode mode;
3591 int unsignedp, volatilep;
3593 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3594 &unsignedp, &volatilep, false);
3596 if (toffset != 0
3597 || bitpos % BITS_PER_UNIT != 0
3598 || TREE_CODE (core) != VAR_DECL)
3600 *symbol_present = false;
3601 *var_present = true;
3602 fd_ivopts_data = data;
3603 walk_tree (&addr, find_depends, depends_on, NULL);
3604 return target_spill_cost;
3607 *offset += bitpos / BITS_PER_UNIT;
3608 if (TREE_STATIC (core)
3609 || DECL_EXTERNAL (core))
3611 *symbol_present = true;
3612 *var_present = false;
3613 return 0;
3616 *symbol_present = false;
3617 *var_present = true;
3618 return 0;
3621 /* Estimates cost of expressing difference of addresses E1 - E2 as
3622 var + symbol + offset. The value of offset is added to OFFSET,
3623 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3624 part is missing. DEPENDS_ON is a set of the invariants the computation
3625 depends on. */
3627 static unsigned
3628 ptr_difference_cost (struct ivopts_data *data,
3629 tree e1, tree e2, bool *symbol_present, bool *var_present,
3630 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3632 HOST_WIDE_INT diff = 0;
3633 unsigned cost;
3635 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3637 if (ptr_difference_const (e1, e2, &diff))
3639 *offset += diff;
3640 *symbol_present = false;
3641 *var_present = false;
3642 return 0;
3645 if (e2 == integer_zero_node)
3646 return split_address_cost (data, TREE_OPERAND (e1, 0),
3647 symbol_present, var_present, offset, depends_on);
3649 *symbol_present = false;
3650 *var_present = true;
3652 cost = force_var_cost (data, e1, depends_on);
3653 cost += force_var_cost (data, e2, depends_on);
3654 cost += add_cost (Pmode);
3656 return cost;
3659 /* Estimates cost of expressing difference E1 - E2 as
3660 var + symbol + offset. The value of offset is added to OFFSET,
3661 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3662 part is missing. DEPENDS_ON is a set of the invariants the computation
3663 depends on. */
3665 static unsigned
3666 difference_cost (struct ivopts_data *data,
3667 tree e1, tree e2, bool *symbol_present, bool *var_present,
3668 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3670 unsigned cost;
3671 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3672 unsigned HOST_WIDE_INT off1, off2;
3674 e1 = strip_offset (e1, &off1);
3675 e2 = strip_offset (e2, &off2);
3676 *offset += off1 - off2;
3678 STRIP_NOPS (e1);
3679 STRIP_NOPS (e2);
3681 if (TREE_CODE (e1) == ADDR_EXPR)
3682 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3683 depends_on);
3684 *symbol_present = false;
3686 if (operand_equal_p (e1, e2, 0))
3688 *var_present = false;
3689 return 0;
3691 *var_present = true;
3692 if (zero_p (e2))
3693 return force_var_cost (data, e1, depends_on);
3695 if (zero_p (e1))
3697 cost = force_var_cost (data, e2, depends_on);
3698 cost += multiply_by_cost (-1, mode);
3700 return cost;
3703 cost = force_var_cost (data, e1, depends_on);
3704 cost += force_var_cost (data, e2, depends_on);
3705 cost += add_cost (mode);
3707 return cost;
3710 /* Determines the cost of the computation by that USE is expressed
3711 from induction variable CAND. If ADDRESS_P is true, we just need
3712 to create an address from it, otherwise we want to get it into
3713 register. A set of invariants we depend on is stored in
3714 DEPENDS_ON. AT is the statement at that the value is computed. */
3716 static unsigned
3717 get_computation_cost_at (struct ivopts_data *data,
3718 struct iv_use *use, struct iv_cand *cand,
3719 bool address_p, bitmap *depends_on, tree at)
3721 tree ubase = use->iv->base, ustep = use->iv->step;
3722 tree cbase, cstep;
3723 tree utype = TREE_TYPE (ubase), ctype;
3724 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3725 HOST_WIDE_INT ratio, aratio;
3726 bool var_present, symbol_present;
3727 unsigned cost = 0, n_sums;
3729 *depends_on = NULL;
3731 /* Only consider real candidates. */
3732 if (!cand->iv)
3733 return INFTY;
3735 cbase = cand->iv->base;
3736 cstep = cand->iv->step;
3737 ctype = TREE_TYPE (cbase);
3739 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3741 /* We do not have a precision to express the values of use. */
3742 return INFTY;
3745 if (address_p)
3747 /* Do not try to express address of an object with computation based
3748 on address of a different object. This may cause problems in rtl
3749 level alias analysis (that does not expect this to be happening,
3750 as this is illegal in C), and would be unlikely to be useful
3751 anyway. */
3752 if (use->iv->base_object
3753 && cand->iv->base_object
3754 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3755 return INFTY;
3758 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3760 /* TODO -- add direct handling of this case. */
3761 goto fallback;
3764 /* CSTEPI is removed from the offset in case statement is after the
3765 increment. If the step is not constant, we use zero instead.
3766 This is a bit imprecise (there is the extra addition), but
3767 redundancy elimination is likely to transform the code so that
3768 it uses value of the variable before increment anyway,
3769 so it is not that much unrealistic. */
3770 if (cst_and_fits_in_hwi (cstep))
3771 cstepi = int_cst_value (cstep);
3772 else
3773 cstepi = 0;
3775 if (cst_and_fits_in_hwi (ustep)
3776 && cst_and_fits_in_hwi (cstep))
3778 ustepi = int_cst_value (ustep);
3780 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3781 return INFTY;
3783 else
3785 tree rat;
3787 rat = constant_multiple_of (utype, ustep, cstep);
3789 if (!rat)
3790 return INFTY;
3792 if (cst_and_fits_in_hwi (rat))
3793 ratio = int_cst_value (rat);
3794 else if (integer_onep (rat))
3795 ratio = 1;
3796 else if (integer_all_onesp (rat))
3797 ratio = -1;
3798 else
3799 return INFTY;
3802 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3803 or ratio == 1, it is better to handle this like
3805 ubase - ratio * cbase + ratio * var
3807 (also holds in the case ratio == -1, TODO. */
3809 if (cst_and_fits_in_hwi (cbase))
3811 offset = - ratio * int_cst_value (cbase);
3812 cost += difference_cost (data,
3813 ubase, integer_zero_node,
3814 &symbol_present, &var_present, &offset,
3815 depends_on);
3817 else if (ratio == 1)
3819 cost += difference_cost (data,
3820 ubase, cbase,
3821 &symbol_present, &var_present, &offset,
3822 depends_on);
3824 else
3826 cost += force_var_cost (data, cbase, depends_on);
3827 cost += add_cost (TYPE_MODE (ctype));
3828 cost += difference_cost (data,
3829 ubase, integer_zero_node,
3830 &symbol_present, &var_present, &offset,
3831 depends_on);
3834 /* If we are after the increment, the value of the candidate is higher by
3835 one iteration. */
3836 if (stmt_after_increment (data->current_loop, cand, at))
3837 offset -= ratio * cstepi;
3839 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3840 (symbol/var/const parts may be omitted). If we are looking for an address,
3841 find the cost of addressing this. */
3842 if (address_p)
3843 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
3845 /* Otherwise estimate the costs for computing the expression. */
3846 aratio = ratio > 0 ? ratio : -ratio;
3847 if (!symbol_present && !var_present && !offset)
3849 if (ratio != 1)
3850 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3852 return cost;
3855 if (aratio != 1)
3856 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3858 n_sums = 1;
3859 if (var_present
3860 /* Symbol + offset should be compile-time computable. */
3861 && (symbol_present || offset))
3862 n_sums++;
3864 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3866 fallback:
3868 /* Just get the expression, expand it and measure the cost. */
3869 tree comp = get_computation_at (data->current_loop, use, cand, at);
3871 if (!comp)
3872 return INFTY;
3874 if (address_p)
3875 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3877 return computation_cost (comp);
3881 /* Determines the cost of the computation by that USE is expressed
3882 from induction variable CAND. If ADDRESS_P is true, we just need
3883 to create an address from it, otherwise we want to get it into
3884 register. A set of invariants we depend on is stored in
3885 DEPENDS_ON. */
3887 static unsigned
3888 get_computation_cost (struct ivopts_data *data,
3889 struct iv_use *use, struct iv_cand *cand,
3890 bool address_p, bitmap *depends_on)
3892 return get_computation_cost_at (data,
3893 use, cand, address_p, depends_on, use->stmt);
3896 /* Determines cost of basing replacement of USE on CAND in a generic
3897 expression. */
3899 static bool
3900 determine_use_iv_cost_generic (struct ivopts_data *data,
3901 struct iv_use *use, struct iv_cand *cand)
3903 bitmap depends_on;
3904 unsigned cost;
3906 /* The simple case first -- if we need to express value of the preserved
3907 original biv, the cost is 0. This also prevents us from counting the
3908 cost of increment twice -- once at this use and once in the cost of
3909 the candidate. */
3910 if (cand->pos == IP_ORIGINAL
3911 && cand->incremented_at == use->stmt)
3913 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3914 return true;
3917 cost = get_computation_cost (data, use, cand, false, &depends_on);
3918 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3920 return cost != INFTY;
3923 /* Determines cost of basing replacement of USE on CAND in an address. */
3925 static bool
3926 determine_use_iv_cost_address (struct ivopts_data *data,
3927 struct iv_use *use, struct iv_cand *cand)
3929 bitmap depends_on;
3930 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3932 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3934 return cost != INFTY;
3937 /* Computes value of induction variable IV in iteration NITER. */
3939 static tree
3940 iv_value (struct iv *iv, tree niter)
3942 tree val;
3943 tree type = TREE_TYPE (iv->base);
3945 niter = fold_convert (type, niter);
3946 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
3948 return fold_build2 (PLUS_EXPR, type, iv->base, val);
3951 /* Computes value of candidate CAND at position AT in iteration NITER. */
3953 static tree
3954 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3956 tree val = iv_value (cand->iv, niter);
3957 tree type = TREE_TYPE (cand->iv->base);
3959 if (stmt_after_increment (loop, cand, at))
3960 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
3962 return val;
3965 /* Returns period of induction variable iv. */
3967 static tree
3968 iv_period (struct iv *iv)
3970 tree step = iv->step, period, type;
3971 tree pow2div;
3973 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3975 /* Period of the iv is gcd (step, type range). Since type range is power
3976 of two, it suffices to determine the maximum power of two that divides
3977 step. */
3978 pow2div = num_ending_zeros (step);
3979 type = unsigned_type_for (TREE_TYPE (step));
3981 period = build_low_bits_mask (type,
3982 (TYPE_PRECISION (type)
3983 - tree_low_cst (pow2div, 1)));
3985 return period;
3988 /* Returns the comparison operator used when eliminating the iv USE. */
3990 static enum tree_code
3991 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3993 struct loop *loop = data->current_loop;
3994 basic_block ex_bb;
3995 edge exit;
3997 ex_bb = bb_for_stmt (use->stmt);
3998 exit = EDGE_SUCC (ex_bb, 0);
3999 if (flow_bb_inside_loop_p (loop, exit->dest))
4000 exit = EDGE_SUCC (ex_bb, 1);
4002 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4005 /* Check whether it is possible to express the condition in USE by comparison
4006 of candidate CAND. If so, store the value compared with to BOUND. */
4008 static bool
4009 may_eliminate_iv (struct ivopts_data *data,
4010 struct iv_use *use, struct iv_cand *cand, tree *bound)
4012 basic_block ex_bb;
4013 edge exit;
4014 struct tree_niter_desc *niter;
4015 tree nit, nit_type;
4016 tree wider_type, period, per_type;
4017 struct loop *loop = data->current_loop;
4019 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4020 return false;
4022 /* For now works only for exits that dominate the loop latch. TODO -- extend
4023 for other conditions inside loop body. */
4024 ex_bb = bb_for_stmt (use->stmt);
4025 if (use->stmt != last_stmt (ex_bb)
4026 || TREE_CODE (use->stmt) != COND_EXPR)
4027 return false;
4028 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4029 return false;
4031 exit = EDGE_SUCC (ex_bb, 0);
4032 if (flow_bb_inside_loop_p (loop, exit->dest))
4033 exit = EDGE_SUCC (ex_bb, 1);
4034 if (flow_bb_inside_loop_p (loop, exit->dest))
4035 return false;
4037 niter = niter_for_exit (data, exit);
4038 if (!niter
4039 || !zero_p (niter->may_be_zero))
4040 return false;
4042 nit = niter->niter;
4043 nit_type = TREE_TYPE (nit);
4045 /* Determine whether we may use the variable to test whether niter iterations
4046 elapsed. This is the case iff the period of the induction variable is
4047 greater than the number of iterations. */
4048 period = iv_period (cand->iv);
4049 if (!period)
4050 return false;
4051 per_type = TREE_TYPE (period);
4053 wider_type = TREE_TYPE (period);
4054 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
4055 wider_type = per_type;
4056 else
4057 wider_type = nit_type;
4059 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
4060 fold_convert (wider_type, period),
4061 fold_convert (wider_type, nit))))
4062 return false;
4064 *bound = cand_value_at (loop, cand, use->stmt, nit);
4065 return true;
4068 /* Determines cost of basing replacement of USE on CAND in a condition. */
4070 static bool
4071 determine_use_iv_cost_condition (struct ivopts_data *data,
4072 struct iv_use *use, struct iv_cand *cand)
4074 tree bound = NULL_TREE, op, cond;
4075 bitmap depends_on = NULL;
4076 unsigned cost;
4078 /* Only consider real candidates. */
4079 if (!cand->iv)
4081 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4082 return false;
4085 if (may_eliminate_iv (data, use, cand, &bound))
4087 cost = force_var_cost (data, bound, &depends_on);
4089 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4090 return cost != INFTY;
4093 /* The induction variable elimination failed; just express the original
4094 giv. If it is compared with an invariant, note that we cannot get
4095 rid of it. */
4096 cost = get_computation_cost (data, use, cand, false, &depends_on);
4098 cond = *use->op_p;
4099 if (TREE_CODE (cond) != SSA_NAME)
4101 op = TREE_OPERAND (cond, 0);
4102 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4103 op = TREE_OPERAND (cond, 1);
4104 if (TREE_CODE (op) == SSA_NAME)
4106 op = get_iv (data, op)->base;
4107 fd_ivopts_data = data;
4108 walk_tree (&op, find_depends, &depends_on, NULL);
4112 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4113 return cost != INFTY;
4116 /* Checks whether it is possible to replace the final value of USE by
4117 a direct computation. If so, the formula is stored to *VALUE. */
4119 static bool
4120 may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
4121 tree *value)
4123 struct loop *loop = data->current_loop;
4124 edge exit;
4125 struct tree_niter_desc *niter;
4127 exit = single_dom_exit (loop);
4128 if (!exit)
4129 return false;
4131 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
4132 bb_for_stmt (use->stmt)));
4134 niter = niter_for_single_dom_exit (data);
4135 if (!niter
4136 || !zero_p (niter->may_be_zero))
4137 return false;
4139 *value = iv_value (use->iv, niter->niter);
4141 return true;
4144 /* Determines cost of replacing final value of USE using CAND. */
4146 static bool
4147 determine_use_iv_cost_outer (struct ivopts_data *data,
4148 struct iv_use *use, struct iv_cand *cand)
4150 bitmap depends_on;
4151 unsigned cost;
4152 edge exit;
4153 tree value = NULL_TREE;
4154 struct loop *loop = data->current_loop;
4156 /* The simple case first -- if we need to express value of the preserved
4157 original biv, the cost is 0. This also prevents us from counting the
4158 cost of increment twice -- once at this use and once in the cost of
4159 the candidate. */
4160 if (cand->pos == IP_ORIGINAL
4161 && cand->incremented_at == use->stmt)
4163 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
4164 return true;
4167 if (!cand->iv)
4169 if (!may_replace_final_value (data, use, &value))
4171 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
4172 return false;
4175 depends_on = NULL;
4176 cost = force_var_cost (data, value, &depends_on);
4178 cost /= AVG_LOOP_NITER (loop);
4180 set_use_iv_cost (data, use, cand, cost, depends_on, value);
4181 return cost != INFTY;
4184 exit = single_dom_exit (loop);
4185 if (exit)
4187 /* If there is just a single exit, we may use value of the candidate
4188 after we take it to determine the value of use. */
4189 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
4190 last_stmt (exit->src));
4191 if (cost != INFTY)
4192 cost /= AVG_LOOP_NITER (loop);
4194 else
4196 /* Otherwise we just need to compute the iv. */
4197 cost = get_computation_cost (data, use, cand, false, &depends_on);
4200 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
4202 return cost != INFTY;
4205 /* Determines cost of basing replacement of USE on CAND. Returns false
4206 if USE cannot be based on CAND. */
4208 static bool
4209 determine_use_iv_cost (struct ivopts_data *data,
4210 struct iv_use *use, struct iv_cand *cand)
4212 switch (use->type)
4214 case USE_NONLINEAR_EXPR:
4215 return determine_use_iv_cost_generic (data, use, cand);
4217 case USE_OUTER:
4218 return determine_use_iv_cost_outer (data, use, cand);
4220 case USE_ADDRESS:
4221 return determine_use_iv_cost_address (data, use, cand);
4223 case USE_COMPARE:
4224 return determine_use_iv_cost_condition (data, use, cand);
4226 default:
4227 gcc_unreachable ();
4231 /* Determines costs of basing the use of the iv on an iv candidate. */
4233 static void
4234 determine_use_iv_costs (struct ivopts_data *data)
4236 unsigned i, j;
4237 struct iv_use *use;
4238 struct iv_cand *cand;
4239 bitmap to_clear = BITMAP_ALLOC (NULL);
4241 alloc_use_cost_map (data);
4243 for (i = 0; i < n_iv_uses (data); i++)
4245 use = iv_use (data, i);
4247 if (data->consider_all_candidates)
4249 for (j = 0; j < n_iv_cands (data); j++)
4251 cand = iv_cand (data, j);
4252 determine_use_iv_cost (data, use, cand);
4255 else
4257 bitmap_iterator bi;
4259 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4261 cand = iv_cand (data, j);
4262 if (!determine_use_iv_cost (data, use, cand))
4263 bitmap_set_bit (to_clear, j);
4266 /* Remove the candidates for that the cost is infinite from
4267 the list of related candidates. */
4268 bitmap_and_compl_into (use->related_cands, to_clear);
4269 bitmap_clear (to_clear);
4273 BITMAP_FREE (to_clear);
4275 if (dump_file && (dump_flags & TDF_DETAILS))
4277 fprintf (dump_file, "Use-candidate costs:\n");
4279 for (i = 0; i < n_iv_uses (data); i++)
4281 use = iv_use (data, i);
4283 fprintf (dump_file, "Use %d:\n", i);
4284 fprintf (dump_file, " cand\tcost\tdepends on\n");
4285 for (j = 0; j < use->n_map_members; j++)
4287 if (!use->cost_map[j].cand
4288 || use->cost_map[j].cost == INFTY)
4289 continue;
4291 fprintf (dump_file, " %d\t%d\t",
4292 use->cost_map[j].cand->id,
4293 use->cost_map[j].cost);
4294 if (use->cost_map[j].depends_on)
4295 bitmap_print (dump_file,
4296 use->cost_map[j].depends_on, "","");
4297 fprintf (dump_file, "\n");
4300 fprintf (dump_file, "\n");
4302 fprintf (dump_file, "\n");
4306 /* Determines cost of the candidate CAND. */
4308 static void
4309 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4311 unsigned cost_base, cost_step;
4312 tree base;
4314 if (!cand->iv)
4316 cand->cost = 0;
4317 return;
4320 /* There are two costs associated with the candidate -- its increment
4321 and its initialization. The second is almost negligible for any loop
4322 that rolls enough, so we take it just very little into account. */
4324 base = cand->iv->base;
4325 cost_base = force_var_cost (data, base, NULL);
4326 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4328 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4330 /* Prefer the original iv unless we may gain something by replacing it;
4331 this is not really relevant for artificial ivs created by other
4332 passes. */
4333 if (cand->pos == IP_ORIGINAL
4334 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4335 cand->cost--;
4337 /* Prefer not to insert statements into latch unless there are some
4338 already (so that we do not create unnecessary jumps). */
4339 if (cand->pos == IP_END
4340 && empty_block_p (ip_end_pos (data->current_loop)))
4341 cand->cost++;
4344 /* Determines costs of computation of the candidates. */
4346 static void
4347 determine_iv_costs (struct ivopts_data *data)
4349 unsigned i;
4351 if (dump_file && (dump_flags & TDF_DETAILS))
4353 fprintf (dump_file, "Candidate costs:\n");
4354 fprintf (dump_file, " cand\tcost\n");
4357 for (i = 0; i < n_iv_cands (data); i++)
4359 struct iv_cand *cand = iv_cand (data, i);
4361 determine_iv_cost (data, cand);
4363 if (dump_file && (dump_flags & TDF_DETAILS))
4364 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4367 if (dump_file && (dump_flags & TDF_DETAILS))
4368 fprintf (dump_file, "\n");
4371 /* Calculates cost for having SIZE induction variables. */
4373 static unsigned
4374 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4376 return global_cost_for_size (size,
4377 loop_data (data->current_loop)->regs_used,
4378 n_iv_uses (data));
4381 /* For each size of the induction variable set determine the penalty. */
4383 static void
4384 determine_set_costs (struct ivopts_data *data)
4386 unsigned j, n;
4387 tree phi, op;
4388 struct loop *loop = data->current_loop;
4389 bitmap_iterator bi;
4391 /* We use the following model (definitely improvable, especially the
4392 cost function -- TODO):
4394 We estimate the number of registers available (using MD data), name it A.
4396 We estimate the number of registers used by the loop, name it U. This
4397 number is obtained as the number of loop phi nodes (not counting virtual
4398 registers and bivs) + the number of variables from outside of the loop.
4400 We set a reserve R (free regs that are used for temporary computations,
4401 etc.). For now the reserve is a constant 3.
4403 Let I be the number of induction variables.
4405 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4406 make a lot of ivs without a reason).
4407 -- if A - R < U + I <= A, the cost is I * PRES_COST
4408 -- if U + I > A, the cost is I * PRES_COST and
4409 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4411 if (dump_file && (dump_flags & TDF_DETAILS))
4413 fprintf (dump_file, "Global costs:\n");
4414 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4415 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4416 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4417 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4420 n = 0;
4421 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
4423 op = PHI_RESULT (phi);
4425 if (!is_gimple_reg (op))
4426 continue;
4428 if (get_iv (data, op))
4429 continue;
4431 n++;
4434 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4436 struct version_info *info = ver_info (data, j);
4438 if (info->inv_id && info->has_nonlin_use)
4439 n++;
4442 loop_data (loop)->regs_used = n;
4443 if (dump_file && (dump_flags & TDF_DETAILS))
4444 fprintf (dump_file, " regs_used %d\n", n);
4446 if (dump_file && (dump_flags & TDF_DETAILS))
4448 fprintf (dump_file, " cost for size:\n");
4449 fprintf (dump_file, " ivs\tcost\n");
4450 for (j = 0; j <= 2 * target_avail_regs; j++)
4451 fprintf (dump_file, " %d\t%d\n", j,
4452 ivopts_global_cost_for_size (data, j));
4453 fprintf (dump_file, "\n");
4457 /* Returns true if A is a cheaper cost pair than B. */
4459 static bool
4460 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4462 if (!a)
4463 return false;
4465 if (!b)
4466 return true;
4468 if (a->cost < b->cost)
4469 return true;
4471 if (a->cost > b->cost)
4472 return false;
4474 /* In case the costs are the same, prefer the cheaper candidate. */
4475 if (a->cand->cost < b->cand->cost)
4476 return true;
4478 return false;
4481 /* Computes the cost field of IVS structure. */
4483 static void
4484 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4486 unsigned cost = 0;
4488 cost += ivs->cand_use_cost;
4489 cost += ivs->cand_cost;
4490 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4492 ivs->cost = cost;
4495 /* Remove invariants in set INVS to set IVS. */
4497 static void
4498 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4500 bitmap_iterator bi;
4501 unsigned iid;
4503 if (!invs)
4504 return;
4506 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4508 ivs->n_invariant_uses[iid]--;
4509 if (ivs->n_invariant_uses[iid] == 0)
4510 ivs->n_regs--;
4514 /* Set USE not to be expressed by any candidate in IVS. */
4516 static void
4517 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4518 struct iv_use *use)
4520 unsigned uid = use->id, cid;
4521 struct cost_pair *cp;
4523 cp = ivs->cand_for_use[uid];
4524 if (!cp)
4525 return;
4526 cid = cp->cand->id;
4528 ivs->bad_uses++;
4529 ivs->cand_for_use[uid] = NULL;
4530 ivs->n_cand_uses[cid]--;
4532 if (ivs->n_cand_uses[cid] == 0)
4534 bitmap_clear_bit (ivs->cands, cid);
4535 /* Do not count the pseudocandidates. */
4536 if (cp->cand->iv)
4537 ivs->n_regs--;
4538 ivs->n_cands--;
4539 ivs->cand_cost -= cp->cand->cost;
4541 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4544 ivs->cand_use_cost -= cp->cost;
4546 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4547 iv_ca_recount_cost (data, ivs);
4550 /* Add invariants in set INVS to set IVS. */
4552 static void
4553 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4555 bitmap_iterator bi;
4556 unsigned iid;
4558 if (!invs)
4559 return;
4561 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4563 ivs->n_invariant_uses[iid]++;
4564 if (ivs->n_invariant_uses[iid] == 1)
4565 ivs->n_regs++;
4569 /* Set cost pair for USE in set IVS to CP. */
4571 static void
4572 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4573 struct iv_use *use, struct cost_pair *cp)
4575 unsigned uid = use->id, cid;
4577 if (ivs->cand_for_use[uid] == cp)
4578 return;
4580 if (ivs->cand_for_use[uid])
4581 iv_ca_set_no_cp (data, ivs, use);
4583 if (cp)
4585 cid = cp->cand->id;
4587 ivs->bad_uses--;
4588 ivs->cand_for_use[uid] = cp;
4589 ivs->n_cand_uses[cid]++;
4590 if (ivs->n_cand_uses[cid] == 1)
4592 bitmap_set_bit (ivs->cands, cid);
4593 /* Do not count the pseudocandidates. */
4594 if (cp->cand->iv)
4595 ivs->n_regs++;
4596 ivs->n_cands++;
4597 ivs->cand_cost += cp->cand->cost;
4599 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4602 ivs->cand_use_cost += cp->cost;
4603 iv_ca_set_add_invariants (ivs, cp->depends_on);
4604 iv_ca_recount_cost (data, ivs);
4608 /* Extend set IVS by expressing USE by some of the candidates in it
4609 if possible. */
4611 static void
4612 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4613 struct iv_use *use)
4615 struct cost_pair *best_cp = NULL, *cp;
4616 bitmap_iterator bi;
4617 unsigned i;
4619 gcc_assert (ivs->upto >= use->id);
4621 if (ivs->upto == use->id)
4623 ivs->upto++;
4624 ivs->bad_uses++;
4627 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4629 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4631 if (cheaper_cost_pair (cp, best_cp))
4632 best_cp = cp;
4635 iv_ca_set_cp (data, ivs, use, best_cp);
4638 /* Get cost for assignment IVS. */
4640 static unsigned
4641 iv_ca_cost (struct iv_ca *ivs)
4643 return (ivs->bad_uses ? INFTY : ivs->cost);
4646 /* Returns true if all dependences of CP are among invariants in IVS. */
4648 static bool
4649 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4651 unsigned i;
4652 bitmap_iterator bi;
4654 if (!cp->depends_on)
4655 return true;
4657 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4659 if (ivs->n_invariant_uses[i] == 0)
4660 return false;
4663 return true;
4666 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4667 it before NEXT_CHANGE. */
4669 static struct iv_ca_delta *
4670 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4671 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4673 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4675 change->use = use;
4676 change->old_cp = old_cp;
4677 change->new_cp = new_cp;
4678 change->next_change = next_change;
4680 return change;
4683 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4684 are rewritten. */
4686 static struct iv_ca_delta *
4687 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4689 struct iv_ca_delta *last;
4691 if (!l2)
4692 return l1;
4694 if (!l1)
4695 return l2;
4697 for (last = l1; last->next_change; last = last->next_change)
4698 continue;
4699 last->next_change = l2;
4701 return l1;
4704 /* Returns candidate by that USE is expressed in IVS. */
4706 static struct cost_pair *
4707 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4709 return ivs->cand_for_use[use->id];
4712 /* Reverse the list of changes DELTA, forming the inverse to it. */
4714 static struct iv_ca_delta *
4715 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4717 struct iv_ca_delta *act, *next, *prev = NULL;
4718 struct cost_pair *tmp;
4720 for (act = delta; act; act = next)
4722 next = act->next_change;
4723 act->next_change = prev;
4724 prev = act;
4726 tmp = act->old_cp;
4727 act->old_cp = act->new_cp;
4728 act->new_cp = tmp;
4731 return prev;
4734 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4735 reverted instead. */
4737 static void
4738 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4739 struct iv_ca_delta *delta, bool forward)
4741 struct cost_pair *from, *to;
4742 struct iv_ca_delta *act;
4744 if (!forward)
4745 delta = iv_ca_delta_reverse (delta);
4747 for (act = delta; act; act = act->next_change)
4749 from = act->old_cp;
4750 to = act->new_cp;
4751 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4752 iv_ca_set_cp (data, ivs, act->use, to);
4755 if (!forward)
4756 iv_ca_delta_reverse (delta);
4759 /* Returns true if CAND is used in IVS. */
4761 static bool
4762 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4764 return ivs->n_cand_uses[cand->id] > 0;
4767 /* Returns number of induction variable candidates in the set IVS. */
4769 static unsigned
4770 iv_ca_n_cands (struct iv_ca *ivs)
4772 return ivs->n_cands;
4775 /* Free the list of changes DELTA. */
4777 static void
4778 iv_ca_delta_free (struct iv_ca_delta **delta)
4780 struct iv_ca_delta *act, *next;
4782 for (act = *delta; act; act = next)
4784 next = act->next_change;
4785 free (act);
4788 *delta = NULL;
4791 /* Allocates new iv candidates assignment. */
4793 static struct iv_ca *
4794 iv_ca_new (struct ivopts_data *data)
4796 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
4798 nw->upto = 0;
4799 nw->bad_uses = 0;
4800 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4801 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
4802 nw->cands = BITMAP_ALLOC (NULL);
4803 nw->n_cands = 0;
4804 nw->n_regs = 0;
4805 nw->cand_use_cost = 0;
4806 nw->cand_cost = 0;
4807 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4808 nw->cost = 0;
4810 return nw;
4813 /* Free memory occupied by the set IVS. */
4815 static void
4816 iv_ca_free (struct iv_ca **ivs)
4818 free ((*ivs)->cand_for_use);
4819 free ((*ivs)->n_cand_uses);
4820 BITMAP_FREE ((*ivs)->cands);
4821 free ((*ivs)->n_invariant_uses);
4822 free (*ivs);
4823 *ivs = NULL;
4826 /* Dumps IVS to FILE. */
4828 static void
4829 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4831 const char *pref = " invariants ";
4832 unsigned i;
4834 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4835 bitmap_print (file, ivs->cands, " candidates ","\n");
4837 for (i = 1; i <= data->max_inv_id; i++)
4838 if (ivs->n_invariant_uses[i])
4840 fprintf (file, "%s%d", pref, i);
4841 pref = ", ";
4843 fprintf (file, "\n");
4846 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4847 new set, and store differences in DELTA. Number of induction variables
4848 in the new set is stored to N_IVS. */
4850 static unsigned
4851 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4852 struct iv_cand *cand, struct iv_ca_delta **delta,
4853 unsigned *n_ivs)
4855 unsigned i, cost;
4856 struct iv_use *use;
4857 struct cost_pair *old_cp, *new_cp;
4859 *delta = NULL;
4860 for (i = 0; i < ivs->upto; i++)
4862 use = iv_use (data, i);
4863 old_cp = iv_ca_cand_for_use (ivs, use);
4865 if (old_cp
4866 && old_cp->cand == cand)
4867 continue;
4869 new_cp = get_use_iv_cost (data, use, cand);
4870 if (!new_cp)
4871 continue;
4873 if (!iv_ca_has_deps (ivs, new_cp))
4874 continue;
4876 if (!cheaper_cost_pair (new_cp, old_cp))
4877 continue;
4879 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4882 iv_ca_delta_commit (data, ivs, *delta, true);
4883 cost = iv_ca_cost (ivs);
4884 if (n_ivs)
4885 *n_ivs = iv_ca_n_cands (ivs);
4886 iv_ca_delta_commit (data, ivs, *delta, false);
4888 return cost;
4891 /* Try narrowing set IVS by removing CAND. Return the cost of
4892 the new set and store the differences in DELTA. */
4894 static unsigned
4895 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4896 struct iv_cand *cand, struct iv_ca_delta **delta)
4898 unsigned i, ci;
4899 struct iv_use *use;
4900 struct cost_pair *old_cp, *new_cp, *cp;
4901 bitmap_iterator bi;
4902 struct iv_cand *cnd;
4903 unsigned cost;
4905 *delta = NULL;
4906 for (i = 0; i < n_iv_uses (data); i++)
4908 use = iv_use (data, i);
4910 old_cp = iv_ca_cand_for_use (ivs, use);
4911 if (old_cp->cand != cand)
4912 continue;
4914 new_cp = NULL;
4916 if (data->consider_all_candidates)
4918 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4920 if (ci == cand->id)
4921 continue;
4923 cnd = iv_cand (data, ci);
4925 cp = get_use_iv_cost (data, use, cnd);
4926 if (!cp)
4927 continue;
4928 if (!iv_ca_has_deps (ivs, cp))
4929 continue;
4931 if (!cheaper_cost_pair (cp, new_cp))
4932 continue;
4934 new_cp = cp;
4937 else
4939 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4941 if (ci == cand->id)
4942 continue;
4944 cnd = iv_cand (data, ci);
4946 cp = get_use_iv_cost (data, use, cnd);
4947 if (!cp)
4948 continue;
4949 if (!iv_ca_has_deps (ivs, cp))
4950 continue;
4952 if (!cheaper_cost_pair (cp, new_cp))
4953 continue;
4955 new_cp = cp;
4959 if (!new_cp)
4961 iv_ca_delta_free (delta);
4962 return INFTY;
4965 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4968 iv_ca_delta_commit (data, ivs, *delta, true);
4969 cost = iv_ca_cost (ivs);
4970 iv_ca_delta_commit (data, ivs, *delta, false);
4972 return cost;
4975 /* Try optimizing the set of candidates IVS by removing candidates different
4976 from to EXCEPT_CAND from it. Return cost of the new set, and store
4977 differences in DELTA. */
4979 static unsigned
4980 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4981 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4983 bitmap_iterator bi;
4984 struct iv_ca_delta *act_delta, *best_delta;
4985 unsigned i, best_cost, acost;
4986 struct iv_cand *cand;
4988 best_delta = NULL;
4989 best_cost = iv_ca_cost (ivs);
4991 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4993 cand = iv_cand (data, i);
4995 if (cand == except_cand)
4996 continue;
4998 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5000 if (acost < best_cost)
5002 best_cost = acost;
5003 iv_ca_delta_free (&best_delta);
5004 best_delta = act_delta;
5006 else
5007 iv_ca_delta_free (&act_delta);
5010 if (!best_delta)
5012 *delta = NULL;
5013 return best_cost;
5016 /* Recurse to possibly remove other unnecessary ivs. */
5017 iv_ca_delta_commit (data, ivs, best_delta, true);
5018 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5019 iv_ca_delta_commit (data, ivs, best_delta, false);
5020 *delta = iv_ca_delta_join (best_delta, *delta);
5021 return best_cost;
5024 /* Tries to extend the sets IVS in the best possible way in order
5025 to express the USE. */
5027 static bool
5028 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5029 struct iv_use *use)
5031 unsigned best_cost, act_cost;
5032 unsigned i;
5033 bitmap_iterator bi;
5034 struct iv_cand *cand;
5035 struct iv_ca_delta *best_delta = NULL, *act_delta;
5036 struct cost_pair *cp;
5038 iv_ca_add_use (data, ivs, use);
5039 best_cost = iv_ca_cost (ivs);
5041 cp = iv_ca_cand_for_use (ivs, use);
5042 if (cp)
5044 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5045 iv_ca_set_no_cp (data, ivs, use);
5048 /* First try important candidates. Only if it fails, try the specific ones.
5049 Rationale -- in loops with many variables the best choice often is to use
5050 just one generic biv. If we added here many ivs specific to the uses,
5051 the optimization algorithm later would be likely to get stuck in a local
5052 minimum, thus causing us to create too many ivs. The approach from
5053 few ivs to more seems more likely to be successful -- starting from few
5054 ivs, replacing an expensive use by a specific iv should always be a
5055 win. */
5056 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5058 cand = iv_cand (data, i);
5060 if (iv_ca_cand_used_p (ivs, cand))
5061 continue;
5063 cp = get_use_iv_cost (data, use, cand);
5064 if (!cp)
5065 continue;
5067 iv_ca_set_cp (data, ivs, use, cp);
5068 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5069 iv_ca_set_no_cp (data, ivs, use);
5070 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5072 if (act_cost < best_cost)
5074 best_cost = act_cost;
5076 iv_ca_delta_free (&best_delta);
5077 best_delta = act_delta;
5079 else
5080 iv_ca_delta_free (&act_delta);
5083 if (best_cost == INFTY)
5085 for (i = 0; i < use->n_map_members; i++)
5087 cp = use->cost_map + i;
5088 cand = cp->cand;
5089 if (!cand)
5090 continue;
5092 /* Already tried this. */
5093 if (cand->important)
5094 continue;
5096 if (iv_ca_cand_used_p (ivs, cand))
5097 continue;
5099 act_delta = NULL;
5100 iv_ca_set_cp (data, ivs, use, cp);
5101 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5102 iv_ca_set_no_cp (data, ivs, use);
5103 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5104 cp, act_delta);
5106 if (act_cost < best_cost)
5108 best_cost = act_cost;
5110 if (best_delta)
5111 iv_ca_delta_free (&best_delta);
5112 best_delta = act_delta;
5114 else
5115 iv_ca_delta_free (&act_delta);
5119 iv_ca_delta_commit (data, ivs, best_delta, true);
5120 iv_ca_delta_free (&best_delta);
5122 return (best_cost != INFTY);
5125 /* Finds an initial assignment of candidates to uses. */
5127 static struct iv_ca *
5128 get_initial_solution (struct ivopts_data *data)
5130 struct iv_ca *ivs = iv_ca_new (data);
5131 unsigned i;
5133 for (i = 0; i < n_iv_uses (data); i++)
5134 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5136 iv_ca_free (&ivs);
5137 return NULL;
5140 return ivs;
5143 /* Tries to improve set of induction variables IVS. */
5145 static bool
5146 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5148 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
5149 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5150 struct iv_cand *cand;
5152 /* Try extending the set of induction variables by one. */
5153 for (i = 0; i < n_iv_cands (data); i++)
5155 cand = iv_cand (data, i);
5157 if (iv_ca_cand_used_p (ivs, cand))
5158 continue;
5160 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5161 if (!act_delta)
5162 continue;
5164 /* If we successfully added the candidate and the set is small enough,
5165 try optimizing it by removing other candidates. */
5166 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5168 iv_ca_delta_commit (data, ivs, act_delta, true);
5169 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5170 iv_ca_delta_commit (data, ivs, act_delta, false);
5171 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5174 if (acost < best_cost)
5176 best_cost = acost;
5177 iv_ca_delta_free (&best_delta);
5178 best_delta = act_delta;
5180 else
5181 iv_ca_delta_free (&act_delta);
5184 if (!best_delta)
5186 /* Try removing the candidates from the set instead. */
5187 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5189 /* Nothing more we can do. */
5190 if (!best_delta)
5191 return false;
5194 iv_ca_delta_commit (data, ivs, best_delta, true);
5195 gcc_assert (best_cost == iv_ca_cost (ivs));
5196 iv_ca_delta_free (&best_delta);
5197 return true;
5200 /* Attempts to find the optimal set of induction variables. We do simple
5201 greedy heuristic -- we try to replace at most one candidate in the selected
5202 solution and remove the unused ivs while this improves the cost. */
5204 static struct iv_ca *
5205 find_optimal_iv_set (struct ivopts_data *data)
5207 unsigned i;
5208 struct iv_ca *set;
5209 struct iv_use *use;
5211 /* Get the initial solution. */
5212 set = get_initial_solution (data);
5213 if (!set)
5215 if (dump_file && (dump_flags & TDF_DETAILS))
5216 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5217 return NULL;
5220 if (dump_file && (dump_flags & TDF_DETAILS))
5222 fprintf (dump_file, "Initial set of candidates:\n");
5223 iv_ca_dump (data, dump_file, set);
5226 while (try_improve_iv_set (data, set))
5228 if (dump_file && (dump_flags & TDF_DETAILS))
5230 fprintf (dump_file, "Improved to:\n");
5231 iv_ca_dump (data, dump_file, set);
5235 if (dump_file && (dump_flags & TDF_DETAILS))
5236 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
5238 for (i = 0; i < n_iv_uses (data); i++)
5240 use = iv_use (data, i);
5241 use->selected = iv_ca_cand_for_use (set, use)->cand;
5244 return set;
5247 /* Creates a new induction variable corresponding to CAND. */
5249 static void
5250 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5252 block_stmt_iterator incr_pos;
5253 tree base;
5254 bool after = false;
5256 if (!cand->iv)
5257 return;
5259 switch (cand->pos)
5261 case IP_NORMAL:
5262 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5263 break;
5265 case IP_END:
5266 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5267 after = true;
5268 break;
5270 case IP_ORIGINAL:
5271 /* Mark that the iv is preserved. */
5272 name_info (data, cand->var_before)->preserve_biv = true;
5273 name_info (data, cand->var_after)->preserve_biv = true;
5275 /* Rewrite the increment so that it uses var_before directly. */
5276 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5278 return;
5281 gimple_add_tmp_var (cand->var_before);
5282 add_referenced_tmp_var (cand->var_before);
5284 base = unshare_expr (cand->iv->base);
5286 create_iv (base, unshare_expr (cand->iv->step),
5287 cand->var_before, data->current_loop,
5288 &incr_pos, after, &cand->var_before, &cand->var_after);
5291 /* Creates new induction variables described in SET. */
5293 static void
5294 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5296 unsigned i;
5297 struct iv_cand *cand;
5298 bitmap_iterator bi;
5300 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5302 cand = iv_cand (data, i);
5303 create_new_iv (data, cand);
5307 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5308 is true, remove also the ssa name defined by the statement. */
5310 static void
5311 remove_statement (tree stmt, bool including_defined_name)
5313 if (TREE_CODE (stmt) == PHI_NODE)
5315 if (!including_defined_name)
5317 /* Prevent the ssa name defined by the statement from being removed. */
5318 SET_PHI_RESULT (stmt, NULL);
5320 remove_phi_node (stmt, NULL_TREE);
5322 else
5324 block_stmt_iterator bsi = bsi_for_stmt (stmt);
5326 bsi_remove (&bsi, true);
5330 /* Rewrites USE (definition of iv used in a nonlinear expression)
5331 using candidate CAND. */
5333 static void
5334 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5335 struct iv_use *use, struct iv_cand *cand)
5337 tree comp;
5338 tree op, stmts, tgt, ass;
5339 block_stmt_iterator bsi, pbsi;
5341 /* An important special case -- if we are asked to express value of
5342 the original iv by itself, just exit; there is no need to
5343 introduce a new computation (that might also need casting the
5344 variable to unsigned and back). */
5345 if (cand->pos == IP_ORIGINAL
5346 && cand->incremented_at == use->stmt)
5348 tree step, ctype, utype;
5349 enum tree_code incr_code = PLUS_EXPR;
5351 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5352 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5354 step = cand->iv->step;
5355 ctype = TREE_TYPE (step);
5356 utype = TREE_TYPE (cand->var_after);
5357 if (TREE_CODE (step) == NEGATE_EXPR)
5359 incr_code = MINUS_EXPR;
5360 step = TREE_OPERAND (step, 0);
5363 /* Check whether we may leave the computation unchanged.
5364 This is the case only if it does not rely on other
5365 computations in the loop -- otherwise, the computation
5366 we rely upon may be removed in remove_unused_ivs,
5367 thus leading to ICE. */
5368 op = TREE_OPERAND (use->stmt, 1);
5369 if (TREE_CODE (op) == PLUS_EXPR
5370 || TREE_CODE (op) == MINUS_EXPR)
5372 if (TREE_OPERAND (op, 0) == cand->var_before)
5373 op = TREE_OPERAND (op, 1);
5374 else if (TREE_CODE (op) == PLUS_EXPR
5375 && TREE_OPERAND (op, 1) == cand->var_before)
5376 op = TREE_OPERAND (op, 0);
5377 else
5378 op = NULL_TREE;
5380 else
5381 op = NULL_TREE;
5383 if (op
5384 && (TREE_CODE (op) == INTEGER_CST
5385 || operand_equal_p (op, step, 0)))
5386 return;
5388 /* Otherwise, add the necessary computations to express
5389 the iv. */
5390 op = fold_convert (ctype, cand->var_before);
5391 comp = fold_convert (utype,
5392 build2 (incr_code, ctype, op,
5393 unshare_expr (step)));
5395 else
5396 comp = get_computation (data->current_loop, use, cand);
5398 switch (TREE_CODE (use->stmt))
5400 case PHI_NODE:
5401 tgt = PHI_RESULT (use->stmt);
5403 /* If we should keep the biv, do not replace it. */
5404 if (name_info (data, tgt)->preserve_biv)
5405 return;
5407 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5408 while (!bsi_end_p (pbsi)
5409 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5411 bsi = pbsi;
5412 bsi_next (&pbsi);
5414 break;
5416 case MODIFY_EXPR:
5417 tgt = TREE_OPERAND (use->stmt, 0);
5418 bsi = bsi_for_stmt (use->stmt);
5419 break;
5421 default:
5422 gcc_unreachable ();
5425 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5427 if (TREE_CODE (use->stmt) == PHI_NODE)
5429 if (stmts)
5430 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5431 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5432 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5433 remove_statement (use->stmt, false);
5434 SSA_NAME_DEF_STMT (tgt) = ass;
5436 else
5438 if (stmts)
5439 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5440 TREE_OPERAND (use->stmt, 1) = op;
5444 /* Replaces ssa name in index IDX by its basic variable. Callback for
5445 for_each_index. */
5447 static bool
5448 idx_remove_ssa_names (tree base, tree *idx,
5449 void *data ATTRIBUTE_UNUSED)
5451 tree *op;
5453 if (TREE_CODE (*idx) == SSA_NAME)
5454 *idx = SSA_NAME_VAR (*idx);
5456 if (TREE_CODE (base) == ARRAY_REF)
5458 op = &TREE_OPERAND (base, 2);
5459 if (*op
5460 && TREE_CODE (*op) == SSA_NAME)
5461 *op = SSA_NAME_VAR (*op);
5462 op = &TREE_OPERAND (base, 3);
5463 if (*op
5464 && TREE_CODE (*op) == SSA_NAME)
5465 *op = SSA_NAME_VAR (*op);
5468 return true;
5471 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5473 static tree
5474 unshare_and_remove_ssa_names (tree ref)
5476 ref = unshare_expr (ref);
5477 for_each_index (&ref, idx_remove_ssa_names, NULL);
5479 return ref;
5482 /* Extract the alias analysis info for the memory reference REF. There are
5483 several ways how this information may be stored and what precisely is
5484 its semantics depending on the type of the reference, but there always is
5485 somewhere hidden one _DECL node that is used to determine the set of
5486 virtual operands for the reference. The code below deciphers this jungle
5487 and extracts this single useful piece of information. */
5489 static tree
5490 get_ref_tag (tree ref, tree orig)
5492 tree var = get_base_address (ref);
5493 tree aref = NULL_TREE, tag, sv;
5494 HOST_WIDE_INT offset, size, maxsize;
5496 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5498 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5499 if (ref)
5500 break;
5503 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5504 return unshare_expr (sv);
5506 if (!var)
5507 return NULL_TREE;
5509 if (TREE_CODE (var) == INDIRECT_REF)
5511 /* In case the base is a dereference of a pointer, first check its name
5512 mem tag, and if it does not have one, use type mem tag. */
5513 var = TREE_OPERAND (var, 0);
5514 if (TREE_CODE (var) != SSA_NAME)
5515 return NULL_TREE;
5517 if (SSA_NAME_PTR_INFO (var))
5519 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5520 if (tag)
5521 return tag;
5524 var = SSA_NAME_VAR (var);
5525 tag = var_ann (var)->type_mem_tag;
5526 gcc_assert (tag != NULL_TREE);
5527 return tag;
5529 else
5531 if (!DECL_P (var))
5532 return NULL_TREE;
5534 tag = var_ann (var)->type_mem_tag;
5535 if (tag)
5536 return tag;
5538 return var;
5542 /* Copies the reference information from OLD_REF to NEW_REF. */
5544 static void
5545 copy_ref_info (tree new_ref, tree old_ref)
5547 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5548 copy_mem_ref_info (new_ref, old_ref);
5549 else
5551 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5552 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5556 /* Rewrites USE (address that is an iv) using candidate CAND. */
5558 static void
5559 rewrite_use_address (struct ivopts_data *data,
5560 struct iv_use *use, struct iv_cand *cand)
5562 struct affine_tree_combination aff;
5563 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5564 tree ref;
5566 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5567 unshare_aff_combination (&aff);
5569 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5570 copy_ref_info (ref, *use->op_p);
5571 *use->op_p = ref;
5574 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5575 candidate CAND. */
5577 static void
5578 rewrite_use_compare (struct ivopts_data *data,
5579 struct iv_use *use, struct iv_cand *cand)
5581 tree comp;
5582 tree *op_p, cond, op, stmts, bound;
5583 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5584 enum tree_code compare;
5585 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5587 bound = cp->value;
5588 if (bound)
5590 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5591 tree var_type = TREE_TYPE (var);
5593 compare = iv_elimination_compare (data, use);
5594 bound = fold_convert (var_type, bound);
5595 op = force_gimple_operand (unshare_expr (bound), &stmts,
5596 true, NULL_TREE);
5598 if (stmts)
5599 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5601 *use->op_p = build2 (compare, boolean_type_node, var, op);
5602 update_stmt (use->stmt);
5603 return;
5606 /* The induction variable elimination failed; just express the original
5607 giv. */
5608 comp = get_computation (data->current_loop, use, cand);
5610 cond = *use->op_p;
5611 op_p = &TREE_OPERAND (cond, 0);
5612 if (TREE_CODE (*op_p) != SSA_NAME
5613 || zero_p (get_iv (data, *op_p)->step))
5614 op_p = &TREE_OPERAND (cond, 1);
5616 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5617 if (stmts)
5618 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5620 *op_p = op;
5623 /* Ensure that operand *OP_P may be used at the end of EXIT without
5624 violating loop closed ssa form. */
5626 static void
5627 protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5629 basic_block def_bb;
5630 struct loop *def_loop;
5631 tree phi, use;
5633 use = USE_FROM_PTR (op_p);
5634 if (TREE_CODE (use) != SSA_NAME)
5635 return;
5637 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5638 if (!def_bb)
5639 return;
5641 def_loop = def_bb->loop_father;
5642 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5643 return;
5645 /* Try finding a phi node that copies the value out of the loop. */
5646 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5647 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5648 break;
5650 if (!phi)
5652 /* Create such a phi node. */
5653 tree new_name = duplicate_ssa_name (use, NULL);
5655 phi = create_phi_node (new_name, exit->dest);
5656 SSA_NAME_DEF_STMT (new_name) = phi;
5657 add_phi_arg (phi, use, exit);
5660 SET_USE (op_p, PHI_RESULT (phi));
5663 /* Ensure that operands of STMT may be used at the end of EXIT without
5664 violating loop closed ssa form. */
5666 static void
5667 protect_loop_closed_ssa_form (edge exit, tree stmt)
5669 ssa_op_iter iter;
5670 use_operand_p use_p;
5672 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5673 protect_loop_closed_ssa_form_use (exit, use_p);
5676 /* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5677 so that they are emitted on the correct place, and so that the loop closed
5678 ssa form is preserved. */
5680 void
5681 compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5683 tree_stmt_iterator tsi;
5684 block_stmt_iterator bsi;
5685 tree phi, stmt, def, next;
5687 if (!single_pred_p (exit->dest))
5688 split_loop_exit_edge (exit);
5690 /* Ensure there is label in exit->dest, so that we can
5691 insert after it. */
5692 tree_block_label (exit->dest);
5693 bsi = bsi_after_labels (exit->dest);
5695 if (TREE_CODE (stmts) == STATEMENT_LIST)
5697 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
5699 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5700 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5703 else
5705 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5706 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5709 if (!op)
5710 return;
5712 for (phi = phi_nodes (exit->dest); phi; phi = next)
5714 next = PHI_CHAIN (phi);
5716 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5718 def = PHI_RESULT (phi);
5719 remove_statement (phi, false);
5720 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5721 def, op);
5722 SSA_NAME_DEF_STMT (def) = stmt;
5723 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5728 /* Rewrites the final value of USE (that is only needed outside of the loop)
5729 using candidate CAND. */
5731 static void
5732 rewrite_use_outer (struct ivopts_data *data,
5733 struct iv_use *use, struct iv_cand *cand)
5735 edge exit;
5736 tree value, op, stmts, tgt;
5737 tree phi;
5739 switch (TREE_CODE (use->stmt))
5741 case PHI_NODE:
5742 tgt = PHI_RESULT (use->stmt);
5743 break;
5744 case MODIFY_EXPR:
5745 tgt = TREE_OPERAND (use->stmt, 0);
5746 break;
5747 default:
5748 gcc_unreachable ();
5751 exit = single_dom_exit (data->current_loop);
5753 if (exit)
5755 if (!cand->iv)
5757 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5758 value = unshare_expr (cp->value);
5760 else
5761 value = get_computation_at (data->current_loop,
5762 use, cand, last_stmt (exit->src));
5764 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5766 /* If we will preserve the iv anyway and we would need to perform
5767 some computation to replace the final value, do nothing. */
5768 if (stmts && name_info (data, tgt)->preserve_biv)
5769 return;
5771 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
5773 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5775 if (USE_FROM_PTR (use_p) == tgt)
5776 SET_USE (use_p, op);
5779 if (stmts)
5780 compute_phi_arg_on_exit (exit, stmts, op);
5782 /* Enable removal of the statement. We cannot remove it directly,
5783 since we may still need the aliasing information attached to the
5784 ssa name defined by it. */
5785 name_info (data, tgt)->iv->have_use_for = false;
5786 return;
5789 /* If the variable is going to be preserved anyway, there is nothing to
5790 do. */
5791 if (name_info (data, tgt)->preserve_biv)
5792 return;
5794 /* Otherwise we just need to compute the iv. */
5795 rewrite_use_nonlinear_expr (data, use, cand);
5798 /* Rewrites USE using candidate CAND. */
5800 static void
5801 rewrite_use (struct ivopts_data *data,
5802 struct iv_use *use, struct iv_cand *cand)
5804 switch (use->type)
5806 case USE_NONLINEAR_EXPR:
5807 rewrite_use_nonlinear_expr (data, use, cand);
5808 break;
5810 case USE_OUTER:
5811 rewrite_use_outer (data, use, cand);
5812 break;
5814 case USE_ADDRESS:
5815 rewrite_use_address (data, use, cand);
5816 break;
5818 case USE_COMPARE:
5819 rewrite_use_compare (data, use, cand);
5820 break;
5822 default:
5823 gcc_unreachable ();
5825 mark_new_vars_to_rename (use->stmt);
5828 /* Rewrite the uses using the selected induction variables. */
5830 static void
5831 rewrite_uses (struct ivopts_data *data)
5833 unsigned i;
5834 struct iv_cand *cand;
5835 struct iv_use *use;
5837 for (i = 0; i < n_iv_uses (data); i++)
5839 use = iv_use (data, i);
5840 cand = use->selected;
5841 gcc_assert (cand);
5843 rewrite_use (data, use, cand);
5847 /* Removes the ivs that are not used after rewriting. */
5849 static void
5850 remove_unused_ivs (struct ivopts_data *data)
5852 unsigned j;
5853 bitmap_iterator bi;
5855 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5857 struct version_info *info;
5859 info = ver_info (data, j);
5860 if (info->iv
5861 && !zero_p (info->iv->step)
5862 && !info->inv_id
5863 && !info->iv->have_use_for
5864 && !info->preserve_biv)
5865 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5869 /* Frees data allocated by the optimization of a single loop. */
5871 static void
5872 free_loop_data (struct ivopts_data *data)
5874 unsigned i, j;
5875 bitmap_iterator bi;
5876 tree obj;
5878 htab_empty (data->niters);
5880 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5882 struct version_info *info;
5884 info = ver_info (data, i);
5885 if (info->iv)
5886 free (info->iv);
5887 info->iv = NULL;
5888 info->has_nonlin_use = false;
5889 info->preserve_biv = false;
5890 info->inv_id = 0;
5892 bitmap_clear (data->relevant);
5893 bitmap_clear (data->important_candidates);
5895 for (i = 0; i < n_iv_uses (data); i++)
5897 struct iv_use *use = iv_use (data, i);
5899 free (use->iv);
5900 BITMAP_FREE (use->related_cands);
5901 for (j = 0; j < use->n_map_members; j++)
5902 if (use->cost_map[j].depends_on)
5903 BITMAP_FREE (use->cost_map[j].depends_on);
5904 free (use->cost_map);
5905 free (use);
5907 VEC_truncate (iv_use_p, data->iv_uses, 0);
5909 for (i = 0; i < n_iv_cands (data); i++)
5911 struct iv_cand *cand = iv_cand (data, i);
5913 if (cand->iv)
5914 free (cand->iv);
5915 if (cand->depends_on)
5916 BITMAP_FREE (cand->depends_on);
5917 free (cand);
5919 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5921 if (data->version_info_size < num_ssa_names)
5923 data->version_info_size = 2 * num_ssa_names;
5924 free (data->version_info);
5925 data->version_info = xcalloc (data->version_info_size,
5926 sizeof (struct version_info));
5929 data->max_inv_id = 0;
5931 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5932 SET_DECL_RTL (obj, NULL_RTX);
5934 VEC_truncate (tree, decl_rtl_to_reset, 0);
5937 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5938 loop tree. */
5940 static void
5941 tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5943 unsigned i;
5945 for (i = 1; i < loops->num; i++)
5946 if (loops->parray[i])
5948 free (loops->parray[i]->aux);
5949 loops->parray[i]->aux = NULL;
5952 free_loop_data (data);
5953 free (data->version_info);
5954 BITMAP_FREE (data->relevant);
5955 BITMAP_FREE (data->important_candidates);
5956 htab_delete (data->niters);
5958 VEC_free (tree, heap, decl_rtl_to_reset);
5959 VEC_free (iv_use_p, heap, data->iv_uses);
5960 VEC_free (iv_cand_p, heap, data->iv_candidates);
5963 /* Optimizes the LOOP. Returns true if anything changed. */
5965 static bool
5966 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5968 bool changed = false;
5969 struct iv_ca *iv_ca;
5970 edge exit;
5972 data->current_loop = loop;
5974 if (dump_file && (dump_flags & TDF_DETAILS))
5976 fprintf (dump_file, "Processing loop %d\n", loop->num);
5978 exit = single_dom_exit (loop);
5979 if (exit)
5981 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5982 exit->src->index, exit->dest->index);
5983 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5984 fprintf (dump_file, "\n");
5987 fprintf (dump_file, "\n");
5990 /* For each ssa name determines whether it behaves as an induction variable
5991 in some loop. */
5992 if (!find_induction_variables (data))
5993 goto finish;
5995 /* Finds interesting uses (item 1). */
5996 find_interesting_uses (data);
5997 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5998 goto finish;
6000 /* Finds candidates for the induction variables (item 2). */
6001 find_iv_candidates (data);
6003 /* Calculates the costs (item 3, part 1). */
6004 determine_use_iv_costs (data);
6005 determine_iv_costs (data);
6006 determine_set_costs (data);
6008 /* Find the optimal set of induction variables (item 3, part 2). */
6009 iv_ca = find_optimal_iv_set (data);
6010 if (!iv_ca)
6011 goto finish;
6012 changed = true;
6014 /* Create the new induction variables (item 4, part 1). */
6015 create_new_ivs (data, iv_ca);
6016 iv_ca_free (&iv_ca);
6018 /* Rewrite the uses (item 4, part 2). */
6019 rewrite_uses (data);
6021 /* Remove the ivs that are unused after rewriting. */
6022 remove_unused_ivs (data);
6024 /* We have changed the structure of induction variables; it might happen
6025 that definitions in the scev database refer to some of them that were
6026 eliminated. */
6027 scev_reset ();
6029 finish:
6030 free_loop_data (data);
6032 return changed;
6035 /* Main entry point. Optimizes induction variables in LOOPS. */
6037 void
6038 tree_ssa_iv_optimize (struct loops *loops)
6040 struct loop *loop;
6041 struct ivopts_data data;
6043 tree_ssa_iv_optimize_init (loops, &data);
6045 /* Optimize the loops starting with the innermost ones. */
6046 loop = loops->tree_root;
6047 while (loop->inner)
6048 loop = loop->inner;
6050 /* Scan the loops, inner ones first. */
6051 while (loop != loops->tree_root)
6053 if (dump_file && (dump_flags & TDF_DETAILS))
6054 flow_loop_dump (loop, dump_file, NULL, 1);
6056 tree_ssa_iv_optimize_loop (&data, loop);
6058 if (loop->next)
6060 loop = loop->next;
6061 while (loop->inner)
6062 loop = loop->inner;
6064 else
6065 loop = loop->outer;
6068 tree_ssa_iv_optimize_finalize (loops, &data);